xref: /aosp_15_r20/external/XNNPACK/test/qc8-igemm-minmax-fp32-3.cc (revision 4bdc94577ba0e567308109d787f7fec7b531ce36)
1 // Copyright (c) Facebook, Inc. and its affiliates.
2 // All rights reserved.
3 //
4 // Copyright 2019 Google LLC
5 //
6 // This source code is licensed under the BSD-style license found in the
7 // LICENSE file in the root directory of this source tree.
8 //
9 // Auto-generated file. Do not edit!
10 //   Specification: test/qc8-igemm-minmax-fp32.yaml
11 //   Generator: tools/generate-gemm-test.py
12 
13 
14 #include <gtest/gtest.h>
15 
16 #include <xnnpack/allocator.h>
17 #include <xnnpack/common.h>
18 #include <xnnpack/isa-checks.h>
19 #include <xnnpack/microparams-init.h>
20 
21 #include <xnnpack/gemm.h>
22 #include <xnnpack/igemm.h>
23 #include <xnnpack/ppmm.h>
24 #include "gemm-microkernel-tester.h"
25 
26 
27 #if XNN_ARCH_ARM && XNN_ENABLE_ASSEMBLY
TEST(QC8_IGEMM_MINMAX_FP32_1X8__AARCH32_NEON_MLAL_LANE_CORTEX_A7,k_eq_8)28   TEST(QC8_IGEMM_MINMAX_FP32_1X8__AARCH32_NEON_MLAL_LANE_CORTEX_A7, k_eq_8) {
29     TEST_REQUIRES_ARM_NEON;
30     GemmMicrokernelTester()
31       .mr(1)
32       .nr(8)
33       .kr(1)
34       .sr(1)
35       .m(1)
36       .n(8)
37       .k(8)
38       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8__aarch32_neon_mlal_lane_cortex_a7, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
39   }
40 
TEST(QC8_IGEMM_MINMAX_FP32_1X8__AARCH32_NEON_MLAL_LANE_CORTEX_A7,strided_cn)41   TEST(QC8_IGEMM_MINMAX_FP32_1X8__AARCH32_NEON_MLAL_LANE_CORTEX_A7, strided_cn) {
42     TEST_REQUIRES_ARM_NEON;
43     GemmMicrokernelTester()
44       .mr(1)
45       .nr(8)
46       .kr(1)
47       .sr(1)
48       .m(1)
49       .n(8)
50       .k(8)
51       .cn_stride(11)
52       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8__aarch32_neon_mlal_lane_cortex_a7, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
53   }
54 
TEST(QC8_IGEMM_MINMAX_FP32_1X8__AARCH32_NEON_MLAL_LANE_CORTEX_A7,k_eq_8_subtile)55   TEST(QC8_IGEMM_MINMAX_FP32_1X8__AARCH32_NEON_MLAL_LANE_CORTEX_A7, k_eq_8_subtile) {
56     TEST_REQUIRES_ARM_NEON;
57     for (uint32_t n = 1; n <= 8; n++) {
58       for (uint32_t m = 1; m <= 1; m++) {
59         GemmMicrokernelTester()
60           .mr(1)
61           .nr(8)
62           .kr(1)
63           .sr(1)
64           .m(m)
65           .n(n)
66           .k(8)
67           .iterations(1)
68           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8__aarch32_neon_mlal_lane_cortex_a7, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
69       }
70     }
71   }
72 
TEST(QC8_IGEMM_MINMAX_FP32_1X8__AARCH32_NEON_MLAL_LANE_CORTEX_A7,k_eq_8_subtile_m)73   TEST(QC8_IGEMM_MINMAX_FP32_1X8__AARCH32_NEON_MLAL_LANE_CORTEX_A7, k_eq_8_subtile_m) {
74     TEST_REQUIRES_ARM_NEON;
75     for (uint32_t m = 1; m <= 1; m++) {
76       GemmMicrokernelTester()
77         .mr(1)
78         .nr(8)
79         .kr(1)
80         .sr(1)
81         .m(m)
82         .n(8)
83         .k(8)
84         .iterations(1)
85         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8__aarch32_neon_mlal_lane_cortex_a7, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
86     }
87   }
88 
TEST(QC8_IGEMM_MINMAX_FP32_1X8__AARCH32_NEON_MLAL_LANE_CORTEX_A7,k_eq_8_subtile_n)89   TEST(QC8_IGEMM_MINMAX_FP32_1X8__AARCH32_NEON_MLAL_LANE_CORTEX_A7, k_eq_8_subtile_n) {
90     TEST_REQUIRES_ARM_NEON;
91     for (uint32_t n = 1; n <= 8; n++) {
92       GemmMicrokernelTester()
93         .mr(1)
94         .nr(8)
95         .kr(1)
96         .sr(1)
97         .m(1)
98         .n(n)
99         .k(8)
100         .iterations(1)
101         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8__aarch32_neon_mlal_lane_cortex_a7, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
102     }
103   }
104 
TEST(QC8_IGEMM_MINMAX_FP32_1X8__AARCH32_NEON_MLAL_LANE_CORTEX_A7,k_lt_8)105   TEST(QC8_IGEMM_MINMAX_FP32_1X8__AARCH32_NEON_MLAL_LANE_CORTEX_A7, k_lt_8) {
106     TEST_REQUIRES_ARM_NEON;
107     for (size_t k = 1; k < 8; k++) {
108       GemmMicrokernelTester()
109         .mr(1)
110         .nr(8)
111         .kr(1)
112         .sr(1)
113         .m(1)
114         .n(8)
115         .k(k)
116         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8__aarch32_neon_mlal_lane_cortex_a7, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
117     }
118   }
119 
TEST(QC8_IGEMM_MINMAX_FP32_1X8__AARCH32_NEON_MLAL_LANE_CORTEX_A7,k_lt_8_subtile)120   TEST(QC8_IGEMM_MINMAX_FP32_1X8__AARCH32_NEON_MLAL_LANE_CORTEX_A7, k_lt_8_subtile) {
121     TEST_REQUIRES_ARM_NEON;
122     for (size_t k = 1; k < 8; k++) {
123       for (uint32_t n = 1; n <= 8; n++) {
124         for (uint32_t m = 1; m <= 1; m++) {
125           GemmMicrokernelTester()
126             .mr(1)
127             .nr(8)
128             .kr(1)
129             .sr(1)
130             .m(m)
131             .n(n)
132             .k(k)
133             .iterations(1)
134             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8__aarch32_neon_mlal_lane_cortex_a7, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
135         }
136       }
137     }
138   }
139 
TEST(QC8_IGEMM_MINMAX_FP32_1X8__AARCH32_NEON_MLAL_LANE_CORTEX_A7,k_gt_8)140   TEST(QC8_IGEMM_MINMAX_FP32_1X8__AARCH32_NEON_MLAL_LANE_CORTEX_A7, k_gt_8) {
141     TEST_REQUIRES_ARM_NEON;
142     for (size_t k = 9; k < 16; k++) {
143       GemmMicrokernelTester()
144         .mr(1)
145         .nr(8)
146         .kr(1)
147         .sr(1)
148         .m(1)
149         .n(8)
150         .k(k)
151         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8__aarch32_neon_mlal_lane_cortex_a7, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
152     }
153   }
154 
TEST(QC8_IGEMM_MINMAX_FP32_1X8__AARCH32_NEON_MLAL_LANE_CORTEX_A7,k_gt_8_subtile)155   TEST(QC8_IGEMM_MINMAX_FP32_1X8__AARCH32_NEON_MLAL_LANE_CORTEX_A7, k_gt_8_subtile) {
156     TEST_REQUIRES_ARM_NEON;
157     for (size_t k = 9; k < 16; k++) {
158       for (uint32_t n = 1; n <= 8; n++) {
159         for (uint32_t m = 1; m <= 1; m++) {
160           GemmMicrokernelTester()
161             .mr(1)
162             .nr(8)
163             .kr(1)
164             .sr(1)
165             .m(m)
166             .n(n)
167             .k(k)
168             .iterations(1)
169             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8__aarch32_neon_mlal_lane_cortex_a7, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
170         }
171       }
172     }
173   }
174 
TEST(QC8_IGEMM_MINMAX_FP32_1X8__AARCH32_NEON_MLAL_LANE_CORTEX_A7,k_div_8)175   TEST(QC8_IGEMM_MINMAX_FP32_1X8__AARCH32_NEON_MLAL_LANE_CORTEX_A7, k_div_8) {
176     TEST_REQUIRES_ARM_NEON;
177     for (size_t k = 16; k <= 80; k += 8) {
178       GemmMicrokernelTester()
179         .mr(1)
180         .nr(8)
181         .kr(1)
182         .sr(1)
183         .m(1)
184         .n(8)
185         .k(k)
186         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8__aarch32_neon_mlal_lane_cortex_a7, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
187     }
188   }
189 
TEST(QC8_IGEMM_MINMAX_FP32_1X8__AARCH32_NEON_MLAL_LANE_CORTEX_A7,k_div_8_subtile)190   TEST(QC8_IGEMM_MINMAX_FP32_1X8__AARCH32_NEON_MLAL_LANE_CORTEX_A7, k_div_8_subtile) {
191     TEST_REQUIRES_ARM_NEON;
192     for (size_t k = 16; k <= 80; k += 8) {
193       for (uint32_t n = 1; n <= 8; n++) {
194         for (uint32_t m = 1; m <= 1; m++) {
195           GemmMicrokernelTester()
196             .mr(1)
197             .nr(8)
198             .kr(1)
199             .sr(1)
200             .m(m)
201             .n(n)
202             .k(k)
203             .iterations(1)
204             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8__aarch32_neon_mlal_lane_cortex_a7, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
205         }
206       }
207     }
208   }
209 
TEST(QC8_IGEMM_MINMAX_FP32_1X8__AARCH32_NEON_MLAL_LANE_CORTEX_A7,n_gt_8)210   TEST(QC8_IGEMM_MINMAX_FP32_1X8__AARCH32_NEON_MLAL_LANE_CORTEX_A7, n_gt_8) {
211     TEST_REQUIRES_ARM_NEON;
212     for (uint32_t n = 9; n < 16; n++) {
213       for (size_t k = 1; k <= 40; k += 9) {
214         GemmMicrokernelTester()
215           .mr(1)
216           .nr(8)
217           .kr(1)
218           .sr(1)
219           .m(1)
220           .n(n)
221           .k(k)
222           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8__aarch32_neon_mlal_lane_cortex_a7, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
223       }
224     }
225   }
226 
TEST(QC8_IGEMM_MINMAX_FP32_1X8__AARCH32_NEON_MLAL_LANE_CORTEX_A7,n_gt_8_strided_cn)227   TEST(QC8_IGEMM_MINMAX_FP32_1X8__AARCH32_NEON_MLAL_LANE_CORTEX_A7, n_gt_8_strided_cn) {
228     TEST_REQUIRES_ARM_NEON;
229     for (uint32_t n = 9; n < 16; n++) {
230       for (size_t k = 1; k <= 40; k += 9) {
231         GemmMicrokernelTester()
232           .mr(1)
233           .nr(8)
234           .kr(1)
235           .sr(1)
236           .m(1)
237           .n(n)
238           .k(k)
239           .cn_stride(11)
240           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8__aarch32_neon_mlal_lane_cortex_a7, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
241       }
242     }
243   }
244 
TEST(QC8_IGEMM_MINMAX_FP32_1X8__AARCH32_NEON_MLAL_LANE_CORTEX_A7,n_gt_8_subtile)245   TEST(QC8_IGEMM_MINMAX_FP32_1X8__AARCH32_NEON_MLAL_LANE_CORTEX_A7, n_gt_8_subtile) {
246     TEST_REQUIRES_ARM_NEON;
247     for (uint32_t n = 9; n < 16; n++) {
248       for (size_t k = 1; k <= 40; k += 9) {
249         for (uint32_t m = 1; m <= 1; m++) {
250           GemmMicrokernelTester()
251             .mr(1)
252             .nr(8)
253             .kr(1)
254             .sr(1)
255             .m(m)
256             .n(n)
257             .k(k)
258             .iterations(1)
259             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8__aarch32_neon_mlal_lane_cortex_a7, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
260         }
261       }
262     }
263   }
264 
TEST(QC8_IGEMM_MINMAX_FP32_1X8__AARCH32_NEON_MLAL_LANE_CORTEX_A7,n_div_8)265   TEST(QC8_IGEMM_MINMAX_FP32_1X8__AARCH32_NEON_MLAL_LANE_CORTEX_A7, n_div_8) {
266     TEST_REQUIRES_ARM_NEON;
267     for (uint32_t n = 16; n <= 24; n += 8) {
268       for (size_t k = 1; k <= 40; k += 9) {
269         GemmMicrokernelTester()
270           .mr(1)
271           .nr(8)
272           .kr(1)
273           .sr(1)
274           .m(1)
275           .n(n)
276           .k(k)
277           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8__aarch32_neon_mlal_lane_cortex_a7, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
278       }
279     }
280   }
281 
TEST(QC8_IGEMM_MINMAX_FP32_1X8__AARCH32_NEON_MLAL_LANE_CORTEX_A7,n_div_8_strided_cn)282   TEST(QC8_IGEMM_MINMAX_FP32_1X8__AARCH32_NEON_MLAL_LANE_CORTEX_A7, n_div_8_strided_cn) {
283     TEST_REQUIRES_ARM_NEON;
284     for (uint32_t n = 16; n <= 24; n += 8) {
285       for (size_t k = 1; k <= 40; k += 9) {
286         GemmMicrokernelTester()
287           .mr(1)
288           .nr(8)
289           .kr(1)
290           .sr(1)
291           .m(1)
292           .n(n)
293           .k(k)
294           .cn_stride(11)
295           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8__aarch32_neon_mlal_lane_cortex_a7, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
296       }
297     }
298   }
299 
TEST(QC8_IGEMM_MINMAX_FP32_1X8__AARCH32_NEON_MLAL_LANE_CORTEX_A7,n_div_8_subtile)300   TEST(QC8_IGEMM_MINMAX_FP32_1X8__AARCH32_NEON_MLAL_LANE_CORTEX_A7, n_div_8_subtile) {
301     TEST_REQUIRES_ARM_NEON;
302     for (uint32_t n = 16; n <= 24; n += 8) {
303       for (size_t k = 1; k <= 40; k += 9) {
304         for (uint32_t m = 1; m <= 1; m++) {
305           GemmMicrokernelTester()
306             .mr(1)
307             .nr(8)
308             .kr(1)
309             .sr(1)
310             .m(m)
311             .n(n)
312             .k(k)
313             .iterations(1)
314             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8__aarch32_neon_mlal_lane_cortex_a7, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
315         }
316       }
317     }
318   }
319 
TEST(QC8_IGEMM_MINMAX_FP32_1X8__AARCH32_NEON_MLAL_LANE_CORTEX_A7,small_kernel)320   TEST(QC8_IGEMM_MINMAX_FP32_1X8__AARCH32_NEON_MLAL_LANE_CORTEX_A7, small_kernel) {
321     TEST_REQUIRES_ARM_NEON;
322     for (size_t k = 1; k <= 40; k += 9) {
323       GemmMicrokernelTester()
324         .mr(1)
325         .nr(8)
326         .kr(1)
327         .sr(1)
328         .m(1)
329         .n(8)
330         .k(k)
331         .ks(3)
332         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8__aarch32_neon_mlal_lane_cortex_a7, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
333     }
334   }
335 
TEST(QC8_IGEMM_MINMAX_FP32_1X8__AARCH32_NEON_MLAL_LANE_CORTEX_A7,small_kernel_subtile)336   TEST(QC8_IGEMM_MINMAX_FP32_1X8__AARCH32_NEON_MLAL_LANE_CORTEX_A7, small_kernel_subtile) {
337     TEST_REQUIRES_ARM_NEON;
338     for (size_t k = 1; k <= 40; k += 9) {
339       for (uint32_t n = 1; n <= 8; n++) {
340         for (uint32_t m = 1; m <= 1; m++) {
341           GemmMicrokernelTester()
342             .mr(1)
343             .nr(8)
344             .kr(1)
345             .sr(1)
346             .m(m)
347             .n(n)
348             .k(k)
349             .ks(3)
350             .iterations(1)
351             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8__aarch32_neon_mlal_lane_cortex_a7, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
352         }
353       }
354     }
355   }
356 
TEST(QC8_IGEMM_MINMAX_FP32_1X8__AARCH32_NEON_MLAL_LANE_CORTEX_A7,n_gt_8_small_kernel)357   TEST(QC8_IGEMM_MINMAX_FP32_1X8__AARCH32_NEON_MLAL_LANE_CORTEX_A7, n_gt_8_small_kernel) {
358     TEST_REQUIRES_ARM_NEON;
359     for (uint32_t n = 9; n < 16; n++) {
360       for (size_t k = 1; k <= 40; k += 9) {
361         GemmMicrokernelTester()
362           .mr(1)
363           .nr(8)
364           .kr(1)
365           .sr(1)
366           .m(1)
367           .n(n)
368           .k(k)
369           .ks(3)
370           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8__aarch32_neon_mlal_lane_cortex_a7, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
371       }
372     }
373   }
374 
TEST(QC8_IGEMM_MINMAX_FP32_1X8__AARCH32_NEON_MLAL_LANE_CORTEX_A7,n_div_8_small_kernel)375   TEST(QC8_IGEMM_MINMAX_FP32_1X8__AARCH32_NEON_MLAL_LANE_CORTEX_A7, n_div_8_small_kernel) {
376     TEST_REQUIRES_ARM_NEON;
377     for (uint32_t n = 16; n <= 24; n += 8) {
378       for (size_t k = 1; k <= 40; k += 9) {
379         GemmMicrokernelTester()
380           .mr(1)
381           .nr(8)
382           .kr(1)
383           .sr(1)
384           .m(1)
385           .n(n)
386           .k(k)
387           .ks(3)
388           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8__aarch32_neon_mlal_lane_cortex_a7, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
389       }
390     }
391   }
392 
TEST(QC8_IGEMM_MINMAX_FP32_1X8__AARCH32_NEON_MLAL_LANE_CORTEX_A7,strided_cm_subtile)393   TEST(QC8_IGEMM_MINMAX_FP32_1X8__AARCH32_NEON_MLAL_LANE_CORTEX_A7, strided_cm_subtile) {
394     TEST_REQUIRES_ARM_NEON;
395     for (size_t k = 1; k <= 40; k += 9) {
396       for (uint32_t n = 1; n <= 8; n++) {
397         for (uint32_t m = 1; m <= 1; m++) {
398           GemmMicrokernelTester()
399             .mr(1)
400             .nr(8)
401             .kr(1)
402             .sr(1)
403             .m(m)
404             .n(n)
405             .k(k)
406             .cm_stride(11)
407             .iterations(1)
408             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8__aarch32_neon_mlal_lane_cortex_a7, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
409         }
410       }
411     }
412   }
413 
TEST(QC8_IGEMM_MINMAX_FP32_1X8__AARCH32_NEON_MLAL_LANE_CORTEX_A7,a_offset)414   TEST(QC8_IGEMM_MINMAX_FP32_1X8__AARCH32_NEON_MLAL_LANE_CORTEX_A7, a_offset) {
415     TEST_REQUIRES_ARM_NEON;
416     for (size_t k = 1; k <= 40; k += 9) {
417       GemmMicrokernelTester()
418         .mr(1)
419         .nr(8)
420         .kr(1)
421         .sr(1)
422         .m(1)
423         .n(8)
424         .k(k)
425         .ks(3)
426         .a_offset(43)
427         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8__aarch32_neon_mlal_lane_cortex_a7, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
428     }
429   }
430 
TEST(QC8_IGEMM_MINMAX_FP32_1X8__AARCH32_NEON_MLAL_LANE_CORTEX_A7,zero)431   TEST(QC8_IGEMM_MINMAX_FP32_1X8__AARCH32_NEON_MLAL_LANE_CORTEX_A7, zero) {
432     TEST_REQUIRES_ARM_NEON;
433     for (size_t k = 1; k <= 40; k += 9) {
434       for (uint32_t mz = 0; mz < 1; mz++) {
435         GemmMicrokernelTester()
436           .mr(1)
437           .nr(8)
438           .kr(1)
439           .sr(1)
440           .m(1)
441           .n(8)
442           .k(k)
443           .ks(3)
444           .a_offset(43)
445           .zero_index(mz)
446           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8__aarch32_neon_mlal_lane_cortex_a7, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
447       }
448     }
449   }
450 
TEST(QC8_IGEMM_MINMAX_FP32_1X8__AARCH32_NEON_MLAL_LANE_CORTEX_A7,qmin)451   TEST(QC8_IGEMM_MINMAX_FP32_1X8__AARCH32_NEON_MLAL_LANE_CORTEX_A7, qmin) {
452     TEST_REQUIRES_ARM_NEON;
453     GemmMicrokernelTester()
454       .mr(1)
455       .nr(8)
456       .kr(1)
457       .sr(1)
458       .m(1)
459       .n(8)
460       .k(8)
461       .qmin(128)
462       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8__aarch32_neon_mlal_lane_cortex_a7, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
463   }
464 
TEST(QC8_IGEMM_MINMAX_FP32_1X8__AARCH32_NEON_MLAL_LANE_CORTEX_A7,qmax)465   TEST(QC8_IGEMM_MINMAX_FP32_1X8__AARCH32_NEON_MLAL_LANE_CORTEX_A7, qmax) {
466     TEST_REQUIRES_ARM_NEON;
467     GemmMicrokernelTester()
468       .mr(1)
469       .nr(8)
470       .kr(1)
471       .sr(1)
472       .m(1)
473       .n(8)
474       .k(8)
475       .qmax(128)
476       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8__aarch32_neon_mlal_lane_cortex_a7, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
477   }
478 
TEST(QC8_IGEMM_MINMAX_FP32_1X8__AARCH32_NEON_MLAL_LANE_CORTEX_A7,strided_cm)479   TEST(QC8_IGEMM_MINMAX_FP32_1X8__AARCH32_NEON_MLAL_LANE_CORTEX_A7, strided_cm) {
480     TEST_REQUIRES_ARM_NEON;
481     GemmMicrokernelTester()
482       .mr(1)
483       .nr(8)
484       .kr(1)
485       .sr(1)
486       .m(1)
487       .n(8)
488       .k(8)
489       .cm_stride(11)
490       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8__aarch32_neon_mlal_lane_cortex_a7, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
491   }
492 #endif  // XNN_ARCH_ARM && XNN_ENABLE_ASSEMBLY
493 
494 
495 #if XNN_ARCH_ARM && XNN_ENABLE_ASSEMBLY
TEST(QC8_IGEMM_MINMAX_FP32_1X8__AARCH32_NEONV8_MLAL_LANE_CORTEX_A35,k_eq_8)496   TEST(QC8_IGEMM_MINMAX_FP32_1X8__AARCH32_NEONV8_MLAL_LANE_CORTEX_A35, k_eq_8) {
497     TEST_REQUIRES_ARM_NEON_V8;
498     GemmMicrokernelTester()
499       .mr(1)
500       .nr(8)
501       .kr(1)
502       .sr(1)
503       .m(1)
504       .n(8)
505       .k(8)
506       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8__aarch32_neonv8_mlal_lane_cortex_a35, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
507   }
508 
TEST(QC8_IGEMM_MINMAX_FP32_1X8__AARCH32_NEONV8_MLAL_LANE_CORTEX_A35,strided_cn)509   TEST(QC8_IGEMM_MINMAX_FP32_1X8__AARCH32_NEONV8_MLAL_LANE_CORTEX_A35, strided_cn) {
510     TEST_REQUIRES_ARM_NEON_V8;
511     GemmMicrokernelTester()
512       .mr(1)
513       .nr(8)
514       .kr(1)
515       .sr(1)
516       .m(1)
517       .n(8)
518       .k(8)
519       .cn_stride(11)
520       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8__aarch32_neonv8_mlal_lane_cortex_a35, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
521   }
522 
TEST(QC8_IGEMM_MINMAX_FP32_1X8__AARCH32_NEONV8_MLAL_LANE_CORTEX_A35,k_eq_8_subtile)523   TEST(QC8_IGEMM_MINMAX_FP32_1X8__AARCH32_NEONV8_MLAL_LANE_CORTEX_A35, k_eq_8_subtile) {
524     TEST_REQUIRES_ARM_NEON_V8;
525     for (uint32_t n = 1; n <= 8; n++) {
526       for (uint32_t m = 1; m <= 1; m++) {
527         GemmMicrokernelTester()
528           .mr(1)
529           .nr(8)
530           .kr(1)
531           .sr(1)
532           .m(m)
533           .n(n)
534           .k(8)
535           .iterations(1)
536           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8__aarch32_neonv8_mlal_lane_cortex_a35, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
537       }
538     }
539   }
540 
TEST(QC8_IGEMM_MINMAX_FP32_1X8__AARCH32_NEONV8_MLAL_LANE_CORTEX_A35,k_eq_8_subtile_m)541   TEST(QC8_IGEMM_MINMAX_FP32_1X8__AARCH32_NEONV8_MLAL_LANE_CORTEX_A35, k_eq_8_subtile_m) {
542     TEST_REQUIRES_ARM_NEON_V8;
543     for (uint32_t m = 1; m <= 1; m++) {
544       GemmMicrokernelTester()
545         .mr(1)
546         .nr(8)
547         .kr(1)
548         .sr(1)
549         .m(m)
550         .n(8)
551         .k(8)
552         .iterations(1)
553         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8__aarch32_neonv8_mlal_lane_cortex_a35, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
554     }
555   }
556 
TEST(QC8_IGEMM_MINMAX_FP32_1X8__AARCH32_NEONV8_MLAL_LANE_CORTEX_A35,k_eq_8_subtile_n)557   TEST(QC8_IGEMM_MINMAX_FP32_1X8__AARCH32_NEONV8_MLAL_LANE_CORTEX_A35, k_eq_8_subtile_n) {
558     TEST_REQUIRES_ARM_NEON_V8;
559     for (uint32_t n = 1; n <= 8; n++) {
560       GemmMicrokernelTester()
561         .mr(1)
562         .nr(8)
563         .kr(1)
564         .sr(1)
565         .m(1)
566         .n(n)
567         .k(8)
568         .iterations(1)
569         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8__aarch32_neonv8_mlal_lane_cortex_a35, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
570     }
571   }
572 
TEST(QC8_IGEMM_MINMAX_FP32_1X8__AARCH32_NEONV8_MLAL_LANE_CORTEX_A35,k_lt_8)573   TEST(QC8_IGEMM_MINMAX_FP32_1X8__AARCH32_NEONV8_MLAL_LANE_CORTEX_A35, k_lt_8) {
574     TEST_REQUIRES_ARM_NEON_V8;
575     for (size_t k = 1; k < 8; k++) {
576       GemmMicrokernelTester()
577         .mr(1)
578         .nr(8)
579         .kr(1)
580         .sr(1)
581         .m(1)
582         .n(8)
583         .k(k)
584         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8__aarch32_neonv8_mlal_lane_cortex_a35, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
585     }
586   }
587 
TEST(QC8_IGEMM_MINMAX_FP32_1X8__AARCH32_NEONV8_MLAL_LANE_CORTEX_A35,k_lt_8_subtile)588   TEST(QC8_IGEMM_MINMAX_FP32_1X8__AARCH32_NEONV8_MLAL_LANE_CORTEX_A35, k_lt_8_subtile) {
589     TEST_REQUIRES_ARM_NEON_V8;
590     for (size_t k = 1; k < 8; k++) {
591       for (uint32_t n = 1; n <= 8; n++) {
592         for (uint32_t m = 1; m <= 1; m++) {
593           GemmMicrokernelTester()
594             .mr(1)
595             .nr(8)
596             .kr(1)
597             .sr(1)
598             .m(m)
599             .n(n)
600             .k(k)
601             .iterations(1)
602             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8__aarch32_neonv8_mlal_lane_cortex_a35, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
603         }
604       }
605     }
606   }
607 
TEST(QC8_IGEMM_MINMAX_FP32_1X8__AARCH32_NEONV8_MLAL_LANE_CORTEX_A35,k_gt_8)608   TEST(QC8_IGEMM_MINMAX_FP32_1X8__AARCH32_NEONV8_MLAL_LANE_CORTEX_A35, k_gt_8) {
609     TEST_REQUIRES_ARM_NEON_V8;
610     for (size_t k = 9; k < 16; k++) {
611       GemmMicrokernelTester()
612         .mr(1)
613         .nr(8)
614         .kr(1)
615         .sr(1)
616         .m(1)
617         .n(8)
618         .k(k)
619         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8__aarch32_neonv8_mlal_lane_cortex_a35, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
620     }
621   }
622 
TEST(QC8_IGEMM_MINMAX_FP32_1X8__AARCH32_NEONV8_MLAL_LANE_CORTEX_A35,k_gt_8_subtile)623   TEST(QC8_IGEMM_MINMAX_FP32_1X8__AARCH32_NEONV8_MLAL_LANE_CORTEX_A35, k_gt_8_subtile) {
624     TEST_REQUIRES_ARM_NEON_V8;
625     for (size_t k = 9; k < 16; k++) {
626       for (uint32_t n = 1; n <= 8; n++) {
627         for (uint32_t m = 1; m <= 1; m++) {
628           GemmMicrokernelTester()
629             .mr(1)
630             .nr(8)
631             .kr(1)
632             .sr(1)
633             .m(m)
634             .n(n)
635             .k(k)
636             .iterations(1)
637             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8__aarch32_neonv8_mlal_lane_cortex_a35, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
638         }
639       }
640     }
641   }
642 
TEST(QC8_IGEMM_MINMAX_FP32_1X8__AARCH32_NEONV8_MLAL_LANE_CORTEX_A35,k_div_8)643   TEST(QC8_IGEMM_MINMAX_FP32_1X8__AARCH32_NEONV8_MLAL_LANE_CORTEX_A35, k_div_8) {
644     TEST_REQUIRES_ARM_NEON_V8;
645     for (size_t k = 16; k <= 80; k += 8) {
646       GemmMicrokernelTester()
647         .mr(1)
648         .nr(8)
649         .kr(1)
650         .sr(1)
651         .m(1)
652         .n(8)
653         .k(k)
654         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8__aarch32_neonv8_mlal_lane_cortex_a35, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
655     }
656   }
657 
TEST(QC8_IGEMM_MINMAX_FP32_1X8__AARCH32_NEONV8_MLAL_LANE_CORTEX_A35,k_div_8_subtile)658   TEST(QC8_IGEMM_MINMAX_FP32_1X8__AARCH32_NEONV8_MLAL_LANE_CORTEX_A35, k_div_8_subtile) {
659     TEST_REQUIRES_ARM_NEON_V8;
660     for (size_t k = 16; k <= 80; k += 8) {
661       for (uint32_t n = 1; n <= 8; n++) {
662         for (uint32_t m = 1; m <= 1; m++) {
663           GemmMicrokernelTester()
664             .mr(1)
665             .nr(8)
666             .kr(1)
667             .sr(1)
668             .m(m)
669             .n(n)
670             .k(k)
671             .iterations(1)
672             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8__aarch32_neonv8_mlal_lane_cortex_a35, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
673         }
674       }
675     }
676   }
677 
TEST(QC8_IGEMM_MINMAX_FP32_1X8__AARCH32_NEONV8_MLAL_LANE_CORTEX_A35,n_gt_8)678   TEST(QC8_IGEMM_MINMAX_FP32_1X8__AARCH32_NEONV8_MLAL_LANE_CORTEX_A35, n_gt_8) {
679     TEST_REQUIRES_ARM_NEON_V8;
680     for (uint32_t n = 9; n < 16; n++) {
681       for (size_t k = 1; k <= 40; k += 9) {
682         GemmMicrokernelTester()
683           .mr(1)
684           .nr(8)
685           .kr(1)
686           .sr(1)
687           .m(1)
688           .n(n)
689           .k(k)
690           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8__aarch32_neonv8_mlal_lane_cortex_a35, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
691       }
692     }
693   }
694 
TEST(QC8_IGEMM_MINMAX_FP32_1X8__AARCH32_NEONV8_MLAL_LANE_CORTEX_A35,n_gt_8_strided_cn)695   TEST(QC8_IGEMM_MINMAX_FP32_1X8__AARCH32_NEONV8_MLAL_LANE_CORTEX_A35, n_gt_8_strided_cn) {
696     TEST_REQUIRES_ARM_NEON_V8;
697     for (uint32_t n = 9; n < 16; n++) {
698       for (size_t k = 1; k <= 40; k += 9) {
699         GemmMicrokernelTester()
700           .mr(1)
701           .nr(8)
702           .kr(1)
703           .sr(1)
704           .m(1)
705           .n(n)
706           .k(k)
707           .cn_stride(11)
708           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8__aarch32_neonv8_mlal_lane_cortex_a35, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
709       }
710     }
711   }
712 
TEST(QC8_IGEMM_MINMAX_FP32_1X8__AARCH32_NEONV8_MLAL_LANE_CORTEX_A35,n_gt_8_subtile)713   TEST(QC8_IGEMM_MINMAX_FP32_1X8__AARCH32_NEONV8_MLAL_LANE_CORTEX_A35, n_gt_8_subtile) {
714     TEST_REQUIRES_ARM_NEON_V8;
715     for (uint32_t n = 9; n < 16; n++) {
716       for (size_t k = 1; k <= 40; k += 9) {
717         for (uint32_t m = 1; m <= 1; m++) {
718           GemmMicrokernelTester()
719             .mr(1)
720             .nr(8)
721             .kr(1)
722             .sr(1)
723             .m(m)
724             .n(n)
725             .k(k)
726             .iterations(1)
727             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8__aarch32_neonv8_mlal_lane_cortex_a35, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
728         }
729       }
730     }
731   }
732 
TEST(QC8_IGEMM_MINMAX_FP32_1X8__AARCH32_NEONV8_MLAL_LANE_CORTEX_A35,n_div_8)733   TEST(QC8_IGEMM_MINMAX_FP32_1X8__AARCH32_NEONV8_MLAL_LANE_CORTEX_A35, n_div_8) {
734     TEST_REQUIRES_ARM_NEON_V8;
735     for (uint32_t n = 16; n <= 24; n += 8) {
736       for (size_t k = 1; k <= 40; k += 9) {
737         GemmMicrokernelTester()
738           .mr(1)
739           .nr(8)
740           .kr(1)
741           .sr(1)
742           .m(1)
743           .n(n)
744           .k(k)
745           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8__aarch32_neonv8_mlal_lane_cortex_a35, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
746       }
747     }
748   }
749 
TEST(QC8_IGEMM_MINMAX_FP32_1X8__AARCH32_NEONV8_MLAL_LANE_CORTEX_A35,n_div_8_strided_cn)750   TEST(QC8_IGEMM_MINMAX_FP32_1X8__AARCH32_NEONV8_MLAL_LANE_CORTEX_A35, n_div_8_strided_cn) {
751     TEST_REQUIRES_ARM_NEON_V8;
752     for (uint32_t n = 16; n <= 24; n += 8) {
753       for (size_t k = 1; k <= 40; k += 9) {
754         GemmMicrokernelTester()
755           .mr(1)
756           .nr(8)
757           .kr(1)
758           .sr(1)
759           .m(1)
760           .n(n)
761           .k(k)
762           .cn_stride(11)
763           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8__aarch32_neonv8_mlal_lane_cortex_a35, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
764       }
765     }
766   }
767 
TEST(QC8_IGEMM_MINMAX_FP32_1X8__AARCH32_NEONV8_MLAL_LANE_CORTEX_A35,n_div_8_subtile)768   TEST(QC8_IGEMM_MINMAX_FP32_1X8__AARCH32_NEONV8_MLAL_LANE_CORTEX_A35, n_div_8_subtile) {
769     TEST_REQUIRES_ARM_NEON_V8;
770     for (uint32_t n = 16; n <= 24; n += 8) {
771       for (size_t k = 1; k <= 40; k += 9) {
772         for (uint32_t m = 1; m <= 1; m++) {
773           GemmMicrokernelTester()
774             .mr(1)
775             .nr(8)
776             .kr(1)
777             .sr(1)
778             .m(m)
779             .n(n)
780             .k(k)
781             .iterations(1)
782             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8__aarch32_neonv8_mlal_lane_cortex_a35, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
783         }
784       }
785     }
786   }
787 
TEST(QC8_IGEMM_MINMAX_FP32_1X8__AARCH32_NEONV8_MLAL_LANE_CORTEX_A35,small_kernel)788   TEST(QC8_IGEMM_MINMAX_FP32_1X8__AARCH32_NEONV8_MLAL_LANE_CORTEX_A35, small_kernel) {
789     TEST_REQUIRES_ARM_NEON_V8;
790     for (size_t k = 1; k <= 40; k += 9) {
791       GemmMicrokernelTester()
792         .mr(1)
793         .nr(8)
794         .kr(1)
795         .sr(1)
796         .m(1)
797         .n(8)
798         .k(k)
799         .ks(3)
800         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8__aarch32_neonv8_mlal_lane_cortex_a35, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
801     }
802   }
803 
TEST(QC8_IGEMM_MINMAX_FP32_1X8__AARCH32_NEONV8_MLAL_LANE_CORTEX_A35,small_kernel_subtile)804   TEST(QC8_IGEMM_MINMAX_FP32_1X8__AARCH32_NEONV8_MLAL_LANE_CORTEX_A35, small_kernel_subtile) {
805     TEST_REQUIRES_ARM_NEON_V8;
806     for (size_t k = 1; k <= 40; k += 9) {
807       for (uint32_t n = 1; n <= 8; n++) {
808         for (uint32_t m = 1; m <= 1; m++) {
809           GemmMicrokernelTester()
810             .mr(1)
811             .nr(8)
812             .kr(1)
813             .sr(1)
814             .m(m)
815             .n(n)
816             .k(k)
817             .ks(3)
818             .iterations(1)
819             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8__aarch32_neonv8_mlal_lane_cortex_a35, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
820         }
821       }
822     }
823   }
824 
TEST(QC8_IGEMM_MINMAX_FP32_1X8__AARCH32_NEONV8_MLAL_LANE_CORTEX_A35,n_gt_8_small_kernel)825   TEST(QC8_IGEMM_MINMAX_FP32_1X8__AARCH32_NEONV8_MLAL_LANE_CORTEX_A35, n_gt_8_small_kernel) {
826     TEST_REQUIRES_ARM_NEON_V8;
827     for (uint32_t n = 9; n < 16; n++) {
828       for (size_t k = 1; k <= 40; k += 9) {
829         GemmMicrokernelTester()
830           .mr(1)
831           .nr(8)
832           .kr(1)
833           .sr(1)
834           .m(1)
835           .n(n)
836           .k(k)
837           .ks(3)
838           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8__aarch32_neonv8_mlal_lane_cortex_a35, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
839       }
840     }
841   }
842 
TEST(QC8_IGEMM_MINMAX_FP32_1X8__AARCH32_NEONV8_MLAL_LANE_CORTEX_A35,n_div_8_small_kernel)843   TEST(QC8_IGEMM_MINMAX_FP32_1X8__AARCH32_NEONV8_MLAL_LANE_CORTEX_A35, n_div_8_small_kernel) {
844     TEST_REQUIRES_ARM_NEON_V8;
845     for (uint32_t n = 16; n <= 24; n += 8) {
846       for (size_t k = 1; k <= 40; k += 9) {
847         GemmMicrokernelTester()
848           .mr(1)
849           .nr(8)
850           .kr(1)
851           .sr(1)
852           .m(1)
853           .n(n)
854           .k(k)
855           .ks(3)
856           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8__aarch32_neonv8_mlal_lane_cortex_a35, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
857       }
858     }
859   }
860 
TEST(QC8_IGEMM_MINMAX_FP32_1X8__AARCH32_NEONV8_MLAL_LANE_CORTEX_A35,strided_cm_subtile)861   TEST(QC8_IGEMM_MINMAX_FP32_1X8__AARCH32_NEONV8_MLAL_LANE_CORTEX_A35, strided_cm_subtile) {
862     TEST_REQUIRES_ARM_NEON_V8;
863     for (size_t k = 1; k <= 40; k += 9) {
864       for (uint32_t n = 1; n <= 8; n++) {
865         for (uint32_t m = 1; m <= 1; m++) {
866           GemmMicrokernelTester()
867             .mr(1)
868             .nr(8)
869             .kr(1)
870             .sr(1)
871             .m(m)
872             .n(n)
873             .k(k)
874             .cm_stride(11)
875             .iterations(1)
876             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8__aarch32_neonv8_mlal_lane_cortex_a35, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
877         }
878       }
879     }
880   }
881 
TEST(QC8_IGEMM_MINMAX_FP32_1X8__AARCH32_NEONV8_MLAL_LANE_CORTEX_A35,a_offset)882   TEST(QC8_IGEMM_MINMAX_FP32_1X8__AARCH32_NEONV8_MLAL_LANE_CORTEX_A35, a_offset) {
883     TEST_REQUIRES_ARM_NEON_V8;
884     for (size_t k = 1; k <= 40; k += 9) {
885       GemmMicrokernelTester()
886         .mr(1)
887         .nr(8)
888         .kr(1)
889         .sr(1)
890         .m(1)
891         .n(8)
892         .k(k)
893         .ks(3)
894         .a_offset(43)
895         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8__aarch32_neonv8_mlal_lane_cortex_a35, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
896     }
897   }
898 
TEST(QC8_IGEMM_MINMAX_FP32_1X8__AARCH32_NEONV8_MLAL_LANE_CORTEX_A35,zero)899   TEST(QC8_IGEMM_MINMAX_FP32_1X8__AARCH32_NEONV8_MLAL_LANE_CORTEX_A35, zero) {
900     TEST_REQUIRES_ARM_NEON_V8;
901     for (size_t k = 1; k <= 40; k += 9) {
902       for (uint32_t mz = 0; mz < 1; mz++) {
903         GemmMicrokernelTester()
904           .mr(1)
905           .nr(8)
906           .kr(1)
907           .sr(1)
908           .m(1)
909           .n(8)
910           .k(k)
911           .ks(3)
912           .a_offset(43)
913           .zero_index(mz)
914           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8__aarch32_neonv8_mlal_lane_cortex_a35, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
915       }
916     }
917   }
918 
TEST(QC8_IGEMM_MINMAX_FP32_1X8__AARCH32_NEONV8_MLAL_LANE_CORTEX_A35,qmin)919   TEST(QC8_IGEMM_MINMAX_FP32_1X8__AARCH32_NEONV8_MLAL_LANE_CORTEX_A35, qmin) {
920     TEST_REQUIRES_ARM_NEON_V8;
921     GemmMicrokernelTester()
922       .mr(1)
923       .nr(8)
924       .kr(1)
925       .sr(1)
926       .m(1)
927       .n(8)
928       .k(8)
929       .qmin(128)
930       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8__aarch32_neonv8_mlal_lane_cortex_a35, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
931   }
932 
TEST(QC8_IGEMM_MINMAX_FP32_1X8__AARCH32_NEONV8_MLAL_LANE_CORTEX_A35,qmax)933   TEST(QC8_IGEMM_MINMAX_FP32_1X8__AARCH32_NEONV8_MLAL_LANE_CORTEX_A35, qmax) {
934     TEST_REQUIRES_ARM_NEON_V8;
935     GemmMicrokernelTester()
936       .mr(1)
937       .nr(8)
938       .kr(1)
939       .sr(1)
940       .m(1)
941       .n(8)
942       .k(8)
943       .qmax(128)
944       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8__aarch32_neonv8_mlal_lane_cortex_a35, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
945   }
946 
TEST(QC8_IGEMM_MINMAX_FP32_1X8__AARCH32_NEONV8_MLAL_LANE_CORTEX_A35,strided_cm)947   TEST(QC8_IGEMM_MINMAX_FP32_1X8__AARCH32_NEONV8_MLAL_LANE_CORTEX_A35, strided_cm) {
948     TEST_REQUIRES_ARM_NEON_V8;
949     GemmMicrokernelTester()
950       .mr(1)
951       .nr(8)
952       .kr(1)
953       .sr(1)
954       .m(1)
955       .n(8)
956       .k(8)
957       .cm_stride(11)
958       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8__aarch32_neonv8_mlal_lane_cortex_a35, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
959   }
960 #endif  // XNN_ARCH_ARM && XNN_ENABLE_ASSEMBLY
961 
962 
963 #if XNN_ARCH_ARM && XNN_ENABLE_ASSEMBLY
TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_CORTEX_A53,k_eq_8)964   TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_CORTEX_A53, k_eq_8) {
965     TEST_REQUIRES_ARM_NEON;
966     GemmMicrokernelTester()
967       .mr(4)
968       .nr(8)
969       .kr(1)
970       .sr(1)
971       .m(4)
972       .n(8)
973       .k(8)
974       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8__aarch32_neon_mlal_lane_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
975   }
976 
TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_CORTEX_A53,strided_cn)977   TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_CORTEX_A53, strided_cn) {
978     TEST_REQUIRES_ARM_NEON;
979     GemmMicrokernelTester()
980       .mr(4)
981       .nr(8)
982       .kr(1)
983       .sr(1)
984       .m(4)
985       .n(8)
986       .k(8)
987       .cn_stride(11)
988       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8__aarch32_neon_mlal_lane_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
989   }
990 
TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_CORTEX_A53,k_eq_8_subtile)991   TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_CORTEX_A53, k_eq_8_subtile) {
992     TEST_REQUIRES_ARM_NEON;
993     for (uint32_t n = 1; n <= 8; n++) {
994       for (uint32_t m = 1; m <= 4; m++) {
995         GemmMicrokernelTester()
996           .mr(4)
997           .nr(8)
998           .kr(1)
999           .sr(1)
1000           .m(m)
1001           .n(n)
1002           .k(8)
1003           .iterations(1)
1004           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8__aarch32_neon_mlal_lane_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1005       }
1006     }
1007   }
1008 
TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_CORTEX_A53,k_eq_8_subtile_m)1009   TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_CORTEX_A53, k_eq_8_subtile_m) {
1010     TEST_REQUIRES_ARM_NEON;
1011     for (uint32_t m = 1; m <= 4; m++) {
1012       GemmMicrokernelTester()
1013         .mr(4)
1014         .nr(8)
1015         .kr(1)
1016         .sr(1)
1017         .m(m)
1018         .n(8)
1019         .k(8)
1020         .iterations(1)
1021         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8__aarch32_neon_mlal_lane_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1022     }
1023   }
1024 
TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_CORTEX_A53,k_eq_8_subtile_n)1025   TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_CORTEX_A53, k_eq_8_subtile_n) {
1026     TEST_REQUIRES_ARM_NEON;
1027     for (uint32_t n = 1; n <= 8; n++) {
1028       GemmMicrokernelTester()
1029         .mr(4)
1030         .nr(8)
1031         .kr(1)
1032         .sr(1)
1033         .m(4)
1034         .n(n)
1035         .k(8)
1036         .iterations(1)
1037         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8__aarch32_neon_mlal_lane_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1038     }
1039   }
1040 
TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_CORTEX_A53,k_lt_8)1041   TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_CORTEX_A53, k_lt_8) {
1042     TEST_REQUIRES_ARM_NEON;
1043     for (size_t k = 1; k < 8; k++) {
1044       GemmMicrokernelTester()
1045         .mr(4)
1046         .nr(8)
1047         .kr(1)
1048         .sr(1)
1049         .m(4)
1050         .n(8)
1051         .k(k)
1052         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8__aarch32_neon_mlal_lane_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1053     }
1054   }
1055 
TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_CORTEX_A53,k_lt_8_subtile)1056   TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_CORTEX_A53, k_lt_8_subtile) {
1057     TEST_REQUIRES_ARM_NEON;
1058     for (size_t k = 1; k < 8; k++) {
1059       for (uint32_t n = 1; n <= 8; n++) {
1060         for (uint32_t m = 1; m <= 4; m++) {
1061           GemmMicrokernelTester()
1062             .mr(4)
1063             .nr(8)
1064             .kr(1)
1065             .sr(1)
1066             .m(m)
1067             .n(n)
1068             .k(k)
1069             .iterations(1)
1070             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8__aarch32_neon_mlal_lane_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1071         }
1072       }
1073     }
1074   }
1075 
TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_CORTEX_A53,k_gt_8)1076   TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_CORTEX_A53, k_gt_8) {
1077     TEST_REQUIRES_ARM_NEON;
1078     for (size_t k = 9; k < 16; k++) {
1079       GemmMicrokernelTester()
1080         .mr(4)
1081         .nr(8)
1082         .kr(1)
1083         .sr(1)
1084         .m(4)
1085         .n(8)
1086         .k(k)
1087         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8__aarch32_neon_mlal_lane_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1088     }
1089   }
1090 
TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_CORTEX_A53,k_gt_8_subtile)1091   TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_CORTEX_A53, k_gt_8_subtile) {
1092     TEST_REQUIRES_ARM_NEON;
1093     for (size_t k = 9; k < 16; k++) {
1094       for (uint32_t n = 1; n <= 8; n++) {
1095         for (uint32_t m = 1; m <= 4; m++) {
1096           GemmMicrokernelTester()
1097             .mr(4)
1098             .nr(8)
1099             .kr(1)
1100             .sr(1)
1101             .m(m)
1102             .n(n)
1103             .k(k)
1104             .iterations(1)
1105             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8__aarch32_neon_mlal_lane_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1106         }
1107       }
1108     }
1109   }
1110 
TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_CORTEX_A53,k_div_8)1111   TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_CORTEX_A53, k_div_8) {
1112     TEST_REQUIRES_ARM_NEON;
1113     for (size_t k = 16; k <= 80; k += 8) {
1114       GemmMicrokernelTester()
1115         .mr(4)
1116         .nr(8)
1117         .kr(1)
1118         .sr(1)
1119         .m(4)
1120         .n(8)
1121         .k(k)
1122         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8__aarch32_neon_mlal_lane_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1123     }
1124   }
1125 
TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_CORTEX_A53,k_div_8_subtile)1126   TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_CORTEX_A53, k_div_8_subtile) {
1127     TEST_REQUIRES_ARM_NEON;
1128     for (size_t k = 16; k <= 80; k += 8) {
1129       for (uint32_t n = 1; n <= 8; n++) {
1130         for (uint32_t m = 1; m <= 4; m++) {
1131           GemmMicrokernelTester()
1132             .mr(4)
1133             .nr(8)
1134             .kr(1)
1135             .sr(1)
1136             .m(m)
1137             .n(n)
1138             .k(k)
1139             .iterations(1)
1140             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8__aarch32_neon_mlal_lane_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1141         }
1142       }
1143     }
1144   }
1145 
TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_CORTEX_A53,n_gt_8)1146   TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_CORTEX_A53, n_gt_8) {
1147     TEST_REQUIRES_ARM_NEON;
1148     for (uint32_t n = 9; n < 16; n++) {
1149       for (size_t k = 1; k <= 40; k += 9) {
1150         GemmMicrokernelTester()
1151           .mr(4)
1152           .nr(8)
1153           .kr(1)
1154           .sr(1)
1155           .m(4)
1156           .n(n)
1157           .k(k)
1158           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8__aarch32_neon_mlal_lane_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1159       }
1160     }
1161   }
1162 
TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_CORTEX_A53,n_gt_8_strided_cn)1163   TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_CORTEX_A53, n_gt_8_strided_cn) {
1164     TEST_REQUIRES_ARM_NEON;
1165     for (uint32_t n = 9; n < 16; n++) {
1166       for (size_t k = 1; k <= 40; k += 9) {
1167         GemmMicrokernelTester()
1168           .mr(4)
1169           .nr(8)
1170           .kr(1)
1171           .sr(1)
1172           .m(4)
1173           .n(n)
1174           .k(k)
1175           .cn_stride(11)
1176           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8__aarch32_neon_mlal_lane_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1177       }
1178     }
1179   }
1180 
TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_CORTEX_A53,n_gt_8_subtile)1181   TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_CORTEX_A53, n_gt_8_subtile) {
1182     TEST_REQUIRES_ARM_NEON;
1183     for (uint32_t n = 9; n < 16; n++) {
1184       for (size_t k = 1; k <= 40; k += 9) {
1185         for (uint32_t m = 1; m <= 4; m++) {
1186           GemmMicrokernelTester()
1187             .mr(4)
1188             .nr(8)
1189             .kr(1)
1190             .sr(1)
1191             .m(m)
1192             .n(n)
1193             .k(k)
1194             .iterations(1)
1195             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8__aarch32_neon_mlal_lane_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1196         }
1197       }
1198     }
1199   }
1200 
TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_CORTEX_A53,n_div_8)1201   TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_CORTEX_A53, n_div_8) {
1202     TEST_REQUIRES_ARM_NEON;
1203     for (uint32_t n = 16; n <= 24; n += 8) {
1204       for (size_t k = 1; k <= 40; k += 9) {
1205         GemmMicrokernelTester()
1206           .mr(4)
1207           .nr(8)
1208           .kr(1)
1209           .sr(1)
1210           .m(4)
1211           .n(n)
1212           .k(k)
1213           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8__aarch32_neon_mlal_lane_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1214       }
1215     }
1216   }
1217 
TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_CORTEX_A53,n_div_8_strided_cn)1218   TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_CORTEX_A53, n_div_8_strided_cn) {
1219     TEST_REQUIRES_ARM_NEON;
1220     for (uint32_t n = 16; n <= 24; n += 8) {
1221       for (size_t k = 1; k <= 40; k += 9) {
1222         GemmMicrokernelTester()
1223           .mr(4)
1224           .nr(8)
1225           .kr(1)
1226           .sr(1)
1227           .m(4)
1228           .n(n)
1229           .k(k)
1230           .cn_stride(11)
1231           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8__aarch32_neon_mlal_lane_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1232       }
1233     }
1234   }
1235 
TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_CORTEX_A53,n_div_8_subtile)1236   TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_CORTEX_A53, n_div_8_subtile) {
1237     TEST_REQUIRES_ARM_NEON;
1238     for (uint32_t n = 16; n <= 24; n += 8) {
1239       for (size_t k = 1; k <= 40; k += 9) {
1240         for (uint32_t m = 1; m <= 4; m++) {
1241           GemmMicrokernelTester()
1242             .mr(4)
1243             .nr(8)
1244             .kr(1)
1245             .sr(1)
1246             .m(m)
1247             .n(n)
1248             .k(k)
1249             .iterations(1)
1250             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8__aarch32_neon_mlal_lane_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1251         }
1252       }
1253     }
1254   }
1255 
TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_CORTEX_A53,small_kernel)1256   TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_CORTEX_A53, small_kernel) {
1257     TEST_REQUIRES_ARM_NEON;
1258     for (size_t k = 1; k <= 40; k += 9) {
1259       GemmMicrokernelTester()
1260         .mr(4)
1261         .nr(8)
1262         .kr(1)
1263         .sr(1)
1264         .m(4)
1265         .n(8)
1266         .k(k)
1267         .ks(3)
1268         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8__aarch32_neon_mlal_lane_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1269     }
1270   }
1271 
TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_CORTEX_A53,small_kernel_subtile)1272   TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_CORTEX_A53, small_kernel_subtile) {
1273     TEST_REQUIRES_ARM_NEON;
1274     for (size_t k = 1; k <= 40; k += 9) {
1275       for (uint32_t n = 1; n <= 8; n++) {
1276         for (uint32_t m = 1; m <= 4; m++) {
1277           GemmMicrokernelTester()
1278             .mr(4)
1279             .nr(8)
1280             .kr(1)
1281             .sr(1)
1282             .m(m)
1283             .n(n)
1284             .k(k)
1285             .ks(3)
1286             .iterations(1)
1287             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8__aarch32_neon_mlal_lane_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1288         }
1289       }
1290     }
1291   }
1292 
TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_CORTEX_A53,n_gt_8_small_kernel)1293   TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_CORTEX_A53, n_gt_8_small_kernel) {
1294     TEST_REQUIRES_ARM_NEON;
1295     for (uint32_t n = 9; n < 16; n++) {
1296       for (size_t k = 1; k <= 40; k += 9) {
1297         GemmMicrokernelTester()
1298           .mr(4)
1299           .nr(8)
1300           .kr(1)
1301           .sr(1)
1302           .m(4)
1303           .n(n)
1304           .k(k)
1305           .ks(3)
1306           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8__aarch32_neon_mlal_lane_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1307       }
1308     }
1309   }
1310 
TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_CORTEX_A53,n_div_8_small_kernel)1311   TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_CORTEX_A53, n_div_8_small_kernel) {
1312     TEST_REQUIRES_ARM_NEON;
1313     for (uint32_t n = 16; n <= 24; n += 8) {
1314       for (size_t k = 1; k <= 40; k += 9) {
1315         GemmMicrokernelTester()
1316           .mr(4)
1317           .nr(8)
1318           .kr(1)
1319           .sr(1)
1320           .m(4)
1321           .n(n)
1322           .k(k)
1323           .ks(3)
1324           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8__aarch32_neon_mlal_lane_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1325       }
1326     }
1327   }
1328 
TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_CORTEX_A53,strided_cm_subtile)1329   TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_CORTEX_A53, strided_cm_subtile) {
1330     TEST_REQUIRES_ARM_NEON;
1331     for (size_t k = 1; k <= 40; k += 9) {
1332       for (uint32_t n = 1; n <= 8; n++) {
1333         for (uint32_t m = 1; m <= 4; m++) {
1334           GemmMicrokernelTester()
1335             .mr(4)
1336             .nr(8)
1337             .kr(1)
1338             .sr(1)
1339             .m(m)
1340             .n(n)
1341             .k(k)
1342             .cm_stride(11)
1343             .iterations(1)
1344             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8__aarch32_neon_mlal_lane_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1345         }
1346       }
1347     }
1348   }
1349 
TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_CORTEX_A53,a_offset)1350   TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_CORTEX_A53, a_offset) {
1351     TEST_REQUIRES_ARM_NEON;
1352     for (size_t k = 1; k <= 40; k += 9) {
1353       GemmMicrokernelTester()
1354         .mr(4)
1355         .nr(8)
1356         .kr(1)
1357         .sr(1)
1358         .m(4)
1359         .n(8)
1360         .k(k)
1361         .ks(3)
1362         .a_offset(163)
1363         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8__aarch32_neon_mlal_lane_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1364     }
1365   }
1366 
TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_CORTEX_A53,zero)1367   TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_CORTEX_A53, zero) {
1368     TEST_REQUIRES_ARM_NEON;
1369     for (size_t k = 1; k <= 40; k += 9) {
1370       for (uint32_t mz = 0; mz < 4; mz++) {
1371         GemmMicrokernelTester()
1372           .mr(4)
1373           .nr(8)
1374           .kr(1)
1375           .sr(1)
1376           .m(4)
1377           .n(8)
1378           .k(k)
1379           .ks(3)
1380           .a_offset(163)
1381           .zero_index(mz)
1382           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8__aarch32_neon_mlal_lane_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1383       }
1384     }
1385   }
1386 
TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_CORTEX_A53,qmin)1387   TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_CORTEX_A53, qmin) {
1388     TEST_REQUIRES_ARM_NEON;
1389     GemmMicrokernelTester()
1390       .mr(4)
1391       .nr(8)
1392       .kr(1)
1393       .sr(1)
1394       .m(4)
1395       .n(8)
1396       .k(8)
1397       .qmin(128)
1398       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8__aarch32_neon_mlal_lane_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1399   }
1400 
TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_CORTEX_A53,qmax)1401   TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_CORTEX_A53, qmax) {
1402     TEST_REQUIRES_ARM_NEON;
1403     GemmMicrokernelTester()
1404       .mr(4)
1405       .nr(8)
1406       .kr(1)
1407       .sr(1)
1408       .m(4)
1409       .n(8)
1410       .k(8)
1411       .qmax(128)
1412       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8__aarch32_neon_mlal_lane_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1413   }
1414 
TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_CORTEX_A53,strided_cm)1415   TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_CORTEX_A53, strided_cm) {
1416     TEST_REQUIRES_ARM_NEON;
1417     GemmMicrokernelTester()
1418       .mr(4)
1419       .nr(8)
1420       .kr(1)
1421       .sr(1)
1422       .m(4)
1423       .n(8)
1424       .k(8)
1425       .cm_stride(11)
1426       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8__aarch32_neon_mlal_lane_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1427   }
1428 #endif  // XNN_ARCH_ARM && XNN_ENABLE_ASSEMBLY
1429 
1430 
1431 #if XNN_ARCH_ARM && XNN_ENABLE_ASSEMBLY
TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_PRFM_CORTEX_A53,k_eq_8)1432   TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_PRFM_CORTEX_A53, k_eq_8) {
1433     TEST_REQUIRES_ARM_NEON_V8;
1434     GemmMicrokernelTester()
1435       .mr(4)
1436       .nr(8)
1437       .kr(1)
1438       .sr(1)
1439       .m(4)
1440       .n(8)
1441       .k(8)
1442       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8__aarch32_neonv8_mlal_lane_prfm_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
1443   }
1444 
TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_PRFM_CORTEX_A53,strided_cn)1445   TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_PRFM_CORTEX_A53, strided_cn) {
1446     TEST_REQUIRES_ARM_NEON_V8;
1447     GemmMicrokernelTester()
1448       .mr(4)
1449       .nr(8)
1450       .kr(1)
1451       .sr(1)
1452       .m(4)
1453       .n(8)
1454       .k(8)
1455       .cn_stride(11)
1456       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8__aarch32_neonv8_mlal_lane_prfm_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
1457   }
1458 
TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_PRFM_CORTEX_A53,k_eq_8_subtile)1459   TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_PRFM_CORTEX_A53, k_eq_8_subtile) {
1460     TEST_REQUIRES_ARM_NEON_V8;
1461     for (uint32_t n = 1; n <= 8; n++) {
1462       for (uint32_t m = 1; m <= 4; m++) {
1463         GemmMicrokernelTester()
1464           .mr(4)
1465           .nr(8)
1466           .kr(1)
1467           .sr(1)
1468           .m(m)
1469           .n(n)
1470           .k(8)
1471           .iterations(1)
1472           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8__aarch32_neonv8_mlal_lane_prfm_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
1473       }
1474     }
1475   }
1476 
TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_PRFM_CORTEX_A53,k_eq_8_subtile_m)1477   TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_PRFM_CORTEX_A53, k_eq_8_subtile_m) {
1478     TEST_REQUIRES_ARM_NEON_V8;
1479     for (uint32_t m = 1; m <= 4; m++) {
1480       GemmMicrokernelTester()
1481         .mr(4)
1482         .nr(8)
1483         .kr(1)
1484         .sr(1)
1485         .m(m)
1486         .n(8)
1487         .k(8)
1488         .iterations(1)
1489         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8__aarch32_neonv8_mlal_lane_prfm_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
1490     }
1491   }
1492 
TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_PRFM_CORTEX_A53,k_eq_8_subtile_n)1493   TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_PRFM_CORTEX_A53, k_eq_8_subtile_n) {
1494     TEST_REQUIRES_ARM_NEON_V8;
1495     for (uint32_t n = 1; n <= 8; n++) {
1496       GemmMicrokernelTester()
1497         .mr(4)
1498         .nr(8)
1499         .kr(1)
1500         .sr(1)
1501         .m(4)
1502         .n(n)
1503         .k(8)
1504         .iterations(1)
1505         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8__aarch32_neonv8_mlal_lane_prfm_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
1506     }
1507   }
1508 
TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_PRFM_CORTEX_A53,k_lt_8)1509   TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_PRFM_CORTEX_A53, k_lt_8) {
1510     TEST_REQUIRES_ARM_NEON_V8;
1511     for (size_t k = 1; k < 8; k++) {
1512       GemmMicrokernelTester()
1513         .mr(4)
1514         .nr(8)
1515         .kr(1)
1516         .sr(1)
1517         .m(4)
1518         .n(8)
1519         .k(k)
1520         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8__aarch32_neonv8_mlal_lane_prfm_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
1521     }
1522   }
1523 
TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_PRFM_CORTEX_A53,k_lt_8_subtile)1524   TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_PRFM_CORTEX_A53, k_lt_8_subtile) {
1525     TEST_REQUIRES_ARM_NEON_V8;
1526     for (size_t k = 1; k < 8; k++) {
1527       for (uint32_t n = 1; n <= 8; n++) {
1528         for (uint32_t m = 1; m <= 4; m++) {
1529           GemmMicrokernelTester()
1530             .mr(4)
1531             .nr(8)
1532             .kr(1)
1533             .sr(1)
1534             .m(m)
1535             .n(n)
1536             .k(k)
1537             .iterations(1)
1538             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8__aarch32_neonv8_mlal_lane_prfm_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
1539         }
1540       }
1541     }
1542   }
1543 
TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_PRFM_CORTEX_A53,k_gt_8)1544   TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_PRFM_CORTEX_A53, k_gt_8) {
1545     TEST_REQUIRES_ARM_NEON_V8;
1546     for (size_t k = 9; k < 16; k++) {
1547       GemmMicrokernelTester()
1548         .mr(4)
1549         .nr(8)
1550         .kr(1)
1551         .sr(1)
1552         .m(4)
1553         .n(8)
1554         .k(k)
1555         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8__aarch32_neonv8_mlal_lane_prfm_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
1556     }
1557   }
1558 
TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_PRFM_CORTEX_A53,k_gt_8_subtile)1559   TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_PRFM_CORTEX_A53, k_gt_8_subtile) {
1560     TEST_REQUIRES_ARM_NEON_V8;
1561     for (size_t k = 9; k < 16; k++) {
1562       for (uint32_t n = 1; n <= 8; n++) {
1563         for (uint32_t m = 1; m <= 4; m++) {
1564           GemmMicrokernelTester()
1565             .mr(4)
1566             .nr(8)
1567             .kr(1)
1568             .sr(1)
1569             .m(m)
1570             .n(n)
1571             .k(k)
1572             .iterations(1)
1573             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8__aarch32_neonv8_mlal_lane_prfm_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
1574         }
1575       }
1576     }
1577   }
1578 
TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_PRFM_CORTEX_A53,k_div_8)1579   TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_PRFM_CORTEX_A53, k_div_8) {
1580     TEST_REQUIRES_ARM_NEON_V8;
1581     for (size_t k = 16; k <= 80; k += 8) {
1582       GemmMicrokernelTester()
1583         .mr(4)
1584         .nr(8)
1585         .kr(1)
1586         .sr(1)
1587         .m(4)
1588         .n(8)
1589         .k(k)
1590         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8__aarch32_neonv8_mlal_lane_prfm_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
1591     }
1592   }
1593 
TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_PRFM_CORTEX_A53,k_div_8_subtile)1594   TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_PRFM_CORTEX_A53, k_div_8_subtile) {
1595     TEST_REQUIRES_ARM_NEON_V8;
1596     for (size_t k = 16; k <= 80; k += 8) {
1597       for (uint32_t n = 1; n <= 8; n++) {
1598         for (uint32_t m = 1; m <= 4; m++) {
1599           GemmMicrokernelTester()
1600             .mr(4)
1601             .nr(8)
1602             .kr(1)
1603             .sr(1)
1604             .m(m)
1605             .n(n)
1606             .k(k)
1607             .iterations(1)
1608             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8__aarch32_neonv8_mlal_lane_prfm_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
1609         }
1610       }
1611     }
1612   }
1613 
TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_PRFM_CORTEX_A53,n_gt_8)1614   TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_PRFM_CORTEX_A53, n_gt_8) {
1615     TEST_REQUIRES_ARM_NEON_V8;
1616     for (uint32_t n = 9; n < 16; n++) {
1617       for (size_t k = 1; k <= 40; k += 9) {
1618         GemmMicrokernelTester()
1619           .mr(4)
1620           .nr(8)
1621           .kr(1)
1622           .sr(1)
1623           .m(4)
1624           .n(n)
1625           .k(k)
1626           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8__aarch32_neonv8_mlal_lane_prfm_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
1627       }
1628     }
1629   }
1630 
TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_PRFM_CORTEX_A53,n_gt_8_strided_cn)1631   TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_PRFM_CORTEX_A53, n_gt_8_strided_cn) {
1632     TEST_REQUIRES_ARM_NEON_V8;
1633     for (uint32_t n = 9; n < 16; n++) {
1634       for (size_t k = 1; k <= 40; k += 9) {
1635         GemmMicrokernelTester()
1636           .mr(4)
1637           .nr(8)
1638           .kr(1)
1639           .sr(1)
1640           .m(4)
1641           .n(n)
1642           .k(k)
1643           .cn_stride(11)
1644           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8__aarch32_neonv8_mlal_lane_prfm_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
1645       }
1646     }
1647   }
1648 
TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_PRFM_CORTEX_A53,n_gt_8_subtile)1649   TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_PRFM_CORTEX_A53, n_gt_8_subtile) {
1650     TEST_REQUIRES_ARM_NEON_V8;
1651     for (uint32_t n = 9; n < 16; n++) {
1652       for (size_t k = 1; k <= 40; k += 9) {
1653         for (uint32_t m = 1; m <= 4; m++) {
1654           GemmMicrokernelTester()
1655             .mr(4)
1656             .nr(8)
1657             .kr(1)
1658             .sr(1)
1659             .m(m)
1660             .n(n)
1661             .k(k)
1662             .iterations(1)
1663             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8__aarch32_neonv8_mlal_lane_prfm_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
1664         }
1665       }
1666     }
1667   }
1668 
TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_PRFM_CORTEX_A53,n_div_8)1669   TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_PRFM_CORTEX_A53, n_div_8) {
1670     TEST_REQUIRES_ARM_NEON_V8;
1671     for (uint32_t n = 16; n <= 24; n += 8) {
1672       for (size_t k = 1; k <= 40; k += 9) {
1673         GemmMicrokernelTester()
1674           .mr(4)
1675           .nr(8)
1676           .kr(1)
1677           .sr(1)
1678           .m(4)
1679           .n(n)
1680           .k(k)
1681           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8__aarch32_neonv8_mlal_lane_prfm_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
1682       }
1683     }
1684   }
1685 
TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_PRFM_CORTEX_A53,n_div_8_strided_cn)1686   TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_PRFM_CORTEX_A53, n_div_8_strided_cn) {
1687     TEST_REQUIRES_ARM_NEON_V8;
1688     for (uint32_t n = 16; n <= 24; n += 8) {
1689       for (size_t k = 1; k <= 40; k += 9) {
1690         GemmMicrokernelTester()
1691           .mr(4)
1692           .nr(8)
1693           .kr(1)
1694           .sr(1)
1695           .m(4)
1696           .n(n)
1697           .k(k)
1698           .cn_stride(11)
1699           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8__aarch32_neonv8_mlal_lane_prfm_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
1700       }
1701     }
1702   }
1703 
TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_PRFM_CORTEX_A53,n_div_8_subtile)1704   TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_PRFM_CORTEX_A53, n_div_8_subtile) {
1705     TEST_REQUIRES_ARM_NEON_V8;
1706     for (uint32_t n = 16; n <= 24; n += 8) {
1707       for (size_t k = 1; k <= 40; k += 9) {
1708         for (uint32_t m = 1; m <= 4; m++) {
1709           GemmMicrokernelTester()
1710             .mr(4)
1711             .nr(8)
1712             .kr(1)
1713             .sr(1)
1714             .m(m)
1715             .n(n)
1716             .k(k)
1717             .iterations(1)
1718             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8__aarch32_neonv8_mlal_lane_prfm_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
1719         }
1720       }
1721     }
1722   }
1723 
TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_PRFM_CORTEX_A53,small_kernel)1724   TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_PRFM_CORTEX_A53, small_kernel) {
1725     TEST_REQUIRES_ARM_NEON_V8;
1726     for (size_t k = 1; k <= 40; k += 9) {
1727       GemmMicrokernelTester()
1728         .mr(4)
1729         .nr(8)
1730         .kr(1)
1731         .sr(1)
1732         .m(4)
1733         .n(8)
1734         .k(k)
1735         .ks(3)
1736         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8__aarch32_neonv8_mlal_lane_prfm_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
1737     }
1738   }
1739 
TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_PRFM_CORTEX_A53,small_kernel_subtile)1740   TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_PRFM_CORTEX_A53, small_kernel_subtile) {
1741     TEST_REQUIRES_ARM_NEON_V8;
1742     for (size_t k = 1; k <= 40; k += 9) {
1743       for (uint32_t n = 1; n <= 8; n++) {
1744         for (uint32_t m = 1; m <= 4; m++) {
1745           GemmMicrokernelTester()
1746             .mr(4)
1747             .nr(8)
1748             .kr(1)
1749             .sr(1)
1750             .m(m)
1751             .n(n)
1752             .k(k)
1753             .ks(3)
1754             .iterations(1)
1755             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8__aarch32_neonv8_mlal_lane_prfm_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
1756         }
1757       }
1758     }
1759   }
1760 
TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_PRFM_CORTEX_A53,n_gt_8_small_kernel)1761   TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_PRFM_CORTEX_A53, n_gt_8_small_kernel) {
1762     TEST_REQUIRES_ARM_NEON_V8;
1763     for (uint32_t n = 9; n < 16; n++) {
1764       for (size_t k = 1; k <= 40; k += 9) {
1765         GemmMicrokernelTester()
1766           .mr(4)
1767           .nr(8)
1768           .kr(1)
1769           .sr(1)
1770           .m(4)
1771           .n(n)
1772           .k(k)
1773           .ks(3)
1774           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8__aarch32_neonv8_mlal_lane_prfm_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
1775       }
1776     }
1777   }
1778 
TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_PRFM_CORTEX_A53,n_div_8_small_kernel)1779   TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_PRFM_CORTEX_A53, n_div_8_small_kernel) {
1780     TEST_REQUIRES_ARM_NEON_V8;
1781     for (uint32_t n = 16; n <= 24; n += 8) {
1782       for (size_t k = 1; k <= 40; k += 9) {
1783         GemmMicrokernelTester()
1784           .mr(4)
1785           .nr(8)
1786           .kr(1)
1787           .sr(1)
1788           .m(4)
1789           .n(n)
1790           .k(k)
1791           .ks(3)
1792           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8__aarch32_neonv8_mlal_lane_prfm_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
1793       }
1794     }
1795   }
1796 
TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_PRFM_CORTEX_A53,strided_cm_subtile)1797   TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_PRFM_CORTEX_A53, strided_cm_subtile) {
1798     TEST_REQUIRES_ARM_NEON_V8;
1799     for (size_t k = 1; k <= 40; k += 9) {
1800       for (uint32_t n = 1; n <= 8; n++) {
1801         for (uint32_t m = 1; m <= 4; m++) {
1802           GemmMicrokernelTester()
1803             .mr(4)
1804             .nr(8)
1805             .kr(1)
1806             .sr(1)
1807             .m(m)
1808             .n(n)
1809             .k(k)
1810             .cm_stride(11)
1811             .iterations(1)
1812             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8__aarch32_neonv8_mlal_lane_prfm_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
1813         }
1814       }
1815     }
1816   }
1817 
TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_PRFM_CORTEX_A53,a_offset)1818   TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_PRFM_CORTEX_A53, a_offset) {
1819     TEST_REQUIRES_ARM_NEON_V8;
1820     for (size_t k = 1; k <= 40; k += 9) {
1821       GemmMicrokernelTester()
1822         .mr(4)
1823         .nr(8)
1824         .kr(1)
1825         .sr(1)
1826         .m(4)
1827         .n(8)
1828         .k(k)
1829         .ks(3)
1830         .a_offset(163)
1831         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8__aarch32_neonv8_mlal_lane_prfm_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
1832     }
1833   }
1834 
TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_PRFM_CORTEX_A53,zero)1835   TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_PRFM_CORTEX_A53, zero) {
1836     TEST_REQUIRES_ARM_NEON_V8;
1837     for (size_t k = 1; k <= 40; k += 9) {
1838       for (uint32_t mz = 0; mz < 4; mz++) {
1839         GemmMicrokernelTester()
1840           .mr(4)
1841           .nr(8)
1842           .kr(1)
1843           .sr(1)
1844           .m(4)
1845           .n(8)
1846           .k(k)
1847           .ks(3)
1848           .a_offset(163)
1849           .zero_index(mz)
1850           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8__aarch32_neonv8_mlal_lane_prfm_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
1851       }
1852     }
1853   }
1854 
TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_PRFM_CORTEX_A53,qmin)1855   TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_PRFM_CORTEX_A53, qmin) {
1856     TEST_REQUIRES_ARM_NEON_V8;
1857     GemmMicrokernelTester()
1858       .mr(4)
1859       .nr(8)
1860       .kr(1)
1861       .sr(1)
1862       .m(4)
1863       .n(8)
1864       .k(8)
1865       .qmin(128)
1866       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8__aarch32_neonv8_mlal_lane_prfm_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
1867   }
1868 
TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_PRFM_CORTEX_A53,qmax)1869   TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_PRFM_CORTEX_A53, qmax) {
1870     TEST_REQUIRES_ARM_NEON_V8;
1871     GemmMicrokernelTester()
1872       .mr(4)
1873       .nr(8)
1874       .kr(1)
1875       .sr(1)
1876       .m(4)
1877       .n(8)
1878       .k(8)
1879       .qmax(128)
1880       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8__aarch32_neonv8_mlal_lane_prfm_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
1881   }
1882 
TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_PRFM_CORTEX_A53,strided_cm)1883   TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_PRFM_CORTEX_A53, strided_cm) {
1884     TEST_REQUIRES_ARM_NEON_V8;
1885     GemmMicrokernelTester()
1886       .mr(4)
1887       .nr(8)
1888       .kr(1)
1889       .sr(1)
1890       .m(4)
1891       .n(8)
1892       .k(8)
1893       .cm_stride(11)
1894       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8__aarch32_neonv8_mlal_lane_prfm_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
1895   }
1896 #endif  // XNN_ARCH_ARM && XNN_ENABLE_ASSEMBLY
1897 
1898 
1899 #if XNN_ARCH_ARM && XNN_ENABLE_ASSEMBLY
TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_PRFM_LD64,k_eq_8)1900   TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_PRFM_LD64, k_eq_8) {
1901     TEST_REQUIRES_ARM_NEON_V8;
1902     GemmMicrokernelTester()
1903       .mr(4)
1904       .nr(8)
1905       .kr(1)
1906       .sr(1)
1907       .m(4)
1908       .n(8)
1909       .k(8)
1910       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8__aarch32_neonv8_mlal_lane_prfm_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
1911   }
1912 
TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_PRFM_LD64,strided_cn)1913   TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_PRFM_LD64, strided_cn) {
1914     TEST_REQUIRES_ARM_NEON_V8;
1915     GemmMicrokernelTester()
1916       .mr(4)
1917       .nr(8)
1918       .kr(1)
1919       .sr(1)
1920       .m(4)
1921       .n(8)
1922       .k(8)
1923       .cn_stride(11)
1924       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8__aarch32_neonv8_mlal_lane_prfm_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
1925   }
1926 
TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_PRFM_LD64,k_eq_8_subtile)1927   TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_PRFM_LD64, k_eq_8_subtile) {
1928     TEST_REQUIRES_ARM_NEON_V8;
1929     for (uint32_t n = 1; n <= 8; n++) {
1930       for (uint32_t m = 1; m <= 4; m++) {
1931         GemmMicrokernelTester()
1932           .mr(4)
1933           .nr(8)
1934           .kr(1)
1935           .sr(1)
1936           .m(m)
1937           .n(n)
1938           .k(8)
1939           .iterations(1)
1940           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8__aarch32_neonv8_mlal_lane_prfm_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
1941       }
1942     }
1943   }
1944 
TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_PRFM_LD64,k_eq_8_subtile_m)1945   TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_PRFM_LD64, k_eq_8_subtile_m) {
1946     TEST_REQUIRES_ARM_NEON_V8;
1947     for (uint32_t m = 1; m <= 4; m++) {
1948       GemmMicrokernelTester()
1949         .mr(4)
1950         .nr(8)
1951         .kr(1)
1952         .sr(1)
1953         .m(m)
1954         .n(8)
1955         .k(8)
1956         .iterations(1)
1957         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8__aarch32_neonv8_mlal_lane_prfm_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
1958     }
1959   }
1960 
TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_PRFM_LD64,k_eq_8_subtile_n)1961   TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_PRFM_LD64, k_eq_8_subtile_n) {
1962     TEST_REQUIRES_ARM_NEON_V8;
1963     for (uint32_t n = 1; n <= 8; n++) {
1964       GemmMicrokernelTester()
1965         .mr(4)
1966         .nr(8)
1967         .kr(1)
1968         .sr(1)
1969         .m(4)
1970         .n(n)
1971         .k(8)
1972         .iterations(1)
1973         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8__aarch32_neonv8_mlal_lane_prfm_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
1974     }
1975   }
1976 
TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_PRFM_LD64,k_lt_8)1977   TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_PRFM_LD64, k_lt_8) {
1978     TEST_REQUIRES_ARM_NEON_V8;
1979     for (size_t k = 1; k < 8; k++) {
1980       GemmMicrokernelTester()
1981         .mr(4)
1982         .nr(8)
1983         .kr(1)
1984         .sr(1)
1985         .m(4)
1986         .n(8)
1987         .k(k)
1988         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8__aarch32_neonv8_mlal_lane_prfm_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
1989     }
1990   }
1991 
TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_PRFM_LD64,k_lt_8_subtile)1992   TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_PRFM_LD64, k_lt_8_subtile) {
1993     TEST_REQUIRES_ARM_NEON_V8;
1994     for (size_t k = 1; k < 8; k++) {
1995       for (uint32_t n = 1; n <= 8; n++) {
1996         for (uint32_t m = 1; m <= 4; m++) {
1997           GemmMicrokernelTester()
1998             .mr(4)
1999             .nr(8)
2000             .kr(1)
2001             .sr(1)
2002             .m(m)
2003             .n(n)
2004             .k(k)
2005             .iterations(1)
2006             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8__aarch32_neonv8_mlal_lane_prfm_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2007         }
2008       }
2009     }
2010   }
2011 
TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_PRFM_LD64,k_gt_8)2012   TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_PRFM_LD64, k_gt_8) {
2013     TEST_REQUIRES_ARM_NEON_V8;
2014     for (size_t k = 9; k < 16; k++) {
2015       GemmMicrokernelTester()
2016         .mr(4)
2017         .nr(8)
2018         .kr(1)
2019         .sr(1)
2020         .m(4)
2021         .n(8)
2022         .k(k)
2023         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8__aarch32_neonv8_mlal_lane_prfm_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2024     }
2025   }
2026 
TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_PRFM_LD64,k_gt_8_subtile)2027   TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_PRFM_LD64, k_gt_8_subtile) {
2028     TEST_REQUIRES_ARM_NEON_V8;
2029     for (size_t k = 9; k < 16; k++) {
2030       for (uint32_t n = 1; n <= 8; n++) {
2031         for (uint32_t m = 1; m <= 4; m++) {
2032           GemmMicrokernelTester()
2033             .mr(4)
2034             .nr(8)
2035             .kr(1)
2036             .sr(1)
2037             .m(m)
2038             .n(n)
2039             .k(k)
2040             .iterations(1)
2041             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8__aarch32_neonv8_mlal_lane_prfm_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2042         }
2043       }
2044     }
2045   }
2046 
TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_PRFM_LD64,k_div_8)2047   TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_PRFM_LD64, k_div_8) {
2048     TEST_REQUIRES_ARM_NEON_V8;
2049     for (size_t k = 16; k <= 80; k += 8) {
2050       GemmMicrokernelTester()
2051         .mr(4)
2052         .nr(8)
2053         .kr(1)
2054         .sr(1)
2055         .m(4)
2056         .n(8)
2057         .k(k)
2058         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8__aarch32_neonv8_mlal_lane_prfm_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2059     }
2060   }
2061 
TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_PRFM_LD64,k_div_8_subtile)2062   TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_PRFM_LD64, k_div_8_subtile) {
2063     TEST_REQUIRES_ARM_NEON_V8;
2064     for (size_t k = 16; k <= 80; k += 8) {
2065       for (uint32_t n = 1; n <= 8; n++) {
2066         for (uint32_t m = 1; m <= 4; m++) {
2067           GemmMicrokernelTester()
2068             .mr(4)
2069             .nr(8)
2070             .kr(1)
2071             .sr(1)
2072             .m(m)
2073             .n(n)
2074             .k(k)
2075             .iterations(1)
2076             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8__aarch32_neonv8_mlal_lane_prfm_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2077         }
2078       }
2079     }
2080   }
2081 
TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_PRFM_LD64,n_gt_8)2082   TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_PRFM_LD64, n_gt_8) {
2083     TEST_REQUIRES_ARM_NEON_V8;
2084     for (uint32_t n = 9; n < 16; n++) {
2085       for (size_t k = 1; k <= 40; k += 9) {
2086         GemmMicrokernelTester()
2087           .mr(4)
2088           .nr(8)
2089           .kr(1)
2090           .sr(1)
2091           .m(4)
2092           .n(n)
2093           .k(k)
2094           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8__aarch32_neonv8_mlal_lane_prfm_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2095       }
2096     }
2097   }
2098 
TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_PRFM_LD64,n_gt_8_strided_cn)2099   TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_PRFM_LD64, n_gt_8_strided_cn) {
2100     TEST_REQUIRES_ARM_NEON_V8;
2101     for (uint32_t n = 9; n < 16; n++) {
2102       for (size_t k = 1; k <= 40; k += 9) {
2103         GemmMicrokernelTester()
2104           .mr(4)
2105           .nr(8)
2106           .kr(1)
2107           .sr(1)
2108           .m(4)
2109           .n(n)
2110           .k(k)
2111           .cn_stride(11)
2112           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8__aarch32_neonv8_mlal_lane_prfm_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2113       }
2114     }
2115   }
2116 
TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_PRFM_LD64,n_gt_8_subtile)2117   TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_PRFM_LD64, n_gt_8_subtile) {
2118     TEST_REQUIRES_ARM_NEON_V8;
2119     for (uint32_t n = 9; n < 16; n++) {
2120       for (size_t k = 1; k <= 40; k += 9) {
2121         for (uint32_t m = 1; m <= 4; m++) {
2122           GemmMicrokernelTester()
2123             .mr(4)
2124             .nr(8)
2125             .kr(1)
2126             .sr(1)
2127             .m(m)
2128             .n(n)
2129             .k(k)
2130             .iterations(1)
2131             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8__aarch32_neonv8_mlal_lane_prfm_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2132         }
2133       }
2134     }
2135   }
2136 
TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_PRFM_LD64,n_div_8)2137   TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_PRFM_LD64, n_div_8) {
2138     TEST_REQUIRES_ARM_NEON_V8;
2139     for (uint32_t n = 16; n <= 24; n += 8) {
2140       for (size_t k = 1; k <= 40; k += 9) {
2141         GemmMicrokernelTester()
2142           .mr(4)
2143           .nr(8)
2144           .kr(1)
2145           .sr(1)
2146           .m(4)
2147           .n(n)
2148           .k(k)
2149           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8__aarch32_neonv8_mlal_lane_prfm_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2150       }
2151     }
2152   }
2153 
TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_PRFM_LD64,n_div_8_strided_cn)2154   TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_PRFM_LD64, n_div_8_strided_cn) {
2155     TEST_REQUIRES_ARM_NEON_V8;
2156     for (uint32_t n = 16; n <= 24; n += 8) {
2157       for (size_t k = 1; k <= 40; k += 9) {
2158         GemmMicrokernelTester()
2159           .mr(4)
2160           .nr(8)
2161           .kr(1)
2162           .sr(1)
2163           .m(4)
2164           .n(n)
2165           .k(k)
2166           .cn_stride(11)
2167           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8__aarch32_neonv8_mlal_lane_prfm_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2168       }
2169     }
2170   }
2171 
TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_PRFM_LD64,n_div_8_subtile)2172   TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_PRFM_LD64, n_div_8_subtile) {
2173     TEST_REQUIRES_ARM_NEON_V8;
2174     for (uint32_t n = 16; n <= 24; n += 8) {
2175       for (size_t k = 1; k <= 40; k += 9) {
2176         for (uint32_t m = 1; m <= 4; m++) {
2177           GemmMicrokernelTester()
2178             .mr(4)
2179             .nr(8)
2180             .kr(1)
2181             .sr(1)
2182             .m(m)
2183             .n(n)
2184             .k(k)
2185             .iterations(1)
2186             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8__aarch32_neonv8_mlal_lane_prfm_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2187         }
2188       }
2189     }
2190   }
2191 
TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_PRFM_LD64,small_kernel)2192   TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_PRFM_LD64, small_kernel) {
2193     TEST_REQUIRES_ARM_NEON_V8;
2194     for (size_t k = 1; k <= 40; k += 9) {
2195       GemmMicrokernelTester()
2196         .mr(4)
2197         .nr(8)
2198         .kr(1)
2199         .sr(1)
2200         .m(4)
2201         .n(8)
2202         .k(k)
2203         .ks(3)
2204         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8__aarch32_neonv8_mlal_lane_prfm_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2205     }
2206   }
2207 
TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_PRFM_LD64,small_kernel_subtile)2208   TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_PRFM_LD64, small_kernel_subtile) {
2209     TEST_REQUIRES_ARM_NEON_V8;
2210     for (size_t k = 1; k <= 40; k += 9) {
2211       for (uint32_t n = 1; n <= 8; n++) {
2212         for (uint32_t m = 1; m <= 4; m++) {
2213           GemmMicrokernelTester()
2214             .mr(4)
2215             .nr(8)
2216             .kr(1)
2217             .sr(1)
2218             .m(m)
2219             .n(n)
2220             .k(k)
2221             .ks(3)
2222             .iterations(1)
2223             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8__aarch32_neonv8_mlal_lane_prfm_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2224         }
2225       }
2226     }
2227   }
2228 
TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_PRFM_LD64,n_gt_8_small_kernel)2229   TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_PRFM_LD64, n_gt_8_small_kernel) {
2230     TEST_REQUIRES_ARM_NEON_V8;
2231     for (uint32_t n = 9; n < 16; n++) {
2232       for (size_t k = 1; k <= 40; k += 9) {
2233         GemmMicrokernelTester()
2234           .mr(4)
2235           .nr(8)
2236           .kr(1)
2237           .sr(1)
2238           .m(4)
2239           .n(n)
2240           .k(k)
2241           .ks(3)
2242           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8__aarch32_neonv8_mlal_lane_prfm_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2243       }
2244     }
2245   }
2246 
TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_PRFM_LD64,n_div_8_small_kernel)2247   TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_PRFM_LD64, n_div_8_small_kernel) {
2248     TEST_REQUIRES_ARM_NEON_V8;
2249     for (uint32_t n = 16; n <= 24; n += 8) {
2250       for (size_t k = 1; k <= 40; k += 9) {
2251         GemmMicrokernelTester()
2252           .mr(4)
2253           .nr(8)
2254           .kr(1)
2255           .sr(1)
2256           .m(4)
2257           .n(n)
2258           .k(k)
2259           .ks(3)
2260           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8__aarch32_neonv8_mlal_lane_prfm_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2261       }
2262     }
2263   }
2264 
TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_PRFM_LD64,strided_cm_subtile)2265   TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_PRFM_LD64, strided_cm_subtile) {
2266     TEST_REQUIRES_ARM_NEON_V8;
2267     for (size_t k = 1; k <= 40; k += 9) {
2268       for (uint32_t n = 1; n <= 8; n++) {
2269         for (uint32_t m = 1; m <= 4; m++) {
2270           GemmMicrokernelTester()
2271             .mr(4)
2272             .nr(8)
2273             .kr(1)
2274             .sr(1)
2275             .m(m)
2276             .n(n)
2277             .k(k)
2278             .cm_stride(11)
2279             .iterations(1)
2280             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8__aarch32_neonv8_mlal_lane_prfm_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2281         }
2282       }
2283     }
2284   }
2285 
TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_PRFM_LD64,a_offset)2286   TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_PRFM_LD64, a_offset) {
2287     TEST_REQUIRES_ARM_NEON_V8;
2288     for (size_t k = 1; k <= 40; k += 9) {
2289       GemmMicrokernelTester()
2290         .mr(4)
2291         .nr(8)
2292         .kr(1)
2293         .sr(1)
2294         .m(4)
2295         .n(8)
2296         .k(k)
2297         .ks(3)
2298         .a_offset(163)
2299         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8__aarch32_neonv8_mlal_lane_prfm_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2300     }
2301   }
2302 
TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_PRFM_LD64,zero)2303   TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_PRFM_LD64, zero) {
2304     TEST_REQUIRES_ARM_NEON_V8;
2305     for (size_t k = 1; k <= 40; k += 9) {
2306       for (uint32_t mz = 0; mz < 4; mz++) {
2307         GemmMicrokernelTester()
2308           .mr(4)
2309           .nr(8)
2310           .kr(1)
2311           .sr(1)
2312           .m(4)
2313           .n(8)
2314           .k(k)
2315           .ks(3)
2316           .a_offset(163)
2317           .zero_index(mz)
2318           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8__aarch32_neonv8_mlal_lane_prfm_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2319       }
2320     }
2321   }
2322 
TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_PRFM_LD64,qmin)2323   TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_PRFM_LD64, qmin) {
2324     TEST_REQUIRES_ARM_NEON_V8;
2325     GemmMicrokernelTester()
2326       .mr(4)
2327       .nr(8)
2328       .kr(1)
2329       .sr(1)
2330       .m(4)
2331       .n(8)
2332       .k(8)
2333       .qmin(128)
2334       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8__aarch32_neonv8_mlal_lane_prfm_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2335   }
2336 
TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_PRFM_LD64,qmax)2337   TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_PRFM_LD64, qmax) {
2338     TEST_REQUIRES_ARM_NEON_V8;
2339     GemmMicrokernelTester()
2340       .mr(4)
2341       .nr(8)
2342       .kr(1)
2343       .sr(1)
2344       .m(4)
2345       .n(8)
2346       .k(8)
2347       .qmax(128)
2348       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8__aarch32_neonv8_mlal_lane_prfm_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2349   }
2350 
TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_PRFM_LD64,strided_cm)2351   TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_PRFM_LD64, strided_cm) {
2352     TEST_REQUIRES_ARM_NEON_V8;
2353     GemmMicrokernelTester()
2354       .mr(4)
2355       .nr(8)
2356       .kr(1)
2357       .sr(1)
2358       .m(4)
2359       .n(8)
2360       .k(8)
2361       .cm_stride(11)
2362       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8__aarch32_neonv8_mlal_lane_prfm_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2363   }
2364 #endif  // XNN_ARCH_ARM && XNN_ENABLE_ASSEMBLY
2365 
2366 
2367 #if XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53,k_eq_16)2368   TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53, k_eq_16) {
2369     TEST_REQUIRES_ARM_NEON;
2370     GemmMicrokernelTester()
2371       .mr(1)
2372       .nr(8)
2373       .kr(8)
2374       .sr(1)
2375       .m(1)
2376       .n(8)
2377       .k(16)
2378       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal_prfm_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2379   }
2380 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53,strided_cn)2381   TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53, strided_cn) {
2382     TEST_REQUIRES_ARM_NEON;
2383     GemmMicrokernelTester()
2384       .mr(1)
2385       .nr(8)
2386       .kr(8)
2387       .sr(1)
2388       .m(1)
2389       .n(8)
2390       .k(16)
2391       .cn_stride(11)
2392       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal_prfm_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2393   }
2394 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53,k_eq_16_subtile)2395   TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53, k_eq_16_subtile) {
2396     TEST_REQUIRES_ARM_NEON;
2397     for (uint32_t n = 1; n <= 8; n++) {
2398       for (uint32_t m = 1; m <= 1; m++) {
2399         GemmMicrokernelTester()
2400           .mr(1)
2401           .nr(8)
2402           .kr(8)
2403           .sr(1)
2404           .m(m)
2405           .n(n)
2406           .k(16)
2407           .iterations(1)
2408           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal_prfm_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2409       }
2410     }
2411   }
2412 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53,k_eq_16_subtile_m)2413   TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53, k_eq_16_subtile_m) {
2414     TEST_REQUIRES_ARM_NEON;
2415     for (uint32_t m = 1; m <= 1; m++) {
2416       GemmMicrokernelTester()
2417         .mr(1)
2418         .nr(8)
2419         .kr(8)
2420         .sr(1)
2421         .m(m)
2422         .n(8)
2423         .k(16)
2424         .iterations(1)
2425         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal_prfm_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2426     }
2427   }
2428 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53,k_eq_16_subtile_n)2429   TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53, k_eq_16_subtile_n) {
2430     TEST_REQUIRES_ARM_NEON;
2431     for (uint32_t n = 1; n <= 8; n++) {
2432       GemmMicrokernelTester()
2433         .mr(1)
2434         .nr(8)
2435         .kr(8)
2436         .sr(1)
2437         .m(1)
2438         .n(n)
2439         .k(16)
2440         .iterations(1)
2441         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal_prfm_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2442     }
2443   }
2444 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53,k_lt_16)2445   TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53, k_lt_16) {
2446     TEST_REQUIRES_ARM_NEON;
2447     for (size_t k = 1; k < 16; k++) {
2448       GemmMicrokernelTester()
2449         .mr(1)
2450         .nr(8)
2451         .kr(8)
2452         .sr(1)
2453         .m(1)
2454         .n(8)
2455         .k(k)
2456         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal_prfm_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2457     }
2458   }
2459 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53,k_lt_16_subtile)2460   TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53, k_lt_16_subtile) {
2461     TEST_REQUIRES_ARM_NEON;
2462     for (size_t k = 1; k < 16; k++) {
2463       for (uint32_t n = 1; n <= 8; n++) {
2464         for (uint32_t m = 1; m <= 1; m++) {
2465           GemmMicrokernelTester()
2466             .mr(1)
2467             .nr(8)
2468             .kr(8)
2469             .sr(1)
2470             .m(m)
2471             .n(n)
2472             .k(k)
2473             .iterations(1)
2474             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal_prfm_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2475         }
2476       }
2477     }
2478   }
2479 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53,k_gt_16)2480   TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53, k_gt_16) {
2481     TEST_REQUIRES_ARM_NEON;
2482     for (size_t k = 17; k < 32; k++) {
2483       GemmMicrokernelTester()
2484         .mr(1)
2485         .nr(8)
2486         .kr(8)
2487         .sr(1)
2488         .m(1)
2489         .n(8)
2490         .k(k)
2491         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal_prfm_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2492     }
2493   }
2494 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53,k_gt_16_subtile)2495   TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53, k_gt_16_subtile) {
2496     TEST_REQUIRES_ARM_NEON;
2497     for (size_t k = 17; k < 32; k++) {
2498       for (uint32_t n = 1; n <= 8; n++) {
2499         for (uint32_t m = 1; m <= 1; m++) {
2500           GemmMicrokernelTester()
2501             .mr(1)
2502             .nr(8)
2503             .kr(8)
2504             .sr(1)
2505             .m(m)
2506             .n(n)
2507             .k(k)
2508             .iterations(1)
2509             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal_prfm_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2510         }
2511       }
2512     }
2513   }
2514 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53,k_div_16)2515   TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53, k_div_16) {
2516     TEST_REQUIRES_ARM_NEON;
2517     for (size_t k = 32; k <= 160; k += 16) {
2518       GemmMicrokernelTester()
2519         .mr(1)
2520         .nr(8)
2521         .kr(8)
2522         .sr(1)
2523         .m(1)
2524         .n(8)
2525         .k(k)
2526         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal_prfm_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2527     }
2528   }
2529 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53,k_div_16_subtile)2530   TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53, k_div_16_subtile) {
2531     TEST_REQUIRES_ARM_NEON;
2532     for (size_t k = 32; k <= 160; k += 16) {
2533       for (uint32_t n = 1; n <= 8; n++) {
2534         for (uint32_t m = 1; m <= 1; m++) {
2535           GemmMicrokernelTester()
2536             .mr(1)
2537             .nr(8)
2538             .kr(8)
2539             .sr(1)
2540             .m(m)
2541             .n(n)
2542             .k(k)
2543             .iterations(1)
2544             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal_prfm_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2545         }
2546       }
2547     }
2548   }
2549 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53,n_gt_8)2550   TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53, n_gt_8) {
2551     TEST_REQUIRES_ARM_NEON;
2552     for (uint32_t n = 9; n < 16; n++) {
2553       for (size_t k = 1; k <= 80; k += 17) {
2554         GemmMicrokernelTester()
2555           .mr(1)
2556           .nr(8)
2557           .kr(8)
2558           .sr(1)
2559           .m(1)
2560           .n(n)
2561           .k(k)
2562           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal_prfm_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2563       }
2564     }
2565   }
2566 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53,n_gt_8_strided_cn)2567   TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53, n_gt_8_strided_cn) {
2568     TEST_REQUIRES_ARM_NEON;
2569     for (uint32_t n = 9; n < 16; n++) {
2570       for (size_t k = 1; k <= 80; k += 17) {
2571         GemmMicrokernelTester()
2572           .mr(1)
2573           .nr(8)
2574           .kr(8)
2575           .sr(1)
2576           .m(1)
2577           .n(n)
2578           .k(k)
2579           .cn_stride(11)
2580           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal_prfm_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2581       }
2582     }
2583   }
2584 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53,n_gt_8_subtile)2585   TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53, n_gt_8_subtile) {
2586     TEST_REQUIRES_ARM_NEON;
2587     for (uint32_t n = 9; n < 16; n++) {
2588       for (size_t k = 1; k <= 80; k += 17) {
2589         for (uint32_t m = 1; m <= 1; m++) {
2590           GemmMicrokernelTester()
2591             .mr(1)
2592             .nr(8)
2593             .kr(8)
2594             .sr(1)
2595             .m(m)
2596             .n(n)
2597             .k(k)
2598             .iterations(1)
2599             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal_prfm_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2600         }
2601       }
2602     }
2603   }
2604 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53,n_div_8)2605   TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53, n_div_8) {
2606     TEST_REQUIRES_ARM_NEON;
2607     for (uint32_t n = 16; n <= 24; n += 8) {
2608       for (size_t k = 1; k <= 80; k += 17) {
2609         GemmMicrokernelTester()
2610           .mr(1)
2611           .nr(8)
2612           .kr(8)
2613           .sr(1)
2614           .m(1)
2615           .n(n)
2616           .k(k)
2617           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal_prfm_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2618       }
2619     }
2620   }
2621 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53,n_div_8_strided_cn)2622   TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53, n_div_8_strided_cn) {
2623     TEST_REQUIRES_ARM_NEON;
2624     for (uint32_t n = 16; n <= 24; n += 8) {
2625       for (size_t k = 1; k <= 80; k += 17) {
2626         GemmMicrokernelTester()
2627           .mr(1)
2628           .nr(8)
2629           .kr(8)
2630           .sr(1)
2631           .m(1)
2632           .n(n)
2633           .k(k)
2634           .cn_stride(11)
2635           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal_prfm_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2636       }
2637     }
2638   }
2639 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53,n_div_8_subtile)2640   TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53, n_div_8_subtile) {
2641     TEST_REQUIRES_ARM_NEON;
2642     for (uint32_t n = 16; n <= 24; n += 8) {
2643       for (size_t k = 1; k <= 80; k += 17) {
2644         for (uint32_t m = 1; m <= 1; m++) {
2645           GemmMicrokernelTester()
2646             .mr(1)
2647             .nr(8)
2648             .kr(8)
2649             .sr(1)
2650             .m(m)
2651             .n(n)
2652             .k(k)
2653             .iterations(1)
2654             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal_prfm_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2655         }
2656       }
2657     }
2658   }
2659 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53,small_kernel)2660   TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53, small_kernel) {
2661     TEST_REQUIRES_ARM_NEON;
2662     for (size_t k = 1; k <= 80; k += 17) {
2663       GemmMicrokernelTester()
2664         .mr(1)
2665         .nr(8)
2666         .kr(8)
2667         .sr(1)
2668         .m(1)
2669         .n(8)
2670         .k(k)
2671         .ks(3)
2672         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal_prfm_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2673     }
2674   }
2675 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53,small_kernel_subtile)2676   TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53, small_kernel_subtile) {
2677     TEST_REQUIRES_ARM_NEON;
2678     for (size_t k = 1; k <= 80; k += 17) {
2679       for (uint32_t n = 1; n <= 8; n++) {
2680         for (uint32_t m = 1; m <= 1; m++) {
2681           GemmMicrokernelTester()
2682             .mr(1)
2683             .nr(8)
2684             .kr(8)
2685             .sr(1)
2686             .m(m)
2687             .n(n)
2688             .k(k)
2689             .ks(3)
2690             .iterations(1)
2691             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal_prfm_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2692         }
2693       }
2694     }
2695   }
2696 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53,n_gt_8_small_kernel)2697   TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53, n_gt_8_small_kernel) {
2698     TEST_REQUIRES_ARM_NEON;
2699     for (uint32_t n = 9; n < 16; n++) {
2700       for (size_t k = 1; k <= 80; k += 17) {
2701         GemmMicrokernelTester()
2702           .mr(1)
2703           .nr(8)
2704           .kr(8)
2705           .sr(1)
2706           .m(1)
2707           .n(n)
2708           .k(k)
2709           .ks(3)
2710           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal_prfm_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2711       }
2712     }
2713   }
2714 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53,n_div_8_small_kernel)2715   TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53, n_div_8_small_kernel) {
2716     TEST_REQUIRES_ARM_NEON;
2717     for (uint32_t n = 16; n <= 24; n += 8) {
2718       for (size_t k = 1; k <= 80; k += 17) {
2719         GemmMicrokernelTester()
2720           .mr(1)
2721           .nr(8)
2722           .kr(8)
2723           .sr(1)
2724           .m(1)
2725           .n(n)
2726           .k(k)
2727           .ks(3)
2728           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal_prfm_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2729       }
2730     }
2731   }
2732 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53,strided_cm_subtile)2733   TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53, strided_cm_subtile) {
2734     TEST_REQUIRES_ARM_NEON;
2735     for (size_t k = 1; k <= 80; k += 17) {
2736       for (uint32_t n = 1; n <= 8; n++) {
2737         for (uint32_t m = 1; m <= 1; m++) {
2738           GemmMicrokernelTester()
2739             .mr(1)
2740             .nr(8)
2741             .kr(8)
2742             .sr(1)
2743             .m(m)
2744             .n(n)
2745             .k(k)
2746             .cm_stride(11)
2747             .iterations(1)
2748             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal_prfm_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2749         }
2750       }
2751     }
2752   }
2753 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53,a_offset)2754   TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53, a_offset) {
2755     TEST_REQUIRES_ARM_NEON;
2756     for (size_t k = 1; k <= 80; k += 17) {
2757       GemmMicrokernelTester()
2758         .mr(1)
2759         .nr(8)
2760         .kr(8)
2761         .sr(1)
2762         .m(1)
2763         .n(8)
2764         .k(k)
2765         .ks(3)
2766         .a_offset(83)
2767         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal_prfm_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2768     }
2769   }
2770 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53,zero)2771   TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53, zero) {
2772     TEST_REQUIRES_ARM_NEON;
2773     for (size_t k = 1; k <= 80; k += 17) {
2774       for (uint32_t mz = 0; mz < 1; mz++) {
2775         GemmMicrokernelTester()
2776           .mr(1)
2777           .nr(8)
2778           .kr(8)
2779           .sr(1)
2780           .m(1)
2781           .n(8)
2782           .k(k)
2783           .ks(3)
2784           .a_offset(83)
2785           .zero_index(mz)
2786           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal_prfm_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2787       }
2788     }
2789   }
2790 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53,qmin)2791   TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53, qmin) {
2792     TEST_REQUIRES_ARM_NEON;
2793     GemmMicrokernelTester()
2794       .mr(1)
2795       .nr(8)
2796       .kr(8)
2797       .sr(1)
2798       .m(1)
2799       .n(8)
2800       .k(16)
2801       .qmin(128)
2802       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal_prfm_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2803   }
2804 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53,qmax)2805   TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53, qmax) {
2806     TEST_REQUIRES_ARM_NEON;
2807     GemmMicrokernelTester()
2808       .mr(1)
2809       .nr(8)
2810       .kr(8)
2811       .sr(1)
2812       .m(1)
2813       .n(8)
2814       .k(16)
2815       .qmax(128)
2816       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal_prfm_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2817   }
2818 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53,strided_cm)2819   TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53, strided_cm) {
2820     TEST_REQUIRES_ARM_NEON;
2821     GemmMicrokernelTester()
2822       .mr(1)
2823       .nr(8)
2824       .kr(8)
2825       .sr(1)
2826       .m(1)
2827       .n(8)
2828       .k(16)
2829       .cm_stride(11)
2830       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal_prfm_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2831   }
2832 #endif  // XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
2833 
2834 
2835 #if XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
TEST(QC8_IGEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_LD64,k_eq_8)2836   TEST(QC8_IGEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_LD64, k_eq_8) {
2837     TEST_REQUIRES_ARM_NEON;
2838     GemmMicrokernelTester()
2839       .mr(4)
2840       .nr(16)
2841       .kr(1)
2842       .sr(1)
2843       .m(4)
2844       .n(16)
2845       .k(8)
2846       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x16__aarch64_neon_mlal_lane_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2847   }
2848 
TEST(QC8_IGEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_LD64,strided_cn)2849   TEST(QC8_IGEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_LD64, strided_cn) {
2850     TEST_REQUIRES_ARM_NEON;
2851     GemmMicrokernelTester()
2852       .mr(4)
2853       .nr(16)
2854       .kr(1)
2855       .sr(1)
2856       .m(4)
2857       .n(16)
2858       .k(8)
2859       .cn_stride(19)
2860       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x16__aarch64_neon_mlal_lane_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2861   }
2862 
TEST(QC8_IGEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_LD64,k_eq_8_subtile)2863   TEST(QC8_IGEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_LD64, k_eq_8_subtile) {
2864     TEST_REQUIRES_ARM_NEON;
2865     for (uint32_t n = 1; n <= 16; n++) {
2866       for (uint32_t m = 1; m <= 4; m++) {
2867         GemmMicrokernelTester()
2868           .mr(4)
2869           .nr(16)
2870           .kr(1)
2871           .sr(1)
2872           .m(m)
2873           .n(n)
2874           .k(8)
2875           .iterations(1)
2876           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x16__aarch64_neon_mlal_lane_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2877       }
2878     }
2879   }
2880 
TEST(QC8_IGEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_LD64,k_eq_8_subtile_m)2881   TEST(QC8_IGEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_LD64, k_eq_8_subtile_m) {
2882     TEST_REQUIRES_ARM_NEON;
2883     for (uint32_t m = 1; m <= 4; m++) {
2884       GemmMicrokernelTester()
2885         .mr(4)
2886         .nr(16)
2887         .kr(1)
2888         .sr(1)
2889         .m(m)
2890         .n(16)
2891         .k(8)
2892         .iterations(1)
2893         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x16__aarch64_neon_mlal_lane_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2894     }
2895   }
2896 
TEST(QC8_IGEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_LD64,k_eq_8_subtile_n)2897   TEST(QC8_IGEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_LD64, k_eq_8_subtile_n) {
2898     TEST_REQUIRES_ARM_NEON;
2899     for (uint32_t n = 1; n <= 16; n++) {
2900       GemmMicrokernelTester()
2901         .mr(4)
2902         .nr(16)
2903         .kr(1)
2904         .sr(1)
2905         .m(4)
2906         .n(n)
2907         .k(8)
2908         .iterations(1)
2909         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x16__aarch64_neon_mlal_lane_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2910     }
2911   }
2912 
TEST(QC8_IGEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_LD64,k_lt_8)2913   TEST(QC8_IGEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_LD64, k_lt_8) {
2914     TEST_REQUIRES_ARM_NEON;
2915     for (size_t k = 1; k < 8; k++) {
2916       GemmMicrokernelTester()
2917         .mr(4)
2918         .nr(16)
2919         .kr(1)
2920         .sr(1)
2921         .m(4)
2922         .n(16)
2923         .k(k)
2924         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x16__aarch64_neon_mlal_lane_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2925     }
2926   }
2927 
TEST(QC8_IGEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_LD64,k_lt_8_subtile)2928   TEST(QC8_IGEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_LD64, k_lt_8_subtile) {
2929     TEST_REQUIRES_ARM_NEON;
2930     for (size_t k = 1; k < 8; k++) {
2931       for (uint32_t n = 1; n <= 16; n++) {
2932         for (uint32_t m = 1; m <= 4; m++) {
2933           GemmMicrokernelTester()
2934             .mr(4)
2935             .nr(16)
2936             .kr(1)
2937             .sr(1)
2938             .m(m)
2939             .n(n)
2940             .k(k)
2941             .iterations(1)
2942             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x16__aarch64_neon_mlal_lane_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2943         }
2944       }
2945     }
2946   }
2947 
TEST(QC8_IGEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_LD64,k_gt_8)2948   TEST(QC8_IGEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_LD64, k_gt_8) {
2949     TEST_REQUIRES_ARM_NEON;
2950     for (size_t k = 9; k < 16; k++) {
2951       GemmMicrokernelTester()
2952         .mr(4)
2953         .nr(16)
2954         .kr(1)
2955         .sr(1)
2956         .m(4)
2957         .n(16)
2958         .k(k)
2959         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x16__aarch64_neon_mlal_lane_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2960     }
2961   }
2962 
TEST(QC8_IGEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_LD64,k_gt_8_subtile)2963   TEST(QC8_IGEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_LD64, k_gt_8_subtile) {
2964     TEST_REQUIRES_ARM_NEON;
2965     for (size_t k = 9; k < 16; k++) {
2966       for (uint32_t n = 1; n <= 16; n++) {
2967         for (uint32_t m = 1; m <= 4; m++) {
2968           GemmMicrokernelTester()
2969             .mr(4)
2970             .nr(16)
2971             .kr(1)
2972             .sr(1)
2973             .m(m)
2974             .n(n)
2975             .k(k)
2976             .iterations(1)
2977             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x16__aarch64_neon_mlal_lane_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2978         }
2979       }
2980     }
2981   }
2982 
TEST(QC8_IGEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_LD64,k_div_8)2983   TEST(QC8_IGEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_LD64, k_div_8) {
2984     TEST_REQUIRES_ARM_NEON;
2985     for (size_t k = 16; k <= 80; k += 8) {
2986       GemmMicrokernelTester()
2987         .mr(4)
2988         .nr(16)
2989         .kr(1)
2990         .sr(1)
2991         .m(4)
2992         .n(16)
2993         .k(k)
2994         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x16__aarch64_neon_mlal_lane_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2995     }
2996   }
2997 
TEST(QC8_IGEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_LD64,k_div_8_subtile)2998   TEST(QC8_IGEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_LD64, k_div_8_subtile) {
2999     TEST_REQUIRES_ARM_NEON;
3000     for (size_t k = 16; k <= 80; k += 8) {
3001       for (uint32_t n = 1; n <= 16; n++) {
3002         for (uint32_t m = 1; m <= 4; m++) {
3003           GemmMicrokernelTester()
3004             .mr(4)
3005             .nr(16)
3006             .kr(1)
3007             .sr(1)
3008             .m(m)
3009             .n(n)
3010             .k(k)
3011             .iterations(1)
3012             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x16__aarch64_neon_mlal_lane_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3013         }
3014       }
3015     }
3016   }
3017 
TEST(QC8_IGEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_LD64,n_gt_16)3018   TEST(QC8_IGEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_LD64, n_gt_16) {
3019     TEST_REQUIRES_ARM_NEON;
3020     for (uint32_t n = 17; n < 32; n++) {
3021       for (size_t k = 1; k <= 40; k += 9) {
3022         GemmMicrokernelTester()
3023           .mr(4)
3024           .nr(16)
3025           .kr(1)
3026           .sr(1)
3027           .m(4)
3028           .n(n)
3029           .k(k)
3030           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x16__aarch64_neon_mlal_lane_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3031       }
3032     }
3033   }
3034 
TEST(QC8_IGEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_LD64,n_gt_16_strided_cn)3035   TEST(QC8_IGEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_LD64, n_gt_16_strided_cn) {
3036     TEST_REQUIRES_ARM_NEON;
3037     for (uint32_t n = 17; n < 32; n++) {
3038       for (size_t k = 1; k <= 40; k += 9) {
3039         GemmMicrokernelTester()
3040           .mr(4)
3041           .nr(16)
3042           .kr(1)
3043           .sr(1)
3044           .m(4)
3045           .n(n)
3046           .k(k)
3047           .cn_stride(19)
3048           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x16__aarch64_neon_mlal_lane_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3049       }
3050     }
3051   }
3052 
TEST(QC8_IGEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_LD64,n_gt_16_subtile)3053   TEST(QC8_IGEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_LD64, n_gt_16_subtile) {
3054     TEST_REQUIRES_ARM_NEON;
3055     for (uint32_t n = 17; n < 32; n++) {
3056       for (size_t k = 1; k <= 40; k += 9) {
3057         for (uint32_t m = 1; m <= 4; m++) {
3058           GemmMicrokernelTester()
3059             .mr(4)
3060             .nr(16)
3061             .kr(1)
3062             .sr(1)
3063             .m(m)
3064             .n(n)
3065             .k(k)
3066             .iterations(1)
3067             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x16__aarch64_neon_mlal_lane_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3068         }
3069       }
3070     }
3071   }
3072 
TEST(QC8_IGEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_LD64,n_div_16)3073   TEST(QC8_IGEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_LD64, n_div_16) {
3074     TEST_REQUIRES_ARM_NEON;
3075     for (uint32_t n = 32; n <= 48; n += 16) {
3076       for (size_t k = 1; k <= 40; k += 9) {
3077         GemmMicrokernelTester()
3078           .mr(4)
3079           .nr(16)
3080           .kr(1)
3081           .sr(1)
3082           .m(4)
3083           .n(n)
3084           .k(k)
3085           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x16__aarch64_neon_mlal_lane_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3086       }
3087     }
3088   }
3089 
TEST(QC8_IGEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_LD64,n_div_16_strided_cn)3090   TEST(QC8_IGEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_LD64, n_div_16_strided_cn) {
3091     TEST_REQUIRES_ARM_NEON;
3092     for (uint32_t n = 32; n <= 48; n += 16) {
3093       for (size_t k = 1; k <= 40; k += 9) {
3094         GemmMicrokernelTester()
3095           .mr(4)
3096           .nr(16)
3097           .kr(1)
3098           .sr(1)
3099           .m(4)
3100           .n(n)
3101           .k(k)
3102           .cn_stride(19)
3103           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x16__aarch64_neon_mlal_lane_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3104       }
3105     }
3106   }
3107 
TEST(QC8_IGEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_LD64,n_div_16_subtile)3108   TEST(QC8_IGEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_LD64, n_div_16_subtile) {
3109     TEST_REQUIRES_ARM_NEON;
3110     for (uint32_t n = 32; n <= 48; n += 16) {
3111       for (size_t k = 1; k <= 40; k += 9) {
3112         for (uint32_t m = 1; m <= 4; m++) {
3113           GemmMicrokernelTester()
3114             .mr(4)
3115             .nr(16)
3116             .kr(1)
3117             .sr(1)
3118             .m(m)
3119             .n(n)
3120             .k(k)
3121             .iterations(1)
3122             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x16__aarch64_neon_mlal_lane_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3123         }
3124       }
3125     }
3126   }
3127 
TEST(QC8_IGEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_LD64,small_kernel)3128   TEST(QC8_IGEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_LD64, small_kernel) {
3129     TEST_REQUIRES_ARM_NEON;
3130     for (size_t k = 1; k <= 40; k += 9) {
3131       GemmMicrokernelTester()
3132         .mr(4)
3133         .nr(16)
3134         .kr(1)
3135         .sr(1)
3136         .m(4)
3137         .n(16)
3138         .k(k)
3139         .ks(3)
3140         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x16__aarch64_neon_mlal_lane_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3141     }
3142   }
3143 
TEST(QC8_IGEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_LD64,small_kernel_subtile)3144   TEST(QC8_IGEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_LD64, small_kernel_subtile) {
3145     TEST_REQUIRES_ARM_NEON;
3146     for (size_t k = 1; k <= 40; k += 9) {
3147       for (uint32_t n = 1; n <= 16; n++) {
3148         for (uint32_t m = 1; m <= 4; m++) {
3149           GemmMicrokernelTester()
3150             .mr(4)
3151             .nr(16)
3152             .kr(1)
3153             .sr(1)
3154             .m(m)
3155             .n(n)
3156             .k(k)
3157             .ks(3)
3158             .iterations(1)
3159             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x16__aarch64_neon_mlal_lane_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3160         }
3161       }
3162     }
3163   }
3164 
TEST(QC8_IGEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_LD64,n_gt_16_small_kernel)3165   TEST(QC8_IGEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_LD64, n_gt_16_small_kernel) {
3166     TEST_REQUIRES_ARM_NEON;
3167     for (uint32_t n = 17; n < 32; n++) {
3168       for (size_t k = 1; k <= 40; k += 9) {
3169         GemmMicrokernelTester()
3170           .mr(4)
3171           .nr(16)
3172           .kr(1)
3173           .sr(1)
3174           .m(4)
3175           .n(n)
3176           .k(k)
3177           .ks(3)
3178           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x16__aarch64_neon_mlal_lane_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3179       }
3180     }
3181   }
3182 
TEST(QC8_IGEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_LD64,n_div_16_small_kernel)3183   TEST(QC8_IGEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_LD64, n_div_16_small_kernel) {
3184     TEST_REQUIRES_ARM_NEON;
3185     for (uint32_t n = 32; n <= 48; n += 16) {
3186       for (size_t k = 1; k <= 40; k += 9) {
3187         GemmMicrokernelTester()
3188           .mr(4)
3189           .nr(16)
3190           .kr(1)
3191           .sr(1)
3192           .m(4)
3193           .n(n)
3194           .k(k)
3195           .ks(3)
3196           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x16__aarch64_neon_mlal_lane_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3197       }
3198     }
3199   }
3200 
TEST(QC8_IGEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_LD64,strided_cm_subtile)3201   TEST(QC8_IGEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_LD64, strided_cm_subtile) {
3202     TEST_REQUIRES_ARM_NEON;
3203     for (size_t k = 1; k <= 40; k += 9) {
3204       for (uint32_t n = 1; n <= 16; n++) {
3205         for (uint32_t m = 1; m <= 4; m++) {
3206           GemmMicrokernelTester()
3207             .mr(4)
3208             .nr(16)
3209             .kr(1)
3210             .sr(1)
3211             .m(m)
3212             .n(n)
3213             .k(k)
3214             .cm_stride(19)
3215             .iterations(1)
3216             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x16__aarch64_neon_mlal_lane_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3217         }
3218       }
3219     }
3220   }
3221 
TEST(QC8_IGEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_LD64,a_offset)3222   TEST(QC8_IGEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_LD64, a_offset) {
3223     TEST_REQUIRES_ARM_NEON;
3224     for (size_t k = 1; k <= 40; k += 9) {
3225       GemmMicrokernelTester()
3226         .mr(4)
3227         .nr(16)
3228         .kr(1)
3229         .sr(1)
3230         .m(4)
3231         .n(16)
3232         .k(k)
3233         .ks(3)
3234         .a_offset(163)
3235         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x16__aarch64_neon_mlal_lane_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3236     }
3237   }
3238 
TEST(QC8_IGEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_LD64,zero)3239   TEST(QC8_IGEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_LD64, zero) {
3240     TEST_REQUIRES_ARM_NEON;
3241     for (size_t k = 1; k <= 40; k += 9) {
3242       for (uint32_t mz = 0; mz < 4; mz++) {
3243         GemmMicrokernelTester()
3244           .mr(4)
3245           .nr(16)
3246           .kr(1)
3247           .sr(1)
3248           .m(4)
3249           .n(16)
3250           .k(k)
3251           .ks(3)
3252           .a_offset(163)
3253           .zero_index(mz)
3254           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x16__aarch64_neon_mlal_lane_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3255       }
3256     }
3257   }
3258 
TEST(QC8_IGEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_LD64,qmin)3259   TEST(QC8_IGEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_LD64, qmin) {
3260     TEST_REQUIRES_ARM_NEON;
3261     GemmMicrokernelTester()
3262       .mr(4)
3263       .nr(16)
3264       .kr(1)
3265       .sr(1)
3266       .m(4)
3267       .n(16)
3268       .k(8)
3269       .qmin(128)
3270       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x16__aarch64_neon_mlal_lane_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3271   }
3272 
TEST(QC8_IGEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_LD64,qmax)3273   TEST(QC8_IGEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_LD64, qmax) {
3274     TEST_REQUIRES_ARM_NEON;
3275     GemmMicrokernelTester()
3276       .mr(4)
3277       .nr(16)
3278       .kr(1)
3279       .sr(1)
3280       .m(4)
3281       .n(16)
3282       .k(8)
3283       .qmax(128)
3284       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x16__aarch64_neon_mlal_lane_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3285   }
3286 
TEST(QC8_IGEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_LD64,strided_cm)3287   TEST(QC8_IGEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_LD64, strided_cm) {
3288     TEST_REQUIRES_ARM_NEON;
3289     GemmMicrokernelTester()
3290       .mr(4)
3291       .nr(16)
3292       .kr(1)
3293       .sr(1)
3294       .m(4)
3295       .n(16)
3296       .k(8)
3297       .cm_stride(19)
3298       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x16__aarch64_neon_mlal_lane_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3299   }
3300 #endif  // XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
3301 
3302 
3303 #if XNN_ENABLE_ARM_DOTPROD && XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
TEST(QC8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55,k_eq_16)3304   TEST(QC8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55, k_eq_16) {
3305     TEST_REQUIRES_ARM_NEON_DOT;
3306     GemmMicrokernelTester()
3307       .mr(4)
3308       .nr(16)
3309       .kr(4)
3310       .sr(1)
3311       .m(4)
3312       .n(16)
3313       .k(16)
3314       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3315   }
3316 
TEST(QC8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55,strided_cn)3317   TEST(QC8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55, strided_cn) {
3318     TEST_REQUIRES_ARM_NEON_DOT;
3319     GemmMicrokernelTester()
3320       .mr(4)
3321       .nr(16)
3322       .kr(4)
3323       .sr(1)
3324       .m(4)
3325       .n(16)
3326       .k(16)
3327       .cn_stride(19)
3328       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3329   }
3330 
TEST(QC8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55,k_eq_16_subtile)3331   TEST(QC8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55, k_eq_16_subtile) {
3332     TEST_REQUIRES_ARM_NEON_DOT;
3333     for (uint32_t n = 1; n <= 16; n++) {
3334       for (uint32_t m = 1; m <= 4; m++) {
3335         GemmMicrokernelTester()
3336           .mr(4)
3337           .nr(16)
3338           .kr(4)
3339           .sr(1)
3340           .m(m)
3341           .n(n)
3342           .k(16)
3343           .iterations(1)
3344           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3345       }
3346     }
3347   }
3348 
TEST(QC8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55,k_eq_16_subtile_m)3349   TEST(QC8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55, k_eq_16_subtile_m) {
3350     TEST_REQUIRES_ARM_NEON_DOT;
3351     for (uint32_t m = 1; m <= 4; m++) {
3352       GemmMicrokernelTester()
3353         .mr(4)
3354         .nr(16)
3355         .kr(4)
3356         .sr(1)
3357         .m(m)
3358         .n(16)
3359         .k(16)
3360         .iterations(1)
3361         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3362     }
3363   }
3364 
TEST(QC8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55,k_eq_16_subtile_n)3365   TEST(QC8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55, k_eq_16_subtile_n) {
3366     TEST_REQUIRES_ARM_NEON_DOT;
3367     for (uint32_t n = 1; n <= 16; n++) {
3368       GemmMicrokernelTester()
3369         .mr(4)
3370         .nr(16)
3371         .kr(4)
3372         .sr(1)
3373         .m(4)
3374         .n(n)
3375         .k(16)
3376         .iterations(1)
3377         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3378     }
3379   }
3380 
TEST(QC8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55,k_lt_16)3381   TEST(QC8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55, k_lt_16) {
3382     TEST_REQUIRES_ARM_NEON_DOT;
3383     for (size_t k = 1; k < 16; k++) {
3384       GemmMicrokernelTester()
3385         .mr(4)
3386         .nr(16)
3387         .kr(4)
3388         .sr(1)
3389         .m(4)
3390         .n(16)
3391         .k(k)
3392         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3393     }
3394   }
3395 
TEST(QC8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55,k_lt_16_subtile)3396   TEST(QC8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55, k_lt_16_subtile) {
3397     TEST_REQUIRES_ARM_NEON_DOT;
3398     for (size_t k = 1; k < 16; k++) {
3399       for (uint32_t n = 1; n <= 16; n++) {
3400         for (uint32_t m = 1; m <= 4; m++) {
3401           GemmMicrokernelTester()
3402             .mr(4)
3403             .nr(16)
3404             .kr(4)
3405             .sr(1)
3406             .m(m)
3407             .n(n)
3408             .k(k)
3409             .iterations(1)
3410             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3411         }
3412       }
3413     }
3414   }
3415 
TEST(QC8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55,k_gt_16)3416   TEST(QC8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55, k_gt_16) {
3417     TEST_REQUIRES_ARM_NEON_DOT;
3418     for (size_t k = 17; k < 32; k++) {
3419       GemmMicrokernelTester()
3420         .mr(4)
3421         .nr(16)
3422         .kr(4)
3423         .sr(1)
3424         .m(4)
3425         .n(16)
3426         .k(k)
3427         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3428     }
3429   }
3430 
TEST(QC8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55,k_gt_16_subtile)3431   TEST(QC8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55, k_gt_16_subtile) {
3432     TEST_REQUIRES_ARM_NEON_DOT;
3433     for (size_t k = 17; k < 32; k++) {
3434       for (uint32_t n = 1; n <= 16; n++) {
3435         for (uint32_t m = 1; m <= 4; m++) {
3436           GemmMicrokernelTester()
3437             .mr(4)
3438             .nr(16)
3439             .kr(4)
3440             .sr(1)
3441             .m(m)
3442             .n(n)
3443             .k(k)
3444             .iterations(1)
3445             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3446         }
3447       }
3448     }
3449   }
3450 
TEST(QC8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55,k_div_16)3451   TEST(QC8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55, k_div_16) {
3452     TEST_REQUIRES_ARM_NEON_DOT;
3453     for (size_t k = 32; k <= 160; k += 16) {
3454       GemmMicrokernelTester()
3455         .mr(4)
3456         .nr(16)
3457         .kr(4)
3458         .sr(1)
3459         .m(4)
3460         .n(16)
3461         .k(k)
3462         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3463     }
3464   }
3465 
TEST(QC8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55,k_div_16_subtile)3466   TEST(QC8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55, k_div_16_subtile) {
3467     TEST_REQUIRES_ARM_NEON_DOT;
3468     for (size_t k = 32; k <= 160; k += 16) {
3469       for (uint32_t n = 1; n <= 16; n++) {
3470         for (uint32_t m = 1; m <= 4; m++) {
3471           GemmMicrokernelTester()
3472             .mr(4)
3473             .nr(16)
3474             .kr(4)
3475             .sr(1)
3476             .m(m)
3477             .n(n)
3478             .k(k)
3479             .iterations(1)
3480             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3481         }
3482       }
3483     }
3484   }
3485 
TEST(QC8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55,n_gt_16)3486   TEST(QC8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55, n_gt_16) {
3487     TEST_REQUIRES_ARM_NEON_DOT;
3488     for (uint32_t n = 17; n < 32; n++) {
3489       for (size_t k = 1; k <= 80; k += 17) {
3490         GemmMicrokernelTester()
3491           .mr(4)
3492           .nr(16)
3493           .kr(4)
3494           .sr(1)
3495           .m(4)
3496           .n(n)
3497           .k(k)
3498           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3499       }
3500     }
3501   }
3502 
TEST(QC8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55,n_gt_16_strided_cn)3503   TEST(QC8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55, n_gt_16_strided_cn) {
3504     TEST_REQUIRES_ARM_NEON_DOT;
3505     for (uint32_t n = 17; n < 32; n++) {
3506       for (size_t k = 1; k <= 80; k += 17) {
3507         GemmMicrokernelTester()
3508           .mr(4)
3509           .nr(16)
3510           .kr(4)
3511           .sr(1)
3512           .m(4)
3513           .n(n)
3514           .k(k)
3515           .cn_stride(19)
3516           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3517       }
3518     }
3519   }
3520 
TEST(QC8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55,n_gt_16_subtile)3521   TEST(QC8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55, n_gt_16_subtile) {
3522     TEST_REQUIRES_ARM_NEON_DOT;
3523     for (uint32_t n = 17; n < 32; n++) {
3524       for (size_t k = 1; k <= 80; k += 17) {
3525         for (uint32_t m = 1; m <= 4; m++) {
3526           GemmMicrokernelTester()
3527             .mr(4)
3528             .nr(16)
3529             .kr(4)
3530             .sr(1)
3531             .m(m)
3532             .n(n)
3533             .k(k)
3534             .iterations(1)
3535             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3536         }
3537       }
3538     }
3539   }
3540 
TEST(QC8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55,n_div_16)3541   TEST(QC8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55, n_div_16) {
3542     TEST_REQUIRES_ARM_NEON_DOT;
3543     for (uint32_t n = 32; n <= 48; n += 16) {
3544       for (size_t k = 1; k <= 80; k += 17) {
3545         GemmMicrokernelTester()
3546           .mr(4)
3547           .nr(16)
3548           .kr(4)
3549           .sr(1)
3550           .m(4)
3551           .n(n)
3552           .k(k)
3553           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3554       }
3555     }
3556   }
3557 
TEST(QC8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55,n_div_16_strided_cn)3558   TEST(QC8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55, n_div_16_strided_cn) {
3559     TEST_REQUIRES_ARM_NEON_DOT;
3560     for (uint32_t n = 32; n <= 48; n += 16) {
3561       for (size_t k = 1; k <= 80; k += 17) {
3562         GemmMicrokernelTester()
3563           .mr(4)
3564           .nr(16)
3565           .kr(4)
3566           .sr(1)
3567           .m(4)
3568           .n(n)
3569           .k(k)
3570           .cn_stride(19)
3571           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3572       }
3573     }
3574   }
3575 
TEST(QC8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55,n_div_16_subtile)3576   TEST(QC8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55, n_div_16_subtile) {
3577     TEST_REQUIRES_ARM_NEON_DOT;
3578     for (uint32_t n = 32; n <= 48; n += 16) {
3579       for (size_t k = 1; k <= 80; k += 17) {
3580         for (uint32_t m = 1; m <= 4; m++) {
3581           GemmMicrokernelTester()
3582             .mr(4)
3583             .nr(16)
3584             .kr(4)
3585             .sr(1)
3586             .m(m)
3587             .n(n)
3588             .k(k)
3589             .iterations(1)
3590             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3591         }
3592       }
3593     }
3594   }
3595 
TEST(QC8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55,small_kernel)3596   TEST(QC8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55, small_kernel) {
3597     TEST_REQUIRES_ARM_NEON_DOT;
3598     for (size_t k = 1; k <= 80; k += 17) {
3599       GemmMicrokernelTester()
3600         .mr(4)
3601         .nr(16)
3602         .kr(4)
3603         .sr(1)
3604         .m(4)
3605         .n(16)
3606         .k(k)
3607         .ks(3)
3608         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3609     }
3610   }
3611 
TEST(QC8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55,small_kernel_subtile)3612   TEST(QC8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55, small_kernel_subtile) {
3613     TEST_REQUIRES_ARM_NEON_DOT;
3614     for (size_t k = 1; k <= 80; k += 17) {
3615       for (uint32_t n = 1; n <= 16; n++) {
3616         for (uint32_t m = 1; m <= 4; m++) {
3617           GemmMicrokernelTester()
3618             .mr(4)
3619             .nr(16)
3620             .kr(4)
3621             .sr(1)
3622             .m(m)
3623             .n(n)
3624             .k(k)
3625             .ks(3)
3626             .iterations(1)
3627             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3628         }
3629       }
3630     }
3631   }
3632 
TEST(QC8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55,n_gt_16_small_kernel)3633   TEST(QC8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55, n_gt_16_small_kernel) {
3634     TEST_REQUIRES_ARM_NEON_DOT;
3635     for (uint32_t n = 17; n < 32; n++) {
3636       for (size_t k = 1; k <= 80; k += 17) {
3637         GemmMicrokernelTester()
3638           .mr(4)
3639           .nr(16)
3640           .kr(4)
3641           .sr(1)
3642           .m(4)
3643           .n(n)
3644           .k(k)
3645           .ks(3)
3646           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3647       }
3648     }
3649   }
3650 
TEST(QC8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55,n_div_16_small_kernel)3651   TEST(QC8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55, n_div_16_small_kernel) {
3652     TEST_REQUIRES_ARM_NEON_DOT;
3653     for (uint32_t n = 32; n <= 48; n += 16) {
3654       for (size_t k = 1; k <= 80; k += 17) {
3655         GemmMicrokernelTester()
3656           .mr(4)
3657           .nr(16)
3658           .kr(4)
3659           .sr(1)
3660           .m(4)
3661           .n(n)
3662           .k(k)
3663           .ks(3)
3664           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3665       }
3666     }
3667   }
3668 
TEST(QC8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55,strided_cm_subtile)3669   TEST(QC8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55, strided_cm_subtile) {
3670     TEST_REQUIRES_ARM_NEON_DOT;
3671     for (size_t k = 1; k <= 80; k += 17) {
3672       for (uint32_t n = 1; n <= 16; n++) {
3673         for (uint32_t m = 1; m <= 4; m++) {
3674           GemmMicrokernelTester()
3675             .mr(4)
3676             .nr(16)
3677             .kr(4)
3678             .sr(1)
3679             .m(m)
3680             .n(n)
3681             .k(k)
3682             .cm_stride(19)
3683             .iterations(1)
3684             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3685         }
3686       }
3687     }
3688   }
3689 
TEST(QC8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55,a_offset)3690   TEST(QC8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55, a_offset) {
3691     TEST_REQUIRES_ARM_NEON_DOT;
3692     for (size_t k = 1; k <= 80; k += 17) {
3693       GemmMicrokernelTester()
3694         .mr(4)
3695         .nr(16)
3696         .kr(4)
3697         .sr(1)
3698         .m(4)
3699         .n(16)
3700         .k(k)
3701         .ks(3)
3702         .a_offset(331)
3703         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3704     }
3705   }
3706 
TEST(QC8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55,zero)3707   TEST(QC8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55, zero) {
3708     TEST_REQUIRES_ARM_NEON_DOT;
3709     for (size_t k = 1; k <= 80; k += 17) {
3710       for (uint32_t mz = 0; mz < 4; mz++) {
3711         GemmMicrokernelTester()
3712           .mr(4)
3713           .nr(16)
3714           .kr(4)
3715           .sr(1)
3716           .m(4)
3717           .n(16)
3718           .k(k)
3719           .ks(3)
3720           .a_offset(331)
3721           .zero_index(mz)
3722           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3723       }
3724     }
3725   }
3726 
TEST(QC8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55,qmin)3727   TEST(QC8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55, qmin) {
3728     TEST_REQUIRES_ARM_NEON_DOT;
3729     GemmMicrokernelTester()
3730       .mr(4)
3731       .nr(16)
3732       .kr(4)
3733       .sr(1)
3734       .m(4)
3735       .n(16)
3736       .k(16)
3737       .qmin(128)
3738       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3739   }
3740 
TEST(QC8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55,qmax)3741   TEST(QC8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55, qmax) {
3742     TEST_REQUIRES_ARM_NEON_DOT;
3743     GemmMicrokernelTester()
3744       .mr(4)
3745       .nr(16)
3746       .kr(4)
3747       .sr(1)
3748       .m(4)
3749       .n(16)
3750       .k(16)
3751       .qmax(128)
3752       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3753   }
3754 
TEST(QC8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55,strided_cm)3755   TEST(QC8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55, strided_cm) {
3756     TEST_REQUIRES_ARM_NEON_DOT;
3757     GemmMicrokernelTester()
3758       .mr(4)
3759       .nr(16)
3760       .kr(4)
3761       .sr(1)
3762       .m(4)
3763       .n(16)
3764       .k(16)
3765       .cm_stride(19)
3766       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3767   }
3768 #endif  // XNN_ENABLE_ARM_DOTPROD && XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
3769 
3770 
3771 #if XNN_ENABLE_ARM_DOTPROD && XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
TEST(QC8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD64,k_eq_8)3772   TEST(QC8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD64, k_eq_8) {
3773     TEST_REQUIRES_ARM_NEON_DOT;
3774     GemmMicrokernelTester()
3775       .mr(4)
3776       .nr(16)
3777       .kr(4)
3778       .sr(1)
3779       .m(4)
3780       .n(16)
3781       .k(8)
3782       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3783   }
3784 
TEST(QC8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD64,strided_cn)3785   TEST(QC8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD64, strided_cn) {
3786     TEST_REQUIRES_ARM_NEON_DOT;
3787     GemmMicrokernelTester()
3788       .mr(4)
3789       .nr(16)
3790       .kr(4)
3791       .sr(1)
3792       .m(4)
3793       .n(16)
3794       .k(8)
3795       .cn_stride(19)
3796       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3797   }
3798 
TEST(QC8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD64,k_eq_8_subtile)3799   TEST(QC8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD64, k_eq_8_subtile) {
3800     TEST_REQUIRES_ARM_NEON_DOT;
3801     for (uint32_t n = 1; n <= 16; n++) {
3802       for (uint32_t m = 1; m <= 4; m++) {
3803         GemmMicrokernelTester()
3804           .mr(4)
3805           .nr(16)
3806           .kr(4)
3807           .sr(1)
3808           .m(m)
3809           .n(n)
3810           .k(8)
3811           .iterations(1)
3812           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3813       }
3814     }
3815   }
3816 
TEST(QC8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD64,k_eq_8_subtile_m)3817   TEST(QC8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD64, k_eq_8_subtile_m) {
3818     TEST_REQUIRES_ARM_NEON_DOT;
3819     for (uint32_t m = 1; m <= 4; m++) {
3820       GemmMicrokernelTester()
3821         .mr(4)
3822         .nr(16)
3823         .kr(4)
3824         .sr(1)
3825         .m(m)
3826         .n(16)
3827         .k(8)
3828         .iterations(1)
3829         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3830     }
3831   }
3832 
TEST(QC8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD64,k_eq_8_subtile_n)3833   TEST(QC8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD64, k_eq_8_subtile_n) {
3834     TEST_REQUIRES_ARM_NEON_DOT;
3835     for (uint32_t n = 1; n <= 16; n++) {
3836       GemmMicrokernelTester()
3837         .mr(4)
3838         .nr(16)
3839         .kr(4)
3840         .sr(1)
3841         .m(4)
3842         .n(n)
3843         .k(8)
3844         .iterations(1)
3845         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3846     }
3847   }
3848 
TEST(QC8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD64,k_lt_8)3849   TEST(QC8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD64, k_lt_8) {
3850     TEST_REQUIRES_ARM_NEON_DOT;
3851     for (size_t k = 1; k < 8; k++) {
3852       GemmMicrokernelTester()
3853         .mr(4)
3854         .nr(16)
3855         .kr(4)
3856         .sr(1)
3857         .m(4)
3858         .n(16)
3859         .k(k)
3860         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3861     }
3862   }
3863 
TEST(QC8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD64,k_lt_8_subtile)3864   TEST(QC8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD64, k_lt_8_subtile) {
3865     TEST_REQUIRES_ARM_NEON_DOT;
3866     for (size_t k = 1; k < 8; k++) {
3867       for (uint32_t n = 1; n <= 16; n++) {
3868         for (uint32_t m = 1; m <= 4; m++) {
3869           GemmMicrokernelTester()
3870             .mr(4)
3871             .nr(16)
3872             .kr(4)
3873             .sr(1)
3874             .m(m)
3875             .n(n)
3876             .k(k)
3877             .iterations(1)
3878             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3879         }
3880       }
3881     }
3882   }
3883 
TEST(QC8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD64,k_gt_8)3884   TEST(QC8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD64, k_gt_8) {
3885     TEST_REQUIRES_ARM_NEON_DOT;
3886     for (size_t k = 9; k < 16; k++) {
3887       GemmMicrokernelTester()
3888         .mr(4)
3889         .nr(16)
3890         .kr(4)
3891         .sr(1)
3892         .m(4)
3893         .n(16)
3894         .k(k)
3895         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3896     }
3897   }
3898 
TEST(QC8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD64,k_gt_8_subtile)3899   TEST(QC8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD64, k_gt_8_subtile) {
3900     TEST_REQUIRES_ARM_NEON_DOT;
3901     for (size_t k = 9; k < 16; k++) {
3902       for (uint32_t n = 1; n <= 16; n++) {
3903         for (uint32_t m = 1; m <= 4; m++) {
3904           GemmMicrokernelTester()
3905             .mr(4)
3906             .nr(16)
3907             .kr(4)
3908             .sr(1)
3909             .m(m)
3910             .n(n)
3911             .k(k)
3912             .iterations(1)
3913             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3914         }
3915       }
3916     }
3917   }
3918 
TEST(QC8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD64,k_div_8)3919   TEST(QC8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD64, k_div_8) {
3920     TEST_REQUIRES_ARM_NEON_DOT;
3921     for (size_t k = 16; k <= 80; k += 8) {
3922       GemmMicrokernelTester()
3923         .mr(4)
3924         .nr(16)
3925         .kr(4)
3926         .sr(1)
3927         .m(4)
3928         .n(16)
3929         .k(k)
3930         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3931     }
3932   }
3933 
TEST(QC8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD64,k_div_8_subtile)3934   TEST(QC8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD64, k_div_8_subtile) {
3935     TEST_REQUIRES_ARM_NEON_DOT;
3936     for (size_t k = 16; k <= 80; k += 8) {
3937       for (uint32_t n = 1; n <= 16; n++) {
3938         for (uint32_t m = 1; m <= 4; m++) {
3939           GemmMicrokernelTester()
3940             .mr(4)
3941             .nr(16)
3942             .kr(4)
3943             .sr(1)
3944             .m(m)
3945             .n(n)
3946             .k(k)
3947             .iterations(1)
3948             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3949         }
3950       }
3951     }
3952   }
3953 
TEST(QC8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD64,n_gt_16)3954   TEST(QC8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD64, n_gt_16) {
3955     TEST_REQUIRES_ARM_NEON_DOT;
3956     for (uint32_t n = 17; n < 32; n++) {
3957       for (size_t k = 1; k <= 40; k += 9) {
3958         GemmMicrokernelTester()
3959           .mr(4)
3960           .nr(16)
3961           .kr(4)
3962           .sr(1)
3963           .m(4)
3964           .n(n)
3965           .k(k)
3966           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3967       }
3968     }
3969   }
3970 
TEST(QC8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD64,n_gt_16_strided_cn)3971   TEST(QC8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD64, n_gt_16_strided_cn) {
3972     TEST_REQUIRES_ARM_NEON_DOT;
3973     for (uint32_t n = 17; n < 32; n++) {
3974       for (size_t k = 1; k <= 40; k += 9) {
3975         GemmMicrokernelTester()
3976           .mr(4)
3977           .nr(16)
3978           .kr(4)
3979           .sr(1)
3980           .m(4)
3981           .n(n)
3982           .k(k)
3983           .cn_stride(19)
3984           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3985       }
3986     }
3987   }
3988 
TEST(QC8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD64,n_gt_16_subtile)3989   TEST(QC8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD64, n_gt_16_subtile) {
3990     TEST_REQUIRES_ARM_NEON_DOT;
3991     for (uint32_t n = 17; n < 32; n++) {
3992       for (size_t k = 1; k <= 40; k += 9) {
3993         for (uint32_t m = 1; m <= 4; m++) {
3994           GemmMicrokernelTester()
3995             .mr(4)
3996             .nr(16)
3997             .kr(4)
3998             .sr(1)
3999             .m(m)
4000             .n(n)
4001             .k(k)
4002             .iterations(1)
4003             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4004         }
4005       }
4006     }
4007   }
4008 
TEST(QC8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD64,n_div_16)4009   TEST(QC8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD64, n_div_16) {
4010     TEST_REQUIRES_ARM_NEON_DOT;
4011     for (uint32_t n = 32; n <= 48; n += 16) {
4012       for (size_t k = 1; k <= 40; k += 9) {
4013         GemmMicrokernelTester()
4014           .mr(4)
4015           .nr(16)
4016           .kr(4)
4017           .sr(1)
4018           .m(4)
4019           .n(n)
4020           .k(k)
4021           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4022       }
4023     }
4024   }
4025 
TEST(QC8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD64,n_div_16_strided_cn)4026   TEST(QC8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD64, n_div_16_strided_cn) {
4027     TEST_REQUIRES_ARM_NEON_DOT;
4028     for (uint32_t n = 32; n <= 48; n += 16) {
4029       for (size_t k = 1; k <= 40; k += 9) {
4030         GemmMicrokernelTester()
4031           .mr(4)
4032           .nr(16)
4033           .kr(4)
4034           .sr(1)
4035           .m(4)
4036           .n(n)
4037           .k(k)
4038           .cn_stride(19)
4039           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4040       }
4041     }
4042   }
4043 
TEST(QC8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD64,n_div_16_subtile)4044   TEST(QC8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD64, n_div_16_subtile) {
4045     TEST_REQUIRES_ARM_NEON_DOT;
4046     for (uint32_t n = 32; n <= 48; n += 16) {
4047       for (size_t k = 1; k <= 40; k += 9) {
4048         for (uint32_t m = 1; m <= 4; m++) {
4049           GemmMicrokernelTester()
4050             .mr(4)
4051             .nr(16)
4052             .kr(4)
4053             .sr(1)
4054             .m(m)
4055             .n(n)
4056             .k(k)
4057             .iterations(1)
4058             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4059         }
4060       }
4061     }
4062   }
4063 
TEST(QC8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD64,small_kernel)4064   TEST(QC8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD64, small_kernel) {
4065     TEST_REQUIRES_ARM_NEON_DOT;
4066     for (size_t k = 1; k <= 40; k += 9) {
4067       GemmMicrokernelTester()
4068         .mr(4)
4069         .nr(16)
4070         .kr(4)
4071         .sr(1)
4072         .m(4)
4073         .n(16)
4074         .k(k)
4075         .ks(3)
4076         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4077     }
4078   }
4079 
TEST(QC8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD64,small_kernel_subtile)4080   TEST(QC8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD64, small_kernel_subtile) {
4081     TEST_REQUIRES_ARM_NEON_DOT;
4082     for (size_t k = 1; k <= 40; k += 9) {
4083       for (uint32_t n = 1; n <= 16; n++) {
4084         for (uint32_t m = 1; m <= 4; m++) {
4085           GemmMicrokernelTester()
4086             .mr(4)
4087             .nr(16)
4088             .kr(4)
4089             .sr(1)
4090             .m(m)
4091             .n(n)
4092             .k(k)
4093             .ks(3)
4094             .iterations(1)
4095             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4096         }
4097       }
4098     }
4099   }
4100 
TEST(QC8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD64,n_gt_16_small_kernel)4101   TEST(QC8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD64, n_gt_16_small_kernel) {
4102     TEST_REQUIRES_ARM_NEON_DOT;
4103     for (uint32_t n = 17; n < 32; n++) {
4104       for (size_t k = 1; k <= 40; k += 9) {
4105         GemmMicrokernelTester()
4106           .mr(4)
4107           .nr(16)
4108           .kr(4)
4109           .sr(1)
4110           .m(4)
4111           .n(n)
4112           .k(k)
4113           .ks(3)
4114           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4115       }
4116     }
4117   }
4118 
TEST(QC8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD64,n_div_16_small_kernel)4119   TEST(QC8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD64, n_div_16_small_kernel) {
4120     TEST_REQUIRES_ARM_NEON_DOT;
4121     for (uint32_t n = 32; n <= 48; n += 16) {
4122       for (size_t k = 1; k <= 40; k += 9) {
4123         GemmMicrokernelTester()
4124           .mr(4)
4125           .nr(16)
4126           .kr(4)
4127           .sr(1)
4128           .m(4)
4129           .n(n)
4130           .k(k)
4131           .ks(3)
4132           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4133       }
4134     }
4135   }
4136 
TEST(QC8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD64,strided_cm_subtile)4137   TEST(QC8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD64, strided_cm_subtile) {
4138     TEST_REQUIRES_ARM_NEON_DOT;
4139     for (size_t k = 1; k <= 40; k += 9) {
4140       for (uint32_t n = 1; n <= 16; n++) {
4141         for (uint32_t m = 1; m <= 4; m++) {
4142           GemmMicrokernelTester()
4143             .mr(4)
4144             .nr(16)
4145             .kr(4)
4146             .sr(1)
4147             .m(m)
4148             .n(n)
4149             .k(k)
4150             .cm_stride(19)
4151             .iterations(1)
4152             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4153         }
4154       }
4155     }
4156   }
4157 
TEST(QC8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD64,a_offset)4158   TEST(QC8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD64, a_offset) {
4159     TEST_REQUIRES_ARM_NEON_DOT;
4160     for (size_t k = 1; k <= 40; k += 9) {
4161       GemmMicrokernelTester()
4162         .mr(4)
4163         .nr(16)
4164         .kr(4)
4165         .sr(1)
4166         .m(4)
4167         .n(16)
4168         .k(k)
4169         .ks(3)
4170         .a_offset(163)
4171         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4172     }
4173   }
4174 
TEST(QC8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD64,zero)4175   TEST(QC8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD64, zero) {
4176     TEST_REQUIRES_ARM_NEON_DOT;
4177     for (size_t k = 1; k <= 40; k += 9) {
4178       for (uint32_t mz = 0; mz < 4; mz++) {
4179         GemmMicrokernelTester()
4180           .mr(4)
4181           .nr(16)
4182           .kr(4)
4183           .sr(1)
4184           .m(4)
4185           .n(16)
4186           .k(k)
4187           .ks(3)
4188           .a_offset(163)
4189           .zero_index(mz)
4190           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4191       }
4192     }
4193   }
4194 
TEST(QC8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD64,qmin)4195   TEST(QC8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD64, qmin) {
4196     TEST_REQUIRES_ARM_NEON_DOT;
4197     GemmMicrokernelTester()
4198       .mr(4)
4199       .nr(16)
4200       .kr(4)
4201       .sr(1)
4202       .m(4)
4203       .n(16)
4204       .k(8)
4205       .qmin(128)
4206       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4207   }
4208 
TEST(QC8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD64,qmax)4209   TEST(QC8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD64, qmax) {
4210     TEST_REQUIRES_ARM_NEON_DOT;
4211     GemmMicrokernelTester()
4212       .mr(4)
4213       .nr(16)
4214       .kr(4)
4215       .sr(1)
4216       .m(4)
4217       .n(16)
4218       .k(8)
4219       .qmax(128)
4220       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4221   }
4222 
TEST(QC8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD64,strided_cm)4223   TEST(QC8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD64, strided_cm) {
4224     TEST_REQUIRES_ARM_NEON_DOT;
4225     GemmMicrokernelTester()
4226       .mr(4)
4227       .nr(16)
4228       .kr(4)
4229       .sr(1)
4230       .m(4)
4231       .n(16)
4232       .k(8)
4233       .cm_stride(19)
4234       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4235   }
4236 #endif  // XNN_ENABLE_ARM_DOTPROD && XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
4237 
4238 
4239 #if XNN_ENABLE_ARM_DOTPROD && XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
TEST(QC8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD128,k_eq_16)4240   TEST(QC8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD128, k_eq_16) {
4241     TEST_REQUIRES_ARM_NEON_DOT;
4242     GemmMicrokernelTester()
4243       .mr(4)
4244       .nr(16)
4245       .kr(4)
4246       .sr(1)
4247       .m(4)
4248       .n(16)
4249       .k(16)
4250       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_ld128, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4251   }
4252 
TEST(QC8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD128,strided_cn)4253   TEST(QC8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD128, strided_cn) {
4254     TEST_REQUIRES_ARM_NEON_DOT;
4255     GemmMicrokernelTester()
4256       .mr(4)
4257       .nr(16)
4258       .kr(4)
4259       .sr(1)
4260       .m(4)
4261       .n(16)
4262       .k(16)
4263       .cn_stride(19)
4264       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_ld128, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4265   }
4266 
TEST(QC8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD128,k_eq_16_subtile)4267   TEST(QC8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD128, k_eq_16_subtile) {
4268     TEST_REQUIRES_ARM_NEON_DOT;
4269     for (uint32_t n = 1; n <= 16; n++) {
4270       for (uint32_t m = 1; m <= 4; m++) {
4271         GemmMicrokernelTester()
4272           .mr(4)
4273           .nr(16)
4274           .kr(4)
4275           .sr(1)
4276           .m(m)
4277           .n(n)
4278           .k(16)
4279           .iterations(1)
4280           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_ld128, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4281       }
4282     }
4283   }
4284 
TEST(QC8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD128,k_eq_16_subtile_m)4285   TEST(QC8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD128, k_eq_16_subtile_m) {
4286     TEST_REQUIRES_ARM_NEON_DOT;
4287     for (uint32_t m = 1; m <= 4; m++) {
4288       GemmMicrokernelTester()
4289         .mr(4)
4290         .nr(16)
4291         .kr(4)
4292         .sr(1)
4293         .m(m)
4294         .n(16)
4295         .k(16)
4296         .iterations(1)
4297         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_ld128, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4298     }
4299   }
4300 
TEST(QC8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD128,k_eq_16_subtile_n)4301   TEST(QC8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD128, k_eq_16_subtile_n) {
4302     TEST_REQUIRES_ARM_NEON_DOT;
4303     for (uint32_t n = 1; n <= 16; n++) {
4304       GemmMicrokernelTester()
4305         .mr(4)
4306         .nr(16)
4307         .kr(4)
4308         .sr(1)
4309         .m(4)
4310         .n(n)
4311         .k(16)
4312         .iterations(1)
4313         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_ld128, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4314     }
4315   }
4316 
TEST(QC8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD128,k_lt_16)4317   TEST(QC8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD128, k_lt_16) {
4318     TEST_REQUIRES_ARM_NEON_DOT;
4319     for (size_t k = 1; k < 16; k++) {
4320       GemmMicrokernelTester()
4321         .mr(4)
4322         .nr(16)
4323         .kr(4)
4324         .sr(1)
4325         .m(4)
4326         .n(16)
4327         .k(k)
4328         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_ld128, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4329     }
4330   }
4331 
TEST(QC8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD128,k_lt_16_subtile)4332   TEST(QC8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD128, k_lt_16_subtile) {
4333     TEST_REQUIRES_ARM_NEON_DOT;
4334     for (size_t k = 1; k < 16; k++) {
4335       for (uint32_t n = 1; n <= 16; n++) {
4336         for (uint32_t m = 1; m <= 4; m++) {
4337           GemmMicrokernelTester()
4338             .mr(4)
4339             .nr(16)
4340             .kr(4)
4341             .sr(1)
4342             .m(m)
4343             .n(n)
4344             .k(k)
4345             .iterations(1)
4346             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_ld128, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4347         }
4348       }
4349     }
4350   }
4351 
TEST(QC8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD128,k_gt_16)4352   TEST(QC8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD128, k_gt_16) {
4353     TEST_REQUIRES_ARM_NEON_DOT;
4354     for (size_t k = 17; k < 32; k++) {
4355       GemmMicrokernelTester()
4356         .mr(4)
4357         .nr(16)
4358         .kr(4)
4359         .sr(1)
4360         .m(4)
4361         .n(16)
4362         .k(k)
4363         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_ld128, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4364     }
4365   }
4366 
TEST(QC8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD128,k_gt_16_subtile)4367   TEST(QC8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD128, k_gt_16_subtile) {
4368     TEST_REQUIRES_ARM_NEON_DOT;
4369     for (size_t k = 17; k < 32; k++) {
4370       for (uint32_t n = 1; n <= 16; n++) {
4371         for (uint32_t m = 1; m <= 4; m++) {
4372           GemmMicrokernelTester()
4373             .mr(4)
4374             .nr(16)
4375             .kr(4)
4376             .sr(1)
4377             .m(m)
4378             .n(n)
4379             .k(k)
4380             .iterations(1)
4381             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_ld128, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4382         }
4383       }
4384     }
4385   }
4386 
TEST(QC8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD128,k_div_16)4387   TEST(QC8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD128, k_div_16) {
4388     TEST_REQUIRES_ARM_NEON_DOT;
4389     for (size_t k = 32; k <= 160; k += 16) {
4390       GemmMicrokernelTester()
4391         .mr(4)
4392         .nr(16)
4393         .kr(4)
4394         .sr(1)
4395         .m(4)
4396         .n(16)
4397         .k(k)
4398         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_ld128, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4399     }
4400   }
4401 
TEST(QC8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD128,k_div_16_subtile)4402   TEST(QC8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD128, k_div_16_subtile) {
4403     TEST_REQUIRES_ARM_NEON_DOT;
4404     for (size_t k = 32; k <= 160; k += 16) {
4405       for (uint32_t n = 1; n <= 16; n++) {
4406         for (uint32_t m = 1; m <= 4; m++) {
4407           GemmMicrokernelTester()
4408             .mr(4)
4409             .nr(16)
4410             .kr(4)
4411             .sr(1)
4412             .m(m)
4413             .n(n)
4414             .k(k)
4415             .iterations(1)
4416             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_ld128, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4417         }
4418       }
4419     }
4420   }
4421 
TEST(QC8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD128,n_gt_16)4422   TEST(QC8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD128, n_gt_16) {
4423     TEST_REQUIRES_ARM_NEON_DOT;
4424     for (uint32_t n = 17; n < 32; n++) {
4425       for (size_t k = 1; k <= 80; k += 17) {
4426         GemmMicrokernelTester()
4427           .mr(4)
4428           .nr(16)
4429           .kr(4)
4430           .sr(1)
4431           .m(4)
4432           .n(n)
4433           .k(k)
4434           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_ld128, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4435       }
4436     }
4437   }
4438 
TEST(QC8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD128,n_gt_16_strided_cn)4439   TEST(QC8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD128, n_gt_16_strided_cn) {
4440     TEST_REQUIRES_ARM_NEON_DOT;
4441     for (uint32_t n = 17; n < 32; n++) {
4442       for (size_t k = 1; k <= 80; k += 17) {
4443         GemmMicrokernelTester()
4444           .mr(4)
4445           .nr(16)
4446           .kr(4)
4447           .sr(1)
4448           .m(4)
4449           .n(n)
4450           .k(k)
4451           .cn_stride(19)
4452           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_ld128, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4453       }
4454     }
4455   }
4456 
TEST(QC8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD128,n_gt_16_subtile)4457   TEST(QC8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD128, n_gt_16_subtile) {
4458     TEST_REQUIRES_ARM_NEON_DOT;
4459     for (uint32_t n = 17; n < 32; n++) {
4460       for (size_t k = 1; k <= 80; k += 17) {
4461         for (uint32_t m = 1; m <= 4; m++) {
4462           GemmMicrokernelTester()
4463             .mr(4)
4464             .nr(16)
4465             .kr(4)
4466             .sr(1)
4467             .m(m)
4468             .n(n)
4469             .k(k)
4470             .iterations(1)
4471             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_ld128, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4472         }
4473       }
4474     }
4475   }
4476 
TEST(QC8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD128,n_div_16)4477   TEST(QC8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD128, n_div_16) {
4478     TEST_REQUIRES_ARM_NEON_DOT;
4479     for (uint32_t n = 32; n <= 48; n += 16) {
4480       for (size_t k = 1; k <= 80; k += 17) {
4481         GemmMicrokernelTester()
4482           .mr(4)
4483           .nr(16)
4484           .kr(4)
4485           .sr(1)
4486           .m(4)
4487           .n(n)
4488           .k(k)
4489           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_ld128, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4490       }
4491     }
4492   }
4493 
TEST(QC8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD128,n_div_16_strided_cn)4494   TEST(QC8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD128, n_div_16_strided_cn) {
4495     TEST_REQUIRES_ARM_NEON_DOT;
4496     for (uint32_t n = 32; n <= 48; n += 16) {
4497       for (size_t k = 1; k <= 80; k += 17) {
4498         GemmMicrokernelTester()
4499           .mr(4)
4500           .nr(16)
4501           .kr(4)
4502           .sr(1)
4503           .m(4)
4504           .n(n)
4505           .k(k)
4506           .cn_stride(19)
4507           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_ld128, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4508       }
4509     }
4510   }
4511 
TEST(QC8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD128,n_div_16_subtile)4512   TEST(QC8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD128, n_div_16_subtile) {
4513     TEST_REQUIRES_ARM_NEON_DOT;
4514     for (uint32_t n = 32; n <= 48; n += 16) {
4515       for (size_t k = 1; k <= 80; k += 17) {
4516         for (uint32_t m = 1; m <= 4; m++) {
4517           GemmMicrokernelTester()
4518             .mr(4)
4519             .nr(16)
4520             .kr(4)
4521             .sr(1)
4522             .m(m)
4523             .n(n)
4524             .k(k)
4525             .iterations(1)
4526             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_ld128, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4527         }
4528       }
4529     }
4530   }
4531 
TEST(QC8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD128,small_kernel)4532   TEST(QC8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD128, small_kernel) {
4533     TEST_REQUIRES_ARM_NEON_DOT;
4534     for (size_t k = 1; k <= 80; k += 17) {
4535       GemmMicrokernelTester()
4536         .mr(4)
4537         .nr(16)
4538         .kr(4)
4539         .sr(1)
4540         .m(4)
4541         .n(16)
4542         .k(k)
4543         .ks(3)
4544         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_ld128, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4545     }
4546   }
4547 
TEST(QC8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD128,small_kernel_subtile)4548   TEST(QC8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD128, small_kernel_subtile) {
4549     TEST_REQUIRES_ARM_NEON_DOT;
4550     for (size_t k = 1; k <= 80; k += 17) {
4551       for (uint32_t n = 1; n <= 16; n++) {
4552         for (uint32_t m = 1; m <= 4; m++) {
4553           GemmMicrokernelTester()
4554             .mr(4)
4555             .nr(16)
4556             .kr(4)
4557             .sr(1)
4558             .m(m)
4559             .n(n)
4560             .k(k)
4561             .ks(3)
4562             .iterations(1)
4563             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_ld128, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4564         }
4565       }
4566     }
4567   }
4568 
TEST(QC8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD128,n_gt_16_small_kernel)4569   TEST(QC8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD128, n_gt_16_small_kernel) {
4570     TEST_REQUIRES_ARM_NEON_DOT;
4571     for (uint32_t n = 17; n < 32; n++) {
4572       for (size_t k = 1; k <= 80; k += 17) {
4573         GemmMicrokernelTester()
4574           .mr(4)
4575           .nr(16)
4576           .kr(4)
4577           .sr(1)
4578           .m(4)
4579           .n(n)
4580           .k(k)
4581           .ks(3)
4582           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_ld128, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4583       }
4584     }
4585   }
4586 
TEST(QC8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD128,n_div_16_small_kernel)4587   TEST(QC8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD128, n_div_16_small_kernel) {
4588     TEST_REQUIRES_ARM_NEON_DOT;
4589     for (uint32_t n = 32; n <= 48; n += 16) {
4590       for (size_t k = 1; k <= 80; k += 17) {
4591         GemmMicrokernelTester()
4592           .mr(4)
4593           .nr(16)
4594           .kr(4)
4595           .sr(1)
4596           .m(4)
4597           .n(n)
4598           .k(k)
4599           .ks(3)
4600           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_ld128, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4601       }
4602     }
4603   }
4604 
TEST(QC8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD128,strided_cm_subtile)4605   TEST(QC8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD128, strided_cm_subtile) {
4606     TEST_REQUIRES_ARM_NEON_DOT;
4607     for (size_t k = 1; k <= 80; k += 17) {
4608       for (uint32_t n = 1; n <= 16; n++) {
4609         for (uint32_t m = 1; m <= 4; m++) {
4610           GemmMicrokernelTester()
4611             .mr(4)
4612             .nr(16)
4613             .kr(4)
4614             .sr(1)
4615             .m(m)
4616             .n(n)
4617             .k(k)
4618             .cm_stride(19)
4619             .iterations(1)
4620             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_ld128, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4621         }
4622       }
4623     }
4624   }
4625 
TEST(QC8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD128,a_offset)4626   TEST(QC8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD128, a_offset) {
4627     TEST_REQUIRES_ARM_NEON_DOT;
4628     for (size_t k = 1; k <= 80; k += 17) {
4629       GemmMicrokernelTester()
4630         .mr(4)
4631         .nr(16)
4632         .kr(4)
4633         .sr(1)
4634         .m(4)
4635         .n(16)
4636         .k(k)
4637         .ks(3)
4638         .a_offset(331)
4639         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_ld128, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4640     }
4641   }
4642 
TEST(QC8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD128,zero)4643   TEST(QC8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD128, zero) {
4644     TEST_REQUIRES_ARM_NEON_DOT;
4645     for (size_t k = 1; k <= 80; k += 17) {
4646       for (uint32_t mz = 0; mz < 4; mz++) {
4647         GemmMicrokernelTester()
4648           .mr(4)
4649           .nr(16)
4650           .kr(4)
4651           .sr(1)
4652           .m(4)
4653           .n(16)
4654           .k(k)
4655           .ks(3)
4656           .a_offset(331)
4657           .zero_index(mz)
4658           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_ld128, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4659       }
4660     }
4661   }
4662 
TEST(QC8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD128,qmin)4663   TEST(QC8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD128, qmin) {
4664     TEST_REQUIRES_ARM_NEON_DOT;
4665     GemmMicrokernelTester()
4666       .mr(4)
4667       .nr(16)
4668       .kr(4)
4669       .sr(1)
4670       .m(4)
4671       .n(16)
4672       .k(16)
4673       .qmin(128)
4674       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_ld128, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4675   }
4676 
TEST(QC8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD128,qmax)4677   TEST(QC8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD128, qmax) {
4678     TEST_REQUIRES_ARM_NEON_DOT;
4679     GemmMicrokernelTester()
4680       .mr(4)
4681       .nr(16)
4682       .kr(4)
4683       .sr(1)
4684       .m(4)
4685       .n(16)
4686       .k(16)
4687       .qmax(128)
4688       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_ld128, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4689   }
4690 
TEST(QC8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD128,strided_cm)4691   TEST(QC8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_LD128, strided_cm) {
4692     TEST_REQUIRES_ARM_NEON_DOT;
4693     GemmMicrokernelTester()
4694       .mr(4)
4695       .nr(16)
4696       .kr(4)
4697       .sr(1)
4698       .m(4)
4699       .n(16)
4700       .k(16)
4701       .cm_stride(19)
4702       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_ld128, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4703   }
4704 #endif  // XNN_ENABLE_ARM_DOTPROD && XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
4705 
4706 
4707 #if XNN_ARCH_ARM
TEST(QC8_IGEMM_MINMAX_FP32_1X1C4__ARMSIMD32,k_eq_4)4708   TEST(QC8_IGEMM_MINMAX_FP32_1X1C4__ARMSIMD32, k_eq_4) {
4709     TEST_REQUIRES_ARM_SIMD32;
4710     GemmMicrokernelTester()
4711       .mr(1)
4712       .nr(1)
4713       .kr(4)
4714       .sr(1)
4715       .m(1)
4716       .n(1)
4717       .k(4)
4718       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x1c4__armsimd32, xnn_init_qc8_conv_minmax_fp32_armsimd32_params, xnn_qs8_requantize_fp32);
4719   }
4720 
TEST(QC8_IGEMM_MINMAX_FP32_1X1C4__ARMSIMD32,strided_cn)4721   TEST(QC8_IGEMM_MINMAX_FP32_1X1C4__ARMSIMD32, strided_cn) {
4722     TEST_REQUIRES_ARM_SIMD32;
4723     GemmMicrokernelTester()
4724       .mr(1)
4725       .nr(1)
4726       .kr(4)
4727       .sr(1)
4728       .m(1)
4729       .n(1)
4730       .k(4)
4731       .cn_stride(3)
4732       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x1c4__armsimd32, xnn_init_qc8_conv_minmax_fp32_armsimd32_params, xnn_qs8_requantize_fp32);
4733   }
4734 
TEST(QC8_IGEMM_MINMAX_FP32_1X1C4__ARMSIMD32,k_eq_4_subtile)4735   TEST(QC8_IGEMM_MINMAX_FP32_1X1C4__ARMSIMD32, k_eq_4_subtile) {
4736     TEST_REQUIRES_ARM_SIMD32;
4737     for (uint32_t n = 1; n <= 1; n++) {
4738       for (uint32_t m = 1; m <= 1; m++) {
4739         GemmMicrokernelTester()
4740           .mr(1)
4741           .nr(1)
4742           .kr(4)
4743           .sr(1)
4744           .m(m)
4745           .n(n)
4746           .k(4)
4747           .iterations(1)
4748           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x1c4__armsimd32, xnn_init_qc8_conv_minmax_fp32_armsimd32_params, xnn_qs8_requantize_fp32);
4749       }
4750     }
4751   }
4752 
TEST(QC8_IGEMM_MINMAX_FP32_1X1C4__ARMSIMD32,k_eq_4_subtile_m)4753   TEST(QC8_IGEMM_MINMAX_FP32_1X1C4__ARMSIMD32, k_eq_4_subtile_m) {
4754     TEST_REQUIRES_ARM_SIMD32;
4755     for (uint32_t m = 1; m <= 1; m++) {
4756       GemmMicrokernelTester()
4757         .mr(1)
4758         .nr(1)
4759         .kr(4)
4760         .sr(1)
4761         .m(m)
4762         .n(1)
4763         .k(4)
4764         .iterations(1)
4765         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x1c4__armsimd32, xnn_init_qc8_conv_minmax_fp32_armsimd32_params, xnn_qs8_requantize_fp32);
4766     }
4767   }
4768 
TEST(QC8_IGEMM_MINMAX_FP32_1X1C4__ARMSIMD32,k_eq_4_subtile_n)4769   TEST(QC8_IGEMM_MINMAX_FP32_1X1C4__ARMSIMD32, k_eq_4_subtile_n) {
4770     TEST_REQUIRES_ARM_SIMD32;
4771     for (uint32_t n = 1; n <= 1; n++) {
4772       GemmMicrokernelTester()
4773         .mr(1)
4774         .nr(1)
4775         .kr(4)
4776         .sr(1)
4777         .m(1)
4778         .n(n)
4779         .k(4)
4780         .iterations(1)
4781         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x1c4__armsimd32, xnn_init_qc8_conv_minmax_fp32_armsimd32_params, xnn_qs8_requantize_fp32);
4782     }
4783   }
4784 
TEST(QC8_IGEMM_MINMAX_FP32_1X1C4__ARMSIMD32,k_lt_4)4785   TEST(QC8_IGEMM_MINMAX_FP32_1X1C4__ARMSIMD32, k_lt_4) {
4786     TEST_REQUIRES_ARM_SIMD32;
4787     for (size_t k = 1; k < 4; k++) {
4788       GemmMicrokernelTester()
4789         .mr(1)
4790         .nr(1)
4791         .kr(4)
4792         .sr(1)
4793         .m(1)
4794         .n(1)
4795         .k(k)
4796         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x1c4__armsimd32, xnn_init_qc8_conv_minmax_fp32_armsimd32_params, xnn_qs8_requantize_fp32);
4797     }
4798   }
4799 
TEST(QC8_IGEMM_MINMAX_FP32_1X1C4__ARMSIMD32,k_lt_4_subtile)4800   TEST(QC8_IGEMM_MINMAX_FP32_1X1C4__ARMSIMD32, k_lt_4_subtile) {
4801     TEST_REQUIRES_ARM_SIMD32;
4802     for (size_t k = 1; k < 4; k++) {
4803       for (uint32_t n = 1; n <= 1; n++) {
4804         for (uint32_t m = 1; m <= 1; m++) {
4805           GemmMicrokernelTester()
4806             .mr(1)
4807             .nr(1)
4808             .kr(4)
4809             .sr(1)
4810             .m(m)
4811             .n(n)
4812             .k(k)
4813             .iterations(1)
4814             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x1c4__armsimd32, xnn_init_qc8_conv_minmax_fp32_armsimd32_params, xnn_qs8_requantize_fp32);
4815         }
4816       }
4817     }
4818   }
4819 
TEST(QC8_IGEMM_MINMAX_FP32_1X1C4__ARMSIMD32,k_gt_4)4820   TEST(QC8_IGEMM_MINMAX_FP32_1X1C4__ARMSIMD32, k_gt_4) {
4821     TEST_REQUIRES_ARM_SIMD32;
4822     for (size_t k = 5; k < 8; k++) {
4823       GemmMicrokernelTester()
4824         .mr(1)
4825         .nr(1)
4826         .kr(4)
4827         .sr(1)
4828         .m(1)
4829         .n(1)
4830         .k(k)
4831         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x1c4__armsimd32, xnn_init_qc8_conv_minmax_fp32_armsimd32_params, xnn_qs8_requantize_fp32);
4832     }
4833   }
4834 
TEST(QC8_IGEMM_MINMAX_FP32_1X1C4__ARMSIMD32,k_gt_4_subtile)4835   TEST(QC8_IGEMM_MINMAX_FP32_1X1C4__ARMSIMD32, k_gt_4_subtile) {
4836     TEST_REQUIRES_ARM_SIMD32;
4837     for (size_t k = 5; k < 8; k++) {
4838       for (uint32_t n = 1; n <= 1; n++) {
4839         for (uint32_t m = 1; m <= 1; m++) {
4840           GemmMicrokernelTester()
4841             .mr(1)
4842             .nr(1)
4843             .kr(4)
4844             .sr(1)
4845             .m(m)
4846             .n(n)
4847             .k(k)
4848             .iterations(1)
4849             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x1c4__armsimd32, xnn_init_qc8_conv_minmax_fp32_armsimd32_params, xnn_qs8_requantize_fp32);
4850         }
4851       }
4852     }
4853   }
4854 
TEST(QC8_IGEMM_MINMAX_FP32_1X1C4__ARMSIMD32,k_div_4)4855   TEST(QC8_IGEMM_MINMAX_FP32_1X1C4__ARMSIMD32, k_div_4) {
4856     TEST_REQUIRES_ARM_SIMD32;
4857     for (size_t k = 8; k <= 40; k += 4) {
4858       GemmMicrokernelTester()
4859         .mr(1)
4860         .nr(1)
4861         .kr(4)
4862         .sr(1)
4863         .m(1)
4864         .n(1)
4865         .k(k)
4866         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x1c4__armsimd32, xnn_init_qc8_conv_minmax_fp32_armsimd32_params, xnn_qs8_requantize_fp32);
4867     }
4868   }
4869 
TEST(QC8_IGEMM_MINMAX_FP32_1X1C4__ARMSIMD32,k_div_4_subtile)4870   TEST(QC8_IGEMM_MINMAX_FP32_1X1C4__ARMSIMD32, k_div_4_subtile) {
4871     TEST_REQUIRES_ARM_SIMD32;
4872     for (size_t k = 8; k <= 40; k += 4) {
4873       for (uint32_t n = 1; n <= 1; n++) {
4874         for (uint32_t m = 1; m <= 1; m++) {
4875           GemmMicrokernelTester()
4876             .mr(1)
4877             .nr(1)
4878             .kr(4)
4879             .sr(1)
4880             .m(m)
4881             .n(n)
4882             .k(k)
4883             .iterations(1)
4884             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x1c4__armsimd32, xnn_init_qc8_conv_minmax_fp32_armsimd32_params, xnn_qs8_requantize_fp32);
4885         }
4886       }
4887     }
4888   }
4889 
TEST(QC8_IGEMM_MINMAX_FP32_1X1C4__ARMSIMD32,n_gt_1)4890   TEST(QC8_IGEMM_MINMAX_FP32_1X1C4__ARMSIMD32, n_gt_1) {
4891     TEST_REQUIRES_ARM_SIMD32;
4892     for (uint32_t n = 2; n < 2; n++) {
4893       for (size_t k = 1; k <= 20; k += 5) {
4894         GemmMicrokernelTester()
4895           .mr(1)
4896           .nr(1)
4897           .kr(4)
4898           .sr(1)
4899           .m(1)
4900           .n(n)
4901           .k(k)
4902           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x1c4__armsimd32, xnn_init_qc8_conv_minmax_fp32_armsimd32_params, xnn_qs8_requantize_fp32);
4903       }
4904     }
4905   }
4906 
TEST(QC8_IGEMM_MINMAX_FP32_1X1C4__ARMSIMD32,n_gt_1_strided_cn)4907   TEST(QC8_IGEMM_MINMAX_FP32_1X1C4__ARMSIMD32, n_gt_1_strided_cn) {
4908     TEST_REQUIRES_ARM_SIMD32;
4909     for (uint32_t n = 2; n < 2; n++) {
4910       for (size_t k = 1; k <= 20; k += 5) {
4911         GemmMicrokernelTester()
4912           .mr(1)
4913           .nr(1)
4914           .kr(4)
4915           .sr(1)
4916           .m(1)
4917           .n(n)
4918           .k(k)
4919           .cn_stride(3)
4920           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x1c4__armsimd32, xnn_init_qc8_conv_minmax_fp32_armsimd32_params, xnn_qs8_requantize_fp32);
4921       }
4922     }
4923   }
4924 
TEST(QC8_IGEMM_MINMAX_FP32_1X1C4__ARMSIMD32,n_gt_1_subtile)4925   TEST(QC8_IGEMM_MINMAX_FP32_1X1C4__ARMSIMD32, n_gt_1_subtile) {
4926     TEST_REQUIRES_ARM_SIMD32;
4927     for (uint32_t n = 2; n < 2; n++) {
4928       for (size_t k = 1; k <= 20; k += 5) {
4929         for (uint32_t m = 1; m <= 1; m++) {
4930           GemmMicrokernelTester()
4931             .mr(1)
4932             .nr(1)
4933             .kr(4)
4934             .sr(1)
4935             .m(m)
4936             .n(n)
4937             .k(k)
4938             .iterations(1)
4939             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x1c4__armsimd32, xnn_init_qc8_conv_minmax_fp32_armsimd32_params, xnn_qs8_requantize_fp32);
4940         }
4941       }
4942     }
4943   }
4944 
TEST(QC8_IGEMM_MINMAX_FP32_1X1C4__ARMSIMD32,n_div_1)4945   TEST(QC8_IGEMM_MINMAX_FP32_1X1C4__ARMSIMD32, n_div_1) {
4946     TEST_REQUIRES_ARM_SIMD32;
4947     for (uint32_t n = 2; n <= 3; n += 1) {
4948       for (size_t k = 1; k <= 20; k += 5) {
4949         GemmMicrokernelTester()
4950           .mr(1)
4951           .nr(1)
4952           .kr(4)
4953           .sr(1)
4954           .m(1)
4955           .n(n)
4956           .k(k)
4957           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x1c4__armsimd32, xnn_init_qc8_conv_minmax_fp32_armsimd32_params, xnn_qs8_requantize_fp32);
4958       }
4959     }
4960   }
4961 
TEST(QC8_IGEMM_MINMAX_FP32_1X1C4__ARMSIMD32,n_div_1_strided_cn)4962   TEST(QC8_IGEMM_MINMAX_FP32_1X1C4__ARMSIMD32, n_div_1_strided_cn) {
4963     TEST_REQUIRES_ARM_SIMD32;
4964     for (uint32_t n = 2; n <= 3; n += 1) {
4965       for (size_t k = 1; k <= 20; k += 5) {
4966         GemmMicrokernelTester()
4967           .mr(1)
4968           .nr(1)
4969           .kr(4)
4970           .sr(1)
4971           .m(1)
4972           .n(n)
4973           .k(k)
4974           .cn_stride(3)
4975           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x1c4__armsimd32, xnn_init_qc8_conv_minmax_fp32_armsimd32_params, xnn_qs8_requantize_fp32);
4976       }
4977     }
4978   }
4979 
TEST(QC8_IGEMM_MINMAX_FP32_1X1C4__ARMSIMD32,n_div_1_subtile)4980   TEST(QC8_IGEMM_MINMAX_FP32_1X1C4__ARMSIMD32, n_div_1_subtile) {
4981     TEST_REQUIRES_ARM_SIMD32;
4982     for (uint32_t n = 2; n <= 3; n += 1) {
4983       for (size_t k = 1; k <= 20; k += 5) {
4984         for (uint32_t m = 1; m <= 1; m++) {
4985           GemmMicrokernelTester()
4986             .mr(1)
4987             .nr(1)
4988             .kr(4)
4989             .sr(1)
4990             .m(m)
4991             .n(n)
4992             .k(k)
4993             .iterations(1)
4994             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x1c4__armsimd32, xnn_init_qc8_conv_minmax_fp32_armsimd32_params, xnn_qs8_requantize_fp32);
4995         }
4996       }
4997     }
4998   }
4999 
TEST(QC8_IGEMM_MINMAX_FP32_1X1C4__ARMSIMD32,small_kernel)5000   TEST(QC8_IGEMM_MINMAX_FP32_1X1C4__ARMSIMD32, small_kernel) {
5001     TEST_REQUIRES_ARM_SIMD32;
5002     for (size_t k = 1; k <= 20; k += 5) {
5003       GemmMicrokernelTester()
5004         .mr(1)
5005         .nr(1)
5006         .kr(4)
5007         .sr(1)
5008         .m(1)
5009         .n(1)
5010         .k(k)
5011         .ks(3)
5012         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x1c4__armsimd32, xnn_init_qc8_conv_minmax_fp32_armsimd32_params, xnn_qs8_requantize_fp32);
5013     }
5014   }
5015 
TEST(QC8_IGEMM_MINMAX_FP32_1X1C4__ARMSIMD32,small_kernel_subtile)5016   TEST(QC8_IGEMM_MINMAX_FP32_1X1C4__ARMSIMD32, small_kernel_subtile) {
5017     TEST_REQUIRES_ARM_SIMD32;
5018     for (size_t k = 1; k <= 20; k += 5) {
5019       for (uint32_t n = 1; n <= 1; n++) {
5020         for (uint32_t m = 1; m <= 1; m++) {
5021           GemmMicrokernelTester()
5022             .mr(1)
5023             .nr(1)
5024             .kr(4)
5025             .sr(1)
5026             .m(m)
5027             .n(n)
5028             .k(k)
5029             .ks(3)
5030             .iterations(1)
5031             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x1c4__armsimd32, xnn_init_qc8_conv_minmax_fp32_armsimd32_params, xnn_qs8_requantize_fp32);
5032         }
5033       }
5034     }
5035   }
5036 
TEST(QC8_IGEMM_MINMAX_FP32_1X1C4__ARMSIMD32,n_gt_1_small_kernel)5037   TEST(QC8_IGEMM_MINMAX_FP32_1X1C4__ARMSIMD32, n_gt_1_small_kernel) {
5038     TEST_REQUIRES_ARM_SIMD32;
5039     for (uint32_t n = 2; n < 2; n++) {
5040       for (size_t k = 1; k <= 20; k += 5) {
5041         GemmMicrokernelTester()
5042           .mr(1)
5043           .nr(1)
5044           .kr(4)
5045           .sr(1)
5046           .m(1)
5047           .n(n)
5048           .k(k)
5049           .ks(3)
5050           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x1c4__armsimd32, xnn_init_qc8_conv_minmax_fp32_armsimd32_params, xnn_qs8_requantize_fp32);
5051       }
5052     }
5053   }
5054 
TEST(QC8_IGEMM_MINMAX_FP32_1X1C4__ARMSIMD32,n_div_1_small_kernel)5055   TEST(QC8_IGEMM_MINMAX_FP32_1X1C4__ARMSIMD32, n_div_1_small_kernel) {
5056     TEST_REQUIRES_ARM_SIMD32;
5057     for (uint32_t n = 2; n <= 3; n += 1) {
5058       for (size_t k = 1; k <= 20; k += 5) {
5059         GemmMicrokernelTester()
5060           .mr(1)
5061           .nr(1)
5062           .kr(4)
5063           .sr(1)
5064           .m(1)
5065           .n(n)
5066           .k(k)
5067           .ks(3)
5068           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x1c4__armsimd32, xnn_init_qc8_conv_minmax_fp32_armsimd32_params, xnn_qs8_requantize_fp32);
5069       }
5070     }
5071   }
5072 
TEST(QC8_IGEMM_MINMAX_FP32_1X1C4__ARMSIMD32,strided_cm_subtile)5073   TEST(QC8_IGEMM_MINMAX_FP32_1X1C4__ARMSIMD32, strided_cm_subtile) {
5074     TEST_REQUIRES_ARM_SIMD32;
5075     for (size_t k = 1; k <= 20; k += 5) {
5076       for (uint32_t n = 1; n <= 1; n++) {
5077         for (uint32_t m = 1; m <= 1; m++) {
5078           GemmMicrokernelTester()
5079             .mr(1)
5080             .nr(1)
5081             .kr(4)
5082             .sr(1)
5083             .m(m)
5084             .n(n)
5085             .k(k)
5086             .cm_stride(3)
5087             .iterations(1)
5088             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x1c4__armsimd32, xnn_init_qc8_conv_minmax_fp32_armsimd32_params, xnn_qs8_requantize_fp32);
5089         }
5090       }
5091     }
5092   }
5093 
TEST(QC8_IGEMM_MINMAX_FP32_1X1C4__ARMSIMD32,a_offset)5094   TEST(QC8_IGEMM_MINMAX_FP32_1X1C4__ARMSIMD32, a_offset) {
5095     TEST_REQUIRES_ARM_SIMD32;
5096     for (size_t k = 1; k <= 20; k += 5) {
5097       GemmMicrokernelTester()
5098         .mr(1)
5099         .nr(1)
5100         .kr(4)
5101         .sr(1)
5102         .m(1)
5103         .n(1)
5104         .k(k)
5105         .ks(3)
5106         .a_offset(23)
5107         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x1c4__armsimd32, xnn_init_qc8_conv_minmax_fp32_armsimd32_params, xnn_qs8_requantize_fp32);
5108     }
5109   }
5110 
TEST(QC8_IGEMM_MINMAX_FP32_1X1C4__ARMSIMD32,zero)5111   TEST(QC8_IGEMM_MINMAX_FP32_1X1C4__ARMSIMD32, zero) {
5112     TEST_REQUIRES_ARM_SIMD32;
5113     for (size_t k = 1; k <= 20; k += 5) {
5114       for (uint32_t mz = 0; mz < 1; mz++) {
5115         GemmMicrokernelTester()
5116           .mr(1)
5117           .nr(1)
5118           .kr(4)
5119           .sr(1)
5120           .m(1)
5121           .n(1)
5122           .k(k)
5123           .ks(3)
5124           .a_offset(23)
5125           .zero_index(mz)
5126           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x1c4__armsimd32, xnn_init_qc8_conv_minmax_fp32_armsimd32_params, xnn_qs8_requantize_fp32);
5127       }
5128     }
5129   }
5130 
TEST(QC8_IGEMM_MINMAX_FP32_1X1C4__ARMSIMD32,qmin)5131   TEST(QC8_IGEMM_MINMAX_FP32_1X1C4__ARMSIMD32, qmin) {
5132     TEST_REQUIRES_ARM_SIMD32;
5133     GemmMicrokernelTester()
5134       .mr(1)
5135       .nr(1)
5136       .kr(4)
5137       .sr(1)
5138       .m(1)
5139       .n(1)
5140       .k(4)
5141       .qmin(128)
5142       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x1c4__armsimd32, xnn_init_qc8_conv_minmax_fp32_armsimd32_params, xnn_qs8_requantize_fp32);
5143   }
5144 
TEST(QC8_IGEMM_MINMAX_FP32_1X1C4__ARMSIMD32,qmax)5145   TEST(QC8_IGEMM_MINMAX_FP32_1X1C4__ARMSIMD32, qmax) {
5146     TEST_REQUIRES_ARM_SIMD32;
5147     GemmMicrokernelTester()
5148       .mr(1)
5149       .nr(1)
5150       .kr(4)
5151       .sr(1)
5152       .m(1)
5153       .n(1)
5154       .k(4)
5155       .qmax(128)
5156       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x1c4__armsimd32, xnn_init_qc8_conv_minmax_fp32_armsimd32_params, xnn_qs8_requantize_fp32);
5157   }
5158 
TEST(QC8_IGEMM_MINMAX_FP32_1X1C4__ARMSIMD32,strided_cm)5159   TEST(QC8_IGEMM_MINMAX_FP32_1X1C4__ARMSIMD32, strided_cm) {
5160     TEST_REQUIRES_ARM_SIMD32;
5161     GemmMicrokernelTester()
5162       .mr(1)
5163       .nr(1)
5164       .kr(4)
5165       .sr(1)
5166       .m(1)
5167       .n(1)
5168       .k(4)
5169       .cm_stride(3)
5170       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x1c4__armsimd32, xnn_init_qc8_conv_minmax_fp32_armsimd32_params, xnn_qs8_requantize_fp32);
5171   }
5172 #endif  // XNN_ARCH_ARM
5173 
5174 
5175 #if XNN_ARCH_ARM
TEST(QC8_IGEMM_MINMAX_FP32_1X2C4__ARMSIMD32,k_eq_4)5176   TEST(QC8_IGEMM_MINMAX_FP32_1X2C4__ARMSIMD32, k_eq_4) {
5177     TEST_REQUIRES_ARM_SIMD32;
5178     GemmMicrokernelTester()
5179       .mr(1)
5180       .nr(2)
5181       .kr(4)
5182       .sr(1)
5183       .m(1)
5184       .n(2)
5185       .k(4)
5186       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x2c4__armsimd32, xnn_init_qc8_conv_minmax_fp32_armsimd32_params, xnn_qs8_requantize_fp32);
5187   }
5188 
TEST(QC8_IGEMM_MINMAX_FP32_1X2C4__ARMSIMD32,strided_cn)5189   TEST(QC8_IGEMM_MINMAX_FP32_1X2C4__ARMSIMD32, strided_cn) {
5190     TEST_REQUIRES_ARM_SIMD32;
5191     GemmMicrokernelTester()
5192       .mr(1)
5193       .nr(2)
5194       .kr(4)
5195       .sr(1)
5196       .m(1)
5197       .n(2)
5198       .k(4)
5199       .cn_stride(5)
5200       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x2c4__armsimd32, xnn_init_qc8_conv_minmax_fp32_armsimd32_params, xnn_qs8_requantize_fp32);
5201   }
5202 
TEST(QC8_IGEMM_MINMAX_FP32_1X2C4__ARMSIMD32,k_eq_4_subtile)5203   TEST(QC8_IGEMM_MINMAX_FP32_1X2C4__ARMSIMD32, k_eq_4_subtile) {
5204     TEST_REQUIRES_ARM_SIMD32;
5205     for (uint32_t n = 1; n <= 2; n++) {
5206       for (uint32_t m = 1; m <= 1; m++) {
5207         GemmMicrokernelTester()
5208           .mr(1)
5209           .nr(2)
5210           .kr(4)
5211           .sr(1)
5212           .m(m)
5213           .n(n)
5214           .k(4)
5215           .iterations(1)
5216           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x2c4__armsimd32, xnn_init_qc8_conv_minmax_fp32_armsimd32_params, xnn_qs8_requantize_fp32);
5217       }
5218     }
5219   }
5220 
TEST(QC8_IGEMM_MINMAX_FP32_1X2C4__ARMSIMD32,k_eq_4_subtile_m)5221   TEST(QC8_IGEMM_MINMAX_FP32_1X2C4__ARMSIMD32, k_eq_4_subtile_m) {
5222     TEST_REQUIRES_ARM_SIMD32;
5223     for (uint32_t m = 1; m <= 1; m++) {
5224       GemmMicrokernelTester()
5225         .mr(1)
5226         .nr(2)
5227         .kr(4)
5228         .sr(1)
5229         .m(m)
5230         .n(2)
5231         .k(4)
5232         .iterations(1)
5233         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x2c4__armsimd32, xnn_init_qc8_conv_minmax_fp32_armsimd32_params, xnn_qs8_requantize_fp32);
5234     }
5235   }
5236 
TEST(QC8_IGEMM_MINMAX_FP32_1X2C4__ARMSIMD32,k_eq_4_subtile_n)5237   TEST(QC8_IGEMM_MINMAX_FP32_1X2C4__ARMSIMD32, k_eq_4_subtile_n) {
5238     TEST_REQUIRES_ARM_SIMD32;
5239     for (uint32_t n = 1; n <= 2; n++) {
5240       GemmMicrokernelTester()
5241         .mr(1)
5242         .nr(2)
5243         .kr(4)
5244         .sr(1)
5245         .m(1)
5246         .n(n)
5247         .k(4)
5248         .iterations(1)
5249         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x2c4__armsimd32, xnn_init_qc8_conv_minmax_fp32_armsimd32_params, xnn_qs8_requantize_fp32);
5250     }
5251   }
5252 
TEST(QC8_IGEMM_MINMAX_FP32_1X2C4__ARMSIMD32,k_lt_4)5253   TEST(QC8_IGEMM_MINMAX_FP32_1X2C4__ARMSIMD32, k_lt_4) {
5254     TEST_REQUIRES_ARM_SIMD32;
5255     for (size_t k = 1; k < 4; k++) {
5256       GemmMicrokernelTester()
5257         .mr(1)
5258         .nr(2)
5259         .kr(4)
5260         .sr(1)
5261         .m(1)
5262         .n(2)
5263         .k(k)
5264         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x2c4__armsimd32, xnn_init_qc8_conv_minmax_fp32_armsimd32_params, xnn_qs8_requantize_fp32);
5265     }
5266   }
5267 
TEST(QC8_IGEMM_MINMAX_FP32_1X2C4__ARMSIMD32,k_lt_4_subtile)5268   TEST(QC8_IGEMM_MINMAX_FP32_1X2C4__ARMSIMD32, k_lt_4_subtile) {
5269     TEST_REQUIRES_ARM_SIMD32;
5270     for (size_t k = 1; k < 4; k++) {
5271       for (uint32_t n = 1; n <= 2; n++) {
5272         for (uint32_t m = 1; m <= 1; m++) {
5273           GemmMicrokernelTester()
5274             .mr(1)
5275             .nr(2)
5276             .kr(4)
5277             .sr(1)
5278             .m(m)
5279             .n(n)
5280             .k(k)
5281             .iterations(1)
5282             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x2c4__armsimd32, xnn_init_qc8_conv_minmax_fp32_armsimd32_params, xnn_qs8_requantize_fp32);
5283         }
5284       }
5285     }
5286   }
5287 
TEST(QC8_IGEMM_MINMAX_FP32_1X2C4__ARMSIMD32,k_gt_4)5288   TEST(QC8_IGEMM_MINMAX_FP32_1X2C4__ARMSIMD32, k_gt_4) {
5289     TEST_REQUIRES_ARM_SIMD32;
5290     for (size_t k = 5; k < 8; k++) {
5291       GemmMicrokernelTester()
5292         .mr(1)
5293         .nr(2)
5294         .kr(4)
5295         .sr(1)
5296         .m(1)
5297         .n(2)
5298         .k(k)
5299         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x2c4__armsimd32, xnn_init_qc8_conv_minmax_fp32_armsimd32_params, xnn_qs8_requantize_fp32);
5300     }
5301   }
5302 
TEST(QC8_IGEMM_MINMAX_FP32_1X2C4__ARMSIMD32,k_gt_4_subtile)5303   TEST(QC8_IGEMM_MINMAX_FP32_1X2C4__ARMSIMD32, k_gt_4_subtile) {
5304     TEST_REQUIRES_ARM_SIMD32;
5305     for (size_t k = 5; k < 8; k++) {
5306       for (uint32_t n = 1; n <= 2; n++) {
5307         for (uint32_t m = 1; m <= 1; m++) {
5308           GemmMicrokernelTester()
5309             .mr(1)
5310             .nr(2)
5311             .kr(4)
5312             .sr(1)
5313             .m(m)
5314             .n(n)
5315             .k(k)
5316             .iterations(1)
5317             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x2c4__armsimd32, xnn_init_qc8_conv_minmax_fp32_armsimd32_params, xnn_qs8_requantize_fp32);
5318         }
5319       }
5320     }
5321   }
5322 
TEST(QC8_IGEMM_MINMAX_FP32_1X2C4__ARMSIMD32,k_div_4)5323   TEST(QC8_IGEMM_MINMAX_FP32_1X2C4__ARMSIMD32, k_div_4) {
5324     TEST_REQUIRES_ARM_SIMD32;
5325     for (size_t k = 8; k <= 40; k += 4) {
5326       GemmMicrokernelTester()
5327         .mr(1)
5328         .nr(2)
5329         .kr(4)
5330         .sr(1)
5331         .m(1)
5332         .n(2)
5333         .k(k)
5334         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x2c4__armsimd32, xnn_init_qc8_conv_minmax_fp32_armsimd32_params, xnn_qs8_requantize_fp32);
5335     }
5336   }
5337 
TEST(QC8_IGEMM_MINMAX_FP32_1X2C4__ARMSIMD32,k_div_4_subtile)5338   TEST(QC8_IGEMM_MINMAX_FP32_1X2C4__ARMSIMD32, k_div_4_subtile) {
5339     TEST_REQUIRES_ARM_SIMD32;
5340     for (size_t k = 8; k <= 40; k += 4) {
5341       for (uint32_t n = 1; n <= 2; n++) {
5342         for (uint32_t m = 1; m <= 1; m++) {
5343           GemmMicrokernelTester()
5344             .mr(1)
5345             .nr(2)
5346             .kr(4)
5347             .sr(1)
5348             .m(m)
5349             .n(n)
5350             .k(k)
5351             .iterations(1)
5352             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x2c4__armsimd32, xnn_init_qc8_conv_minmax_fp32_armsimd32_params, xnn_qs8_requantize_fp32);
5353         }
5354       }
5355     }
5356   }
5357 
TEST(QC8_IGEMM_MINMAX_FP32_1X2C4__ARMSIMD32,n_gt_2)5358   TEST(QC8_IGEMM_MINMAX_FP32_1X2C4__ARMSIMD32, n_gt_2) {
5359     TEST_REQUIRES_ARM_SIMD32;
5360     for (uint32_t n = 3; n < 4; n++) {
5361       for (size_t k = 1; k <= 20; k += 5) {
5362         GemmMicrokernelTester()
5363           .mr(1)
5364           .nr(2)
5365           .kr(4)
5366           .sr(1)
5367           .m(1)
5368           .n(n)
5369           .k(k)
5370           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x2c4__armsimd32, xnn_init_qc8_conv_minmax_fp32_armsimd32_params, xnn_qs8_requantize_fp32);
5371       }
5372     }
5373   }
5374 
TEST(QC8_IGEMM_MINMAX_FP32_1X2C4__ARMSIMD32,n_gt_2_strided_cn)5375   TEST(QC8_IGEMM_MINMAX_FP32_1X2C4__ARMSIMD32, n_gt_2_strided_cn) {
5376     TEST_REQUIRES_ARM_SIMD32;
5377     for (uint32_t n = 3; n < 4; n++) {
5378       for (size_t k = 1; k <= 20; k += 5) {
5379         GemmMicrokernelTester()
5380           .mr(1)
5381           .nr(2)
5382           .kr(4)
5383           .sr(1)
5384           .m(1)
5385           .n(n)
5386           .k(k)
5387           .cn_stride(5)
5388           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x2c4__armsimd32, xnn_init_qc8_conv_minmax_fp32_armsimd32_params, xnn_qs8_requantize_fp32);
5389       }
5390     }
5391   }
5392 
TEST(QC8_IGEMM_MINMAX_FP32_1X2C4__ARMSIMD32,n_gt_2_subtile)5393   TEST(QC8_IGEMM_MINMAX_FP32_1X2C4__ARMSIMD32, n_gt_2_subtile) {
5394     TEST_REQUIRES_ARM_SIMD32;
5395     for (uint32_t n = 3; n < 4; n++) {
5396       for (size_t k = 1; k <= 20; k += 5) {
5397         for (uint32_t m = 1; m <= 1; m++) {
5398           GemmMicrokernelTester()
5399             .mr(1)
5400             .nr(2)
5401             .kr(4)
5402             .sr(1)
5403             .m(m)
5404             .n(n)
5405             .k(k)
5406             .iterations(1)
5407             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x2c4__armsimd32, xnn_init_qc8_conv_minmax_fp32_armsimd32_params, xnn_qs8_requantize_fp32);
5408         }
5409       }
5410     }
5411   }
5412 
TEST(QC8_IGEMM_MINMAX_FP32_1X2C4__ARMSIMD32,n_div_2)5413   TEST(QC8_IGEMM_MINMAX_FP32_1X2C4__ARMSIMD32, n_div_2) {
5414     TEST_REQUIRES_ARM_SIMD32;
5415     for (uint32_t n = 4; n <= 6; n += 2) {
5416       for (size_t k = 1; k <= 20; k += 5) {
5417         GemmMicrokernelTester()
5418           .mr(1)
5419           .nr(2)
5420           .kr(4)
5421           .sr(1)
5422           .m(1)
5423           .n(n)
5424           .k(k)
5425           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x2c4__armsimd32, xnn_init_qc8_conv_minmax_fp32_armsimd32_params, xnn_qs8_requantize_fp32);
5426       }
5427     }
5428   }
5429 
TEST(QC8_IGEMM_MINMAX_FP32_1X2C4__ARMSIMD32,n_div_2_strided_cn)5430   TEST(QC8_IGEMM_MINMAX_FP32_1X2C4__ARMSIMD32, n_div_2_strided_cn) {
5431     TEST_REQUIRES_ARM_SIMD32;
5432     for (uint32_t n = 4; n <= 6; n += 2) {
5433       for (size_t k = 1; k <= 20; k += 5) {
5434         GemmMicrokernelTester()
5435           .mr(1)
5436           .nr(2)
5437           .kr(4)
5438           .sr(1)
5439           .m(1)
5440           .n(n)
5441           .k(k)
5442           .cn_stride(5)
5443           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x2c4__armsimd32, xnn_init_qc8_conv_minmax_fp32_armsimd32_params, xnn_qs8_requantize_fp32);
5444       }
5445     }
5446   }
5447 
TEST(QC8_IGEMM_MINMAX_FP32_1X2C4__ARMSIMD32,n_div_2_subtile)5448   TEST(QC8_IGEMM_MINMAX_FP32_1X2C4__ARMSIMD32, n_div_2_subtile) {
5449     TEST_REQUIRES_ARM_SIMD32;
5450     for (uint32_t n = 4; n <= 6; n += 2) {
5451       for (size_t k = 1; k <= 20; k += 5) {
5452         for (uint32_t m = 1; m <= 1; m++) {
5453           GemmMicrokernelTester()
5454             .mr(1)
5455             .nr(2)
5456             .kr(4)
5457             .sr(1)
5458             .m(m)
5459             .n(n)
5460             .k(k)
5461             .iterations(1)
5462             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x2c4__armsimd32, xnn_init_qc8_conv_minmax_fp32_armsimd32_params, xnn_qs8_requantize_fp32);
5463         }
5464       }
5465     }
5466   }
5467 
TEST(QC8_IGEMM_MINMAX_FP32_1X2C4__ARMSIMD32,small_kernel)5468   TEST(QC8_IGEMM_MINMAX_FP32_1X2C4__ARMSIMD32, small_kernel) {
5469     TEST_REQUIRES_ARM_SIMD32;
5470     for (size_t k = 1; k <= 20; k += 5) {
5471       GemmMicrokernelTester()
5472         .mr(1)
5473         .nr(2)
5474         .kr(4)
5475         .sr(1)
5476         .m(1)
5477         .n(2)
5478         .k(k)
5479         .ks(3)
5480         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x2c4__armsimd32, xnn_init_qc8_conv_minmax_fp32_armsimd32_params, xnn_qs8_requantize_fp32);
5481     }
5482   }
5483 
TEST(QC8_IGEMM_MINMAX_FP32_1X2C4__ARMSIMD32,small_kernel_subtile)5484   TEST(QC8_IGEMM_MINMAX_FP32_1X2C4__ARMSIMD32, small_kernel_subtile) {
5485     TEST_REQUIRES_ARM_SIMD32;
5486     for (size_t k = 1; k <= 20; k += 5) {
5487       for (uint32_t n = 1; n <= 2; n++) {
5488         for (uint32_t m = 1; m <= 1; m++) {
5489           GemmMicrokernelTester()
5490             .mr(1)
5491             .nr(2)
5492             .kr(4)
5493             .sr(1)
5494             .m(m)
5495             .n(n)
5496             .k(k)
5497             .ks(3)
5498             .iterations(1)
5499             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x2c4__armsimd32, xnn_init_qc8_conv_minmax_fp32_armsimd32_params, xnn_qs8_requantize_fp32);
5500         }
5501       }
5502     }
5503   }
5504 
TEST(QC8_IGEMM_MINMAX_FP32_1X2C4__ARMSIMD32,n_gt_2_small_kernel)5505   TEST(QC8_IGEMM_MINMAX_FP32_1X2C4__ARMSIMD32, n_gt_2_small_kernel) {
5506     TEST_REQUIRES_ARM_SIMD32;
5507     for (uint32_t n = 3; n < 4; n++) {
5508       for (size_t k = 1; k <= 20; k += 5) {
5509         GemmMicrokernelTester()
5510           .mr(1)
5511           .nr(2)
5512           .kr(4)
5513           .sr(1)
5514           .m(1)
5515           .n(n)
5516           .k(k)
5517           .ks(3)
5518           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x2c4__armsimd32, xnn_init_qc8_conv_minmax_fp32_armsimd32_params, xnn_qs8_requantize_fp32);
5519       }
5520     }
5521   }
5522 
TEST(QC8_IGEMM_MINMAX_FP32_1X2C4__ARMSIMD32,n_div_2_small_kernel)5523   TEST(QC8_IGEMM_MINMAX_FP32_1X2C4__ARMSIMD32, n_div_2_small_kernel) {
5524     TEST_REQUIRES_ARM_SIMD32;
5525     for (uint32_t n = 4; n <= 6; n += 2) {
5526       for (size_t k = 1; k <= 20; k += 5) {
5527         GemmMicrokernelTester()
5528           .mr(1)
5529           .nr(2)
5530           .kr(4)
5531           .sr(1)
5532           .m(1)
5533           .n(n)
5534           .k(k)
5535           .ks(3)
5536           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x2c4__armsimd32, xnn_init_qc8_conv_minmax_fp32_armsimd32_params, xnn_qs8_requantize_fp32);
5537       }
5538     }
5539   }
5540 
TEST(QC8_IGEMM_MINMAX_FP32_1X2C4__ARMSIMD32,strided_cm_subtile)5541   TEST(QC8_IGEMM_MINMAX_FP32_1X2C4__ARMSIMD32, strided_cm_subtile) {
5542     TEST_REQUIRES_ARM_SIMD32;
5543     for (size_t k = 1; k <= 20; k += 5) {
5544       for (uint32_t n = 1; n <= 2; n++) {
5545         for (uint32_t m = 1; m <= 1; m++) {
5546           GemmMicrokernelTester()
5547             .mr(1)
5548             .nr(2)
5549             .kr(4)
5550             .sr(1)
5551             .m(m)
5552             .n(n)
5553             .k(k)
5554             .cm_stride(5)
5555             .iterations(1)
5556             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x2c4__armsimd32, xnn_init_qc8_conv_minmax_fp32_armsimd32_params, xnn_qs8_requantize_fp32);
5557         }
5558       }
5559     }
5560   }
5561 
TEST(QC8_IGEMM_MINMAX_FP32_1X2C4__ARMSIMD32,a_offset)5562   TEST(QC8_IGEMM_MINMAX_FP32_1X2C4__ARMSIMD32, a_offset) {
5563     TEST_REQUIRES_ARM_SIMD32;
5564     for (size_t k = 1; k <= 20; k += 5) {
5565       GemmMicrokernelTester()
5566         .mr(1)
5567         .nr(2)
5568         .kr(4)
5569         .sr(1)
5570         .m(1)
5571         .n(2)
5572         .k(k)
5573         .ks(3)
5574         .a_offset(23)
5575         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x2c4__armsimd32, xnn_init_qc8_conv_minmax_fp32_armsimd32_params, xnn_qs8_requantize_fp32);
5576     }
5577   }
5578 
TEST(QC8_IGEMM_MINMAX_FP32_1X2C4__ARMSIMD32,zero)5579   TEST(QC8_IGEMM_MINMAX_FP32_1X2C4__ARMSIMD32, zero) {
5580     TEST_REQUIRES_ARM_SIMD32;
5581     for (size_t k = 1; k <= 20; k += 5) {
5582       for (uint32_t mz = 0; mz < 1; mz++) {
5583         GemmMicrokernelTester()
5584           .mr(1)
5585           .nr(2)
5586           .kr(4)
5587           .sr(1)
5588           .m(1)
5589           .n(2)
5590           .k(k)
5591           .ks(3)
5592           .a_offset(23)
5593           .zero_index(mz)
5594           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x2c4__armsimd32, xnn_init_qc8_conv_minmax_fp32_armsimd32_params, xnn_qs8_requantize_fp32);
5595       }
5596     }
5597   }
5598 
TEST(QC8_IGEMM_MINMAX_FP32_1X2C4__ARMSIMD32,qmin)5599   TEST(QC8_IGEMM_MINMAX_FP32_1X2C4__ARMSIMD32, qmin) {
5600     TEST_REQUIRES_ARM_SIMD32;
5601     GemmMicrokernelTester()
5602       .mr(1)
5603       .nr(2)
5604       .kr(4)
5605       .sr(1)
5606       .m(1)
5607       .n(2)
5608       .k(4)
5609       .qmin(128)
5610       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x2c4__armsimd32, xnn_init_qc8_conv_minmax_fp32_armsimd32_params, xnn_qs8_requantize_fp32);
5611   }
5612 
TEST(QC8_IGEMM_MINMAX_FP32_1X2C4__ARMSIMD32,qmax)5613   TEST(QC8_IGEMM_MINMAX_FP32_1X2C4__ARMSIMD32, qmax) {
5614     TEST_REQUIRES_ARM_SIMD32;
5615     GemmMicrokernelTester()
5616       .mr(1)
5617       .nr(2)
5618       .kr(4)
5619       .sr(1)
5620       .m(1)
5621       .n(2)
5622       .k(4)
5623       .qmax(128)
5624       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x2c4__armsimd32, xnn_init_qc8_conv_minmax_fp32_armsimd32_params, xnn_qs8_requantize_fp32);
5625   }
5626 
TEST(QC8_IGEMM_MINMAX_FP32_1X2C4__ARMSIMD32,strided_cm)5627   TEST(QC8_IGEMM_MINMAX_FP32_1X2C4__ARMSIMD32, strided_cm) {
5628     TEST_REQUIRES_ARM_SIMD32;
5629     GemmMicrokernelTester()
5630       .mr(1)
5631       .nr(2)
5632       .kr(4)
5633       .sr(1)
5634       .m(1)
5635       .n(2)
5636       .k(4)
5637       .cm_stride(5)
5638       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x2c4__armsimd32, xnn_init_qc8_conv_minmax_fp32_armsimd32_params, xnn_qs8_requantize_fp32);
5639   }
5640 #endif  // XNN_ARCH_ARM
5641 
5642 
5643 #if XNN_ARCH_ARM
TEST(QC8_IGEMM_MINMAX_FP32_2X1C4__ARMSIMD32,k_eq_4)5644   TEST(QC8_IGEMM_MINMAX_FP32_2X1C4__ARMSIMD32, k_eq_4) {
5645     TEST_REQUIRES_ARM_SIMD32;
5646     GemmMicrokernelTester()
5647       .mr(2)
5648       .nr(1)
5649       .kr(4)
5650       .sr(1)
5651       .m(2)
5652       .n(1)
5653       .k(4)
5654       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x1c4__armsimd32, xnn_init_qc8_conv_minmax_fp32_armsimd32_params, xnn_qs8_requantize_fp32);
5655   }
5656 
TEST(QC8_IGEMM_MINMAX_FP32_2X1C4__ARMSIMD32,strided_cn)5657   TEST(QC8_IGEMM_MINMAX_FP32_2X1C4__ARMSIMD32, strided_cn) {
5658     TEST_REQUIRES_ARM_SIMD32;
5659     GemmMicrokernelTester()
5660       .mr(2)
5661       .nr(1)
5662       .kr(4)
5663       .sr(1)
5664       .m(2)
5665       .n(1)
5666       .k(4)
5667       .cn_stride(3)
5668       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x1c4__armsimd32, xnn_init_qc8_conv_minmax_fp32_armsimd32_params, xnn_qs8_requantize_fp32);
5669   }
5670 
TEST(QC8_IGEMM_MINMAX_FP32_2X1C4__ARMSIMD32,k_eq_4_subtile)5671   TEST(QC8_IGEMM_MINMAX_FP32_2X1C4__ARMSIMD32, k_eq_4_subtile) {
5672     TEST_REQUIRES_ARM_SIMD32;
5673     for (uint32_t n = 1; n <= 1; n++) {
5674       for (uint32_t m = 1; m <= 2; m++) {
5675         GemmMicrokernelTester()
5676           .mr(2)
5677           .nr(1)
5678           .kr(4)
5679           .sr(1)
5680           .m(m)
5681           .n(n)
5682           .k(4)
5683           .iterations(1)
5684           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x1c4__armsimd32, xnn_init_qc8_conv_minmax_fp32_armsimd32_params, xnn_qs8_requantize_fp32);
5685       }
5686     }
5687   }
5688 
TEST(QC8_IGEMM_MINMAX_FP32_2X1C4__ARMSIMD32,k_eq_4_subtile_m)5689   TEST(QC8_IGEMM_MINMAX_FP32_2X1C4__ARMSIMD32, k_eq_4_subtile_m) {
5690     TEST_REQUIRES_ARM_SIMD32;
5691     for (uint32_t m = 1; m <= 2; m++) {
5692       GemmMicrokernelTester()
5693         .mr(2)
5694         .nr(1)
5695         .kr(4)
5696         .sr(1)
5697         .m(m)
5698         .n(1)
5699         .k(4)
5700         .iterations(1)
5701         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x1c4__armsimd32, xnn_init_qc8_conv_minmax_fp32_armsimd32_params, xnn_qs8_requantize_fp32);
5702     }
5703   }
5704 
TEST(QC8_IGEMM_MINMAX_FP32_2X1C4__ARMSIMD32,k_eq_4_subtile_n)5705   TEST(QC8_IGEMM_MINMAX_FP32_2X1C4__ARMSIMD32, k_eq_4_subtile_n) {
5706     TEST_REQUIRES_ARM_SIMD32;
5707     for (uint32_t n = 1; n <= 1; n++) {
5708       GemmMicrokernelTester()
5709         .mr(2)
5710         .nr(1)
5711         .kr(4)
5712         .sr(1)
5713         .m(2)
5714         .n(n)
5715         .k(4)
5716         .iterations(1)
5717         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x1c4__armsimd32, xnn_init_qc8_conv_minmax_fp32_armsimd32_params, xnn_qs8_requantize_fp32);
5718     }
5719   }
5720 
TEST(QC8_IGEMM_MINMAX_FP32_2X1C4__ARMSIMD32,k_lt_4)5721   TEST(QC8_IGEMM_MINMAX_FP32_2X1C4__ARMSIMD32, k_lt_4) {
5722     TEST_REQUIRES_ARM_SIMD32;
5723     for (size_t k = 1; k < 4; k++) {
5724       GemmMicrokernelTester()
5725         .mr(2)
5726         .nr(1)
5727         .kr(4)
5728         .sr(1)
5729         .m(2)
5730         .n(1)
5731         .k(k)
5732         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x1c4__armsimd32, xnn_init_qc8_conv_minmax_fp32_armsimd32_params, xnn_qs8_requantize_fp32);
5733     }
5734   }
5735 
TEST(QC8_IGEMM_MINMAX_FP32_2X1C4__ARMSIMD32,k_lt_4_subtile)5736   TEST(QC8_IGEMM_MINMAX_FP32_2X1C4__ARMSIMD32, k_lt_4_subtile) {
5737     TEST_REQUIRES_ARM_SIMD32;
5738     for (size_t k = 1; k < 4; k++) {
5739       for (uint32_t n = 1; n <= 1; n++) {
5740         for (uint32_t m = 1; m <= 2; m++) {
5741           GemmMicrokernelTester()
5742             .mr(2)
5743             .nr(1)
5744             .kr(4)
5745             .sr(1)
5746             .m(m)
5747             .n(n)
5748             .k(k)
5749             .iterations(1)
5750             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x1c4__armsimd32, xnn_init_qc8_conv_minmax_fp32_armsimd32_params, xnn_qs8_requantize_fp32);
5751         }
5752       }
5753     }
5754   }
5755 
TEST(QC8_IGEMM_MINMAX_FP32_2X1C4__ARMSIMD32,k_gt_4)5756   TEST(QC8_IGEMM_MINMAX_FP32_2X1C4__ARMSIMD32, k_gt_4) {
5757     TEST_REQUIRES_ARM_SIMD32;
5758     for (size_t k = 5; k < 8; k++) {
5759       GemmMicrokernelTester()
5760         .mr(2)
5761         .nr(1)
5762         .kr(4)
5763         .sr(1)
5764         .m(2)
5765         .n(1)
5766         .k(k)
5767         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x1c4__armsimd32, xnn_init_qc8_conv_minmax_fp32_armsimd32_params, xnn_qs8_requantize_fp32);
5768     }
5769   }
5770 
TEST(QC8_IGEMM_MINMAX_FP32_2X1C4__ARMSIMD32,k_gt_4_subtile)5771   TEST(QC8_IGEMM_MINMAX_FP32_2X1C4__ARMSIMD32, k_gt_4_subtile) {
5772     TEST_REQUIRES_ARM_SIMD32;
5773     for (size_t k = 5; k < 8; k++) {
5774       for (uint32_t n = 1; n <= 1; n++) {
5775         for (uint32_t m = 1; m <= 2; m++) {
5776           GemmMicrokernelTester()
5777             .mr(2)
5778             .nr(1)
5779             .kr(4)
5780             .sr(1)
5781             .m(m)
5782             .n(n)
5783             .k(k)
5784             .iterations(1)
5785             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x1c4__armsimd32, xnn_init_qc8_conv_minmax_fp32_armsimd32_params, xnn_qs8_requantize_fp32);
5786         }
5787       }
5788     }
5789   }
5790 
TEST(QC8_IGEMM_MINMAX_FP32_2X1C4__ARMSIMD32,k_div_4)5791   TEST(QC8_IGEMM_MINMAX_FP32_2X1C4__ARMSIMD32, k_div_4) {
5792     TEST_REQUIRES_ARM_SIMD32;
5793     for (size_t k = 8; k <= 40; k += 4) {
5794       GemmMicrokernelTester()
5795         .mr(2)
5796         .nr(1)
5797         .kr(4)
5798         .sr(1)
5799         .m(2)
5800         .n(1)
5801         .k(k)
5802         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x1c4__armsimd32, xnn_init_qc8_conv_minmax_fp32_armsimd32_params, xnn_qs8_requantize_fp32);
5803     }
5804   }
5805 
TEST(QC8_IGEMM_MINMAX_FP32_2X1C4__ARMSIMD32,k_div_4_subtile)5806   TEST(QC8_IGEMM_MINMAX_FP32_2X1C4__ARMSIMD32, k_div_4_subtile) {
5807     TEST_REQUIRES_ARM_SIMD32;
5808     for (size_t k = 8; k <= 40; k += 4) {
5809       for (uint32_t n = 1; n <= 1; n++) {
5810         for (uint32_t m = 1; m <= 2; m++) {
5811           GemmMicrokernelTester()
5812             .mr(2)
5813             .nr(1)
5814             .kr(4)
5815             .sr(1)
5816             .m(m)
5817             .n(n)
5818             .k(k)
5819             .iterations(1)
5820             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x1c4__armsimd32, xnn_init_qc8_conv_minmax_fp32_armsimd32_params, xnn_qs8_requantize_fp32);
5821         }
5822       }
5823     }
5824   }
5825 
TEST(QC8_IGEMM_MINMAX_FP32_2X1C4__ARMSIMD32,n_gt_1)5826   TEST(QC8_IGEMM_MINMAX_FP32_2X1C4__ARMSIMD32, n_gt_1) {
5827     TEST_REQUIRES_ARM_SIMD32;
5828     for (uint32_t n = 2; n < 2; n++) {
5829       for (size_t k = 1; k <= 20; k += 5) {
5830         GemmMicrokernelTester()
5831           .mr(2)
5832           .nr(1)
5833           .kr(4)
5834           .sr(1)
5835           .m(2)
5836           .n(n)
5837           .k(k)
5838           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x1c4__armsimd32, xnn_init_qc8_conv_minmax_fp32_armsimd32_params, xnn_qs8_requantize_fp32);
5839       }
5840     }
5841   }
5842 
TEST(QC8_IGEMM_MINMAX_FP32_2X1C4__ARMSIMD32,n_gt_1_strided_cn)5843   TEST(QC8_IGEMM_MINMAX_FP32_2X1C4__ARMSIMD32, n_gt_1_strided_cn) {
5844     TEST_REQUIRES_ARM_SIMD32;
5845     for (uint32_t n = 2; n < 2; n++) {
5846       for (size_t k = 1; k <= 20; k += 5) {
5847         GemmMicrokernelTester()
5848           .mr(2)
5849           .nr(1)
5850           .kr(4)
5851           .sr(1)
5852           .m(2)
5853           .n(n)
5854           .k(k)
5855           .cn_stride(3)
5856           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x1c4__armsimd32, xnn_init_qc8_conv_minmax_fp32_armsimd32_params, xnn_qs8_requantize_fp32);
5857       }
5858     }
5859   }
5860 
TEST(QC8_IGEMM_MINMAX_FP32_2X1C4__ARMSIMD32,n_gt_1_subtile)5861   TEST(QC8_IGEMM_MINMAX_FP32_2X1C4__ARMSIMD32, n_gt_1_subtile) {
5862     TEST_REQUIRES_ARM_SIMD32;
5863     for (uint32_t n = 2; n < 2; n++) {
5864       for (size_t k = 1; k <= 20; k += 5) {
5865         for (uint32_t m = 1; m <= 2; m++) {
5866           GemmMicrokernelTester()
5867             .mr(2)
5868             .nr(1)
5869             .kr(4)
5870             .sr(1)
5871             .m(m)
5872             .n(n)
5873             .k(k)
5874             .iterations(1)
5875             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x1c4__armsimd32, xnn_init_qc8_conv_minmax_fp32_armsimd32_params, xnn_qs8_requantize_fp32);
5876         }
5877       }
5878     }
5879   }
5880 
TEST(QC8_IGEMM_MINMAX_FP32_2X1C4__ARMSIMD32,n_div_1)5881   TEST(QC8_IGEMM_MINMAX_FP32_2X1C4__ARMSIMD32, n_div_1) {
5882     TEST_REQUIRES_ARM_SIMD32;
5883     for (uint32_t n = 2; n <= 3; n += 1) {
5884       for (size_t k = 1; k <= 20; k += 5) {
5885         GemmMicrokernelTester()
5886           .mr(2)
5887           .nr(1)
5888           .kr(4)
5889           .sr(1)
5890           .m(2)
5891           .n(n)
5892           .k(k)
5893           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x1c4__armsimd32, xnn_init_qc8_conv_minmax_fp32_armsimd32_params, xnn_qs8_requantize_fp32);
5894       }
5895     }
5896   }
5897 
TEST(QC8_IGEMM_MINMAX_FP32_2X1C4__ARMSIMD32,n_div_1_strided_cn)5898   TEST(QC8_IGEMM_MINMAX_FP32_2X1C4__ARMSIMD32, n_div_1_strided_cn) {
5899     TEST_REQUIRES_ARM_SIMD32;
5900     for (uint32_t n = 2; n <= 3; n += 1) {
5901       for (size_t k = 1; k <= 20; k += 5) {
5902         GemmMicrokernelTester()
5903           .mr(2)
5904           .nr(1)
5905           .kr(4)
5906           .sr(1)
5907           .m(2)
5908           .n(n)
5909           .k(k)
5910           .cn_stride(3)
5911           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x1c4__armsimd32, xnn_init_qc8_conv_minmax_fp32_armsimd32_params, xnn_qs8_requantize_fp32);
5912       }
5913     }
5914   }
5915 
TEST(QC8_IGEMM_MINMAX_FP32_2X1C4__ARMSIMD32,n_div_1_subtile)5916   TEST(QC8_IGEMM_MINMAX_FP32_2X1C4__ARMSIMD32, n_div_1_subtile) {
5917     TEST_REQUIRES_ARM_SIMD32;
5918     for (uint32_t n = 2; n <= 3; n += 1) {
5919       for (size_t k = 1; k <= 20; k += 5) {
5920         for (uint32_t m = 1; m <= 2; m++) {
5921           GemmMicrokernelTester()
5922             .mr(2)
5923             .nr(1)
5924             .kr(4)
5925             .sr(1)
5926             .m(m)
5927             .n(n)
5928             .k(k)
5929             .iterations(1)
5930             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x1c4__armsimd32, xnn_init_qc8_conv_minmax_fp32_armsimd32_params, xnn_qs8_requantize_fp32);
5931         }
5932       }
5933     }
5934   }
5935 
TEST(QC8_IGEMM_MINMAX_FP32_2X1C4__ARMSIMD32,small_kernel)5936   TEST(QC8_IGEMM_MINMAX_FP32_2X1C4__ARMSIMD32, small_kernel) {
5937     TEST_REQUIRES_ARM_SIMD32;
5938     for (size_t k = 1; k <= 20; k += 5) {
5939       GemmMicrokernelTester()
5940         .mr(2)
5941         .nr(1)
5942         .kr(4)
5943         .sr(1)
5944         .m(2)
5945         .n(1)
5946         .k(k)
5947         .ks(3)
5948         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x1c4__armsimd32, xnn_init_qc8_conv_minmax_fp32_armsimd32_params, xnn_qs8_requantize_fp32);
5949     }
5950   }
5951 
TEST(QC8_IGEMM_MINMAX_FP32_2X1C4__ARMSIMD32,small_kernel_subtile)5952   TEST(QC8_IGEMM_MINMAX_FP32_2X1C4__ARMSIMD32, small_kernel_subtile) {
5953     TEST_REQUIRES_ARM_SIMD32;
5954     for (size_t k = 1; k <= 20; k += 5) {
5955       for (uint32_t n = 1; n <= 1; n++) {
5956         for (uint32_t m = 1; m <= 2; m++) {
5957           GemmMicrokernelTester()
5958             .mr(2)
5959             .nr(1)
5960             .kr(4)
5961             .sr(1)
5962             .m(m)
5963             .n(n)
5964             .k(k)
5965             .ks(3)
5966             .iterations(1)
5967             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x1c4__armsimd32, xnn_init_qc8_conv_minmax_fp32_armsimd32_params, xnn_qs8_requantize_fp32);
5968         }
5969       }
5970     }
5971   }
5972 
TEST(QC8_IGEMM_MINMAX_FP32_2X1C4__ARMSIMD32,n_gt_1_small_kernel)5973   TEST(QC8_IGEMM_MINMAX_FP32_2X1C4__ARMSIMD32, n_gt_1_small_kernel) {
5974     TEST_REQUIRES_ARM_SIMD32;
5975     for (uint32_t n = 2; n < 2; n++) {
5976       for (size_t k = 1; k <= 20; k += 5) {
5977         GemmMicrokernelTester()
5978           .mr(2)
5979           .nr(1)
5980           .kr(4)
5981           .sr(1)
5982           .m(2)
5983           .n(n)
5984           .k(k)
5985           .ks(3)
5986           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x1c4__armsimd32, xnn_init_qc8_conv_minmax_fp32_armsimd32_params, xnn_qs8_requantize_fp32);
5987       }
5988     }
5989   }
5990 
TEST(QC8_IGEMM_MINMAX_FP32_2X1C4__ARMSIMD32,n_div_1_small_kernel)5991   TEST(QC8_IGEMM_MINMAX_FP32_2X1C4__ARMSIMD32, n_div_1_small_kernel) {
5992     TEST_REQUIRES_ARM_SIMD32;
5993     for (uint32_t n = 2; n <= 3; n += 1) {
5994       for (size_t k = 1; k <= 20; k += 5) {
5995         GemmMicrokernelTester()
5996           .mr(2)
5997           .nr(1)
5998           .kr(4)
5999           .sr(1)
6000           .m(2)
6001           .n(n)
6002           .k(k)
6003           .ks(3)
6004           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x1c4__armsimd32, xnn_init_qc8_conv_minmax_fp32_armsimd32_params, xnn_qs8_requantize_fp32);
6005       }
6006     }
6007   }
6008 
TEST(QC8_IGEMM_MINMAX_FP32_2X1C4__ARMSIMD32,strided_cm_subtile)6009   TEST(QC8_IGEMM_MINMAX_FP32_2X1C4__ARMSIMD32, strided_cm_subtile) {
6010     TEST_REQUIRES_ARM_SIMD32;
6011     for (size_t k = 1; k <= 20; k += 5) {
6012       for (uint32_t n = 1; n <= 1; n++) {
6013         for (uint32_t m = 1; m <= 2; m++) {
6014           GemmMicrokernelTester()
6015             .mr(2)
6016             .nr(1)
6017             .kr(4)
6018             .sr(1)
6019             .m(m)
6020             .n(n)
6021             .k(k)
6022             .cm_stride(3)
6023             .iterations(1)
6024             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x1c4__armsimd32, xnn_init_qc8_conv_minmax_fp32_armsimd32_params, xnn_qs8_requantize_fp32);
6025         }
6026       }
6027     }
6028   }
6029 
TEST(QC8_IGEMM_MINMAX_FP32_2X1C4__ARMSIMD32,a_offset)6030   TEST(QC8_IGEMM_MINMAX_FP32_2X1C4__ARMSIMD32, a_offset) {
6031     TEST_REQUIRES_ARM_SIMD32;
6032     for (size_t k = 1; k <= 20; k += 5) {
6033       GemmMicrokernelTester()
6034         .mr(2)
6035         .nr(1)
6036         .kr(4)
6037         .sr(1)
6038         .m(2)
6039         .n(1)
6040         .k(k)
6041         .ks(3)
6042         .a_offset(43)
6043         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x1c4__armsimd32, xnn_init_qc8_conv_minmax_fp32_armsimd32_params, xnn_qs8_requantize_fp32);
6044     }
6045   }
6046 
TEST(QC8_IGEMM_MINMAX_FP32_2X1C4__ARMSIMD32,zero)6047   TEST(QC8_IGEMM_MINMAX_FP32_2X1C4__ARMSIMD32, zero) {
6048     TEST_REQUIRES_ARM_SIMD32;
6049     for (size_t k = 1; k <= 20; k += 5) {
6050       for (uint32_t mz = 0; mz < 2; mz++) {
6051         GemmMicrokernelTester()
6052           .mr(2)
6053           .nr(1)
6054           .kr(4)
6055           .sr(1)
6056           .m(2)
6057           .n(1)
6058           .k(k)
6059           .ks(3)
6060           .a_offset(43)
6061           .zero_index(mz)
6062           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x1c4__armsimd32, xnn_init_qc8_conv_minmax_fp32_armsimd32_params, xnn_qs8_requantize_fp32);
6063       }
6064     }
6065   }
6066 
TEST(QC8_IGEMM_MINMAX_FP32_2X1C4__ARMSIMD32,qmin)6067   TEST(QC8_IGEMM_MINMAX_FP32_2X1C4__ARMSIMD32, qmin) {
6068     TEST_REQUIRES_ARM_SIMD32;
6069     GemmMicrokernelTester()
6070       .mr(2)
6071       .nr(1)
6072       .kr(4)
6073       .sr(1)
6074       .m(2)
6075       .n(1)
6076       .k(4)
6077       .qmin(128)
6078       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x1c4__armsimd32, xnn_init_qc8_conv_minmax_fp32_armsimd32_params, xnn_qs8_requantize_fp32);
6079   }
6080 
TEST(QC8_IGEMM_MINMAX_FP32_2X1C4__ARMSIMD32,qmax)6081   TEST(QC8_IGEMM_MINMAX_FP32_2X1C4__ARMSIMD32, qmax) {
6082     TEST_REQUIRES_ARM_SIMD32;
6083     GemmMicrokernelTester()
6084       .mr(2)
6085       .nr(1)
6086       .kr(4)
6087       .sr(1)
6088       .m(2)
6089       .n(1)
6090       .k(4)
6091       .qmax(128)
6092       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x1c4__armsimd32, xnn_init_qc8_conv_minmax_fp32_armsimd32_params, xnn_qs8_requantize_fp32);
6093   }
6094 
TEST(QC8_IGEMM_MINMAX_FP32_2X1C4__ARMSIMD32,strided_cm)6095   TEST(QC8_IGEMM_MINMAX_FP32_2X1C4__ARMSIMD32, strided_cm) {
6096     TEST_REQUIRES_ARM_SIMD32;
6097     GemmMicrokernelTester()
6098       .mr(2)
6099       .nr(1)
6100       .kr(4)
6101       .sr(1)
6102       .m(2)
6103       .n(1)
6104       .k(4)
6105       .cm_stride(3)
6106       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x1c4__armsimd32, xnn_init_qc8_conv_minmax_fp32_armsimd32_params, xnn_qs8_requantize_fp32);
6107   }
6108 #endif  // XNN_ARCH_ARM
6109 
6110 
6111 #if XNN_ARCH_ARM
TEST(QC8_IGEMM_MINMAX_FP32_2X2C4__ARMSIMD32,k_eq_4)6112   TEST(QC8_IGEMM_MINMAX_FP32_2X2C4__ARMSIMD32, k_eq_4) {
6113     TEST_REQUIRES_ARM_SIMD32;
6114     GemmMicrokernelTester()
6115       .mr(2)
6116       .nr(2)
6117       .kr(4)
6118       .sr(1)
6119       .m(2)
6120       .n(2)
6121       .k(4)
6122       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x2c4__armsimd32, xnn_init_qc8_conv_minmax_fp32_armsimd32_params, xnn_qs8_requantize_fp32);
6123   }
6124 
TEST(QC8_IGEMM_MINMAX_FP32_2X2C4__ARMSIMD32,strided_cn)6125   TEST(QC8_IGEMM_MINMAX_FP32_2X2C4__ARMSIMD32, strided_cn) {
6126     TEST_REQUIRES_ARM_SIMD32;
6127     GemmMicrokernelTester()
6128       .mr(2)
6129       .nr(2)
6130       .kr(4)
6131       .sr(1)
6132       .m(2)
6133       .n(2)
6134       .k(4)
6135       .cn_stride(5)
6136       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x2c4__armsimd32, xnn_init_qc8_conv_minmax_fp32_armsimd32_params, xnn_qs8_requantize_fp32);
6137   }
6138 
TEST(QC8_IGEMM_MINMAX_FP32_2X2C4__ARMSIMD32,k_eq_4_subtile)6139   TEST(QC8_IGEMM_MINMAX_FP32_2X2C4__ARMSIMD32, k_eq_4_subtile) {
6140     TEST_REQUIRES_ARM_SIMD32;
6141     for (uint32_t n = 1; n <= 2; n++) {
6142       for (uint32_t m = 1; m <= 2; m++) {
6143         GemmMicrokernelTester()
6144           .mr(2)
6145           .nr(2)
6146           .kr(4)
6147           .sr(1)
6148           .m(m)
6149           .n(n)
6150           .k(4)
6151           .iterations(1)
6152           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x2c4__armsimd32, xnn_init_qc8_conv_minmax_fp32_armsimd32_params, xnn_qs8_requantize_fp32);
6153       }
6154     }
6155   }
6156 
TEST(QC8_IGEMM_MINMAX_FP32_2X2C4__ARMSIMD32,k_eq_4_subtile_m)6157   TEST(QC8_IGEMM_MINMAX_FP32_2X2C4__ARMSIMD32, k_eq_4_subtile_m) {
6158     TEST_REQUIRES_ARM_SIMD32;
6159     for (uint32_t m = 1; m <= 2; m++) {
6160       GemmMicrokernelTester()
6161         .mr(2)
6162         .nr(2)
6163         .kr(4)
6164         .sr(1)
6165         .m(m)
6166         .n(2)
6167         .k(4)
6168         .iterations(1)
6169         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x2c4__armsimd32, xnn_init_qc8_conv_minmax_fp32_armsimd32_params, xnn_qs8_requantize_fp32);
6170     }
6171   }
6172 
TEST(QC8_IGEMM_MINMAX_FP32_2X2C4__ARMSIMD32,k_eq_4_subtile_n)6173   TEST(QC8_IGEMM_MINMAX_FP32_2X2C4__ARMSIMD32, k_eq_4_subtile_n) {
6174     TEST_REQUIRES_ARM_SIMD32;
6175     for (uint32_t n = 1; n <= 2; n++) {
6176       GemmMicrokernelTester()
6177         .mr(2)
6178         .nr(2)
6179         .kr(4)
6180         .sr(1)
6181         .m(2)
6182         .n(n)
6183         .k(4)
6184         .iterations(1)
6185         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x2c4__armsimd32, xnn_init_qc8_conv_minmax_fp32_armsimd32_params, xnn_qs8_requantize_fp32);
6186     }
6187   }
6188 
TEST(QC8_IGEMM_MINMAX_FP32_2X2C4__ARMSIMD32,k_lt_4)6189   TEST(QC8_IGEMM_MINMAX_FP32_2X2C4__ARMSIMD32, k_lt_4) {
6190     TEST_REQUIRES_ARM_SIMD32;
6191     for (size_t k = 1; k < 4; k++) {
6192       GemmMicrokernelTester()
6193         .mr(2)
6194         .nr(2)
6195         .kr(4)
6196         .sr(1)
6197         .m(2)
6198         .n(2)
6199         .k(k)
6200         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x2c4__armsimd32, xnn_init_qc8_conv_minmax_fp32_armsimd32_params, xnn_qs8_requantize_fp32);
6201     }
6202   }
6203 
TEST(QC8_IGEMM_MINMAX_FP32_2X2C4__ARMSIMD32,k_lt_4_subtile)6204   TEST(QC8_IGEMM_MINMAX_FP32_2X2C4__ARMSIMD32, k_lt_4_subtile) {
6205     TEST_REQUIRES_ARM_SIMD32;
6206     for (size_t k = 1; k < 4; k++) {
6207       for (uint32_t n = 1; n <= 2; n++) {
6208         for (uint32_t m = 1; m <= 2; m++) {
6209           GemmMicrokernelTester()
6210             .mr(2)
6211             .nr(2)
6212             .kr(4)
6213             .sr(1)
6214             .m(m)
6215             .n(n)
6216             .k(k)
6217             .iterations(1)
6218             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x2c4__armsimd32, xnn_init_qc8_conv_minmax_fp32_armsimd32_params, xnn_qs8_requantize_fp32);
6219         }
6220       }
6221     }
6222   }
6223 
TEST(QC8_IGEMM_MINMAX_FP32_2X2C4__ARMSIMD32,k_gt_4)6224   TEST(QC8_IGEMM_MINMAX_FP32_2X2C4__ARMSIMD32, k_gt_4) {
6225     TEST_REQUIRES_ARM_SIMD32;
6226     for (size_t k = 5; k < 8; k++) {
6227       GemmMicrokernelTester()
6228         .mr(2)
6229         .nr(2)
6230         .kr(4)
6231         .sr(1)
6232         .m(2)
6233         .n(2)
6234         .k(k)
6235         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x2c4__armsimd32, xnn_init_qc8_conv_minmax_fp32_armsimd32_params, xnn_qs8_requantize_fp32);
6236     }
6237   }
6238 
TEST(QC8_IGEMM_MINMAX_FP32_2X2C4__ARMSIMD32,k_gt_4_subtile)6239   TEST(QC8_IGEMM_MINMAX_FP32_2X2C4__ARMSIMD32, k_gt_4_subtile) {
6240     TEST_REQUIRES_ARM_SIMD32;
6241     for (size_t k = 5; k < 8; k++) {
6242       for (uint32_t n = 1; n <= 2; n++) {
6243         for (uint32_t m = 1; m <= 2; m++) {
6244           GemmMicrokernelTester()
6245             .mr(2)
6246             .nr(2)
6247             .kr(4)
6248             .sr(1)
6249             .m(m)
6250             .n(n)
6251             .k(k)
6252             .iterations(1)
6253             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x2c4__armsimd32, xnn_init_qc8_conv_minmax_fp32_armsimd32_params, xnn_qs8_requantize_fp32);
6254         }
6255       }
6256     }
6257   }
6258 
TEST(QC8_IGEMM_MINMAX_FP32_2X2C4__ARMSIMD32,k_div_4)6259   TEST(QC8_IGEMM_MINMAX_FP32_2X2C4__ARMSIMD32, k_div_4) {
6260     TEST_REQUIRES_ARM_SIMD32;
6261     for (size_t k = 8; k <= 40; k += 4) {
6262       GemmMicrokernelTester()
6263         .mr(2)
6264         .nr(2)
6265         .kr(4)
6266         .sr(1)
6267         .m(2)
6268         .n(2)
6269         .k(k)
6270         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x2c4__armsimd32, xnn_init_qc8_conv_minmax_fp32_armsimd32_params, xnn_qs8_requantize_fp32);
6271     }
6272   }
6273 
TEST(QC8_IGEMM_MINMAX_FP32_2X2C4__ARMSIMD32,k_div_4_subtile)6274   TEST(QC8_IGEMM_MINMAX_FP32_2X2C4__ARMSIMD32, k_div_4_subtile) {
6275     TEST_REQUIRES_ARM_SIMD32;
6276     for (size_t k = 8; k <= 40; k += 4) {
6277       for (uint32_t n = 1; n <= 2; n++) {
6278         for (uint32_t m = 1; m <= 2; m++) {
6279           GemmMicrokernelTester()
6280             .mr(2)
6281             .nr(2)
6282             .kr(4)
6283             .sr(1)
6284             .m(m)
6285             .n(n)
6286             .k(k)
6287             .iterations(1)
6288             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x2c4__armsimd32, xnn_init_qc8_conv_minmax_fp32_armsimd32_params, xnn_qs8_requantize_fp32);
6289         }
6290       }
6291     }
6292   }
6293 
TEST(QC8_IGEMM_MINMAX_FP32_2X2C4__ARMSIMD32,n_gt_2)6294   TEST(QC8_IGEMM_MINMAX_FP32_2X2C4__ARMSIMD32, n_gt_2) {
6295     TEST_REQUIRES_ARM_SIMD32;
6296     for (uint32_t n = 3; n < 4; n++) {
6297       for (size_t k = 1; k <= 20; k += 5) {
6298         GemmMicrokernelTester()
6299           .mr(2)
6300           .nr(2)
6301           .kr(4)
6302           .sr(1)
6303           .m(2)
6304           .n(n)
6305           .k(k)
6306           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x2c4__armsimd32, xnn_init_qc8_conv_minmax_fp32_armsimd32_params, xnn_qs8_requantize_fp32);
6307       }
6308     }
6309   }
6310 
TEST(QC8_IGEMM_MINMAX_FP32_2X2C4__ARMSIMD32,n_gt_2_strided_cn)6311   TEST(QC8_IGEMM_MINMAX_FP32_2X2C4__ARMSIMD32, n_gt_2_strided_cn) {
6312     TEST_REQUIRES_ARM_SIMD32;
6313     for (uint32_t n = 3; n < 4; n++) {
6314       for (size_t k = 1; k <= 20; k += 5) {
6315         GemmMicrokernelTester()
6316           .mr(2)
6317           .nr(2)
6318           .kr(4)
6319           .sr(1)
6320           .m(2)
6321           .n(n)
6322           .k(k)
6323           .cn_stride(5)
6324           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x2c4__armsimd32, xnn_init_qc8_conv_minmax_fp32_armsimd32_params, xnn_qs8_requantize_fp32);
6325       }
6326     }
6327   }
6328 
TEST(QC8_IGEMM_MINMAX_FP32_2X2C4__ARMSIMD32,n_gt_2_subtile)6329   TEST(QC8_IGEMM_MINMAX_FP32_2X2C4__ARMSIMD32, n_gt_2_subtile) {
6330     TEST_REQUIRES_ARM_SIMD32;
6331     for (uint32_t n = 3; n < 4; n++) {
6332       for (size_t k = 1; k <= 20; k += 5) {
6333         for (uint32_t m = 1; m <= 2; m++) {
6334           GemmMicrokernelTester()
6335             .mr(2)
6336             .nr(2)
6337             .kr(4)
6338             .sr(1)
6339             .m(m)
6340             .n(n)
6341             .k(k)
6342             .iterations(1)
6343             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x2c4__armsimd32, xnn_init_qc8_conv_minmax_fp32_armsimd32_params, xnn_qs8_requantize_fp32);
6344         }
6345       }
6346     }
6347   }
6348 
TEST(QC8_IGEMM_MINMAX_FP32_2X2C4__ARMSIMD32,n_div_2)6349   TEST(QC8_IGEMM_MINMAX_FP32_2X2C4__ARMSIMD32, n_div_2) {
6350     TEST_REQUIRES_ARM_SIMD32;
6351     for (uint32_t n = 4; n <= 6; n += 2) {
6352       for (size_t k = 1; k <= 20; k += 5) {
6353         GemmMicrokernelTester()
6354           .mr(2)
6355           .nr(2)
6356           .kr(4)
6357           .sr(1)
6358           .m(2)
6359           .n(n)
6360           .k(k)
6361           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x2c4__armsimd32, xnn_init_qc8_conv_minmax_fp32_armsimd32_params, xnn_qs8_requantize_fp32);
6362       }
6363     }
6364   }
6365 
TEST(QC8_IGEMM_MINMAX_FP32_2X2C4__ARMSIMD32,n_div_2_strided_cn)6366   TEST(QC8_IGEMM_MINMAX_FP32_2X2C4__ARMSIMD32, n_div_2_strided_cn) {
6367     TEST_REQUIRES_ARM_SIMD32;
6368     for (uint32_t n = 4; n <= 6; n += 2) {
6369       for (size_t k = 1; k <= 20; k += 5) {
6370         GemmMicrokernelTester()
6371           .mr(2)
6372           .nr(2)
6373           .kr(4)
6374           .sr(1)
6375           .m(2)
6376           .n(n)
6377           .k(k)
6378           .cn_stride(5)
6379           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x2c4__armsimd32, xnn_init_qc8_conv_minmax_fp32_armsimd32_params, xnn_qs8_requantize_fp32);
6380       }
6381     }
6382   }
6383 
TEST(QC8_IGEMM_MINMAX_FP32_2X2C4__ARMSIMD32,n_div_2_subtile)6384   TEST(QC8_IGEMM_MINMAX_FP32_2X2C4__ARMSIMD32, n_div_2_subtile) {
6385     TEST_REQUIRES_ARM_SIMD32;
6386     for (uint32_t n = 4; n <= 6; n += 2) {
6387       for (size_t k = 1; k <= 20; k += 5) {
6388         for (uint32_t m = 1; m <= 2; m++) {
6389           GemmMicrokernelTester()
6390             .mr(2)
6391             .nr(2)
6392             .kr(4)
6393             .sr(1)
6394             .m(m)
6395             .n(n)
6396             .k(k)
6397             .iterations(1)
6398             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x2c4__armsimd32, xnn_init_qc8_conv_minmax_fp32_armsimd32_params, xnn_qs8_requantize_fp32);
6399         }
6400       }
6401     }
6402   }
6403 
TEST(QC8_IGEMM_MINMAX_FP32_2X2C4__ARMSIMD32,small_kernel)6404   TEST(QC8_IGEMM_MINMAX_FP32_2X2C4__ARMSIMD32, small_kernel) {
6405     TEST_REQUIRES_ARM_SIMD32;
6406     for (size_t k = 1; k <= 20; k += 5) {
6407       GemmMicrokernelTester()
6408         .mr(2)
6409         .nr(2)
6410         .kr(4)
6411         .sr(1)
6412         .m(2)
6413         .n(2)
6414         .k(k)
6415         .ks(3)
6416         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x2c4__armsimd32, xnn_init_qc8_conv_minmax_fp32_armsimd32_params, xnn_qs8_requantize_fp32);
6417     }
6418   }
6419 
TEST(QC8_IGEMM_MINMAX_FP32_2X2C4__ARMSIMD32,small_kernel_subtile)6420   TEST(QC8_IGEMM_MINMAX_FP32_2X2C4__ARMSIMD32, small_kernel_subtile) {
6421     TEST_REQUIRES_ARM_SIMD32;
6422     for (size_t k = 1; k <= 20; k += 5) {
6423       for (uint32_t n = 1; n <= 2; n++) {
6424         for (uint32_t m = 1; m <= 2; m++) {
6425           GemmMicrokernelTester()
6426             .mr(2)
6427             .nr(2)
6428             .kr(4)
6429             .sr(1)
6430             .m(m)
6431             .n(n)
6432             .k(k)
6433             .ks(3)
6434             .iterations(1)
6435             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x2c4__armsimd32, xnn_init_qc8_conv_minmax_fp32_armsimd32_params, xnn_qs8_requantize_fp32);
6436         }
6437       }
6438     }
6439   }
6440 
TEST(QC8_IGEMM_MINMAX_FP32_2X2C4__ARMSIMD32,n_gt_2_small_kernel)6441   TEST(QC8_IGEMM_MINMAX_FP32_2X2C4__ARMSIMD32, n_gt_2_small_kernel) {
6442     TEST_REQUIRES_ARM_SIMD32;
6443     for (uint32_t n = 3; n < 4; n++) {
6444       for (size_t k = 1; k <= 20; k += 5) {
6445         GemmMicrokernelTester()
6446           .mr(2)
6447           .nr(2)
6448           .kr(4)
6449           .sr(1)
6450           .m(2)
6451           .n(n)
6452           .k(k)
6453           .ks(3)
6454           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x2c4__armsimd32, xnn_init_qc8_conv_minmax_fp32_armsimd32_params, xnn_qs8_requantize_fp32);
6455       }
6456     }
6457   }
6458 
TEST(QC8_IGEMM_MINMAX_FP32_2X2C4__ARMSIMD32,n_div_2_small_kernel)6459   TEST(QC8_IGEMM_MINMAX_FP32_2X2C4__ARMSIMD32, n_div_2_small_kernel) {
6460     TEST_REQUIRES_ARM_SIMD32;
6461     for (uint32_t n = 4; n <= 6; n += 2) {
6462       for (size_t k = 1; k <= 20; k += 5) {
6463         GemmMicrokernelTester()
6464           .mr(2)
6465           .nr(2)
6466           .kr(4)
6467           .sr(1)
6468           .m(2)
6469           .n(n)
6470           .k(k)
6471           .ks(3)
6472           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x2c4__armsimd32, xnn_init_qc8_conv_minmax_fp32_armsimd32_params, xnn_qs8_requantize_fp32);
6473       }
6474     }
6475   }
6476 
TEST(QC8_IGEMM_MINMAX_FP32_2X2C4__ARMSIMD32,strided_cm_subtile)6477   TEST(QC8_IGEMM_MINMAX_FP32_2X2C4__ARMSIMD32, strided_cm_subtile) {
6478     TEST_REQUIRES_ARM_SIMD32;
6479     for (size_t k = 1; k <= 20; k += 5) {
6480       for (uint32_t n = 1; n <= 2; n++) {
6481         for (uint32_t m = 1; m <= 2; m++) {
6482           GemmMicrokernelTester()
6483             .mr(2)
6484             .nr(2)
6485             .kr(4)
6486             .sr(1)
6487             .m(m)
6488             .n(n)
6489             .k(k)
6490             .cm_stride(5)
6491             .iterations(1)
6492             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x2c4__armsimd32, xnn_init_qc8_conv_minmax_fp32_armsimd32_params, xnn_qs8_requantize_fp32);
6493         }
6494       }
6495     }
6496   }
6497 
TEST(QC8_IGEMM_MINMAX_FP32_2X2C4__ARMSIMD32,a_offset)6498   TEST(QC8_IGEMM_MINMAX_FP32_2X2C4__ARMSIMD32, a_offset) {
6499     TEST_REQUIRES_ARM_SIMD32;
6500     for (size_t k = 1; k <= 20; k += 5) {
6501       GemmMicrokernelTester()
6502         .mr(2)
6503         .nr(2)
6504         .kr(4)
6505         .sr(1)
6506         .m(2)
6507         .n(2)
6508         .k(k)
6509         .ks(3)
6510         .a_offset(43)
6511         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x2c4__armsimd32, xnn_init_qc8_conv_minmax_fp32_armsimd32_params, xnn_qs8_requantize_fp32);
6512     }
6513   }
6514 
TEST(QC8_IGEMM_MINMAX_FP32_2X2C4__ARMSIMD32,zero)6515   TEST(QC8_IGEMM_MINMAX_FP32_2X2C4__ARMSIMD32, zero) {
6516     TEST_REQUIRES_ARM_SIMD32;
6517     for (size_t k = 1; k <= 20; k += 5) {
6518       for (uint32_t mz = 0; mz < 2; mz++) {
6519         GemmMicrokernelTester()
6520           .mr(2)
6521           .nr(2)
6522           .kr(4)
6523           .sr(1)
6524           .m(2)
6525           .n(2)
6526           .k(k)
6527           .ks(3)
6528           .a_offset(43)
6529           .zero_index(mz)
6530           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x2c4__armsimd32, xnn_init_qc8_conv_minmax_fp32_armsimd32_params, xnn_qs8_requantize_fp32);
6531       }
6532     }
6533   }
6534 
TEST(QC8_IGEMM_MINMAX_FP32_2X2C4__ARMSIMD32,qmin)6535   TEST(QC8_IGEMM_MINMAX_FP32_2X2C4__ARMSIMD32, qmin) {
6536     TEST_REQUIRES_ARM_SIMD32;
6537     GemmMicrokernelTester()
6538       .mr(2)
6539       .nr(2)
6540       .kr(4)
6541       .sr(1)
6542       .m(2)
6543       .n(2)
6544       .k(4)
6545       .qmin(128)
6546       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x2c4__armsimd32, xnn_init_qc8_conv_minmax_fp32_armsimd32_params, xnn_qs8_requantize_fp32);
6547   }
6548 
TEST(QC8_IGEMM_MINMAX_FP32_2X2C4__ARMSIMD32,qmax)6549   TEST(QC8_IGEMM_MINMAX_FP32_2X2C4__ARMSIMD32, qmax) {
6550     TEST_REQUIRES_ARM_SIMD32;
6551     GemmMicrokernelTester()
6552       .mr(2)
6553       .nr(2)
6554       .kr(4)
6555       .sr(1)
6556       .m(2)
6557       .n(2)
6558       .k(4)
6559       .qmax(128)
6560       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x2c4__armsimd32, xnn_init_qc8_conv_minmax_fp32_armsimd32_params, xnn_qs8_requantize_fp32);
6561   }
6562 
TEST(QC8_IGEMM_MINMAX_FP32_2X2C4__ARMSIMD32,strided_cm)6563   TEST(QC8_IGEMM_MINMAX_FP32_2X2C4__ARMSIMD32, strided_cm) {
6564     TEST_REQUIRES_ARM_SIMD32;
6565     GemmMicrokernelTester()
6566       .mr(2)
6567       .nr(2)
6568       .kr(4)
6569       .sr(1)
6570       .m(2)
6571       .n(2)
6572       .k(4)
6573       .cm_stride(5)
6574       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x2c4__armsimd32, xnn_init_qc8_conv_minmax_fp32_armsimd32_params, xnn_qs8_requantize_fp32);
6575   }
6576 #endif  // XNN_ARCH_ARM
6577 
6578 
6579 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QC8_IGEMM_MINMAX_FP32_1X8__NEON_MLAL_LANE,k_eq_8)6580   TEST(QC8_IGEMM_MINMAX_FP32_1X8__NEON_MLAL_LANE, k_eq_8) {
6581     TEST_REQUIRES_ARM_NEON;
6582     GemmMicrokernelTester()
6583       .mr(1)
6584       .nr(8)
6585       .kr(1)
6586       .sr(1)
6587       .m(1)
6588       .n(8)
6589       .k(8)
6590       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
6591   }
6592 
TEST(QC8_IGEMM_MINMAX_FP32_1X8__NEON_MLAL_LANE,strided_cn)6593   TEST(QC8_IGEMM_MINMAX_FP32_1X8__NEON_MLAL_LANE, strided_cn) {
6594     TEST_REQUIRES_ARM_NEON;
6595     GemmMicrokernelTester()
6596       .mr(1)
6597       .nr(8)
6598       .kr(1)
6599       .sr(1)
6600       .m(1)
6601       .n(8)
6602       .k(8)
6603       .cn_stride(11)
6604       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
6605   }
6606 
TEST(QC8_IGEMM_MINMAX_FP32_1X8__NEON_MLAL_LANE,k_eq_8_subtile)6607   TEST(QC8_IGEMM_MINMAX_FP32_1X8__NEON_MLAL_LANE, k_eq_8_subtile) {
6608     TEST_REQUIRES_ARM_NEON;
6609     for (uint32_t n = 1; n <= 8; n++) {
6610       for (uint32_t m = 1; m <= 1; m++) {
6611         GemmMicrokernelTester()
6612           .mr(1)
6613           .nr(8)
6614           .kr(1)
6615           .sr(1)
6616           .m(m)
6617           .n(n)
6618           .k(8)
6619           .iterations(1)
6620           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
6621       }
6622     }
6623   }
6624 
TEST(QC8_IGEMM_MINMAX_FP32_1X8__NEON_MLAL_LANE,k_eq_8_subtile_m)6625   TEST(QC8_IGEMM_MINMAX_FP32_1X8__NEON_MLAL_LANE, k_eq_8_subtile_m) {
6626     TEST_REQUIRES_ARM_NEON;
6627     for (uint32_t m = 1; m <= 1; m++) {
6628       GemmMicrokernelTester()
6629         .mr(1)
6630         .nr(8)
6631         .kr(1)
6632         .sr(1)
6633         .m(m)
6634         .n(8)
6635         .k(8)
6636         .iterations(1)
6637         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
6638     }
6639   }
6640 
TEST(QC8_IGEMM_MINMAX_FP32_1X8__NEON_MLAL_LANE,k_eq_8_subtile_n)6641   TEST(QC8_IGEMM_MINMAX_FP32_1X8__NEON_MLAL_LANE, k_eq_8_subtile_n) {
6642     TEST_REQUIRES_ARM_NEON;
6643     for (uint32_t n = 1; n <= 8; n++) {
6644       GemmMicrokernelTester()
6645         .mr(1)
6646         .nr(8)
6647         .kr(1)
6648         .sr(1)
6649         .m(1)
6650         .n(n)
6651         .k(8)
6652         .iterations(1)
6653         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
6654     }
6655   }
6656 
TEST(QC8_IGEMM_MINMAX_FP32_1X8__NEON_MLAL_LANE,k_lt_8)6657   TEST(QC8_IGEMM_MINMAX_FP32_1X8__NEON_MLAL_LANE, k_lt_8) {
6658     TEST_REQUIRES_ARM_NEON;
6659     for (size_t k = 1; k < 8; k++) {
6660       GemmMicrokernelTester()
6661         .mr(1)
6662         .nr(8)
6663         .kr(1)
6664         .sr(1)
6665         .m(1)
6666         .n(8)
6667         .k(k)
6668         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
6669     }
6670   }
6671 
TEST(QC8_IGEMM_MINMAX_FP32_1X8__NEON_MLAL_LANE,k_lt_8_subtile)6672   TEST(QC8_IGEMM_MINMAX_FP32_1X8__NEON_MLAL_LANE, k_lt_8_subtile) {
6673     TEST_REQUIRES_ARM_NEON;
6674     for (size_t k = 1; k < 8; k++) {
6675       for (uint32_t n = 1; n <= 8; n++) {
6676         for (uint32_t m = 1; m <= 1; m++) {
6677           GemmMicrokernelTester()
6678             .mr(1)
6679             .nr(8)
6680             .kr(1)
6681             .sr(1)
6682             .m(m)
6683             .n(n)
6684             .k(k)
6685             .iterations(1)
6686             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
6687         }
6688       }
6689     }
6690   }
6691 
TEST(QC8_IGEMM_MINMAX_FP32_1X8__NEON_MLAL_LANE,k_gt_8)6692   TEST(QC8_IGEMM_MINMAX_FP32_1X8__NEON_MLAL_LANE, k_gt_8) {
6693     TEST_REQUIRES_ARM_NEON;
6694     for (size_t k = 9; k < 16; k++) {
6695       GemmMicrokernelTester()
6696         .mr(1)
6697         .nr(8)
6698         .kr(1)
6699         .sr(1)
6700         .m(1)
6701         .n(8)
6702         .k(k)
6703         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
6704     }
6705   }
6706 
TEST(QC8_IGEMM_MINMAX_FP32_1X8__NEON_MLAL_LANE,k_gt_8_subtile)6707   TEST(QC8_IGEMM_MINMAX_FP32_1X8__NEON_MLAL_LANE, k_gt_8_subtile) {
6708     TEST_REQUIRES_ARM_NEON;
6709     for (size_t k = 9; k < 16; k++) {
6710       for (uint32_t n = 1; n <= 8; n++) {
6711         for (uint32_t m = 1; m <= 1; m++) {
6712           GemmMicrokernelTester()
6713             .mr(1)
6714             .nr(8)
6715             .kr(1)
6716             .sr(1)
6717             .m(m)
6718             .n(n)
6719             .k(k)
6720             .iterations(1)
6721             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
6722         }
6723       }
6724     }
6725   }
6726 
TEST(QC8_IGEMM_MINMAX_FP32_1X8__NEON_MLAL_LANE,k_div_8)6727   TEST(QC8_IGEMM_MINMAX_FP32_1X8__NEON_MLAL_LANE, k_div_8) {
6728     TEST_REQUIRES_ARM_NEON;
6729     for (size_t k = 16; k <= 80; k += 8) {
6730       GemmMicrokernelTester()
6731         .mr(1)
6732         .nr(8)
6733         .kr(1)
6734         .sr(1)
6735         .m(1)
6736         .n(8)
6737         .k(k)
6738         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
6739     }
6740   }
6741 
TEST(QC8_IGEMM_MINMAX_FP32_1X8__NEON_MLAL_LANE,k_div_8_subtile)6742   TEST(QC8_IGEMM_MINMAX_FP32_1X8__NEON_MLAL_LANE, k_div_8_subtile) {
6743     TEST_REQUIRES_ARM_NEON;
6744     for (size_t k = 16; k <= 80; k += 8) {
6745       for (uint32_t n = 1; n <= 8; n++) {
6746         for (uint32_t m = 1; m <= 1; m++) {
6747           GemmMicrokernelTester()
6748             .mr(1)
6749             .nr(8)
6750             .kr(1)
6751             .sr(1)
6752             .m(m)
6753             .n(n)
6754             .k(k)
6755             .iterations(1)
6756             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
6757         }
6758       }
6759     }
6760   }
6761 
TEST(QC8_IGEMM_MINMAX_FP32_1X8__NEON_MLAL_LANE,n_gt_8)6762   TEST(QC8_IGEMM_MINMAX_FP32_1X8__NEON_MLAL_LANE, n_gt_8) {
6763     TEST_REQUIRES_ARM_NEON;
6764     for (uint32_t n = 9; n < 16; n++) {
6765       for (size_t k = 1; k <= 40; k += 9) {
6766         GemmMicrokernelTester()
6767           .mr(1)
6768           .nr(8)
6769           .kr(1)
6770           .sr(1)
6771           .m(1)
6772           .n(n)
6773           .k(k)
6774           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
6775       }
6776     }
6777   }
6778 
TEST(QC8_IGEMM_MINMAX_FP32_1X8__NEON_MLAL_LANE,n_gt_8_strided_cn)6779   TEST(QC8_IGEMM_MINMAX_FP32_1X8__NEON_MLAL_LANE, n_gt_8_strided_cn) {
6780     TEST_REQUIRES_ARM_NEON;
6781     for (uint32_t n = 9; n < 16; n++) {
6782       for (size_t k = 1; k <= 40; k += 9) {
6783         GemmMicrokernelTester()
6784           .mr(1)
6785           .nr(8)
6786           .kr(1)
6787           .sr(1)
6788           .m(1)
6789           .n(n)
6790           .k(k)
6791           .cn_stride(11)
6792           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
6793       }
6794     }
6795   }
6796 
TEST(QC8_IGEMM_MINMAX_FP32_1X8__NEON_MLAL_LANE,n_gt_8_subtile)6797   TEST(QC8_IGEMM_MINMAX_FP32_1X8__NEON_MLAL_LANE, n_gt_8_subtile) {
6798     TEST_REQUIRES_ARM_NEON;
6799     for (uint32_t n = 9; n < 16; n++) {
6800       for (size_t k = 1; k <= 40; k += 9) {
6801         for (uint32_t m = 1; m <= 1; m++) {
6802           GemmMicrokernelTester()
6803             .mr(1)
6804             .nr(8)
6805             .kr(1)
6806             .sr(1)
6807             .m(m)
6808             .n(n)
6809             .k(k)
6810             .iterations(1)
6811             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
6812         }
6813       }
6814     }
6815   }
6816 
TEST(QC8_IGEMM_MINMAX_FP32_1X8__NEON_MLAL_LANE,n_div_8)6817   TEST(QC8_IGEMM_MINMAX_FP32_1X8__NEON_MLAL_LANE, n_div_8) {
6818     TEST_REQUIRES_ARM_NEON;
6819     for (uint32_t n = 16; n <= 24; n += 8) {
6820       for (size_t k = 1; k <= 40; k += 9) {
6821         GemmMicrokernelTester()
6822           .mr(1)
6823           .nr(8)
6824           .kr(1)
6825           .sr(1)
6826           .m(1)
6827           .n(n)
6828           .k(k)
6829           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
6830       }
6831     }
6832   }
6833 
TEST(QC8_IGEMM_MINMAX_FP32_1X8__NEON_MLAL_LANE,n_div_8_strided_cn)6834   TEST(QC8_IGEMM_MINMAX_FP32_1X8__NEON_MLAL_LANE, n_div_8_strided_cn) {
6835     TEST_REQUIRES_ARM_NEON;
6836     for (uint32_t n = 16; n <= 24; n += 8) {
6837       for (size_t k = 1; k <= 40; k += 9) {
6838         GemmMicrokernelTester()
6839           .mr(1)
6840           .nr(8)
6841           .kr(1)
6842           .sr(1)
6843           .m(1)
6844           .n(n)
6845           .k(k)
6846           .cn_stride(11)
6847           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
6848       }
6849     }
6850   }
6851 
TEST(QC8_IGEMM_MINMAX_FP32_1X8__NEON_MLAL_LANE,n_div_8_subtile)6852   TEST(QC8_IGEMM_MINMAX_FP32_1X8__NEON_MLAL_LANE, n_div_8_subtile) {
6853     TEST_REQUIRES_ARM_NEON;
6854     for (uint32_t n = 16; n <= 24; n += 8) {
6855       for (size_t k = 1; k <= 40; k += 9) {
6856         for (uint32_t m = 1; m <= 1; m++) {
6857           GemmMicrokernelTester()
6858             .mr(1)
6859             .nr(8)
6860             .kr(1)
6861             .sr(1)
6862             .m(m)
6863             .n(n)
6864             .k(k)
6865             .iterations(1)
6866             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
6867         }
6868       }
6869     }
6870   }
6871 
TEST(QC8_IGEMM_MINMAX_FP32_1X8__NEON_MLAL_LANE,small_kernel)6872   TEST(QC8_IGEMM_MINMAX_FP32_1X8__NEON_MLAL_LANE, small_kernel) {
6873     TEST_REQUIRES_ARM_NEON;
6874     for (size_t k = 1; k <= 40; k += 9) {
6875       GemmMicrokernelTester()
6876         .mr(1)
6877         .nr(8)
6878         .kr(1)
6879         .sr(1)
6880         .m(1)
6881         .n(8)
6882         .k(k)
6883         .ks(3)
6884         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
6885     }
6886   }
6887 
TEST(QC8_IGEMM_MINMAX_FP32_1X8__NEON_MLAL_LANE,small_kernel_subtile)6888   TEST(QC8_IGEMM_MINMAX_FP32_1X8__NEON_MLAL_LANE, small_kernel_subtile) {
6889     TEST_REQUIRES_ARM_NEON;
6890     for (size_t k = 1; k <= 40; k += 9) {
6891       for (uint32_t n = 1; n <= 8; n++) {
6892         for (uint32_t m = 1; m <= 1; m++) {
6893           GemmMicrokernelTester()
6894             .mr(1)
6895             .nr(8)
6896             .kr(1)
6897             .sr(1)
6898             .m(m)
6899             .n(n)
6900             .k(k)
6901             .ks(3)
6902             .iterations(1)
6903             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
6904         }
6905       }
6906     }
6907   }
6908 
TEST(QC8_IGEMM_MINMAX_FP32_1X8__NEON_MLAL_LANE,n_gt_8_small_kernel)6909   TEST(QC8_IGEMM_MINMAX_FP32_1X8__NEON_MLAL_LANE, n_gt_8_small_kernel) {
6910     TEST_REQUIRES_ARM_NEON;
6911     for (uint32_t n = 9; n < 16; n++) {
6912       for (size_t k = 1; k <= 40; k += 9) {
6913         GemmMicrokernelTester()
6914           .mr(1)
6915           .nr(8)
6916           .kr(1)
6917           .sr(1)
6918           .m(1)
6919           .n(n)
6920           .k(k)
6921           .ks(3)
6922           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
6923       }
6924     }
6925   }
6926 
TEST(QC8_IGEMM_MINMAX_FP32_1X8__NEON_MLAL_LANE,n_div_8_small_kernel)6927   TEST(QC8_IGEMM_MINMAX_FP32_1X8__NEON_MLAL_LANE, n_div_8_small_kernel) {
6928     TEST_REQUIRES_ARM_NEON;
6929     for (uint32_t n = 16; n <= 24; n += 8) {
6930       for (size_t k = 1; k <= 40; k += 9) {
6931         GemmMicrokernelTester()
6932           .mr(1)
6933           .nr(8)
6934           .kr(1)
6935           .sr(1)
6936           .m(1)
6937           .n(n)
6938           .k(k)
6939           .ks(3)
6940           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
6941       }
6942     }
6943   }
6944 
TEST(QC8_IGEMM_MINMAX_FP32_1X8__NEON_MLAL_LANE,strided_cm_subtile)6945   TEST(QC8_IGEMM_MINMAX_FP32_1X8__NEON_MLAL_LANE, strided_cm_subtile) {
6946     TEST_REQUIRES_ARM_NEON;
6947     for (size_t k = 1; k <= 40; k += 9) {
6948       for (uint32_t n = 1; n <= 8; n++) {
6949         for (uint32_t m = 1; m <= 1; m++) {
6950           GemmMicrokernelTester()
6951             .mr(1)
6952             .nr(8)
6953             .kr(1)
6954             .sr(1)
6955             .m(m)
6956             .n(n)
6957             .k(k)
6958             .cm_stride(11)
6959             .iterations(1)
6960             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
6961         }
6962       }
6963     }
6964   }
6965 
TEST(QC8_IGEMM_MINMAX_FP32_1X8__NEON_MLAL_LANE,a_offset)6966   TEST(QC8_IGEMM_MINMAX_FP32_1X8__NEON_MLAL_LANE, a_offset) {
6967     TEST_REQUIRES_ARM_NEON;
6968     for (size_t k = 1; k <= 40; k += 9) {
6969       GemmMicrokernelTester()
6970         .mr(1)
6971         .nr(8)
6972         .kr(1)
6973         .sr(1)
6974         .m(1)
6975         .n(8)
6976         .k(k)
6977         .ks(3)
6978         .a_offset(43)
6979         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
6980     }
6981   }
6982 
TEST(QC8_IGEMM_MINMAX_FP32_1X8__NEON_MLAL_LANE,zero)6983   TEST(QC8_IGEMM_MINMAX_FP32_1X8__NEON_MLAL_LANE, zero) {
6984     TEST_REQUIRES_ARM_NEON;
6985     for (size_t k = 1; k <= 40; k += 9) {
6986       for (uint32_t mz = 0; mz < 1; mz++) {
6987         GemmMicrokernelTester()
6988           .mr(1)
6989           .nr(8)
6990           .kr(1)
6991           .sr(1)
6992           .m(1)
6993           .n(8)
6994           .k(k)
6995           .ks(3)
6996           .a_offset(43)
6997           .zero_index(mz)
6998           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
6999       }
7000     }
7001   }
7002 
TEST(QC8_IGEMM_MINMAX_FP32_1X8__NEON_MLAL_LANE,qmin)7003   TEST(QC8_IGEMM_MINMAX_FP32_1X8__NEON_MLAL_LANE, qmin) {
7004     TEST_REQUIRES_ARM_NEON;
7005     GemmMicrokernelTester()
7006       .mr(1)
7007       .nr(8)
7008       .kr(1)
7009       .sr(1)
7010       .m(1)
7011       .n(8)
7012       .k(8)
7013       .qmin(128)
7014       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
7015   }
7016 
TEST(QC8_IGEMM_MINMAX_FP32_1X8__NEON_MLAL_LANE,qmax)7017   TEST(QC8_IGEMM_MINMAX_FP32_1X8__NEON_MLAL_LANE, qmax) {
7018     TEST_REQUIRES_ARM_NEON;
7019     GemmMicrokernelTester()
7020       .mr(1)
7021       .nr(8)
7022       .kr(1)
7023       .sr(1)
7024       .m(1)
7025       .n(8)
7026       .k(8)
7027       .qmax(128)
7028       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
7029   }
7030 
TEST(QC8_IGEMM_MINMAX_FP32_1X8__NEON_MLAL_LANE,strided_cm)7031   TEST(QC8_IGEMM_MINMAX_FP32_1X8__NEON_MLAL_LANE, strided_cm) {
7032     TEST_REQUIRES_ARM_NEON;
7033     GemmMicrokernelTester()
7034       .mr(1)
7035       .nr(8)
7036       .kr(1)
7037       .sr(1)
7038       .m(1)
7039       .n(8)
7040       .k(8)
7041       .cm_stride(11)
7042       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
7043   }
7044 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
7045 
7046 
7047 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_DUP,k_eq_16)7048   TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_DUP, k_eq_16) {
7049     TEST_REQUIRES_ARM_NEON;
7050     GemmMicrokernelTester()
7051       .mr(1)
7052       .nr(8)
7053       .kr(2)
7054       .sr(1)
7055       .m(1)
7056       .n(8)
7057       .k(16)
7058       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
7059   }
7060 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_DUP,strided_cn)7061   TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_DUP, strided_cn) {
7062     TEST_REQUIRES_ARM_NEON;
7063     GemmMicrokernelTester()
7064       .mr(1)
7065       .nr(8)
7066       .kr(2)
7067       .sr(1)
7068       .m(1)
7069       .n(8)
7070       .k(16)
7071       .cn_stride(11)
7072       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
7073   }
7074 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_DUP,k_eq_16_subtile)7075   TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_DUP, k_eq_16_subtile) {
7076     TEST_REQUIRES_ARM_NEON;
7077     for (uint32_t n = 1; n <= 8; n++) {
7078       for (uint32_t m = 1; m <= 1; m++) {
7079         GemmMicrokernelTester()
7080           .mr(1)
7081           .nr(8)
7082           .kr(2)
7083           .sr(1)
7084           .m(m)
7085           .n(n)
7086           .k(16)
7087           .iterations(1)
7088           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
7089       }
7090     }
7091   }
7092 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_DUP,k_eq_16_subtile_m)7093   TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_DUP, k_eq_16_subtile_m) {
7094     TEST_REQUIRES_ARM_NEON;
7095     for (uint32_t m = 1; m <= 1; m++) {
7096       GemmMicrokernelTester()
7097         .mr(1)
7098         .nr(8)
7099         .kr(2)
7100         .sr(1)
7101         .m(m)
7102         .n(8)
7103         .k(16)
7104         .iterations(1)
7105         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
7106     }
7107   }
7108 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_DUP,k_eq_16_subtile_n)7109   TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_DUP, k_eq_16_subtile_n) {
7110     TEST_REQUIRES_ARM_NEON;
7111     for (uint32_t n = 1; n <= 8; n++) {
7112       GemmMicrokernelTester()
7113         .mr(1)
7114         .nr(8)
7115         .kr(2)
7116         .sr(1)
7117         .m(1)
7118         .n(n)
7119         .k(16)
7120         .iterations(1)
7121         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
7122     }
7123   }
7124 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_DUP,k_lt_16)7125   TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_DUP, k_lt_16) {
7126     TEST_REQUIRES_ARM_NEON;
7127     for (size_t k = 1; k < 16; k++) {
7128       GemmMicrokernelTester()
7129         .mr(1)
7130         .nr(8)
7131         .kr(2)
7132         .sr(1)
7133         .m(1)
7134         .n(8)
7135         .k(k)
7136         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
7137     }
7138   }
7139 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_DUP,k_lt_16_subtile)7140   TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_DUP, k_lt_16_subtile) {
7141     TEST_REQUIRES_ARM_NEON;
7142     for (size_t k = 1; k < 16; k++) {
7143       for (uint32_t n = 1; n <= 8; n++) {
7144         for (uint32_t m = 1; m <= 1; m++) {
7145           GemmMicrokernelTester()
7146             .mr(1)
7147             .nr(8)
7148             .kr(2)
7149             .sr(1)
7150             .m(m)
7151             .n(n)
7152             .k(k)
7153             .iterations(1)
7154             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
7155         }
7156       }
7157     }
7158   }
7159 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_DUP,k_gt_16)7160   TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_DUP, k_gt_16) {
7161     TEST_REQUIRES_ARM_NEON;
7162     for (size_t k = 17; k < 32; k++) {
7163       GemmMicrokernelTester()
7164         .mr(1)
7165         .nr(8)
7166         .kr(2)
7167         .sr(1)
7168         .m(1)
7169         .n(8)
7170         .k(k)
7171         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
7172     }
7173   }
7174 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_DUP,k_gt_16_subtile)7175   TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_DUP, k_gt_16_subtile) {
7176     TEST_REQUIRES_ARM_NEON;
7177     for (size_t k = 17; k < 32; k++) {
7178       for (uint32_t n = 1; n <= 8; n++) {
7179         for (uint32_t m = 1; m <= 1; m++) {
7180           GemmMicrokernelTester()
7181             .mr(1)
7182             .nr(8)
7183             .kr(2)
7184             .sr(1)
7185             .m(m)
7186             .n(n)
7187             .k(k)
7188             .iterations(1)
7189             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
7190         }
7191       }
7192     }
7193   }
7194 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_DUP,k_div_16)7195   TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_DUP, k_div_16) {
7196     TEST_REQUIRES_ARM_NEON;
7197     for (size_t k = 32; k <= 160; k += 16) {
7198       GemmMicrokernelTester()
7199         .mr(1)
7200         .nr(8)
7201         .kr(2)
7202         .sr(1)
7203         .m(1)
7204         .n(8)
7205         .k(k)
7206         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
7207     }
7208   }
7209 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_DUP,k_div_16_subtile)7210   TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_DUP, k_div_16_subtile) {
7211     TEST_REQUIRES_ARM_NEON;
7212     for (size_t k = 32; k <= 160; k += 16) {
7213       for (uint32_t n = 1; n <= 8; n++) {
7214         for (uint32_t m = 1; m <= 1; m++) {
7215           GemmMicrokernelTester()
7216             .mr(1)
7217             .nr(8)
7218             .kr(2)
7219             .sr(1)
7220             .m(m)
7221             .n(n)
7222             .k(k)
7223             .iterations(1)
7224             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
7225         }
7226       }
7227     }
7228   }
7229 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_DUP,n_gt_8)7230   TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_DUP, n_gt_8) {
7231     TEST_REQUIRES_ARM_NEON;
7232     for (uint32_t n = 9; n < 16; n++) {
7233       for (size_t k = 1; k <= 80; k += 17) {
7234         GemmMicrokernelTester()
7235           .mr(1)
7236           .nr(8)
7237           .kr(2)
7238           .sr(1)
7239           .m(1)
7240           .n(n)
7241           .k(k)
7242           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
7243       }
7244     }
7245   }
7246 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_DUP,n_gt_8_strided_cn)7247   TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_DUP, n_gt_8_strided_cn) {
7248     TEST_REQUIRES_ARM_NEON;
7249     for (uint32_t n = 9; n < 16; n++) {
7250       for (size_t k = 1; k <= 80; k += 17) {
7251         GemmMicrokernelTester()
7252           .mr(1)
7253           .nr(8)
7254           .kr(2)
7255           .sr(1)
7256           .m(1)
7257           .n(n)
7258           .k(k)
7259           .cn_stride(11)
7260           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
7261       }
7262     }
7263   }
7264 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_DUP,n_gt_8_subtile)7265   TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_DUP, n_gt_8_subtile) {
7266     TEST_REQUIRES_ARM_NEON;
7267     for (uint32_t n = 9; n < 16; n++) {
7268       for (size_t k = 1; k <= 80; k += 17) {
7269         for (uint32_t m = 1; m <= 1; m++) {
7270           GemmMicrokernelTester()
7271             .mr(1)
7272             .nr(8)
7273             .kr(2)
7274             .sr(1)
7275             .m(m)
7276             .n(n)
7277             .k(k)
7278             .iterations(1)
7279             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
7280         }
7281       }
7282     }
7283   }
7284 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_DUP,n_div_8)7285   TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_DUP, n_div_8) {
7286     TEST_REQUIRES_ARM_NEON;
7287     for (uint32_t n = 16; n <= 24; n += 8) {
7288       for (size_t k = 1; k <= 80; k += 17) {
7289         GemmMicrokernelTester()
7290           .mr(1)
7291           .nr(8)
7292           .kr(2)
7293           .sr(1)
7294           .m(1)
7295           .n(n)
7296           .k(k)
7297           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
7298       }
7299     }
7300   }
7301 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_DUP,n_div_8_strided_cn)7302   TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_DUP, n_div_8_strided_cn) {
7303     TEST_REQUIRES_ARM_NEON;
7304     for (uint32_t n = 16; n <= 24; n += 8) {
7305       for (size_t k = 1; k <= 80; k += 17) {
7306         GemmMicrokernelTester()
7307           .mr(1)
7308           .nr(8)
7309           .kr(2)
7310           .sr(1)
7311           .m(1)
7312           .n(n)
7313           .k(k)
7314           .cn_stride(11)
7315           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
7316       }
7317     }
7318   }
7319 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_DUP,n_div_8_subtile)7320   TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_DUP, n_div_8_subtile) {
7321     TEST_REQUIRES_ARM_NEON;
7322     for (uint32_t n = 16; n <= 24; n += 8) {
7323       for (size_t k = 1; k <= 80; k += 17) {
7324         for (uint32_t m = 1; m <= 1; m++) {
7325           GemmMicrokernelTester()
7326             .mr(1)
7327             .nr(8)
7328             .kr(2)
7329             .sr(1)
7330             .m(m)
7331             .n(n)
7332             .k(k)
7333             .iterations(1)
7334             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
7335         }
7336       }
7337     }
7338   }
7339 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_DUP,small_kernel)7340   TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_DUP, small_kernel) {
7341     TEST_REQUIRES_ARM_NEON;
7342     for (size_t k = 1; k <= 80; k += 17) {
7343       GemmMicrokernelTester()
7344         .mr(1)
7345         .nr(8)
7346         .kr(2)
7347         .sr(1)
7348         .m(1)
7349         .n(8)
7350         .k(k)
7351         .ks(3)
7352         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
7353     }
7354   }
7355 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_DUP,small_kernel_subtile)7356   TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_DUP, small_kernel_subtile) {
7357     TEST_REQUIRES_ARM_NEON;
7358     for (size_t k = 1; k <= 80; k += 17) {
7359       for (uint32_t n = 1; n <= 8; n++) {
7360         for (uint32_t m = 1; m <= 1; m++) {
7361           GemmMicrokernelTester()
7362             .mr(1)
7363             .nr(8)
7364             .kr(2)
7365             .sr(1)
7366             .m(m)
7367             .n(n)
7368             .k(k)
7369             .ks(3)
7370             .iterations(1)
7371             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
7372         }
7373       }
7374     }
7375   }
7376 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_DUP,n_gt_8_small_kernel)7377   TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_DUP, n_gt_8_small_kernel) {
7378     TEST_REQUIRES_ARM_NEON;
7379     for (uint32_t n = 9; n < 16; n++) {
7380       for (size_t k = 1; k <= 80; k += 17) {
7381         GemmMicrokernelTester()
7382           .mr(1)
7383           .nr(8)
7384           .kr(2)
7385           .sr(1)
7386           .m(1)
7387           .n(n)
7388           .k(k)
7389           .ks(3)
7390           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
7391       }
7392     }
7393   }
7394 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_DUP,n_div_8_small_kernel)7395   TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_DUP, n_div_8_small_kernel) {
7396     TEST_REQUIRES_ARM_NEON;
7397     for (uint32_t n = 16; n <= 24; n += 8) {
7398       for (size_t k = 1; k <= 80; k += 17) {
7399         GemmMicrokernelTester()
7400           .mr(1)
7401           .nr(8)
7402           .kr(2)
7403           .sr(1)
7404           .m(1)
7405           .n(n)
7406           .k(k)
7407           .ks(3)
7408           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
7409       }
7410     }
7411   }
7412 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_DUP,strided_cm_subtile)7413   TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_DUP, strided_cm_subtile) {
7414     TEST_REQUIRES_ARM_NEON;
7415     for (size_t k = 1; k <= 80; k += 17) {
7416       for (uint32_t n = 1; n <= 8; n++) {
7417         for (uint32_t m = 1; m <= 1; m++) {
7418           GemmMicrokernelTester()
7419             .mr(1)
7420             .nr(8)
7421             .kr(2)
7422             .sr(1)
7423             .m(m)
7424             .n(n)
7425             .k(k)
7426             .cm_stride(11)
7427             .iterations(1)
7428             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
7429         }
7430       }
7431     }
7432   }
7433 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_DUP,a_offset)7434   TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_DUP, a_offset) {
7435     TEST_REQUIRES_ARM_NEON;
7436     for (size_t k = 1; k <= 80; k += 17) {
7437       GemmMicrokernelTester()
7438         .mr(1)
7439         .nr(8)
7440         .kr(2)
7441         .sr(1)
7442         .m(1)
7443         .n(8)
7444         .k(k)
7445         .ks(3)
7446         .a_offset(83)
7447         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
7448     }
7449   }
7450 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_DUP,zero)7451   TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_DUP, zero) {
7452     TEST_REQUIRES_ARM_NEON;
7453     for (size_t k = 1; k <= 80; k += 17) {
7454       for (uint32_t mz = 0; mz < 1; mz++) {
7455         GemmMicrokernelTester()
7456           .mr(1)
7457           .nr(8)
7458           .kr(2)
7459           .sr(1)
7460           .m(1)
7461           .n(8)
7462           .k(k)
7463           .ks(3)
7464           .a_offset(83)
7465           .zero_index(mz)
7466           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
7467       }
7468     }
7469   }
7470 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_DUP,qmin)7471   TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_DUP, qmin) {
7472     TEST_REQUIRES_ARM_NEON;
7473     GemmMicrokernelTester()
7474       .mr(1)
7475       .nr(8)
7476       .kr(2)
7477       .sr(1)
7478       .m(1)
7479       .n(8)
7480       .k(16)
7481       .qmin(128)
7482       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
7483   }
7484 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_DUP,qmax)7485   TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_DUP, qmax) {
7486     TEST_REQUIRES_ARM_NEON;
7487     GemmMicrokernelTester()
7488       .mr(1)
7489       .nr(8)
7490       .kr(2)
7491       .sr(1)
7492       .m(1)
7493       .n(8)
7494       .k(16)
7495       .qmax(128)
7496       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
7497   }
7498 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_DUP,strided_cm)7499   TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_DUP, strided_cm) {
7500     TEST_REQUIRES_ARM_NEON;
7501     GemmMicrokernelTester()
7502       .mr(1)
7503       .nr(8)
7504       .kr(2)
7505       .sr(1)
7506       .m(1)
7507       .n(8)
7508       .k(16)
7509       .cm_stride(11)
7510       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
7511   }
7512 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
7513 
7514 
7515 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD1R,k_eq_16)7516   TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD1R, k_eq_16) {
7517     TEST_REQUIRES_ARM_NEON;
7518     GemmMicrokernelTester()
7519       .mr(1)
7520       .nr(8)
7521       .kr(2)
7522       .sr(1)
7523       .m(1)
7524       .n(8)
7525       .k(16)
7526       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
7527   }
7528 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD1R,strided_cn)7529   TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD1R, strided_cn) {
7530     TEST_REQUIRES_ARM_NEON;
7531     GemmMicrokernelTester()
7532       .mr(1)
7533       .nr(8)
7534       .kr(2)
7535       .sr(1)
7536       .m(1)
7537       .n(8)
7538       .k(16)
7539       .cn_stride(11)
7540       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
7541   }
7542 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD1R,k_eq_16_subtile)7543   TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD1R, k_eq_16_subtile) {
7544     TEST_REQUIRES_ARM_NEON;
7545     for (uint32_t n = 1; n <= 8; n++) {
7546       for (uint32_t m = 1; m <= 1; m++) {
7547         GemmMicrokernelTester()
7548           .mr(1)
7549           .nr(8)
7550           .kr(2)
7551           .sr(1)
7552           .m(m)
7553           .n(n)
7554           .k(16)
7555           .iterations(1)
7556           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
7557       }
7558     }
7559   }
7560 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD1R,k_eq_16_subtile_m)7561   TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD1R, k_eq_16_subtile_m) {
7562     TEST_REQUIRES_ARM_NEON;
7563     for (uint32_t m = 1; m <= 1; m++) {
7564       GemmMicrokernelTester()
7565         .mr(1)
7566         .nr(8)
7567         .kr(2)
7568         .sr(1)
7569         .m(m)
7570         .n(8)
7571         .k(16)
7572         .iterations(1)
7573         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
7574     }
7575   }
7576 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD1R,k_eq_16_subtile_n)7577   TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD1R, k_eq_16_subtile_n) {
7578     TEST_REQUIRES_ARM_NEON;
7579     for (uint32_t n = 1; n <= 8; n++) {
7580       GemmMicrokernelTester()
7581         .mr(1)
7582         .nr(8)
7583         .kr(2)
7584         .sr(1)
7585         .m(1)
7586         .n(n)
7587         .k(16)
7588         .iterations(1)
7589         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
7590     }
7591   }
7592 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD1R,k_lt_16)7593   TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD1R, k_lt_16) {
7594     TEST_REQUIRES_ARM_NEON;
7595     for (size_t k = 1; k < 16; k++) {
7596       GemmMicrokernelTester()
7597         .mr(1)
7598         .nr(8)
7599         .kr(2)
7600         .sr(1)
7601         .m(1)
7602         .n(8)
7603         .k(k)
7604         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
7605     }
7606   }
7607 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD1R,k_lt_16_subtile)7608   TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD1R, k_lt_16_subtile) {
7609     TEST_REQUIRES_ARM_NEON;
7610     for (size_t k = 1; k < 16; k++) {
7611       for (uint32_t n = 1; n <= 8; n++) {
7612         for (uint32_t m = 1; m <= 1; m++) {
7613           GemmMicrokernelTester()
7614             .mr(1)
7615             .nr(8)
7616             .kr(2)
7617             .sr(1)
7618             .m(m)
7619             .n(n)
7620             .k(k)
7621             .iterations(1)
7622             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
7623         }
7624       }
7625     }
7626   }
7627 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD1R,k_gt_16)7628   TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD1R, k_gt_16) {
7629     TEST_REQUIRES_ARM_NEON;
7630     for (size_t k = 17; k < 32; k++) {
7631       GemmMicrokernelTester()
7632         .mr(1)
7633         .nr(8)
7634         .kr(2)
7635         .sr(1)
7636         .m(1)
7637         .n(8)
7638         .k(k)
7639         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
7640     }
7641   }
7642 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD1R,k_gt_16_subtile)7643   TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD1R, k_gt_16_subtile) {
7644     TEST_REQUIRES_ARM_NEON;
7645     for (size_t k = 17; k < 32; k++) {
7646       for (uint32_t n = 1; n <= 8; n++) {
7647         for (uint32_t m = 1; m <= 1; m++) {
7648           GemmMicrokernelTester()
7649             .mr(1)
7650             .nr(8)
7651             .kr(2)
7652             .sr(1)
7653             .m(m)
7654             .n(n)
7655             .k(k)
7656             .iterations(1)
7657             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
7658         }
7659       }
7660     }
7661   }
7662 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD1R,k_div_16)7663   TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD1R, k_div_16) {
7664     TEST_REQUIRES_ARM_NEON;
7665     for (size_t k = 32; k <= 160; k += 16) {
7666       GemmMicrokernelTester()
7667         .mr(1)
7668         .nr(8)
7669         .kr(2)
7670         .sr(1)
7671         .m(1)
7672         .n(8)
7673         .k(k)
7674         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
7675     }
7676   }
7677 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD1R,k_div_16_subtile)7678   TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD1R, k_div_16_subtile) {
7679     TEST_REQUIRES_ARM_NEON;
7680     for (size_t k = 32; k <= 160; k += 16) {
7681       for (uint32_t n = 1; n <= 8; n++) {
7682         for (uint32_t m = 1; m <= 1; m++) {
7683           GemmMicrokernelTester()
7684             .mr(1)
7685             .nr(8)
7686             .kr(2)
7687             .sr(1)
7688             .m(m)
7689             .n(n)
7690             .k(k)
7691             .iterations(1)
7692             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
7693         }
7694       }
7695     }
7696   }
7697 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD1R,n_gt_8)7698   TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD1R, n_gt_8) {
7699     TEST_REQUIRES_ARM_NEON;
7700     for (uint32_t n = 9; n < 16; n++) {
7701       for (size_t k = 1; k <= 80; k += 17) {
7702         GemmMicrokernelTester()
7703           .mr(1)
7704           .nr(8)
7705           .kr(2)
7706           .sr(1)
7707           .m(1)
7708           .n(n)
7709           .k(k)
7710           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
7711       }
7712     }
7713   }
7714 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD1R,n_gt_8_strided_cn)7715   TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD1R, n_gt_8_strided_cn) {
7716     TEST_REQUIRES_ARM_NEON;
7717     for (uint32_t n = 9; n < 16; n++) {
7718       for (size_t k = 1; k <= 80; k += 17) {
7719         GemmMicrokernelTester()
7720           .mr(1)
7721           .nr(8)
7722           .kr(2)
7723           .sr(1)
7724           .m(1)
7725           .n(n)
7726           .k(k)
7727           .cn_stride(11)
7728           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
7729       }
7730     }
7731   }
7732 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD1R,n_gt_8_subtile)7733   TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD1R, n_gt_8_subtile) {
7734     TEST_REQUIRES_ARM_NEON;
7735     for (uint32_t n = 9; n < 16; n++) {
7736       for (size_t k = 1; k <= 80; k += 17) {
7737         for (uint32_t m = 1; m <= 1; m++) {
7738           GemmMicrokernelTester()
7739             .mr(1)
7740             .nr(8)
7741             .kr(2)
7742             .sr(1)
7743             .m(m)
7744             .n(n)
7745             .k(k)
7746             .iterations(1)
7747             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
7748         }
7749       }
7750     }
7751   }
7752 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD1R,n_div_8)7753   TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD1R, n_div_8) {
7754     TEST_REQUIRES_ARM_NEON;
7755     for (uint32_t n = 16; n <= 24; n += 8) {
7756       for (size_t k = 1; k <= 80; k += 17) {
7757         GemmMicrokernelTester()
7758           .mr(1)
7759           .nr(8)
7760           .kr(2)
7761           .sr(1)
7762           .m(1)
7763           .n(n)
7764           .k(k)
7765           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
7766       }
7767     }
7768   }
7769 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD1R,n_div_8_strided_cn)7770   TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD1R, n_div_8_strided_cn) {
7771     TEST_REQUIRES_ARM_NEON;
7772     for (uint32_t n = 16; n <= 24; n += 8) {
7773       for (size_t k = 1; k <= 80; k += 17) {
7774         GemmMicrokernelTester()
7775           .mr(1)
7776           .nr(8)
7777           .kr(2)
7778           .sr(1)
7779           .m(1)
7780           .n(n)
7781           .k(k)
7782           .cn_stride(11)
7783           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
7784       }
7785     }
7786   }
7787 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD1R,n_div_8_subtile)7788   TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD1R, n_div_8_subtile) {
7789     TEST_REQUIRES_ARM_NEON;
7790     for (uint32_t n = 16; n <= 24; n += 8) {
7791       for (size_t k = 1; k <= 80; k += 17) {
7792         for (uint32_t m = 1; m <= 1; m++) {
7793           GemmMicrokernelTester()
7794             .mr(1)
7795             .nr(8)
7796             .kr(2)
7797             .sr(1)
7798             .m(m)
7799             .n(n)
7800             .k(k)
7801             .iterations(1)
7802             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
7803         }
7804       }
7805     }
7806   }
7807 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD1R,small_kernel)7808   TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD1R, small_kernel) {
7809     TEST_REQUIRES_ARM_NEON;
7810     for (size_t k = 1; k <= 80; k += 17) {
7811       GemmMicrokernelTester()
7812         .mr(1)
7813         .nr(8)
7814         .kr(2)
7815         .sr(1)
7816         .m(1)
7817         .n(8)
7818         .k(k)
7819         .ks(3)
7820         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
7821     }
7822   }
7823 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD1R,small_kernel_subtile)7824   TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD1R, small_kernel_subtile) {
7825     TEST_REQUIRES_ARM_NEON;
7826     for (size_t k = 1; k <= 80; k += 17) {
7827       for (uint32_t n = 1; n <= 8; n++) {
7828         for (uint32_t m = 1; m <= 1; m++) {
7829           GemmMicrokernelTester()
7830             .mr(1)
7831             .nr(8)
7832             .kr(2)
7833             .sr(1)
7834             .m(m)
7835             .n(n)
7836             .k(k)
7837             .ks(3)
7838             .iterations(1)
7839             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
7840         }
7841       }
7842     }
7843   }
7844 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD1R,n_gt_8_small_kernel)7845   TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD1R, n_gt_8_small_kernel) {
7846     TEST_REQUIRES_ARM_NEON;
7847     for (uint32_t n = 9; n < 16; n++) {
7848       for (size_t k = 1; k <= 80; k += 17) {
7849         GemmMicrokernelTester()
7850           .mr(1)
7851           .nr(8)
7852           .kr(2)
7853           .sr(1)
7854           .m(1)
7855           .n(n)
7856           .k(k)
7857           .ks(3)
7858           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
7859       }
7860     }
7861   }
7862 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD1R,n_div_8_small_kernel)7863   TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD1R, n_div_8_small_kernel) {
7864     TEST_REQUIRES_ARM_NEON;
7865     for (uint32_t n = 16; n <= 24; n += 8) {
7866       for (size_t k = 1; k <= 80; k += 17) {
7867         GemmMicrokernelTester()
7868           .mr(1)
7869           .nr(8)
7870           .kr(2)
7871           .sr(1)
7872           .m(1)
7873           .n(n)
7874           .k(k)
7875           .ks(3)
7876           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
7877       }
7878     }
7879   }
7880 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD1R,strided_cm_subtile)7881   TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD1R, strided_cm_subtile) {
7882     TEST_REQUIRES_ARM_NEON;
7883     for (size_t k = 1; k <= 80; k += 17) {
7884       for (uint32_t n = 1; n <= 8; n++) {
7885         for (uint32_t m = 1; m <= 1; m++) {
7886           GemmMicrokernelTester()
7887             .mr(1)
7888             .nr(8)
7889             .kr(2)
7890             .sr(1)
7891             .m(m)
7892             .n(n)
7893             .k(k)
7894             .cm_stride(11)
7895             .iterations(1)
7896             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
7897         }
7898       }
7899     }
7900   }
7901 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD1R,a_offset)7902   TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD1R, a_offset) {
7903     TEST_REQUIRES_ARM_NEON;
7904     for (size_t k = 1; k <= 80; k += 17) {
7905       GemmMicrokernelTester()
7906         .mr(1)
7907         .nr(8)
7908         .kr(2)
7909         .sr(1)
7910         .m(1)
7911         .n(8)
7912         .k(k)
7913         .ks(3)
7914         .a_offset(83)
7915         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
7916     }
7917   }
7918 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD1R,zero)7919   TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD1R, zero) {
7920     TEST_REQUIRES_ARM_NEON;
7921     for (size_t k = 1; k <= 80; k += 17) {
7922       for (uint32_t mz = 0; mz < 1; mz++) {
7923         GemmMicrokernelTester()
7924           .mr(1)
7925           .nr(8)
7926           .kr(2)
7927           .sr(1)
7928           .m(1)
7929           .n(8)
7930           .k(k)
7931           .ks(3)
7932           .a_offset(83)
7933           .zero_index(mz)
7934           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
7935       }
7936     }
7937   }
7938 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD1R,qmin)7939   TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD1R, qmin) {
7940     TEST_REQUIRES_ARM_NEON;
7941     GemmMicrokernelTester()
7942       .mr(1)
7943       .nr(8)
7944       .kr(2)
7945       .sr(1)
7946       .m(1)
7947       .n(8)
7948       .k(16)
7949       .qmin(128)
7950       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
7951   }
7952 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD1R,qmax)7953   TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD1R, qmax) {
7954     TEST_REQUIRES_ARM_NEON;
7955     GemmMicrokernelTester()
7956       .mr(1)
7957       .nr(8)
7958       .kr(2)
7959       .sr(1)
7960       .m(1)
7961       .n(8)
7962       .k(16)
7963       .qmax(128)
7964       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
7965   }
7966 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD1R,strided_cm)7967   TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD1R, strided_cm) {
7968     TEST_REQUIRES_ARM_NEON;
7969     GemmMicrokernelTester()
7970       .mr(1)
7971       .nr(8)
7972       .kr(2)
7973       .sr(1)
7974       .m(1)
7975       .n(8)
7976       .k(16)
7977       .cm_stride(11)
7978       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
7979   }
7980 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
7981 
7982 
7983 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD1R,k_eq_16)7984   TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD1R, k_eq_16) {
7985     TEST_REQUIRES_ARM_NEON_V8;
7986     GemmMicrokernelTester()
7987       .mr(1)
7988       .nr(8)
7989       .kr(2)
7990       .sr(1)
7991       .m(1)
7992       .n(8)
7993       .k(16)
7994       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
7995   }
7996 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD1R,strided_cn)7997   TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD1R, strided_cn) {
7998     TEST_REQUIRES_ARM_NEON_V8;
7999     GemmMicrokernelTester()
8000       .mr(1)
8001       .nr(8)
8002       .kr(2)
8003       .sr(1)
8004       .m(1)
8005       .n(8)
8006       .k(16)
8007       .cn_stride(11)
8008       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8009   }
8010 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD1R,k_eq_16_subtile)8011   TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD1R, k_eq_16_subtile) {
8012     TEST_REQUIRES_ARM_NEON_V8;
8013     for (uint32_t n = 1; n <= 8; n++) {
8014       for (uint32_t m = 1; m <= 1; m++) {
8015         GemmMicrokernelTester()
8016           .mr(1)
8017           .nr(8)
8018           .kr(2)
8019           .sr(1)
8020           .m(m)
8021           .n(n)
8022           .k(16)
8023           .iterations(1)
8024           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8025       }
8026     }
8027   }
8028 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD1R,k_eq_16_subtile_m)8029   TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD1R, k_eq_16_subtile_m) {
8030     TEST_REQUIRES_ARM_NEON_V8;
8031     for (uint32_t m = 1; m <= 1; m++) {
8032       GemmMicrokernelTester()
8033         .mr(1)
8034         .nr(8)
8035         .kr(2)
8036         .sr(1)
8037         .m(m)
8038         .n(8)
8039         .k(16)
8040         .iterations(1)
8041         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8042     }
8043   }
8044 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD1R,k_eq_16_subtile_n)8045   TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD1R, k_eq_16_subtile_n) {
8046     TEST_REQUIRES_ARM_NEON_V8;
8047     for (uint32_t n = 1; n <= 8; n++) {
8048       GemmMicrokernelTester()
8049         .mr(1)
8050         .nr(8)
8051         .kr(2)
8052         .sr(1)
8053         .m(1)
8054         .n(n)
8055         .k(16)
8056         .iterations(1)
8057         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8058     }
8059   }
8060 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD1R,k_lt_16)8061   TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD1R, k_lt_16) {
8062     TEST_REQUIRES_ARM_NEON_V8;
8063     for (size_t k = 1; k < 16; k++) {
8064       GemmMicrokernelTester()
8065         .mr(1)
8066         .nr(8)
8067         .kr(2)
8068         .sr(1)
8069         .m(1)
8070         .n(8)
8071         .k(k)
8072         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8073     }
8074   }
8075 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD1R,k_lt_16_subtile)8076   TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD1R, k_lt_16_subtile) {
8077     TEST_REQUIRES_ARM_NEON_V8;
8078     for (size_t k = 1; k < 16; k++) {
8079       for (uint32_t n = 1; n <= 8; n++) {
8080         for (uint32_t m = 1; m <= 1; m++) {
8081           GemmMicrokernelTester()
8082             .mr(1)
8083             .nr(8)
8084             .kr(2)
8085             .sr(1)
8086             .m(m)
8087             .n(n)
8088             .k(k)
8089             .iterations(1)
8090             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8091         }
8092       }
8093     }
8094   }
8095 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD1R,k_gt_16)8096   TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD1R, k_gt_16) {
8097     TEST_REQUIRES_ARM_NEON_V8;
8098     for (size_t k = 17; k < 32; k++) {
8099       GemmMicrokernelTester()
8100         .mr(1)
8101         .nr(8)
8102         .kr(2)
8103         .sr(1)
8104         .m(1)
8105         .n(8)
8106         .k(k)
8107         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8108     }
8109   }
8110 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD1R,k_gt_16_subtile)8111   TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD1R, k_gt_16_subtile) {
8112     TEST_REQUIRES_ARM_NEON_V8;
8113     for (size_t k = 17; k < 32; k++) {
8114       for (uint32_t n = 1; n <= 8; n++) {
8115         for (uint32_t m = 1; m <= 1; m++) {
8116           GemmMicrokernelTester()
8117             .mr(1)
8118             .nr(8)
8119             .kr(2)
8120             .sr(1)
8121             .m(m)
8122             .n(n)
8123             .k(k)
8124             .iterations(1)
8125             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8126         }
8127       }
8128     }
8129   }
8130 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD1R,k_div_16)8131   TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD1R, k_div_16) {
8132     TEST_REQUIRES_ARM_NEON_V8;
8133     for (size_t k = 32; k <= 160; k += 16) {
8134       GemmMicrokernelTester()
8135         .mr(1)
8136         .nr(8)
8137         .kr(2)
8138         .sr(1)
8139         .m(1)
8140         .n(8)
8141         .k(k)
8142         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8143     }
8144   }
8145 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD1R,k_div_16_subtile)8146   TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD1R, k_div_16_subtile) {
8147     TEST_REQUIRES_ARM_NEON_V8;
8148     for (size_t k = 32; k <= 160; k += 16) {
8149       for (uint32_t n = 1; n <= 8; n++) {
8150         for (uint32_t m = 1; m <= 1; m++) {
8151           GemmMicrokernelTester()
8152             .mr(1)
8153             .nr(8)
8154             .kr(2)
8155             .sr(1)
8156             .m(m)
8157             .n(n)
8158             .k(k)
8159             .iterations(1)
8160             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8161         }
8162       }
8163     }
8164   }
8165 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD1R,n_gt_8)8166   TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD1R, n_gt_8) {
8167     TEST_REQUIRES_ARM_NEON_V8;
8168     for (uint32_t n = 9; n < 16; n++) {
8169       for (size_t k = 1; k <= 80; k += 17) {
8170         GemmMicrokernelTester()
8171           .mr(1)
8172           .nr(8)
8173           .kr(2)
8174           .sr(1)
8175           .m(1)
8176           .n(n)
8177           .k(k)
8178           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8179       }
8180     }
8181   }
8182 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD1R,n_gt_8_strided_cn)8183   TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD1R, n_gt_8_strided_cn) {
8184     TEST_REQUIRES_ARM_NEON_V8;
8185     for (uint32_t n = 9; n < 16; n++) {
8186       for (size_t k = 1; k <= 80; k += 17) {
8187         GemmMicrokernelTester()
8188           .mr(1)
8189           .nr(8)
8190           .kr(2)
8191           .sr(1)
8192           .m(1)
8193           .n(n)
8194           .k(k)
8195           .cn_stride(11)
8196           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8197       }
8198     }
8199   }
8200 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD1R,n_gt_8_subtile)8201   TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD1R, n_gt_8_subtile) {
8202     TEST_REQUIRES_ARM_NEON_V8;
8203     for (uint32_t n = 9; n < 16; n++) {
8204       for (size_t k = 1; k <= 80; k += 17) {
8205         for (uint32_t m = 1; m <= 1; m++) {
8206           GemmMicrokernelTester()
8207             .mr(1)
8208             .nr(8)
8209             .kr(2)
8210             .sr(1)
8211             .m(m)
8212             .n(n)
8213             .k(k)
8214             .iterations(1)
8215             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8216         }
8217       }
8218     }
8219   }
8220 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD1R,n_div_8)8221   TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD1R, n_div_8) {
8222     TEST_REQUIRES_ARM_NEON_V8;
8223     for (uint32_t n = 16; n <= 24; n += 8) {
8224       for (size_t k = 1; k <= 80; k += 17) {
8225         GemmMicrokernelTester()
8226           .mr(1)
8227           .nr(8)
8228           .kr(2)
8229           .sr(1)
8230           .m(1)
8231           .n(n)
8232           .k(k)
8233           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8234       }
8235     }
8236   }
8237 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD1R,n_div_8_strided_cn)8238   TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD1R, n_div_8_strided_cn) {
8239     TEST_REQUIRES_ARM_NEON_V8;
8240     for (uint32_t n = 16; n <= 24; n += 8) {
8241       for (size_t k = 1; k <= 80; k += 17) {
8242         GemmMicrokernelTester()
8243           .mr(1)
8244           .nr(8)
8245           .kr(2)
8246           .sr(1)
8247           .m(1)
8248           .n(n)
8249           .k(k)
8250           .cn_stride(11)
8251           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8252       }
8253     }
8254   }
8255 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD1R,n_div_8_subtile)8256   TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD1R, n_div_8_subtile) {
8257     TEST_REQUIRES_ARM_NEON_V8;
8258     for (uint32_t n = 16; n <= 24; n += 8) {
8259       for (size_t k = 1; k <= 80; k += 17) {
8260         for (uint32_t m = 1; m <= 1; m++) {
8261           GemmMicrokernelTester()
8262             .mr(1)
8263             .nr(8)
8264             .kr(2)
8265             .sr(1)
8266             .m(m)
8267             .n(n)
8268             .k(k)
8269             .iterations(1)
8270             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8271         }
8272       }
8273     }
8274   }
8275 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD1R,small_kernel)8276   TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD1R, small_kernel) {
8277     TEST_REQUIRES_ARM_NEON_V8;
8278     for (size_t k = 1; k <= 80; k += 17) {
8279       GemmMicrokernelTester()
8280         .mr(1)
8281         .nr(8)
8282         .kr(2)
8283         .sr(1)
8284         .m(1)
8285         .n(8)
8286         .k(k)
8287         .ks(3)
8288         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8289     }
8290   }
8291 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD1R,small_kernel_subtile)8292   TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD1R, small_kernel_subtile) {
8293     TEST_REQUIRES_ARM_NEON_V8;
8294     for (size_t k = 1; k <= 80; k += 17) {
8295       for (uint32_t n = 1; n <= 8; n++) {
8296         for (uint32_t m = 1; m <= 1; m++) {
8297           GemmMicrokernelTester()
8298             .mr(1)
8299             .nr(8)
8300             .kr(2)
8301             .sr(1)
8302             .m(m)
8303             .n(n)
8304             .k(k)
8305             .ks(3)
8306             .iterations(1)
8307             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8308         }
8309       }
8310     }
8311   }
8312 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD1R,n_gt_8_small_kernel)8313   TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD1R, n_gt_8_small_kernel) {
8314     TEST_REQUIRES_ARM_NEON_V8;
8315     for (uint32_t n = 9; n < 16; n++) {
8316       for (size_t k = 1; k <= 80; k += 17) {
8317         GemmMicrokernelTester()
8318           .mr(1)
8319           .nr(8)
8320           .kr(2)
8321           .sr(1)
8322           .m(1)
8323           .n(n)
8324           .k(k)
8325           .ks(3)
8326           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8327       }
8328     }
8329   }
8330 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD1R,n_div_8_small_kernel)8331   TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD1R, n_div_8_small_kernel) {
8332     TEST_REQUIRES_ARM_NEON_V8;
8333     for (uint32_t n = 16; n <= 24; n += 8) {
8334       for (size_t k = 1; k <= 80; k += 17) {
8335         GemmMicrokernelTester()
8336           .mr(1)
8337           .nr(8)
8338           .kr(2)
8339           .sr(1)
8340           .m(1)
8341           .n(n)
8342           .k(k)
8343           .ks(3)
8344           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8345       }
8346     }
8347   }
8348 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD1R,strided_cm_subtile)8349   TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD1R, strided_cm_subtile) {
8350     TEST_REQUIRES_ARM_NEON_V8;
8351     for (size_t k = 1; k <= 80; k += 17) {
8352       for (uint32_t n = 1; n <= 8; n++) {
8353         for (uint32_t m = 1; m <= 1; m++) {
8354           GemmMicrokernelTester()
8355             .mr(1)
8356             .nr(8)
8357             .kr(2)
8358             .sr(1)
8359             .m(m)
8360             .n(n)
8361             .k(k)
8362             .cm_stride(11)
8363             .iterations(1)
8364             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8365         }
8366       }
8367     }
8368   }
8369 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD1R,a_offset)8370   TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD1R, a_offset) {
8371     TEST_REQUIRES_ARM_NEON_V8;
8372     for (size_t k = 1; k <= 80; k += 17) {
8373       GemmMicrokernelTester()
8374         .mr(1)
8375         .nr(8)
8376         .kr(2)
8377         .sr(1)
8378         .m(1)
8379         .n(8)
8380         .k(k)
8381         .ks(3)
8382         .a_offset(83)
8383         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8384     }
8385   }
8386 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD1R,zero)8387   TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD1R, zero) {
8388     TEST_REQUIRES_ARM_NEON_V8;
8389     for (size_t k = 1; k <= 80; k += 17) {
8390       for (uint32_t mz = 0; mz < 1; mz++) {
8391         GemmMicrokernelTester()
8392           .mr(1)
8393           .nr(8)
8394           .kr(2)
8395           .sr(1)
8396           .m(1)
8397           .n(8)
8398           .k(k)
8399           .ks(3)
8400           .a_offset(83)
8401           .zero_index(mz)
8402           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8403       }
8404     }
8405   }
8406 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD1R,qmin)8407   TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD1R, qmin) {
8408     TEST_REQUIRES_ARM_NEON_V8;
8409     GemmMicrokernelTester()
8410       .mr(1)
8411       .nr(8)
8412       .kr(2)
8413       .sr(1)
8414       .m(1)
8415       .n(8)
8416       .k(16)
8417       .qmin(128)
8418       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8419   }
8420 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD1R,qmax)8421   TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD1R, qmax) {
8422     TEST_REQUIRES_ARM_NEON_V8;
8423     GemmMicrokernelTester()
8424       .mr(1)
8425       .nr(8)
8426       .kr(2)
8427       .sr(1)
8428       .m(1)
8429       .n(8)
8430       .k(16)
8431       .qmax(128)
8432       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8433   }
8434 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD1R,strided_cm)8435   TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD1R, strided_cm) {
8436     TEST_REQUIRES_ARM_NEON_V8;
8437     GemmMicrokernelTester()
8438       .mr(1)
8439       .nr(8)
8440       .kr(2)
8441       .sr(1)
8442       .m(1)
8443       .n(8)
8444       .k(16)
8445       .cm_stride(11)
8446       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8447   }
8448 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
8449 
8450 
8451 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD4R,k_eq_16)8452   TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD4R, k_eq_16) {
8453     TEST_REQUIRES_ARM_NEON_V8;
8454     GemmMicrokernelTester()
8455       .mr(1)
8456       .nr(8)
8457       .kr(2)
8458       .sr(1)
8459       .m(1)
8460       .n(8)
8461       .k(16)
8462       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld4r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8463   }
8464 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD4R,strided_cn)8465   TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD4R, strided_cn) {
8466     TEST_REQUIRES_ARM_NEON_V8;
8467     GemmMicrokernelTester()
8468       .mr(1)
8469       .nr(8)
8470       .kr(2)
8471       .sr(1)
8472       .m(1)
8473       .n(8)
8474       .k(16)
8475       .cn_stride(11)
8476       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld4r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8477   }
8478 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD4R,k_eq_16_subtile)8479   TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD4R, k_eq_16_subtile) {
8480     TEST_REQUIRES_ARM_NEON_V8;
8481     for (uint32_t n = 1; n <= 8; n++) {
8482       for (uint32_t m = 1; m <= 1; m++) {
8483         GemmMicrokernelTester()
8484           .mr(1)
8485           .nr(8)
8486           .kr(2)
8487           .sr(1)
8488           .m(m)
8489           .n(n)
8490           .k(16)
8491           .iterations(1)
8492           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld4r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8493       }
8494     }
8495   }
8496 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD4R,k_eq_16_subtile_m)8497   TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD4R, k_eq_16_subtile_m) {
8498     TEST_REQUIRES_ARM_NEON_V8;
8499     for (uint32_t m = 1; m <= 1; m++) {
8500       GemmMicrokernelTester()
8501         .mr(1)
8502         .nr(8)
8503         .kr(2)
8504         .sr(1)
8505         .m(m)
8506         .n(8)
8507         .k(16)
8508         .iterations(1)
8509         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld4r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8510     }
8511   }
8512 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD4R,k_eq_16_subtile_n)8513   TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD4R, k_eq_16_subtile_n) {
8514     TEST_REQUIRES_ARM_NEON_V8;
8515     for (uint32_t n = 1; n <= 8; n++) {
8516       GemmMicrokernelTester()
8517         .mr(1)
8518         .nr(8)
8519         .kr(2)
8520         .sr(1)
8521         .m(1)
8522         .n(n)
8523         .k(16)
8524         .iterations(1)
8525         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld4r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8526     }
8527   }
8528 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD4R,k_lt_16)8529   TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD4R, k_lt_16) {
8530     TEST_REQUIRES_ARM_NEON_V8;
8531     for (size_t k = 1; k < 16; k++) {
8532       GemmMicrokernelTester()
8533         .mr(1)
8534         .nr(8)
8535         .kr(2)
8536         .sr(1)
8537         .m(1)
8538         .n(8)
8539         .k(k)
8540         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld4r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8541     }
8542   }
8543 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD4R,k_lt_16_subtile)8544   TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD4R, k_lt_16_subtile) {
8545     TEST_REQUIRES_ARM_NEON_V8;
8546     for (size_t k = 1; k < 16; k++) {
8547       for (uint32_t n = 1; n <= 8; n++) {
8548         for (uint32_t m = 1; m <= 1; m++) {
8549           GemmMicrokernelTester()
8550             .mr(1)
8551             .nr(8)
8552             .kr(2)
8553             .sr(1)
8554             .m(m)
8555             .n(n)
8556             .k(k)
8557             .iterations(1)
8558             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld4r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8559         }
8560       }
8561     }
8562   }
8563 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD4R,k_gt_16)8564   TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD4R, k_gt_16) {
8565     TEST_REQUIRES_ARM_NEON_V8;
8566     for (size_t k = 17; k < 32; k++) {
8567       GemmMicrokernelTester()
8568         .mr(1)
8569         .nr(8)
8570         .kr(2)
8571         .sr(1)
8572         .m(1)
8573         .n(8)
8574         .k(k)
8575         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld4r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8576     }
8577   }
8578 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD4R,k_gt_16_subtile)8579   TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD4R, k_gt_16_subtile) {
8580     TEST_REQUIRES_ARM_NEON_V8;
8581     for (size_t k = 17; k < 32; k++) {
8582       for (uint32_t n = 1; n <= 8; n++) {
8583         for (uint32_t m = 1; m <= 1; m++) {
8584           GemmMicrokernelTester()
8585             .mr(1)
8586             .nr(8)
8587             .kr(2)
8588             .sr(1)
8589             .m(m)
8590             .n(n)
8591             .k(k)
8592             .iterations(1)
8593             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld4r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8594         }
8595       }
8596     }
8597   }
8598 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD4R,k_div_16)8599   TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD4R, k_div_16) {
8600     TEST_REQUIRES_ARM_NEON_V8;
8601     for (size_t k = 32; k <= 160; k += 16) {
8602       GemmMicrokernelTester()
8603         .mr(1)
8604         .nr(8)
8605         .kr(2)
8606         .sr(1)
8607         .m(1)
8608         .n(8)
8609         .k(k)
8610         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld4r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8611     }
8612   }
8613 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD4R,k_div_16_subtile)8614   TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD4R, k_div_16_subtile) {
8615     TEST_REQUIRES_ARM_NEON_V8;
8616     for (size_t k = 32; k <= 160; k += 16) {
8617       for (uint32_t n = 1; n <= 8; n++) {
8618         for (uint32_t m = 1; m <= 1; m++) {
8619           GemmMicrokernelTester()
8620             .mr(1)
8621             .nr(8)
8622             .kr(2)
8623             .sr(1)
8624             .m(m)
8625             .n(n)
8626             .k(k)
8627             .iterations(1)
8628             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld4r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8629         }
8630       }
8631     }
8632   }
8633 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD4R,n_gt_8)8634   TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD4R, n_gt_8) {
8635     TEST_REQUIRES_ARM_NEON_V8;
8636     for (uint32_t n = 9; n < 16; n++) {
8637       for (size_t k = 1; k <= 80; k += 17) {
8638         GemmMicrokernelTester()
8639           .mr(1)
8640           .nr(8)
8641           .kr(2)
8642           .sr(1)
8643           .m(1)
8644           .n(n)
8645           .k(k)
8646           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld4r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8647       }
8648     }
8649   }
8650 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD4R,n_gt_8_strided_cn)8651   TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD4R, n_gt_8_strided_cn) {
8652     TEST_REQUIRES_ARM_NEON_V8;
8653     for (uint32_t n = 9; n < 16; n++) {
8654       for (size_t k = 1; k <= 80; k += 17) {
8655         GemmMicrokernelTester()
8656           .mr(1)
8657           .nr(8)
8658           .kr(2)
8659           .sr(1)
8660           .m(1)
8661           .n(n)
8662           .k(k)
8663           .cn_stride(11)
8664           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld4r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8665       }
8666     }
8667   }
8668 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD4R,n_gt_8_subtile)8669   TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD4R, n_gt_8_subtile) {
8670     TEST_REQUIRES_ARM_NEON_V8;
8671     for (uint32_t n = 9; n < 16; n++) {
8672       for (size_t k = 1; k <= 80; k += 17) {
8673         for (uint32_t m = 1; m <= 1; m++) {
8674           GemmMicrokernelTester()
8675             .mr(1)
8676             .nr(8)
8677             .kr(2)
8678             .sr(1)
8679             .m(m)
8680             .n(n)
8681             .k(k)
8682             .iterations(1)
8683             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld4r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8684         }
8685       }
8686     }
8687   }
8688 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD4R,n_div_8)8689   TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD4R, n_div_8) {
8690     TEST_REQUIRES_ARM_NEON_V8;
8691     for (uint32_t n = 16; n <= 24; n += 8) {
8692       for (size_t k = 1; k <= 80; k += 17) {
8693         GemmMicrokernelTester()
8694           .mr(1)
8695           .nr(8)
8696           .kr(2)
8697           .sr(1)
8698           .m(1)
8699           .n(n)
8700           .k(k)
8701           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld4r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8702       }
8703     }
8704   }
8705 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD4R,n_div_8_strided_cn)8706   TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD4R, n_div_8_strided_cn) {
8707     TEST_REQUIRES_ARM_NEON_V8;
8708     for (uint32_t n = 16; n <= 24; n += 8) {
8709       for (size_t k = 1; k <= 80; k += 17) {
8710         GemmMicrokernelTester()
8711           .mr(1)
8712           .nr(8)
8713           .kr(2)
8714           .sr(1)
8715           .m(1)
8716           .n(n)
8717           .k(k)
8718           .cn_stride(11)
8719           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld4r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8720       }
8721     }
8722   }
8723 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD4R,n_div_8_subtile)8724   TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD4R, n_div_8_subtile) {
8725     TEST_REQUIRES_ARM_NEON_V8;
8726     for (uint32_t n = 16; n <= 24; n += 8) {
8727       for (size_t k = 1; k <= 80; k += 17) {
8728         for (uint32_t m = 1; m <= 1; m++) {
8729           GemmMicrokernelTester()
8730             .mr(1)
8731             .nr(8)
8732             .kr(2)
8733             .sr(1)
8734             .m(m)
8735             .n(n)
8736             .k(k)
8737             .iterations(1)
8738             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld4r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8739         }
8740       }
8741     }
8742   }
8743 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD4R,small_kernel)8744   TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD4R, small_kernel) {
8745     TEST_REQUIRES_ARM_NEON_V8;
8746     for (size_t k = 1; k <= 80; k += 17) {
8747       GemmMicrokernelTester()
8748         .mr(1)
8749         .nr(8)
8750         .kr(2)
8751         .sr(1)
8752         .m(1)
8753         .n(8)
8754         .k(k)
8755         .ks(3)
8756         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld4r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8757     }
8758   }
8759 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD4R,small_kernel_subtile)8760   TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD4R, small_kernel_subtile) {
8761     TEST_REQUIRES_ARM_NEON_V8;
8762     for (size_t k = 1; k <= 80; k += 17) {
8763       for (uint32_t n = 1; n <= 8; n++) {
8764         for (uint32_t m = 1; m <= 1; m++) {
8765           GemmMicrokernelTester()
8766             .mr(1)
8767             .nr(8)
8768             .kr(2)
8769             .sr(1)
8770             .m(m)
8771             .n(n)
8772             .k(k)
8773             .ks(3)
8774             .iterations(1)
8775             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld4r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8776         }
8777       }
8778     }
8779   }
8780 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD4R,n_gt_8_small_kernel)8781   TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD4R, n_gt_8_small_kernel) {
8782     TEST_REQUIRES_ARM_NEON_V8;
8783     for (uint32_t n = 9; n < 16; n++) {
8784       for (size_t k = 1; k <= 80; k += 17) {
8785         GemmMicrokernelTester()
8786           .mr(1)
8787           .nr(8)
8788           .kr(2)
8789           .sr(1)
8790           .m(1)
8791           .n(n)
8792           .k(k)
8793           .ks(3)
8794           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld4r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8795       }
8796     }
8797   }
8798 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD4R,n_div_8_small_kernel)8799   TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD4R, n_div_8_small_kernel) {
8800     TEST_REQUIRES_ARM_NEON_V8;
8801     for (uint32_t n = 16; n <= 24; n += 8) {
8802       for (size_t k = 1; k <= 80; k += 17) {
8803         GemmMicrokernelTester()
8804           .mr(1)
8805           .nr(8)
8806           .kr(2)
8807           .sr(1)
8808           .m(1)
8809           .n(n)
8810           .k(k)
8811           .ks(3)
8812           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld4r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8813       }
8814     }
8815   }
8816 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD4R,strided_cm_subtile)8817   TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD4R, strided_cm_subtile) {
8818     TEST_REQUIRES_ARM_NEON_V8;
8819     for (size_t k = 1; k <= 80; k += 17) {
8820       for (uint32_t n = 1; n <= 8; n++) {
8821         for (uint32_t m = 1; m <= 1; m++) {
8822           GemmMicrokernelTester()
8823             .mr(1)
8824             .nr(8)
8825             .kr(2)
8826             .sr(1)
8827             .m(m)
8828             .n(n)
8829             .k(k)
8830             .cm_stride(11)
8831             .iterations(1)
8832             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld4r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8833         }
8834       }
8835     }
8836   }
8837 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD4R,a_offset)8838   TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD4R, a_offset) {
8839     TEST_REQUIRES_ARM_NEON_V8;
8840     for (size_t k = 1; k <= 80; k += 17) {
8841       GemmMicrokernelTester()
8842         .mr(1)
8843         .nr(8)
8844         .kr(2)
8845         .sr(1)
8846         .m(1)
8847         .n(8)
8848         .k(k)
8849         .ks(3)
8850         .a_offset(83)
8851         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld4r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8852     }
8853   }
8854 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD4R,zero)8855   TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD4R, zero) {
8856     TEST_REQUIRES_ARM_NEON_V8;
8857     for (size_t k = 1; k <= 80; k += 17) {
8858       for (uint32_t mz = 0; mz < 1; mz++) {
8859         GemmMicrokernelTester()
8860           .mr(1)
8861           .nr(8)
8862           .kr(2)
8863           .sr(1)
8864           .m(1)
8865           .n(8)
8866           .k(k)
8867           .ks(3)
8868           .a_offset(83)
8869           .zero_index(mz)
8870           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld4r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8871       }
8872     }
8873   }
8874 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD4R,qmin)8875   TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD4R, qmin) {
8876     TEST_REQUIRES_ARM_NEON_V8;
8877     GemmMicrokernelTester()
8878       .mr(1)
8879       .nr(8)
8880       .kr(2)
8881       .sr(1)
8882       .m(1)
8883       .n(8)
8884       .k(16)
8885       .qmin(128)
8886       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld4r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8887   }
8888 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD4R,qmax)8889   TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD4R, qmax) {
8890     TEST_REQUIRES_ARM_NEON_V8;
8891     GemmMicrokernelTester()
8892       .mr(1)
8893       .nr(8)
8894       .kr(2)
8895       .sr(1)
8896       .m(1)
8897       .n(8)
8898       .k(16)
8899       .qmax(128)
8900       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld4r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8901   }
8902 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD4R,strided_cm)8903   TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD4R, strided_cm) {
8904     TEST_REQUIRES_ARM_NEON_V8;
8905     GemmMicrokernelTester()
8906       .mr(1)
8907       .nr(8)
8908       .kr(2)
8909       .sr(1)
8910       .m(1)
8911       .n(8)
8912       .k(16)
8913       .cm_stride(11)
8914       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld4r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8915   }
8916 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
8917 
8918 
8919 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QC8_IGEMM_MINMAX_FP32_1X8C2S4__NEON_MLAL,k_eq_16)8920   TEST(QC8_IGEMM_MINMAX_FP32_1X8C2S4__NEON_MLAL, k_eq_16) {
8921     TEST_REQUIRES_ARM_NEON;
8922     GemmMicrokernelTester()
8923       .mr(1)
8924       .nr(8)
8925       .kr(2)
8926       .sr(4)
8927       .m(1)
8928       .n(8)
8929       .k(16)
8930       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c2s4__neon_mlal, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
8931   }
8932 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C2S4__NEON_MLAL,strided_cn)8933   TEST(QC8_IGEMM_MINMAX_FP32_1X8C2S4__NEON_MLAL, strided_cn) {
8934     TEST_REQUIRES_ARM_NEON;
8935     GemmMicrokernelTester()
8936       .mr(1)
8937       .nr(8)
8938       .kr(2)
8939       .sr(4)
8940       .m(1)
8941       .n(8)
8942       .k(16)
8943       .cn_stride(11)
8944       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c2s4__neon_mlal, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
8945   }
8946 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C2S4__NEON_MLAL,k_eq_16_subtile)8947   TEST(QC8_IGEMM_MINMAX_FP32_1X8C2S4__NEON_MLAL, k_eq_16_subtile) {
8948     TEST_REQUIRES_ARM_NEON;
8949     for (uint32_t n = 1; n <= 8; n++) {
8950       for (uint32_t m = 1; m <= 1; m++) {
8951         GemmMicrokernelTester()
8952           .mr(1)
8953           .nr(8)
8954           .kr(2)
8955           .sr(4)
8956           .m(m)
8957           .n(n)
8958           .k(16)
8959           .iterations(1)
8960           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c2s4__neon_mlal, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
8961       }
8962     }
8963   }
8964 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C2S4__NEON_MLAL,k_eq_16_subtile_m)8965   TEST(QC8_IGEMM_MINMAX_FP32_1X8C2S4__NEON_MLAL, k_eq_16_subtile_m) {
8966     TEST_REQUIRES_ARM_NEON;
8967     for (uint32_t m = 1; m <= 1; m++) {
8968       GemmMicrokernelTester()
8969         .mr(1)
8970         .nr(8)
8971         .kr(2)
8972         .sr(4)
8973         .m(m)
8974         .n(8)
8975         .k(16)
8976         .iterations(1)
8977         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c2s4__neon_mlal, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
8978     }
8979   }
8980 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C2S4__NEON_MLAL,k_eq_16_subtile_n)8981   TEST(QC8_IGEMM_MINMAX_FP32_1X8C2S4__NEON_MLAL, k_eq_16_subtile_n) {
8982     TEST_REQUIRES_ARM_NEON;
8983     for (uint32_t n = 1; n <= 8; n++) {
8984       GemmMicrokernelTester()
8985         .mr(1)
8986         .nr(8)
8987         .kr(2)
8988         .sr(4)
8989         .m(1)
8990         .n(n)
8991         .k(16)
8992         .iterations(1)
8993         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c2s4__neon_mlal, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
8994     }
8995   }
8996 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C2S4__NEON_MLAL,k_lt_16)8997   TEST(QC8_IGEMM_MINMAX_FP32_1X8C2S4__NEON_MLAL, k_lt_16) {
8998     TEST_REQUIRES_ARM_NEON;
8999     for (size_t k = 1; k < 16; k++) {
9000       GemmMicrokernelTester()
9001         .mr(1)
9002         .nr(8)
9003         .kr(2)
9004         .sr(4)
9005         .m(1)
9006         .n(8)
9007         .k(k)
9008         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c2s4__neon_mlal, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
9009     }
9010   }
9011 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C2S4__NEON_MLAL,k_lt_16_subtile)9012   TEST(QC8_IGEMM_MINMAX_FP32_1X8C2S4__NEON_MLAL, k_lt_16_subtile) {
9013     TEST_REQUIRES_ARM_NEON;
9014     for (size_t k = 1; k < 16; k++) {
9015       for (uint32_t n = 1; n <= 8; n++) {
9016         for (uint32_t m = 1; m <= 1; m++) {
9017           GemmMicrokernelTester()
9018             .mr(1)
9019             .nr(8)
9020             .kr(2)
9021             .sr(4)
9022             .m(m)
9023             .n(n)
9024             .k(k)
9025             .iterations(1)
9026             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c2s4__neon_mlal, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
9027         }
9028       }
9029     }
9030   }
9031 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C2S4__NEON_MLAL,k_gt_16)9032   TEST(QC8_IGEMM_MINMAX_FP32_1X8C2S4__NEON_MLAL, k_gt_16) {
9033     TEST_REQUIRES_ARM_NEON;
9034     for (size_t k = 17; k < 32; k++) {
9035       GemmMicrokernelTester()
9036         .mr(1)
9037         .nr(8)
9038         .kr(2)
9039         .sr(4)
9040         .m(1)
9041         .n(8)
9042         .k(k)
9043         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c2s4__neon_mlal, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
9044     }
9045   }
9046 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C2S4__NEON_MLAL,k_gt_16_subtile)9047   TEST(QC8_IGEMM_MINMAX_FP32_1X8C2S4__NEON_MLAL, k_gt_16_subtile) {
9048     TEST_REQUIRES_ARM_NEON;
9049     for (size_t k = 17; k < 32; k++) {
9050       for (uint32_t n = 1; n <= 8; n++) {
9051         for (uint32_t m = 1; m <= 1; m++) {
9052           GemmMicrokernelTester()
9053             .mr(1)
9054             .nr(8)
9055             .kr(2)
9056             .sr(4)
9057             .m(m)
9058             .n(n)
9059             .k(k)
9060             .iterations(1)
9061             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c2s4__neon_mlal, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
9062         }
9063       }
9064     }
9065   }
9066 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C2S4__NEON_MLAL,k_div_16)9067   TEST(QC8_IGEMM_MINMAX_FP32_1X8C2S4__NEON_MLAL, k_div_16) {
9068     TEST_REQUIRES_ARM_NEON;
9069     for (size_t k = 32; k <= 160; k += 16) {
9070       GemmMicrokernelTester()
9071         .mr(1)
9072         .nr(8)
9073         .kr(2)
9074         .sr(4)
9075         .m(1)
9076         .n(8)
9077         .k(k)
9078         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c2s4__neon_mlal, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
9079     }
9080   }
9081 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C2S4__NEON_MLAL,k_div_16_subtile)9082   TEST(QC8_IGEMM_MINMAX_FP32_1X8C2S4__NEON_MLAL, k_div_16_subtile) {
9083     TEST_REQUIRES_ARM_NEON;
9084     for (size_t k = 32; k <= 160; k += 16) {
9085       for (uint32_t n = 1; n <= 8; n++) {
9086         for (uint32_t m = 1; m <= 1; m++) {
9087           GemmMicrokernelTester()
9088             .mr(1)
9089             .nr(8)
9090             .kr(2)
9091             .sr(4)
9092             .m(m)
9093             .n(n)
9094             .k(k)
9095             .iterations(1)
9096             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c2s4__neon_mlal, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
9097         }
9098       }
9099     }
9100   }
9101 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C2S4__NEON_MLAL,n_gt_8)9102   TEST(QC8_IGEMM_MINMAX_FP32_1X8C2S4__NEON_MLAL, n_gt_8) {
9103     TEST_REQUIRES_ARM_NEON;
9104     for (uint32_t n = 9; n < 16; n++) {
9105       for (size_t k = 1; k <= 80; k += 17) {
9106         GemmMicrokernelTester()
9107           .mr(1)
9108           .nr(8)
9109           .kr(2)
9110           .sr(4)
9111           .m(1)
9112           .n(n)
9113           .k(k)
9114           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c2s4__neon_mlal, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
9115       }
9116     }
9117   }
9118 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C2S4__NEON_MLAL,n_gt_8_strided_cn)9119   TEST(QC8_IGEMM_MINMAX_FP32_1X8C2S4__NEON_MLAL, n_gt_8_strided_cn) {
9120     TEST_REQUIRES_ARM_NEON;
9121     for (uint32_t n = 9; n < 16; n++) {
9122       for (size_t k = 1; k <= 80; k += 17) {
9123         GemmMicrokernelTester()
9124           .mr(1)
9125           .nr(8)
9126           .kr(2)
9127           .sr(4)
9128           .m(1)
9129           .n(n)
9130           .k(k)
9131           .cn_stride(11)
9132           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c2s4__neon_mlal, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
9133       }
9134     }
9135   }
9136 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C2S4__NEON_MLAL,n_gt_8_subtile)9137   TEST(QC8_IGEMM_MINMAX_FP32_1X8C2S4__NEON_MLAL, n_gt_8_subtile) {
9138     TEST_REQUIRES_ARM_NEON;
9139     for (uint32_t n = 9; n < 16; n++) {
9140       for (size_t k = 1; k <= 80; k += 17) {
9141         for (uint32_t m = 1; m <= 1; m++) {
9142           GemmMicrokernelTester()
9143             .mr(1)
9144             .nr(8)
9145             .kr(2)
9146             .sr(4)
9147             .m(m)
9148             .n(n)
9149             .k(k)
9150             .iterations(1)
9151             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c2s4__neon_mlal, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
9152         }
9153       }
9154     }
9155   }
9156 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C2S4__NEON_MLAL,n_div_8)9157   TEST(QC8_IGEMM_MINMAX_FP32_1X8C2S4__NEON_MLAL, n_div_8) {
9158     TEST_REQUIRES_ARM_NEON;
9159     for (uint32_t n = 16; n <= 24; n += 8) {
9160       for (size_t k = 1; k <= 80; k += 17) {
9161         GemmMicrokernelTester()
9162           .mr(1)
9163           .nr(8)
9164           .kr(2)
9165           .sr(4)
9166           .m(1)
9167           .n(n)
9168           .k(k)
9169           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c2s4__neon_mlal, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
9170       }
9171     }
9172   }
9173 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C2S4__NEON_MLAL,n_div_8_strided_cn)9174   TEST(QC8_IGEMM_MINMAX_FP32_1X8C2S4__NEON_MLAL, n_div_8_strided_cn) {
9175     TEST_REQUIRES_ARM_NEON;
9176     for (uint32_t n = 16; n <= 24; n += 8) {
9177       for (size_t k = 1; k <= 80; k += 17) {
9178         GemmMicrokernelTester()
9179           .mr(1)
9180           .nr(8)
9181           .kr(2)
9182           .sr(4)
9183           .m(1)
9184           .n(n)
9185           .k(k)
9186           .cn_stride(11)
9187           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c2s4__neon_mlal, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
9188       }
9189     }
9190   }
9191 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C2S4__NEON_MLAL,n_div_8_subtile)9192   TEST(QC8_IGEMM_MINMAX_FP32_1X8C2S4__NEON_MLAL, n_div_8_subtile) {
9193     TEST_REQUIRES_ARM_NEON;
9194     for (uint32_t n = 16; n <= 24; n += 8) {
9195       for (size_t k = 1; k <= 80; k += 17) {
9196         for (uint32_t m = 1; m <= 1; m++) {
9197           GemmMicrokernelTester()
9198             .mr(1)
9199             .nr(8)
9200             .kr(2)
9201             .sr(4)
9202             .m(m)
9203             .n(n)
9204             .k(k)
9205             .iterations(1)
9206             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c2s4__neon_mlal, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
9207         }
9208       }
9209     }
9210   }
9211 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C2S4__NEON_MLAL,small_kernel)9212   TEST(QC8_IGEMM_MINMAX_FP32_1X8C2S4__NEON_MLAL, small_kernel) {
9213     TEST_REQUIRES_ARM_NEON;
9214     for (size_t k = 1; k <= 80; k += 17) {
9215       GemmMicrokernelTester()
9216         .mr(1)
9217         .nr(8)
9218         .kr(2)
9219         .sr(4)
9220         .m(1)
9221         .n(8)
9222         .k(k)
9223         .ks(3)
9224         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c2s4__neon_mlal, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
9225     }
9226   }
9227 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C2S4__NEON_MLAL,small_kernel_subtile)9228   TEST(QC8_IGEMM_MINMAX_FP32_1X8C2S4__NEON_MLAL, small_kernel_subtile) {
9229     TEST_REQUIRES_ARM_NEON;
9230     for (size_t k = 1; k <= 80; k += 17) {
9231       for (uint32_t n = 1; n <= 8; n++) {
9232         for (uint32_t m = 1; m <= 1; m++) {
9233           GemmMicrokernelTester()
9234             .mr(1)
9235             .nr(8)
9236             .kr(2)
9237             .sr(4)
9238             .m(m)
9239             .n(n)
9240             .k(k)
9241             .ks(3)
9242             .iterations(1)
9243             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c2s4__neon_mlal, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
9244         }
9245       }
9246     }
9247   }
9248 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C2S4__NEON_MLAL,n_gt_8_small_kernel)9249   TEST(QC8_IGEMM_MINMAX_FP32_1X8C2S4__NEON_MLAL, n_gt_8_small_kernel) {
9250     TEST_REQUIRES_ARM_NEON;
9251     for (uint32_t n = 9; n < 16; n++) {
9252       for (size_t k = 1; k <= 80; k += 17) {
9253         GemmMicrokernelTester()
9254           .mr(1)
9255           .nr(8)
9256           .kr(2)
9257           .sr(4)
9258           .m(1)
9259           .n(n)
9260           .k(k)
9261           .ks(3)
9262           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c2s4__neon_mlal, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
9263       }
9264     }
9265   }
9266 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C2S4__NEON_MLAL,n_div_8_small_kernel)9267   TEST(QC8_IGEMM_MINMAX_FP32_1X8C2S4__NEON_MLAL, n_div_8_small_kernel) {
9268     TEST_REQUIRES_ARM_NEON;
9269     for (uint32_t n = 16; n <= 24; n += 8) {
9270       for (size_t k = 1; k <= 80; k += 17) {
9271         GemmMicrokernelTester()
9272           .mr(1)
9273           .nr(8)
9274           .kr(2)
9275           .sr(4)
9276           .m(1)
9277           .n(n)
9278           .k(k)
9279           .ks(3)
9280           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c2s4__neon_mlal, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
9281       }
9282     }
9283   }
9284 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C2S4__NEON_MLAL,strided_cm_subtile)9285   TEST(QC8_IGEMM_MINMAX_FP32_1X8C2S4__NEON_MLAL, strided_cm_subtile) {
9286     TEST_REQUIRES_ARM_NEON;
9287     for (size_t k = 1; k <= 80; k += 17) {
9288       for (uint32_t n = 1; n <= 8; n++) {
9289         for (uint32_t m = 1; m <= 1; m++) {
9290           GemmMicrokernelTester()
9291             .mr(1)
9292             .nr(8)
9293             .kr(2)
9294             .sr(4)
9295             .m(m)
9296             .n(n)
9297             .k(k)
9298             .cm_stride(11)
9299             .iterations(1)
9300             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c2s4__neon_mlal, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
9301         }
9302       }
9303     }
9304   }
9305 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C2S4__NEON_MLAL,a_offset)9306   TEST(QC8_IGEMM_MINMAX_FP32_1X8C2S4__NEON_MLAL, a_offset) {
9307     TEST_REQUIRES_ARM_NEON;
9308     for (size_t k = 1; k <= 80; k += 17) {
9309       GemmMicrokernelTester()
9310         .mr(1)
9311         .nr(8)
9312         .kr(2)
9313         .sr(4)
9314         .m(1)
9315         .n(8)
9316         .k(k)
9317         .ks(3)
9318         .a_offset(83)
9319         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c2s4__neon_mlal, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
9320     }
9321   }
9322 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C2S4__NEON_MLAL,zero)9323   TEST(QC8_IGEMM_MINMAX_FP32_1X8C2S4__NEON_MLAL, zero) {
9324     TEST_REQUIRES_ARM_NEON;
9325     for (size_t k = 1; k <= 80; k += 17) {
9326       for (uint32_t mz = 0; mz < 1; mz++) {
9327         GemmMicrokernelTester()
9328           .mr(1)
9329           .nr(8)
9330           .kr(2)
9331           .sr(4)
9332           .m(1)
9333           .n(8)
9334           .k(k)
9335           .ks(3)
9336           .a_offset(83)
9337           .zero_index(mz)
9338           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c2s4__neon_mlal, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
9339       }
9340     }
9341   }
9342 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C2S4__NEON_MLAL,qmin)9343   TEST(QC8_IGEMM_MINMAX_FP32_1X8C2S4__NEON_MLAL, qmin) {
9344     TEST_REQUIRES_ARM_NEON;
9345     GemmMicrokernelTester()
9346       .mr(1)
9347       .nr(8)
9348       .kr(2)
9349       .sr(4)
9350       .m(1)
9351       .n(8)
9352       .k(16)
9353       .qmin(128)
9354       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c2s4__neon_mlal, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
9355   }
9356 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C2S4__NEON_MLAL,qmax)9357   TEST(QC8_IGEMM_MINMAX_FP32_1X8C2S4__NEON_MLAL, qmax) {
9358     TEST_REQUIRES_ARM_NEON;
9359     GemmMicrokernelTester()
9360       .mr(1)
9361       .nr(8)
9362       .kr(2)
9363       .sr(4)
9364       .m(1)
9365       .n(8)
9366       .k(16)
9367       .qmax(128)
9368       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c2s4__neon_mlal, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
9369   }
9370 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C2S4__NEON_MLAL,strided_cm)9371   TEST(QC8_IGEMM_MINMAX_FP32_1X8C2S4__NEON_MLAL, strided_cm) {
9372     TEST_REQUIRES_ARM_NEON;
9373     GemmMicrokernelTester()
9374       .mr(1)
9375       .nr(8)
9376       .kr(2)
9377       .sr(4)
9378       .m(1)
9379       .n(8)
9380       .k(16)
9381       .cm_stride(11)
9382       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c2s4__neon_mlal, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
9383   }
9384 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
9385 
9386 
9387 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QC8_IGEMM_MINMAX_FP32_1X8C2S4__NEONV8_MLAL,k_eq_16)9388   TEST(QC8_IGEMM_MINMAX_FP32_1X8C2S4__NEONV8_MLAL, k_eq_16) {
9389     TEST_REQUIRES_ARM_NEON_V8;
9390     GemmMicrokernelTester()
9391       .mr(1)
9392       .nr(8)
9393       .kr(2)
9394       .sr(4)
9395       .m(1)
9396       .n(8)
9397       .k(16)
9398       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c2s4__neonv8_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
9399   }
9400 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C2S4__NEONV8_MLAL,strided_cn)9401   TEST(QC8_IGEMM_MINMAX_FP32_1X8C2S4__NEONV8_MLAL, strided_cn) {
9402     TEST_REQUIRES_ARM_NEON_V8;
9403     GemmMicrokernelTester()
9404       .mr(1)
9405       .nr(8)
9406       .kr(2)
9407       .sr(4)
9408       .m(1)
9409       .n(8)
9410       .k(16)
9411       .cn_stride(11)
9412       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c2s4__neonv8_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
9413   }
9414 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C2S4__NEONV8_MLAL,k_eq_16_subtile)9415   TEST(QC8_IGEMM_MINMAX_FP32_1X8C2S4__NEONV8_MLAL, k_eq_16_subtile) {
9416     TEST_REQUIRES_ARM_NEON_V8;
9417     for (uint32_t n = 1; n <= 8; n++) {
9418       for (uint32_t m = 1; m <= 1; m++) {
9419         GemmMicrokernelTester()
9420           .mr(1)
9421           .nr(8)
9422           .kr(2)
9423           .sr(4)
9424           .m(m)
9425           .n(n)
9426           .k(16)
9427           .iterations(1)
9428           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c2s4__neonv8_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
9429       }
9430     }
9431   }
9432 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C2S4__NEONV8_MLAL,k_eq_16_subtile_m)9433   TEST(QC8_IGEMM_MINMAX_FP32_1X8C2S4__NEONV8_MLAL, k_eq_16_subtile_m) {
9434     TEST_REQUIRES_ARM_NEON_V8;
9435     for (uint32_t m = 1; m <= 1; m++) {
9436       GemmMicrokernelTester()
9437         .mr(1)
9438         .nr(8)
9439         .kr(2)
9440         .sr(4)
9441         .m(m)
9442         .n(8)
9443         .k(16)
9444         .iterations(1)
9445         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c2s4__neonv8_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
9446     }
9447   }
9448 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C2S4__NEONV8_MLAL,k_eq_16_subtile_n)9449   TEST(QC8_IGEMM_MINMAX_FP32_1X8C2S4__NEONV8_MLAL, k_eq_16_subtile_n) {
9450     TEST_REQUIRES_ARM_NEON_V8;
9451     for (uint32_t n = 1; n <= 8; n++) {
9452       GemmMicrokernelTester()
9453         .mr(1)
9454         .nr(8)
9455         .kr(2)
9456         .sr(4)
9457         .m(1)
9458         .n(n)
9459         .k(16)
9460         .iterations(1)
9461         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c2s4__neonv8_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
9462     }
9463   }
9464 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C2S4__NEONV8_MLAL,k_lt_16)9465   TEST(QC8_IGEMM_MINMAX_FP32_1X8C2S4__NEONV8_MLAL, k_lt_16) {
9466     TEST_REQUIRES_ARM_NEON_V8;
9467     for (size_t k = 1; k < 16; k++) {
9468       GemmMicrokernelTester()
9469         .mr(1)
9470         .nr(8)
9471         .kr(2)
9472         .sr(4)
9473         .m(1)
9474         .n(8)
9475         .k(k)
9476         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c2s4__neonv8_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
9477     }
9478   }
9479 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C2S4__NEONV8_MLAL,k_lt_16_subtile)9480   TEST(QC8_IGEMM_MINMAX_FP32_1X8C2S4__NEONV8_MLAL, k_lt_16_subtile) {
9481     TEST_REQUIRES_ARM_NEON_V8;
9482     for (size_t k = 1; k < 16; k++) {
9483       for (uint32_t n = 1; n <= 8; n++) {
9484         for (uint32_t m = 1; m <= 1; m++) {
9485           GemmMicrokernelTester()
9486             .mr(1)
9487             .nr(8)
9488             .kr(2)
9489             .sr(4)
9490             .m(m)
9491             .n(n)
9492             .k(k)
9493             .iterations(1)
9494             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c2s4__neonv8_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
9495         }
9496       }
9497     }
9498   }
9499 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C2S4__NEONV8_MLAL,k_gt_16)9500   TEST(QC8_IGEMM_MINMAX_FP32_1X8C2S4__NEONV8_MLAL, k_gt_16) {
9501     TEST_REQUIRES_ARM_NEON_V8;
9502     for (size_t k = 17; k < 32; k++) {
9503       GemmMicrokernelTester()
9504         .mr(1)
9505         .nr(8)
9506         .kr(2)
9507         .sr(4)
9508         .m(1)
9509         .n(8)
9510         .k(k)
9511         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c2s4__neonv8_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
9512     }
9513   }
9514 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C2S4__NEONV8_MLAL,k_gt_16_subtile)9515   TEST(QC8_IGEMM_MINMAX_FP32_1X8C2S4__NEONV8_MLAL, k_gt_16_subtile) {
9516     TEST_REQUIRES_ARM_NEON_V8;
9517     for (size_t k = 17; k < 32; k++) {
9518       for (uint32_t n = 1; n <= 8; n++) {
9519         for (uint32_t m = 1; m <= 1; m++) {
9520           GemmMicrokernelTester()
9521             .mr(1)
9522             .nr(8)
9523             .kr(2)
9524             .sr(4)
9525             .m(m)
9526             .n(n)
9527             .k(k)
9528             .iterations(1)
9529             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c2s4__neonv8_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
9530         }
9531       }
9532     }
9533   }
9534 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C2S4__NEONV8_MLAL,k_div_16)9535   TEST(QC8_IGEMM_MINMAX_FP32_1X8C2S4__NEONV8_MLAL, k_div_16) {
9536     TEST_REQUIRES_ARM_NEON_V8;
9537     for (size_t k = 32; k <= 160; k += 16) {
9538       GemmMicrokernelTester()
9539         .mr(1)
9540         .nr(8)
9541         .kr(2)
9542         .sr(4)
9543         .m(1)
9544         .n(8)
9545         .k(k)
9546         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c2s4__neonv8_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
9547     }
9548   }
9549 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C2S4__NEONV8_MLAL,k_div_16_subtile)9550   TEST(QC8_IGEMM_MINMAX_FP32_1X8C2S4__NEONV8_MLAL, k_div_16_subtile) {
9551     TEST_REQUIRES_ARM_NEON_V8;
9552     for (size_t k = 32; k <= 160; k += 16) {
9553       for (uint32_t n = 1; n <= 8; n++) {
9554         for (uint32_t m = 1; m <= 1; m++) {
9555           GemmMicrokernelTester()
9556             .mr(1)
9557             .nr(8)
9558             .kr(2)
9559             .sr(4)
9560             .m(m)
9561             .n(n)
9562             .k(k)
9563             .iterations(1)
9564             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c2s4__neonv8_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
9565         }
9566       }
9567     }
9568   }
9569 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C2S4__NEONV8_MLAL,n_gt_8)9570   TEST(QC8_IGEMM_MINMAX_FP32_1X8C2S4__NEONV8_MLAL, n_gt_8) {
9571     TEST_REQUIRES_ARM_NEON_V8;
9572     for (uint32_t n = 9; n < 16; n++) {
9573       for (size_t k = 1; k <= 80; k += 17) {
9574         GemmMicrokernelTester()
9575           .mr(1)
9576           .nr(8)
9577           .kr(2)
9578           .sr(4)
9579           .m(1)
9580           .n(n)
9581           .k(k)
9582           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c2s4__neonv8_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
9583       }
9584     }
9585   }
9586 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C2S4__NEONV8_MLAL,n_gt_8_strided_cn)9587   TEST(QC8_IGEMM_MINMAX_FP32_1X8C2S4__NEONV8_MLAL, n_gt_8_strided_cn) {
9588     TEST_REQUIRES_ARM_NEON_V8;
9589     for (uint32_t n = 9; n < 16; n++) {
9590       for (size_t k = 1; k <= 80; k += 17) {
9591         GemmMicrokernelTester()
9592           .mr(1)
9593           .nr(8)
9594           .kr(2)
9595           .sr(4)
9596           .m(1)
9597           .n(n)
9598           .k(k)
9599           .cn_stride(11)
9600           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c2s4__neonv8_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
9601       }
9602     }
9603   }
9604 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C2S4__NEONV8_MLAL,n_gt_8_subtile)9605   TEST(QC8_IGEMM_MINMAX_FP32_1X8C2S4__NEONV8_MLAL, n_gt_8_subtile) {
9606     TEST_REQUIRES_ARM_NEON_V8;
9607     for (uint32_t n = 9; n < 16; n++) {
9608       for (size_t k = 1; k <= 80; k += 17) {
9609         for (uint32_t m = 1; m <= 1; m++) {
9610           GemmMicrokernelTester()
9611             .mr(1)
9612             .nr(8)
9613             .kr(2)
9614             .sr(4)
9615             .m(m)
9616             .n(n)
9617             .k(k)
9618             .iterations(1)
9619             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c2s4__neonv8_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
9620         }
9621       }
9622     }
9623   }
9624 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C2S4__NEONV8_MLAL,n_div_8)9625   TEST(QC8_IGEMM_MINMAX_FP32_1X8C2S4__NEONV8_MLAL, n_div_8) {
9626     TEST_REQUIRES_ARM_NEON_V8;
9627     for (uint32_t n = 16; n <= 24; n += 8) {
9628       for (size_t k = 1; k <= 80; k += 17) {
9629         GemmMicrokernelTester()
9630           .mr(1)
9631           .nr(8)
9632           .kr(2)
9633           .sr(4)
9634           .m(1)
9635           .n(n)
9636           .k(k)
9637           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c2s4__neonv8_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
9638       }
9639     }
9640   }
9641 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C2S4__NEONV8_MLAL,n_div_8_strided_cn)9642   TEST(QC8_IGEMM_MINMAX_FP32_1X8C2S4__NEONV8_MLAL, n_div_8_strided_cn) {
9643     TEST_REQUIRES_ARM_NEON_V8;
9644     for (uint32_t n = 16; n <= 24; n += 8) {
9645       for (size_t k = 1; k <= 80; k += 17) {
9646         GemmMicrokernelTester()
9647           .mr(1)
9648           .nr(8)
9649           .kr(2)
9650           .sr(4)
9651           .m(1)
9652           .n(n)
9653           .k(k)
9654           .cn_stride(11)
9655           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c2s4__neonv8_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
9656       }
9657     }
9658   }
9659 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C2S4__NEONV8_MLAL,n_div_8_subtile)9660   TEST(QC8_IGEMM_MINMAX_FP32_1X8C2S4__NEONV8_MLAL, n_div_8_subtile) {
9661     TEST_REQUIRES_ARM_NEON_V8;
9662     for (uint32_t n = 16; n <= 24; n += 8) {
9663       for (size_t k = 1; k <= 80; k += 17) {
9664         for (uint32_t m = 1; m <= 1; m++) {
9665           GemmMicrokernelTester()
9666             .mr(1)
9667             .nr(8)
9668             .kr(2)
9669             .sr(4)
9670             .m(m)
9671             .n(n)
9672             .k(k)
9673             .iterations(1)
9674             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c2s4__neonv8_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
9675         }
9676       }
9677     }
9678   }
9679 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C2S4__NEONV8_MLAL,small_kernel)9680   TEST(QC8_IGEMM_MINMAX_FP32_1X8C2S4__NEONV8_MLAL, small_kernel) {
9681     TEST_REQUIRES_ARM_NEON_V8;
9682     for (size_t k = 1; k <= 80; k += 17) {
9683       GemmMicrokernelTester()
9684         .mr(1)
9685         .nr(8)
9686         .kr(2)
9687         .sr(4)
9688         .m(1)
9689         .n(8)
9690         .k(k)
9691         .ks(3)
9692         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c2s4__neonv8_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
9693     }
9694   }
9695 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C2S4__NEONV8_MLAL,small_kernel_subtile)9696   TEST(QC8_IGEMM_MINMAX_FP32_1X8C2S4__NEONV8_MLAL, small_kernel_subtile) {
9697     TEST_REQUIRES_ARM_NEON_V8;
9698     for (size_t k = 1; k <= 80; k += 17) {
9699       for (uint32_t n = 1; n <= 8; n++) {
9700         for (uint32_t m = 1; m <= 1; m++) {
9701           GemmMicrokernelTester()
9702             .mr(1)
9703             .nr(8)
9704             .kr(2)
9705             .sr(4)
9706             .m(m)
9707             .n(n)
9708             .k(k)
9709             .ks(3)
9710             .iterations(1)
9711             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c2s4__neonv8_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
9712         }
9713       }
9714     }
9715   }
9716 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C2S4__NEONV8_MLAL,n_gt_8_small_kernel)9717   TEST(QC8_IGEMM_MINMAX_FP32_1X8C2S4__NEONV8_MLAL, n_gt_8_small_kernel) {
9718     TEST_REQUIRES_ARM_NEON_V8;
9719     for (uint32_t n = 9; n < 16; n++) {
9720       for (size_t k = 1; k <= 80; k += 17) {
9721         GemmMicrokernelTester()
9722           .mr(1)
9723           .nr(8)
9724           .kr(2)
9725           .sr(4)
9726           .m(1)
9727           .n(n)
9728           .k(k)
9729           .ks(3)
9730           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c2s4__neonv8_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
9731       }
9732     }
9733   }
9734 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C2S4__NEONV8_MLAL,n_div_8_small_kernel)9735   TEST(QC8_IGEMM_MINMAX_FP32_1X8C2S4__NEONV8_MLAL, n_div_8_small_kernel) {
9736     TEST_REQUIRES_ARM_NEON_V8;
9737     for (uint32_t n = 16; n <= 24; n += 8) {
9738       for (size_t k = 1; k <= 80; k += 17) {
9739         GemmMicrokernelTester()
9740           .mr(1)
9741           .nr(8)
9742           .kr(2)
9743           .sr(4)
9744           .m(1)
9745           .n(n)
9746           .k(k)
9747           .ks(3)
9748           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c2s4__neonv8_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
9749       }
9750     }
9751   }
9752 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C2S4__NEONV8_MLAL,strided_cm_subtile)9753   TEST(QC8_IGEMM_MINMAX_FP32_1X8C2S4__NEONV8_MLAL, strided_cm_subtile) {
9754     TEST_REQUIRES_ARM_NEON_V8;
9755     for (size_t k = 1; k <= 80; k += 17) {
9756       for (uint32_t n = 1; n <= 8; n++) {
9757         for (uint32_t m = 1; m <= 1; m++) {
9758           GemmMicrokernelTester()
9759             .mr(1)
9760             .nr(8)
9761             .kr(2)
9762             .sr(4)
9763             .m(m)
9764             .n(n)
9765             .k(k)
9766             .cm_stride(11)
9767             .iterations(1)
9768             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c2s4__neonv8_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
9769         }
9770       }
9771     }
9772   }
9773 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C2S4__NEONV8_MLAL,a_offset)9774   TEST(QC8_IGEMM_MINMAX_FP32_1X8C2S4__NEONV8_MLAL, a_offset) {
9775     TEST_REQUIRES_ARM_NEON_V8;
9776     for (size_t k = 1; k <= 80; k += 17) {
9777       GemmMicrokernelTester()
9778         .mr(1)
9779         .nr(8)
9780         .kr(2)
9781         .sr(4)
9782         .m(1)
9783         .n(8)
9784         .k(k)
9785         .ks(3)
9786         .a_offset(83)
9787         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c2s4__neonv8_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
9788     }
9789   }
9790 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C2S4__NEONV8_MLAL,zero)9791   TEST(QC8_IGEMM_MINMAX_FP32_1X8C2S4__NEONV8_MLAL, zero) {
9792     TEST_REQUIRES_ARM_NEON_V8;
9793     for (size_t k = 1; k <= 80; k += 17) {
9794       for (uint32_t mz = 0; mz < 1; mz++) {
9795         GemmMicrokernelTester()
9796           .mr(1)
9797           .nr(8)
9798           .kr(2)
9799           .sr(4)
9800           .m(1)
9801           .n(8)
9802           .k(k)
9803           .ks(3)
9804           .a_offset(83)
9805           .zero_index(mz)
9806           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c2s4__neonv8_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
9807       }
9808     }
9809   }
9810 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C2S4__NEONV8_MLAL,qmin)9811   TEST(QC8_IGEMM_MINMAX_FP32_1X8C2S4__NEONV8_MLAL, qmin) {
9812     TEST_REQUIRES_ARM_NEON_V8;
9813     GemmMicrokernelTester()
9814       .mr(1)
9815       .nr(8)
9816       .kr(2)
9817       .sr(4)
9818       .m(1)
9819       .n(8)
9820       .k(16)
9821       .qmin(128)
9822       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c2s4__neonv8_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
9823   }
9824 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C2S4__NEONV8_MLAL,qmax)9825   TEST(QC8_IGEMM_MINMAX_FP32_1X8C2S4__NEONV8_MLAL, qmax) {
9826     TEST_REQUIRES_ARM_NEON_V8;
9827     GemmMicrokernelTester()
9828       .mr(1)
9829       .nr(8)
9830       .kr(2)
9831       .sr(4)
9832       .m(1)
9833       .n(8)
9834       .k(16)
9835       .qmax(128)
9836       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c2s4__neonv8_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
9837   }
9838 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C2S4__NEONV8_MLAL,strided_cm)9839   TEST(QC8_IGEMM_MINMAX_FP32_1X8C2S4__NEONV8_MLAL, strided_cm) {
9840     TEST_REQUIRES_ARM_NEON_V8;
9841     GemmMicrokernelTester()
9842       .mr(1)
9843       .nr(8)
9844       .kr(2)
9845       .sr(4)
9846       .m(1)
9847       .n(8)
9848       .k(16)
9849       .cm_stride(11)
9850       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c2s4__neonv8_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
9851   }
9852 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
9853 
9854 
9855 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QC8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_DUP,k_eq_16)9856   TEST(QC8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_DUP, k_eq_16) {
9857     TEST_REQUIRES_ARM_NEON;
9858     GemmMicrokernelTester()
9859       .mr(1)
9860       .nr(8)
9861       .kr(4)
9862       .sr(1)
9863       .m(1)
9864       .n(8)
9865       .k(16)
9866       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c4__neon_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
9867   }
9868 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_DUP,strided_cn)9869   TEST(QC8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_DUP, strided_cn) {
9870     TEST_REQUIRES_ARM_NEON;
9871     GemmMicrokernelTester()
9872       .mr(1)
9873       .nr(8)
9874       .kr(4)
9875       .sr(1)
9876       .m(1)
9877       .n(8)
9878       .k(16)
9879       .cn_stride(11)
9880       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c4__neon_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
9881   }
9882 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_DUP,k_eq_16_subtile)9883   TEST(QC8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_DUP, k_eq_16_subtile) {
9884     TEST_REQUIRES_ARM_NEON;
9885     for (uint32_t n = 1; n <= 8; n++) {
9886       for (uint32_t m = 1; m <= 1; m++) {
9887         GemmMicrokernelTester()
9888           .mr(1)
9889           .nr(8)
9890           .kr(4)
9891           .sr(1)
9892           .m(m)
9893           .n(n)
9894           .k(16)
9895           .iterations(1)
9896           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c4__neon_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
9897       }
9898     }
9899   }
9900 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_DUP,k_eq_16_subtile_m)9901   TEST(QC8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_DUP, k_eq_16_subtile_m) {
9902     TEST_REQUIRES_ARM_NEON;
9903     for (uint32_t m = 1; m <= 1; m++) {
9904       GemmMicrokernelTester()
9905         .mr(1)
9906         .nr(8)
9907         .kr(4)
9908         .sr(1)
9909         .m(m)
9910         .n(8)
9911         .k(16)
9912         .iterations(1)
9913         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c4__neon_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
9914     }
9915   }
9916 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_DUP,k_eq_16_subtile_n)9917   TEST(QC8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_DUP, k_eq_16_subtile_n) {
9918     TEST_REQUIRES_ARM_NEON;
9919     for (uint32_t n = 1; n <= 8; n++) {
9920       GemmMicrokernelTester()
9921         .mr(1)
9922         .nr(8)
9923         .kr(4)
9924         .sr(1)
9925         .m(1)
9926         .n(n)
9927         .k(16)
9928         .iterations(1)
9929         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c4__neon_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
9930     }
9931   }
9932 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_DUP,k_lt_16)9933   TEST(QC8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_DUP, k_lt_16) {
9934     TEST_REQUIRES_ARM_NEON;
9935     for (size_t k = 1; k < 16; k++) {
9936       GemmMicrokernelTester()
9937         .mr(1)
9938         .nr(8)
9939         .kr(4)
9940         .sr(1)
9941         .m(1)
9942         .n(8)
9943         .k(k)
9944         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c4__neon_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
9945     }
9946   }
9947 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_DUP,k_lt_16_subtile)9948   TEST(QC8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_DUP, k_lt_16_subtile) {
9949     TEST_REQUIRES_ARM_NEON;
9950     for (size_t k = 1; k < 16; k++) {
9951       for (uint32_t n = 1; n <= 8; n++) {
9952         for (uint32_t m = 1; m <= 1; m++) {
9953           GemmMicrokernelTester()
9954             .mr(1)
9955             .nr(8)
9956             .kr(4)
9957             .sr(1)
9958             .m(m)
9959             .n(n)
9960             .k(k)
9961             .iterations(1)
9962             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c4__neon_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
9963         }
9964       }
9965     }
9966   }
9967 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_DUP,k_gt_16)9968   TEST(QC8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_DUP, k_gt_16) {
9969     TEST_REQUIRES_ARM_NEON;
9970     for (size_t k = 17; k < 32; k++) {
9971       GemmMicrokernelTester()
9972         .mr(1)
9973         .nr(8)
9974         .kr(4)
9975         .sr(1)
9976         .m(1)
9977         .n(8)
9978         .k(k)
9979         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c4__neon_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
9980     }
9981   }
9982 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_DUP,k_gt_16_subtile)9983   TEST(QC8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_DUP, k_gt_16_subtile) {
9984     TEST_REQUIRES_ARM_NEON;
9985     for (size_t k = 17; k < 32; k++) {
9986       for (uint32_t n = 1; n <= 8; n++) {
9987         for (uint32_t m = 1; m <= 1; m++) {
9988           GemmMicrokernelTester()
9989             .mr(1)
9990             .nr(8)
9991             .kr(4)
9992             .sr(1)
9993             .m(m)
9994             .n(n)
9995             .k(k)
9996             .iterations(1)
9997             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c4__neon_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
9998         }
9999       }
10000     }
10001   }
10002 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_DUP,k_div_16)10003   TEST(QC8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_DUP, k_div_16) {
10004     TEST_REQUIRES_ARM_NEON;
10005     for (size_t k = 32; k <= 160; k += 16) {
10006       GemmMicrokernelTester()
10007         .mr(1)
10008         .nr(8)
10009         .kr(4)
10010         .sr(1)
10011         .m(1)
10012         .n(8)
10013         .k(k)
10014         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c4__neon_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
10015     }
10016   }
10017 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_DUP,k_div_16_subtile)10018   TEST(QC8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_DUP, k_div_16_subtile) {
10019     TEST_REQUIRES_ARM_NEON;
10020     for (size_t k = 32; k <= 160; k += 16) {
10021       for (uint32_t n = 1; n <= 8; n++) {
10022         for (uint32_t m = 1; m <= 1; m++) {
10023           GemmMicrokernelTester()
10024             .mr(1)
10025             .nr(8)
10026             .kr(4)
10027             .sr(1)
10028             .m(m)
10029             .n(n)
10030             .k(k)
10031             .iterations(1)
10032             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c4__neon_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
10033         }
10034       }
10035     }
10036   }
10037 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_DUP,n_gt_8)10038   TEST(QC8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_DUP, n_gt_8) {
10039     TEST_REQUIRES_ARM_NEON;
10040     for (uint32_t n = 9; n < 16; n++) {
10041       for (size_t k = 1; k <= 80; k += 17) {
10042         GemmMicrokernelTester()
10043           .mr(1)
10044           .nr(8)
10045           .kr(4)
10046           .sr(1)
10047           .m(1)
10048           .n(n)
10049           .k(k)
10050           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c4__neon_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
10051       }
10052     }
10053   }
10054 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_DUP,n_gt_8_strided_cn)10055   TEST(QC8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_DUP, n_gt_8_strided_cn) {
10056     TEST_REQUIRES_ARM_NEON;
10057     for (uint32_t n = 9; n < 16; n++) {
10058       for (size_t k = 1; k <= 80; k += 17) {
10059         GemmMicrokernelTester()
10060           .mr(1)
10061           .nr(8)
10062           .kr(4)
10063           .sr(1)
10064           .m(1)
10065           .n(n)
10066           .k(k)
10067           .cn_stride(11)
10068           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c4__neon_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
10069       }
10070     }
10071   }
10072 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_DUP,n_gt_8_subtile)10073   TEST(QC8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_DUP, n_gt_8_subtile) {
10074     TEST_REQUIRES_ARM_NEON;
10075     for (uint32_t n = 9; n < 16; n++) {
10076       for (size_t k = 1; k <= 80; k += 17) {
10077         for (uint32_t m = 1; m <= 1; m++) {
10078           GemmMicrokernelTester()
10079             .mr(1)
10080             .nr(8)
10081             .kr(4)
10082             .sr(1)
10083             .m(m)
10084             .n(n)
10085             .k(k)
10086             .iterations(1)
10087             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c4__neon_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
10088         }
10089       }
10090     }
10091   }
10092 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_DUP,n_div_8)10093   TEST(QC8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_DUP, n_div_8) {
10094     TEST_REQUIRES_ARM_NEON;
10095     for (uint32_t n = 16; n <= 24; n += 8) {
10096       for (size_t k = 1; k <= 80; k += 17) {
10097         GemmMicrokernelTester()
10098           .mr(1)
10099           .nr(8)
10100           .kr(4)
10101           .sr(1)
10102           .m(1)
10103           .n(n)
10104           .k(k)
10105           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c4__neon_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
10106       }
10107     }
10108   }
10109 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_DUP,n_div_8_strided_cn)10110   TEST(QC8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_DUP, n_div_8_strided_cn) {
10111     TEST_REQUIRES_ARM_NEON;
10112     for (uint32_t n = 16; n <= 24; n += 8) {
10113       for (size_t k = 1; k <= 80; k += 17) {
10114         GemmMicrokernelTester()
10115           .mr(1)
10116           .nr(8)
10117           .kr(4)
10118           .sr(1)
10119           .m(1)
10120           .n(n)
10121           .k(k)
10122           .cn_stride(11)
10123           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c4__neon_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
10124       }
10125     }
10126   }
10127 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_DUP,n_div_8_subtile)10128   TEST(QC8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_DUP, n_div_8_subtile) {
10129     TEST_REQUIRES_ARM_NEON;
10130     for (uint32_t n = 16; n <= 24; n += 8) {
10131       for (size_t k = 1; k <= 80; k += 17) {
10132         for (uint32_t m = 1; m <= 1; m++) {
10133           GemmMicrokernelTester()
10134             .mr(1)
10135             .nr(8)
10136             .kr(4)
10137             .sr(1)
10138             .m(m)
10139             .n(n)
10140             .k(k)
10141             .iterations(1)
10142             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c4__neon_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
10143         }
10144       }
10145     }
10146   }
10147 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_DUP,small_kernel)10148   TEST(QC8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_DUP, small_kernel) {
10149     TEST_REQUIRES_ARM_NEON;
10150     for (size_t k = 1; k <= 80; k += 17) {
10151       GemmMicrokernelTester()
10152         .mr(1)
10153         .nr(8)
10154         .kr(4)
10155         .sr(1)
10156         .m(1)
10157         .n(8)
10158         .k(k)
10159         .ks(3)
10160         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c4__neon_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
10161     }
10162   }
10163 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_DUP,small_kernel_subtile)10164   TEST(QC8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_DUP, small_kernel_subtile) {
10165     TEST_REQUIRES_ARM_NEON;
10166     for (size_t k = 1; k <= 80; k += 17) {
10167       for (uint32_t n = 1; n <= 8; n++) {
10168         for (uint32_t m = 1; m <= 1; m++) {
10169           GemmMicrokernelTester()
10170             .mr(1)
10171             .nr(8)
10172             .kr(4)
10173             .sr(1)
10174             .m(m)
10175             .n(n)
10176             .k(k)
10177             .ks(3)
10178             .iterations(1)
10179             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c4__neon_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
10180         }
10181       }
10182     }
10183   }
10184 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_DUP,n_gt_8_small_kernel)10185   TEST(QC8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_DUP, n_gt_8_small_kernel) {
10186     TEST_REQUIRES_ARM_NEON;
10187     for (uint32_t n = 9; n < 16; n++) {
10188       for (size_t k = 1; k <= 80; k += 17) {
10189         GemmMicrokernelTester()
10190           .mr(1)
10191           .nr(8)
10192           .kr(4)
10193           .sr(1)
10194           .m(1)
10195           .n(n)
10196           .k(k)
10197           .ks(3)
10198           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c4__neon_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
10199       }
10200     }
10201   }
10202 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_DUP,n_div_8_small_kernel)10203   TEST(QC8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_DUP, n_div_8_small_kernel) {
10204     TEST_REQUIRES_ARM_NEON;
10205     for (uint32_t n = 16; n <= 24; n += 8) {
10206       for (size_t k = 1; k <= 80; k += 17) {
10207         GemmMicrokernelTester()
10208           .mr(1)
10209           .nr(8)
10210           .kr(4)
10211           .sr(1)
10212           .m(1)
10213           .n(n)
10214           .k(k)
10215           .ks(3)
10216           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c4__neon_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
10217       }
10218     }
10219   }
10220 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_DUP,strided_cm_subtile)10221   TEST(QC8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_DUP, strided_cm_subtile) {
10222     TEST_REQUIRES_ARM_NEON;
10223     for (size_t k = 1; k <= 80; k += 17) {
10224       for (uint32_t n = 1; n <= 8; n++) {
10225         for (uint32_t m = 1; m <= 1; m++) {
10226           GemmMicrokernelTester()
10227             .mr(1)
10228             .nr(8)
10229             .kr(4)
10230             .sr(1)
10231             .m(m)
10232             .n(n)
10233             .k(k)
10234             .cm_stride(11)
10235             .iterations(1)
10236             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c4__neon_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
10237         }
10238       }
10239     }
10240   }
10241 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_DUP,a_offset)10242   TEST(QC8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_DUP, a_offset) {
10243     TEST_REQUIRES_ARM_NEON;
10244     for (size_t k = 1; k <= 80; k += 17) {
10245       GemmMicrokernelTester()
10246         .mr(1)
10247         .nr(8)
10248         .kr(4)
10249         .sr(1)
10250         .m(1)
10251         .n(8)
10252         .k(k)
10253         .ks(3)
10254         .a_offset(83)
10255         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c4__neon_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
10256     }
10257   }
10258 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_DUP,zero)10259   TEST(QC8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_DUP, zero) {
10260     TEST_REQUIRES_ARM_NEON;
10261     for (size_t k = 1; k <= 80; k += 17) {
10262       for (uint32_t mz = 0; mz < 1; mz++) {
10263         GemmMicrokernelTester()
10264           .mr(1)
10265           .nr(8)
10266           .kr(4)
10267           .sr(1)
10268           .m(1)
10269           .n(8)
10270           .k(k)
10271           .ks(3)
10272           .a_offset(83)
10273           .zero_index(mz)
10274           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c4__neon_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
10275       }
10276     }
10277   }
10278 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_DUP,qmin)10279   TEST(QC8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_DUP, qmin) {
10280     TEST_REQUIRES_ARM_NEON;
10281     GemmMicrokernelTester()
10282       .mr(1)
10283       .nr(8)
10284       .kr(4)
10285       .sr(1)
10286       .m(1)
10287       .n(8)
10288       .k(16)
10289       .qmin(128)
10290       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c4__neon_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
10291   }
10292 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_DUP,qmax)10293   TEST(QC8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_DUP, qmax) {
10294     TEST_REQUIRES_ARM_NEON;
10295     GemmMicrokernelTester()
10296       .mr(1)
10297       .nr(8)
10298       .kr(4)
10299       .sr(1)
10300       .m(1)
10301       .n(8)
10302       .k(16)
10303       .qmax(128)
10304       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c4__neon_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
10305   }
10306 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_DUP,strided_cm)10307   TEST(QC8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_DUP, strided_cm) {
10308     TEST_REQUIRES_ARM_NEON;
10309     GemmMicrokernelTester()
10310       .mr(1)
10311       .nr(8)
10312       .kr(4)
10313       .sr(1)
10314       .m(1)
10315       .n(8)
10316       .k(16)
10317       .cm_stride(11)
10318       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c4__neon_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
10319   }
10320 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
10321 
10322 
10323 #if XNN_ENABLE_ARM_DOTPROD && (XNN_ARCH_ARM && !XNN_PLATFORM_IOS || XNN_ARCH_ARM64)
TEST(QC8_IGEMM_MINMAX_FP32_1X8C4__NEONDOT,k_eq_8)10324   TEST(QC8_IGEMM_MINMAX_FP32_1X8C4__NEONDOT, k_eq_8) {
10325     TEST_REQUIRES_ARM_NEON_DOT;
10326     GemmMicrokernelTester()
10327       .mr(1)
10328       .nr(8)
10329       .kr(4)
10330       .sr(1)
10331       .m(1)
10332       .n(8)
10333       .k(8)
10334       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
10335   }
10336 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C4__NEONDOT,strided_cn)10337   TEST(QC8_IGEMM_MINMAX_FP32_1X8C4__NEONDOT, strided_cn) {
10338     TEST_REQUIRES_ARM_NEON_DOT;
10339     GemmMicrokernelTester()
10340       .mr(1)
10341       .nr(8)
10342       .kr(4)
10343       .sr(1)
10344       .m(1)
10345       .n(8)
10346       .k(8)
10347       .cn_stride(11)
10348       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
10349   }
10350 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C4__NEONDOT,k_eq_8_subtile)10351   TEST(QC8_IGEMM_MINMAX_FP32_1X8C4__NEONDOT, k_eq_8_subtile) {
10352     TEST_REQUIRES_ARM_NEON_DOT;
10353     for (uint32_t n = 1; n <= 8; n++) {
10354       for (uint32_t m = 1; m <= 1; m++) {
10355         GemmMicrokernelTester()
10356           .mr(1)
10357           .nr(8)
10358           .kr(4)
10359           .sr(1)
10360           .m(m)
10361           .n(n)
10362           .k(8)
10363           .iterations(1)
10364           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
10365       }
10366     }
10367   }
10368 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C4__NEONDOT,k_eq_8_subtile_m)10369   TEST(QC8_IGEMM_MINMAX_FP32_1X8C4__NEONDOT, k_eq_8_subtile_m) {
10370     TEST_REQUIRES_ARM_NEON_DOT;
10371     for (uint32_t m = 1; m <= 1; m++) {
10372       GemmMicrokernelTester()
10373         .mr(1)
10374         .nr(8)
10375         .kr(4)
10376         .sr(1)
10377         .m(m)
10378         .n(8)
10379         .k(8)
10380         .iterations(1)
10381         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
10382     }
10383   }
10384 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C4__NEONDOT,k_eq_8_subtile_n)10385   TEST(QC8_IGEMM_MINMAX_FP32_1X8C4__NEONDOT, k_eq_8_subtile_n) {
10386     TEST_REQUIRES_ARM_NEON_DOT;
10387     for (uint32_t n = 1; n <= 8; n++) {
10388       GemmMicrokernelTester()
10389         .mr(1)
10390         .nr(8)
10391         .kr(4)
10392         .sr(1)
10393         .m(1)
10394         .n(n)
10395         .k(8)
10396         .iterations(1)
10397         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
10398     }
10399   }
10400 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C4__NEONDOT,k_lt_8)10401   TEST(QC8_IGEMM_MINMAX_FP32_1X8C4__NEONDOT, k_lt_8) {
10402     TEST_REQUIRES_ARM_NEON_DOT;
10403     for (size_t k = 1; k < 8; k++) {
10404       GemmMicrokernelTester()
10405         .mr(1)
10406         .nr(8)
10407         .kr(4)
10408         .sr(1)
10409         .m(1)
10410         .n(8)
10411         .k(k)
10412         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
10413     }
10414   }
10415 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C4__NEONDOT,k_lt_8_subtile)10416   TEST(QC8_IGEMM_MINMAX_FP32_1X8C4__NEONDOT, k_lt_8_subtile) {
10417     TEST_REQUIRES_ARM_NEON_DOT;
10418     for (size_t k = 1; k < 8; k++) {
10419       for (uint32_t n = 1; n <= 8; n++) {
10420         for (uint32_t m = 1; m <= 1; m++) {
10421           GemmMicrokernelTester()
10422             .mr(1)
10423             .nr(8)
10424             .kr(4)
10425             .sr(1)
10426             .m(m)
10427             .n(n)
10428             .k(k)
10429             .iterations(1)
10430             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
10431         }
10432       }
10433     }
10434   }
10435 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C4__NEONDOT,k_gt_8)10436   TEST(QC8_IGEMM_MINMAX_FP32_1X8C4__NEONDOT, k_gt_8) {
10437     TEST_REQUIRES_ARM_NEON_DOT;
10438     for (size_t k = 9; k < 16; k++) {
10439       GemmMicrokernelTester()
10440         .mr(1)
10441         .nr(8)
10442         .kr(4)
10443         .sr(1)
10444         .m(1)
10445         .n(8)
10446         .k(k)
10447         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
10448     }
10449   }
10450 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C4__NEONDOT,k_gt_8_subtile)10451   TEST(QC8_IGEMM_MINMAX_FP32_1X8C4__NEONDOT, k_gt_8_subtile) {
10452     TEST_REQUIRES_ARM_NEON_DOT;
10453     for (size_t k = 9; k < 16; k++) {
10454       for (uint32_t n = 1; n <= 8; n++) {
10455         for (uint32_t m = 1; m <= 1; m++) {
10456           GemmMicrokernelTester()
10457             .mr(1)
10458             .nr(8)
10459             .kr(4)
10460             .sr(1)
10461             .m(m)
10462             .n(n)
10463             .k(k)
10464             .iterations(1)
10465             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
10466         }
10467       }
10468     }
10469   }
10470 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C4__NEONDOT,k_div_8)10471   TEST(QC8_IGEMM_MINMAX_FP32_1X8C4__NEONDOT, k_div_8) {
10472     TEST_REQUIRES_ARM_NEON_DOT;
10473     for (size_t k = 16; k <= 80; k += 8) {
10474       GemmMicrokernelTester()
10475         .mr(1)
10476         .nr(8)
10477         .kr(4)
10478         .sr(1)
10479         .m(1)
10480         .n(8)
10481         .k(k)
10482         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
10483     }
10484   }
10485 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C4__NEONDOT,k_div_8_subtile)10486   TEST(QC8_IGEMM_MINMAX_FP32_1X8C4__NEONDOT, k_div_8_subtile) {
10487     TEST_REQUIRES_ARM_NEON_DOT;
10488     for (size_t k = 16; k <= 80; k += 8) {
10489       for (uint32_t n = 1; n <= 8; n++) {
10490         for (uint32_t m = 1; m <= 1; m++) {
10491           GemmMicrokernelTester()
10492             .mr(1)
10493             .nr(8)
10494             .kr(4)
10495             .sr(1)
10496             .m(m)
10497             .n(n)
10498             .k(k)
10499             .iterations(1)
10500             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
10501         }
10502       }
10503     }
10504   }
10505 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C4__NEONDOT,n_gt_8)10506   TEST(QC8_IGEMM_MINMAX_FP32_1X8C4__NEONDOT, n_gt_8) {
10507     TEST_REQUIRES_ARM_NEON_DOT;
10508     for (uint32_t n = 9; n < 16; n++) {
10509       for (size_t k = 1; k <= 40; k += 9) {
10510         GemmMicrokernelTester()
10511           .mr(1)
10512           .nr(8)
10513           .kr(4)
10514           .sr(1)
10515           .m(1)
10516           .n(n)
10517           .k(k)
10518           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
10519       }
10520     }
10521   }
10522 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C4__NEONDOT,n_gt_8_strided_cn)10523   TEST(QC8_IGEMM_MINMAX_FP32_1X8C4__NEONDOT, n_gt_8_strided_cn) {
10524     TEST_REQUIRES_ARM_NEON_DOT;
10525     for (uint32_t n = 9; n < 16; n++) {
10526       for (size_t k = 1; k <= 40; k += 9) {
10527         GemmMicrokernelTester()
10528           .mr(1)
10529           .nr(8)
10530           .kr(4)
10531           .sr(1)
10532           .m(1)
10533           .n(n)
10534           .k(k)
10535           .cn_stride(11)
10536           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
10537       }
10538     }
10539   }
10540 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C4__NEONDOT,n_gt_8_subtile)10541   TEST(QC8_IGEMM_MINMAX_FP32_1X8C4__NEONDOT, n_gt_8_subtile) {
10542     TEST_REQUIRES_ARM_NEON_DOT;
10543     for (uint32_t n = 9; n < 16; n++) {
10544       for (size_t k = 1; k <= 40; k += 9) {
10545         for (uint32_t m = 1; m <= 1; m++) {
10546           GemmMicrokernelTester()
10547             .mr(1)
10548             .nr(8)
10549             .kr(4)
10550             .sr(1)
10551             .m(m)
10552             .n(n)
10553             .k(k)
10554             .iterations(1)
10555             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
10556         }
10557       }
10558     }
10559   }
10560 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C4__NEONDOT,n_div_8)10561   TEST(QC8_IGEMM_MINMAX_FP32_1X8C4__NEONDOT, n_div_8) {
10562     TEST_REQUIRES_ARM_NEON_DOT;
10563     for (uint32_t n = 16; n <= 24; n += 8) {
10564       for (size_t k = 1; k <= 40; k += 9) {
10565         GemmMicrokernelTester()
10566           .mr(1)
10567           .nr(8)
10568           .kr(4)
10569           .sr(1)
10570           .m(1)
10571           .n(n)
10572           .k(k)
10573           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
10574       }
10575     }
10576   }
10577 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C4__NEONDOT,n_div_8_strided_cn)10578   TEST(QC8_IGEMM_MINMAX_FP32_1X8C4__NEONDOT, n_div_8_strided_cn) {
10579     TEST_REQUIRES_ARM_NEON_DOT;
10580     for (uint32_t n = 16; n <= 24; n += 8) {
10581       for (size_t k = 1; k <= 40; k += 9) {
10582         GemmMicrokernelTester()
10583           .mr(1)
10584           .nr(8)
10585           .kr(4)
10586           .sr(1)
10587           .m(1)
10588           .n(n)
10589           .k(k)
10590           .cn_stride(11)
10591           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
10592       }
10593     }
10594   }
10595 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C4__NEONDOT,n_div_8_subtile)10596   TEST(QC8_IGEMM_MINMAX_FP32_1X8C4__NEONDOT, n_div_8_subtile) {
10597     TEST_REQUIRES_ARM_NEON_DOT;
10598     for (uint32_t n = 16; n <= 24; n += 8) {
10599       for (size_t k = 1; k <= 40; k += 9) {
10600         for (uint32_t m = 1; m <= 1; m++) {
10601           GemmMicrokernelTester()
10602             .mr(1)
10603             .nr(8)
10604             .kr(4)
10605             .sr(1)
10606             .m(m)
10607             .n(n)
10608             .k(k)
10609             .iterations(1)
10610             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
10611         }
10612       }
10613     }
10614   }
10615 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C4__NEONDOT,small_kernel)10616   TEST(QC8_IGEMM_MINMAX_FP32_1X8C4__NEONDOT, small_kernel) {
10617     TEST_REQUIRES_ARM_NEON_DOT;
10618     for (size_t k = 1; k <= 40; k += 9) {
10619       GemmMicrokernelTester()
10620         .mr(1)
10621         .nr(8)
10622         .kr(4)
10623         .sr(1)
10624         .m(1)
10625         .n(8)
10626         .k(k)
10627         .ks(3)
10628         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
10629     }
10630   }
10631 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C4__NEONDOT,small_kernel_subtile)10632   TEST(QC8_IGEMM_MINMAX_FP32_1X8C4__NEONDOT, small_kernel_subtile) {
10633     TEST_REQUIRES_ARM_NEON_DOT;
10634     for (size_t k = 1; k <= 40; k += 9) {
10635       for (uint32_t n = 1; n <= 8; n++) {
10636         for (uint32_t m = 1; m <= 1; m++) {
10637           GemmMicrokernelTester()
10638             .mr(1)
10639             .nr(8)
10640             .kr(4)
10641             .sr(1)
10642             .m(m)
10643             .n(n)
10644             .k(k)
10645             .ks(3)
10646             .iterations(1)
10647             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
10648         }
10649       }
10650     }
10651   }
10652 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C4__NEONDOT,n_gt_8_small_kernel)10653   TEST(QC8_IGEMM_MINMAX_FP32_1X8C4__NEONDOT, n_gt_8_small_kernel) {
10654     TEST_REQUIRES_ARM_NEON_DOT;
10655     for (uint32_t n = 9; n < 16; n++) {
10656       for (size_t k = 1; k <= 40; k += 9) {
10657         GemmMicrokernelTester()
10658           .mr(1)
10659           .nr(8)
10660           .kr(4)
10661           .sr(1)
10662           .m(1)
10663           .n(n)
10664           .k(k)
10665           .ks(3)
10666           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
10667       }
10668     }
10669   }
10670 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C4__NEONDOT,n_div_8_small_kernel)10671   TEST(QC8_IGEMM_MINMAX_FP32_1X8C4__NEONDOT, n_div_8_small_kernel) {
10672     TEST_REQUIRES_ARM_NEON_DOT;
10673     for (uint32_t n = 16; n <= 24; n += 8) {
10674       for (size_t k = 1; k <= 40; k += 9) {
10675         GemmMicrokernelTester()
10676           .mr(1)
10677           .nr(8)
10678           .kr(4)
10679           .sr(1)
10680           .m(1)
10681           .n(n)
10682           .k(k)
10683           .ks(3)
10684           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
10685       }
10686     }
10687   }
10688 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C4__NEONDOT,strided_cm_subtile)10689   TEST(QC8_IGEMM_MINMAX_FP32_1X8C4__NEONDOT, strided_cm_subtile) {
10690     TEST_REQUIRES_ARM_NEON_DOT;
10691     for (size_t k = 1; k <= 40; k += 9) {
10692       for (uint32_t n = 1; n <= 8; n++) {
10693         for (uint32_t m = 1; m <= 1; m++) {
10694           GemmMicrokernelTester()
10695             .mr(1)
10696             .nr(8)
10697             .kr(4)
10698             .sr(1)
10699             .m(m)
10700             .n(n)
10701             .k(k)
10702             .cm_stride(11)
10703             .iterations(1)
10704             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
10705         }
10706       }
10707     }
10708   }
10709 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C4__NEONDOT,a_offset)10710   TEST(QC8_IGEMM_MINMAX_FP32_1X8C4__NEONDOT, a_offset) {
10711     TEST_REQUIRES_ARM_NEON_DOT;
10712     for (size_t k = 1; k <= 40; k += 9) {
10713       GemmMicrokernelTester()
10714         .mr(1)
10715         .nr(8)
10716         .kr(4)
10717         .sr(1)
10718         .m(1)
10719         .n(8)
10720         .k(k)
10721         .ks(3)
10722         .a_offset(43)
10723         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
10724     }
10725   }
10726 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C4__NEONDOT,zero)10727   TEST(QC8_IGEMM_MINMAX_FP32_1X8C4__NEONDOT, zero) {
10728     TEST_REQUIRES_ARM_NEON_DOT;
10729     for (size_t k = 1; k <= 40; k += 9) {
10730       for (uint32_t mz = 0; mz < 1; mz++) {
10731         GemmMicrokernelTester()
10732           .mr(1)
10733           .nr(8)
10734           .kr(4)
10735           .sr(1)
10736           .m(1)
10737           .n(8)
10738           .k(k)
10739           .ks(3)
10740           .a_offset(43)
10741           .zero_index(mz)
10742           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
10743       }
10744     }
10745   }
10746 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C4__NEONDOT,qmin)10747   TEST(QC8_IGEMM_MINMAX_FP32_1X8C4__NEONDOT, qmin) {
10748     TEST_REQUIRES_ARM_NEON_DOT;
10749     GemmMicrokernelTester()
10750       .mr(1)
10751       .nr(8)
10752       .kr(4)
10753       .sr(1)
10754       .m(1)
10755       .n(8)
10756       .k(8)
10757       .qmin(128)
10758       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
10759   }
10760 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C4__NEONDOT,qmax)10761   TEST(QC8_IGEMM_MINMAX_FP32_1X8C4__NEONDOT, qmax) {
10762     TEST_REQUIRES_ARM_NEON_DOT;
10763     GemmMicrokernelTester()
10764       .mr(1)
10765       .nr(8)
10766       .kr(4)
10767       .sr(1)
10768       .m(1)
10769       .n(8)
10770       .k(8)
10771       .qmax(128)
10772       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
10773   }
10774 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C4__NEONDOT,strided_cm)10775   TEST(QC8_IGEMM_MINMAX_FP32_1X8C4__NEONDOT, strided_cm) {
10776     TEST_REQUIRES_ARM_NEON_DOT;
10777     GemmMicrokernelTester()
10778       .mr(1)
10779       .nr(8)
10780       .kr(4)
10781       .sr(1)
10782       .m(1)
10783       .n(8)
10784       .k(8)
10785       .cm_stride(11)
10786       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
10787   }
10788 #endif  // XNN_ENABLE_ARM_DOTPROD && (XNN_ARCH_ARM && !XNN_PLATFORM_IOS || XNN_ARCH_ARM64)
10789 
10790 
10791 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QC8_IGEMM_MINMAX_FP32_1X8C4S2__NEON_MLAL,k_eq_16)10792   TEST(QC8_IGEMM_MINMAX_FP32_1X8C4S2__NEON_MLAL, k_eq_16) {
10793     TEST_REQUIRES_ARM_NEON;
10794     GemmMicrokernelTester()
10795       .mr(1)
10796       .nr(8)
10797       .kr(4)
10798       .sr(2)
10799       .m(1)
10800       .n(8)
10801       .k(16)
10802       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c4s2__neon_mlal, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
10803   }
10804 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C4S2__NEON_MLAL,strided_cn)10805   TEST(QC8_IGEMM_MINMAX_FP32_1X8C4S2__NEON_MLAL, strided_cn) {
10806     TEST_REQUIRES_ARM_NEON;
10807     GemmMicrokernelTester()
10808       .mr(1)
10809       .nr(8)
10810       .kr(4)
10811       .sr(2)
10812       .m(1)
10813       .n(8)
10814       .k(16)
10815       .cn_stride(11)
10816       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c4s2__neon_mlal, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
10817   }
10818 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C4S2__NEON_MLAL,k_eq_16_subtile)10819   TEST(QC8_IGEMM_MINMAX_FP32_1X8C4S2__NEON_MLAL, k_eq_16_subtile) {
10820     TEST_REQUIRES_ARM_NEON;
10821     for (uint32_t n = 1; n <= 8; n++) {
10822       for (uint32_t m = 1; m <= 1; m++) {
10823         GemmMicrokernelTester()
10824           .mr(1)
10825           .nr(8)
10826           .kr(4)
10827           .sr(2)
10828           .m(m)
10829           .n(n)
10830           .k(16)
10831           .iterations(1)
10832           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c4s2__neon_mlal, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
10833       }
10834     }
10835   }
10836 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C4S2__NEON_MLAL,k_eq_16_subtile_m)10837   TEST(QC8_IGEMM_MINMAX_FP32_1X8C4S2__NEON_MLAL, k_eq_16_subtile_m) {
10838     TEST_REQUIRES_ARM_NEON;
10839     for (uint32_t m = 1; m <= 1; m++) {
10840       GemmMicrokernelTester()
10841         .mr(1)
10842         .nr(8)
10843         .kr(4)
10844         .sr(2)
10845         .m(m)
10846         .n(8)
10847         .k(16)
10848         .iterations(1)
10849         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c4s2__neon_mlal, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
10850     }
10851   }
10852 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C4S2__NEON_MLAL,k_eq_16_subtile_n)10853   TEST(QC8_IGEMM_MINMAX_FP32_1X8C4S2__NEON_MLAL, k_eq_16_subtile_n) {
10854     TEST_REQUIRES_ARM_NEON;
10855     for (uint32_t n = 1; n <= 8; n++) {
10856       GemmMicrokernelTester()
10857         .mr(1)
10858         .nr(8)
10859         .kr(4)
10860         .sr(2)
10861         .m(1)
10862         .n(n)
10863         .k(16)
10864         .iterations(1)
10865         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c4s2__neon_mlal, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
10866     }
10867   }
10868 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C4S2__NEON_MLAL,k_lt_16)10869   TEST(QC8_IGEMM_MINMAX_FP32_1X8C4S2__NEON_MLAL, k_lt_16) {
10870     TEST_REQUIRES_ARM_NEON;
10871     for (size_t k = 1; k < 16; k++) {
10872       GemmMicrokernelTester()
10873         .mr(1)
10874         .nr(8)
10875         .kr(4)
10876         .sr(2)
10877         .m(1)
10878         .n(8)
10879         .k(k)
10880         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c4s2__neon_mlal, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
10881     }
10882   }
10883 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C4S2__NEON_MLAL,k_lt_16_subtile)10884   TEST(QC8_IGEMM_MINMAX_FP32_1X8C4S2__NEON_MLAL, k_lt_16_subtile) {
10885     TEST_REQUIRES_ARM_NEON;
10886     for (size_t k = 1; k < 16; k++) {
10887       for (uint32_t n = 1; n <= 8; n++) {
10888         for (uint32_t m = 1; m <= 1; m++) {
10889           GemmMicrokernelTester()
10890             .mr(1)
10891             .nr(8)
10892             .kr(4)
10893             .sr(2)
10894             .m(m)
10895             .n(n)
10896             .k(k)
10897             .iterations(1)
10898             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c4s2__neon_mlal, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
10899         }
10900       }
10901     }
10902   }
10903 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C4S2__NEON_MLAL,k_gt_16)10904   TEST(QC8_IGEMM_MINMAX_FP32_1X8C4S2__NEON_MLAL, k_gt_16) {
10905     TEST_REQUIRES_ARM_NEON;
10906     for (size_t k = 17; k < 32; k++) {
10907       GemmMicrokernelTester()
10908         .mr(1)
10909         .nr(8)
10910         .kr(4)
10911         .sr(2)
10912         .m(1)
10913         .n(8)
10914         .k(k)
10915         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c4s2__neon_mlal, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
10916     }
10917   }
10918 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C4S2__NEON_MLAL,k_gt_16_subtile)10919   TEST(QC8_IGEMM_MINMAX_FP32_1X8C4S2__NEON_MLAL, k_gt_16_subtile) {
10920     TEST_REQUIRES_ARM_NEON;
10921     for (size_t k = 17; k < 32; k++) {
10922       for (uint32_t n = 1; n <= 8; n++) {
10923         for (uint32_t m = 1; m <= 1; m++) {
10924           GemmMicrokernelTester()
10925             .mr(1)
10926             .nr(8)
10927             .kr(4)
10928             .sr(2)
10929             .m(m)
10930             .n(n)
10931             .k(k)
10932             .iterations(1)
10933             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c4s2__neon_mlal, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
10934         }
10935       }
10936     }
10937   }
10938 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C4S2__NEON_MLAL,k_div_16)10939   TEST(QC8_IGEMM_MINMAX_FP32_1X8C4S2__NEON_MLAL, k_div_16) {
10940     TEST_REQUIRES_ARM_NEON;
10941     for (size_t k = 32; k <= 160; k += 16) {
10942       GemmMicrokernelTester()
10943         .mr(1)
10944         .nr(8)
10945         .kr(4)
10946         .sr(2)
10947         .m(1)
10948         .n(8)
10949         .k(k)
10950         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c4s2__neon_mlal, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
10951     }
10952   }
10953 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C4S2__NEON_MLAL,k_div_16_subtile)10954   TEST(QC8_IGEMM_MINMAX_FP32_1X8C4S2__NEON_MLAL, k_div_16_subtile) {
10955     TEST_REQUIRES_ARM_NEON;
10956     for (size_t k = 32; k <= 160; k += 16) {
10957       for (uint32_t n = 1; n <= 8; n++) {
10958         for (uint32_t m = 1; m <= 1; m++) {
10959           GemmMicrokernelTester()
10960             .mr(1)
10961             .nr(8)
10962             .kr(4)
10963             .sr(2)
10964             .m(m)
10965             .n(n)
10966             .k(k)
10967             .iterations(1)
10968             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c4s2__neon_mlal, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
10969         }
10970       }
10971     }
10972   }
10973 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C4S2__NEON_MLAL,n_gt_8)10974   TEST(QC8_IGEMM_MINMAX_FP32_1X8C4S2__NEON_MLAL, n_gt_8) {
10975     TEST_REQUIRES_ARM_NEON;
10976     for (uint32_t n = 9; n < 16; n++) {
10977       for (size_t k = 1; k <= 80; k += 17) {
10978         GemmMicrokernelTester()
10979           .mr(1)
10980           .nr(8)
10981           .kr(4)
10982           .sr(2)
10983           .m(1)
10984           .n(n)
10985           .k(k)
10986           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c4s2__neon_mlal, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
10987       }
10988     }
10989   }
10990 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C4S2__NEON_MLAL,n_gt_8_strided_cn)10991   TEST(QC8_IGEMM_MINMAX_FP32_1X8C4S2__NEON_MLAL, n_gt_8_strided_cn) {
10992     TEST_REQUIRES_ARM_NEON;
10993     for (uint32_t n = 9; n < 16; n++) {
10994       for (size_t k = 1; k <= 80; k += 17) {
10995         GemmMicrokernelTester()
10996           .mr(1)
10997           .nr(8)
10998           .kr(4)
10999           .sr(2)
11000           .m(1)
11001           .n(n)
11002           .k(k)
11003           .cn_stride(11)
11004           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c4s2__neon_mlal, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
11005       }
11006     }
11007   }
11008 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C4S2__NEON_MLAL,n_gt_8_subtile)11009   TEST(QC8_IGEMM_MINMAX_FP32_1X8C4S2__NEON_MLAL, n_gt_8_subtile) {
11010     TEST_REQUIRES_ARM_NEON;
11011     for (uint32_t n = 9; n < 16; n++) {
11012       for (size_t k = 1; k <= 80; k += 17) {
11013         for (uint32_t m = 1; m <= 1; m++) {
11014           GemmMicrokernelTester()
11015             .mr(1)
11016             .nr(8)
11017             .kr(4)
11018             .sr(2)
11019             .m(m)
11020             .n(n)
11021             .k(k)
11022             .iterations(1)
11023             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c4s2__neon_mlal, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
11024         }
11025       }
11026     }
11027   }
11028 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C4S2__NEON_MLAL,n_div_8)11029   TEST(QC8_IGEMM_MINMAX_FP32_1X8C4S2__NEON_MLAL, n_div_8) {
11030     TEST_REQUIRES_ARM_NEON;
11031     for (uint32_t n = 16; n <= 24; n += 8) {
11032       for (size_t k = 1; k <= 80; k += 17) {
11033         GemmMicrokernelTester()
11034           .mr(1)
11035           .nr(8)
11036           .kr(4)
11037           .sr(2)
11038           .m(1)
11039           .n(n)
11040           .k(k)
11041           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c4s2__neon_mlal, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
11042       }
11043     }
11044   }
11045 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C4S2__NEON_MLAL,n_div_8_strided_cn)11046   TEST(QC8_IGEMM_MINMAX_FP32_1X8C4S2__NEON_MLAL, n_div_8_strided_cn) {
11047     TEST_REQUIRES_ARM_NEON;
11048     for (uint32_t n = 16; n <= 24; n += 8) {
11049       for (size_t k = 1; k <= 80; k += 17) {
11050         GemmMicrokernelTester()
11051           .mr(1)
11052           .nr(8)
11053           .kr(4)
11054           .sr(2)
11055           .m(1)
11056           .n(n)
11057           .k(k)
11058           .cn_stride(11)
11059           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c4s2__neon_mlal, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
11060       }
11061     }
11062   }
11063 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C4S2__NEON_MLAL,n_div_8_subtile)11064   TEST(QC8_IGEMM_MINMAX_FP32_1X8C4S2__NEON_MLAL, n_div_8_subtile) {
11065     TEST_REQUIRES_ARM_NEON;
11066     for (uint32_t n = 16; n <= 24; n += 8) {
11067       for (size_t k = 1; k <= 80; k += 17) {
11068         for (uint32_t m = 1; m <= 1; m++) {
11069           GemmMicrokernelTester()
11070             .mr(1)
11071             .nr(8)
11072             .kr(4)
11073             .sr(2)
11074             .m(m)
11075             .n(n)
11076             .k(k)
11077             .iterations(1)
11078             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c4s2__neon_mlal, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
11079         }
11080       }
11081     }
11082   }
11083 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C4S2__NEON_MLAL,small_kernel)11084   TEST(QC8_IGEMM_MINMAX_FP32_1X8C4S2__NEON_MLAL, small_kernel) {
11085     TEST_REQUIRES_ARM_NEON;
11086     for (size_t k = 1; k <= 80; k += 17) {
11087       GemmMicrokernelTester()
11088         .mr(1)
11089         .nr(8)
11090         .kr(4)
11091         .sr(2)
11092         .m(1)
11093         .n(8)
11094         .k(k)
11095         .ks(3)
11096         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c4s2__neon_mlal, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
11097     }
11098   }
11099 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C4S2__NEON_MLAL,small_kernel_subtile)11100   TEST(QC8_IGEMM_MINMAX_FP32_1X8C4S2__NEON_MLAL, small_kernel_subtile) {
11101     TEST_REQUIRES_ARM_NEON;
11102     for (size_t k = 1; k <= 80; k += 17) {
11103       for (uint32_t n = 1; n <= 8; n++) {
11104         for (uint32_t m = 1; m <= 1; m++) {
11105           GemmMicrokernelTester()
11106             .mr(1)
11107             .nr(8)
11108             .kr(4)
11109             .sr(2)
11110             .m(m)
11111             .n(n)
11112             .k(k)
11113             .ks(3)
11114             .iterations(1)
11115             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c4s2__neon_mlal, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
11116         }
11117       }
11118     }
11119   }
11120 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C4S2__NEON_MLAL,n_gt_8_small_kernel)11121   TEST(QC8_IGEMM_MINMAX_FP32_1X8C4S2__NEON_MLAL, n_gt_8_small_kernel) {
11122     TEST_REQUIRES_ARM_NEON;
11123     for (uint32_t n = 9; n < 16; n++) {
11124       for (size_t k = 1; k <= 80; k += 17) {
11125         GemmMicrokernelTester()
11126           .mr(1)
11127           .nr(8)
11128           .kr(4)
11129           .sr(2)
11130           .m(1)
11131           .n(n)
11132           .k(k)
11133           .ks(3)
11134           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c4s2__neon_mlal, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
11135       }
11136     }
11137   }
11138 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C4S2__NEON_MLAL,n_div_8_small_kernel)11139   TEST(QC8_IGEMM_MINMAX_FP32_1X8C4S2__NEON_MLAL, n_div_8_small_kernel) {
11140     TEST_REQUIRES_ARM_NEON;
11141     for (uint32_t n = 16; n <= 24; n += 8) {
11142       for (size_t k = 1; k <= 80; k += 17) {
11143         GemmMicrokernelTester()
11144           .mr(1)
11145           .nr(8)
11146           .kr(4)
11147           .sr(2)
11148           .m(1)
11149           .n(n)
11150           .k(k)
11151           .ks(3)
11152           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c4s2__neon_mlal, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
11153       }
11154     }
11155   }
11156 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C4S2__NEON_MLAL,strided_cm_subtile)11157   TEST(QC8_IGEMM_MINMAX_FP32_1X8C4S2__NEON_MLAL, strided_cm_subtile) {
11158     TEST_REQUIRES_ARM_NEON;
11159     for (size_t k = 1; k <= 80; k += 17) {
11160       for (uint32_t n = 1; n <= 8; n++) {
11161         for (uint32_t m = 1; m <= 1; m++) {
11162           GemmMicrokernelTester()
11163             .mr(1)
11164             .nr(8)
11165             .kr(4)
11166             .sr(2)
11167             .m(m)
11168             .n(n)
11169             .k(k)
11170             .cm_stride(11)
11171             .iterations(1)
11172             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c4s2__neon_mlal, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
11173         }
11174       }
11175     }
11176   }
11177 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C4S2__NEON_MLAL,a_offset)11178   TEST(QC8_IGEMM_MINMAX_FP32_1X8C4S2__NEON_MLAL, a_offset) {
11179     TEST_REQUIRES_ARM_NEON;
11180     for (size_t k = 1; k <= 80; k += 17) {
11181       GemmMicrokernelTester()
11182         .mr(1)
11183         .nr(8)
11184         .kr(4)
11185         .sr(2)
11186         .m(1)
11187         .n(8)
11188         .k(k)
11189         .ks(3)
11190         .a_offset(83)
11191         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c4s2__neon_mlal, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
11192     }
11193   }
11194 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C4S2__NEON_MLAL,zero)11195   TEST(QC8_IGEMM_MINMAX_FP32_1X8C4S2__NEON_MLAL, zero) {
11196     TEST_REQUIRES_ARM_NEON;
11197     for (size_t k = 1; k <= 80; k += 17) {
11198       for (uint32_t mz = 0; mz < 1; mz++) {
11199         GemmMicrokernelTester()
11200           .mr(1)
11201           .nr(8)
11202           .kr(4)
11203           .sr(2)
11204           .m(1)
11205           .n(8)
11206           .k(k)
11207           .ks(3)
11208           .a_offset(83)
11209           .zero_index(mz)
11210           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c4s2__neon_mlal, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
11211       }
11212     }
11213   }
11214 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C4S2__NEON_MLAL,qmin)11215   TEST(QC8_IGEMM_MINMAX_FP32_1X8C4S2__NEON_MLAL, qmin) {
11216     TEST_REQUIRES_ARM_NEON;
11217     GemmMicrokernelTester()
11218       .mr(1)
11219       .nr(8)
11220       .kr(4)
11221       .sr(2)
11222       .m(1)
11223       .n(8)
11224       .k(16)
11225       .qmin(128)
11226       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c4s2__neon_mlal, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
11227   }
11228 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C4S2__NEON_MLAL,qmax)11229   TEST(QC8_IGEMM_MINMAX_FP32_1X8C4S2__NEON_MLAL, qmax) {
11230     TEST_REQUIRES_ARM_NEON;
11231     GemmMicrokernelTester()
11232       .mr(1)
11233       .nr(8)
11234       .kr(4)
11235       .sr(2)
11236       .m(1)
11237       .n(8)
11238       .k(16)
11239       .qmax(128)
11240       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c4s2__neon_mlal, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
11241   }
11242 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C4S2__NEON_MLAL,strided_cm)11243   TEST(QC8_IGEMM_MINMAX_FP32_1X8C4S2__NEON_MLAL, strided_cm) {
11244     TEST_REQUIRES_ARM_NEON;
11245     GemmMicrokernelTester()
11246       .mr(1)
11247       .nr(8)
11248       .kr(4)
11249       .sr(2)
11250       .m(1)
11251       .n(8)
11252       .k(16)
11253       .cm_stride(11)
11254       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c4s2__neon_mlal, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
11255   }
11256 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
11257 
11258 
11259 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__NEON_MLAL,k_eq_16)11260   TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__NEON_MLAL, k_eq_16) {
11261     TEST_REQUIRES_ARM_NEON;
11262     GemmMicrokernelTester()
11263       .mr(1)
11264       .nr(8)
11265       .kr(8)
11266       .sr(1)
11267       .m(1)
11268       .n(8)
11269       .k(16)
11270       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c8__neon_mlal, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
11271   }
11272 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__NEON_MLAL,strided_cn)11273   TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__NEON_MLAL, strided_cn) {
11274     TEST_REQUIRES_ARM_NEON;
11275     GemmMicrokernelTester()
11276       .mr(1)
11277       .nr(8)
11278       .kr(8)
11279       .sr(1)
11280       .m(1)
11281       .n(8)
11282       .k(16)
11283       .cn_stride(11)
11284       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c8__neon_mlal, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
11285   }
11286 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__NEON_MLAL,k_eq_16_subtile)11287   TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__NEON_MLAL, k_eq_16_subtile) {
11288     TEST_REQUIRES_ARM_NEON;
11289     for (uint32_t n = 1; n <= 8; n++) {
11290       for (uint32_t m = 1; m <= 1; m++) {
11291         GemmMicrokernelTester()
11292           .mr(1)
11293           .nr(8)
11294           .kr(8)
11295           .sr(1)
11296           .m(m)
11297           .n(n)
11298           .k(16)
11299           .iterations(1)
11300           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c8__neon_mlal, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
11301       }
11302     }
11303   }
11304 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__NEON_MLAL,k_eq_16_subtile_m)11305   TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__NEON_MLAL, k_eq_16_subtile_m) {
11306     TEST_REQUIRES_ARM_NEON;
11307     for (uint32_t m = 1; m <= 1; m++) {
11308       GemmMicrokernelTester()
11309         .mr(1)
11310         .nr(8)
11311         .kr(8)
11312         .sr(1)
11313         .m(m)
11314         .n(8)
11315         .k(16)
11316         .iterations(1)
11317         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c8__neon_mlal, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
11318     }
11319   }
11320 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__NEON_MLAL,k_eq_16_subtile_n)11321   TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__NEON_MLAL, k_eq_16_subtile_n) {
11322     TEST_REQUIRES_ARM_NEON;
11323     for (uint32_t n = 1; n <= 8; n++) {
11324       GemmMicrokernelTester()
11325         .mr(1)
11326         .nr(8)
11327         .kr(8)
11328         .sr(1)
11329         .m(1)
11330         .n(n)
11331         .k(16)
11332         .iterations(1)
11333         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c8__neon_mlal, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
11334     }
11335   }
11336 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__NEON_MLAL,k_lt_16)11337   TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__NEON_MLAL, k_lt_16) {
11338     TEST_REQUIRES_ARM_NEON;
11339     for (size_t k = 1; k < 16; k++) {
11340       GemmMicrokernelTester()
11341         .mr(1)
11342         .nr(8)
11343         .kr(8)
11344         .sr(1)
11345         .m(1)
11346         .n(8)
11347         .k(k)
11348         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c8__neon_mlal, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
11349     }
11350   }
11351 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__NEON_MLAL,k_lt_16_subtile)11352   TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__NEON_MLAL, k_lt_16_subtile) {
11353     TEST_REQUIRES_ARM_NEON;
11354     for (size_t k = 1; k < 16; k++) {
11355       for (uint32_t n = 1; n <= 8; n++) {
11356         for (uint32_t m = 1; m <= 1; m++) {
11357           GemmMicrokernelTester()
11358             .mr(1)
11359             .nr(8)
11360             .kr(8)
11361             .sr(1)
11362             .m(m)
11363             .n(n)
11364             .k(k)
11365             .iterations(1)
11366             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c8__neon_mlal, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
11367         }
11368       }
11369     }
11370   }
11371 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__NEON_MLAL,k_gt_16)11372   TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__NEON_MLAL, k_gt_16) {
11373     TEST_REQUIRES_ARM_NEON;
11374     for (size_t k = 17; k < 32; k++) {
11375       GemmMicrokernelTester()
11376         .mr(1)
11377         .nr(8)
11378         .kr(8)
11379         .sr(1)
11380         .m(1)
11381         .n(8)
11382         .k(k)
11383         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c8__neon_mlal, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
11384     }
11385   }
11386 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__NEON_MLAL,k_gt_16_subtile)11387   TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__NEON_MLAL, k_gt_16_subtile) {
11388     TEST_REQUIRES_ARM_NEON;
11389     for (size_t k = 17; k < 32; k++) {
11390       for (uint32_t n = 1; n <= 8; n++) {
11391         for (uint32_t m = 1; m <= 1; m++) {
11392           GemmMicrokernelTester()
11393             .mr(1)
11394             .nr(8)
11395             .kr(8)
11396             .sr(1)
11397             .m(m)
11398             .n(n)
11399             .k(k)
11400             .iterations(1)
11401             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c8__neon_mlal, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
11402         }
11403       }
11404     }
11405   }
11406 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__NEON_MLAL,k_div_16)11407   TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__NEON_MLAL, k_div_16) {
11408     TEST_REQUIRES_ARM_NEON;
11409     for (size_t k = 32; k <= 160; k += 16) {
11410       GemmMicrokernelTester()
11411         .mr(1)
11412         .nr(8)
11413         .kr(8)
11414         .sr(1)
11415         .m(1)
11416         .n(8)
11417         .k(k)
11418         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c8__neon_mlal, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
11419     }
11420   }
11421 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__NEON_MLAL,k_div_16_subtile)11422   TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__NEON_MLAL, k_div_16_subtile) {
11423     TEST_REQUIRES_ARM_NEON;
11424     for (size_t k = 32; k <= 160; k += 16) {
11425       for (uint32_t n = 1; n <= 8; n++) {
11426         for (uint32_t m = 1; m <= 1; m++) {
11427           GemmMicrokernelTester()
11428             .mr(1)
11429             .nr(8)
11430             .kr(8)
11431             .sr(1)
11432             .m(m)
11433             .n(n)
11434             .k(k)
11435             .iterations(1)
11436             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c8__neon_mlal, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
11437         }
11438       }
11439     }
11440   }
11441 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__NEON_MLAL,n_gt_8)11442   TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__NEON_MLAL, n_gt_8) {
11443     TEST_REQUIRES_ARM_NEON;
11444     for (uint32_t n = 9; n < 16; n++) {
11445       for (size_t k = 1; k <= 80; k += 17) {
11446         GemmMicrokernelTester()
11447           .mr(1)
11448           .nr(8)
11449           .kr(8)
11450           .sr(1)
11451           .m(1)
11452           .n(n)
11453           .k(k)
11454           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c8__neon_mlal, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
11455       }
11456     }
11457   }
11458 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__NEON_MLAL,n_gt_8_strided_cn)11459   TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__NEON_MLAL, n_gt_8_strided_cn) {
11460     TEST_REQUIRES_ARM_NEON;
11461     for (uint32_t n = 9; n < 16; n++) {
11462       for (size_t k = 1; k <= 80; k += 17) {
11463         GemmMicrokernelTester()
11464           .mr(1)
11465           .nr(8)
11466           .kr(8)
11467           .sr(1)
11468           .m(1)
11469           .n(n)
11470           .k(k)
11471           .cn_stride(11)
11472           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c8__neon_mlal, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
11473       }
11474     }
11475   }
11476 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__NEON_MLAL,n_gt_8_subtile)11477   TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__NEON_MLAL, n_gt_8_subtile) {
11478     TEST_REQUIRES_ARM_NEON;
11479     for (uint32_t n = 9; n < 16; n++) {
11480       for (size_t k = 1; k <= 80; k += 17) {
11481         for (uint32_t m = 1; m <= 1; m++) {
11482           GemmMicrokernelTester()
11483             .mr(1)
11484             .nr(8)
11485             .kr(8)
11486             .sr(1)
11487             .m(m)
11488             .n(n)
11489             .k(k)
11490             .iterations(1)
11491             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c8__neon_mlal, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
11492         }
11493       }
11494     }
11495   }
11496 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__NEON_MLAL,n_div_8)11497   TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__NEON_MLAL, n_div_8) {
11498     TEST_REQUIRES_ARM_NEON;
11499     for (uint32_t n = 16; n <= 24; n += 8) {
11500       for (size_t k = 1; k <= 80; k += 17) {
11501         GemmMicrokernelTester()
11502           .mr(1)
11503           .nr(8)
11504           .kr(8)
11505           .sr(1)
11506           .m(1)
11507           .n(n)
11508           .k(k)
11509           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c8__neon_mlal, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
11510       }
11511     }
11512   }
11513 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__NEON_MLAL,n_div_8_strided_cn)11514   TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__NEON_MLAL, n_div_8_strided_cn) {
11515     TEST_REQUIRES_ARM_NEON;
11516     for (uint32_t n = 16; n <= 24; n += 8) {
11517       for (size_t k = 1; k <= 80; k += 17) {
11518         GemmMicrokernelTester()
11519           .mr(1)
11520           .nr(8)
11521           .kr(8)
11522           .sr(1)
11523           .m(1)
11524           .n(n)
11525           .k(k)
11526           .cn_stride(11)
11527           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c8__neon_mlal, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
11528       }
11529     }
11530   }
11531 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__NEON_MLAL,n_div_8_subtile)11532   TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__NEON_MLAL, n_div_8_subtile) {
11533     TEST_REQUIRES_ARM_NEON;
11534     for (uint32_t n = 16; n <= 24; n += 8) {
11535       for (size_t k = 1; k <= 80; k += 17) {
11536         for (uint32_t m = 1; m <= 1; m++) {
11537           GemmMicrokernelTester()
11538             .mr(1)
11539             .nr(8)
11540             .kr(8)
11541             .sr(1)
11542             .m(m)
11543             .n(n)
11544             .k(k)
11545             .iterations(1)
11546             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c8__neon_mlal, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
11547         }
11548       }
11549     }
11550   }
11551 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__NEON_MLAL,small_kernel)11552   TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__NEON_MLAL, small_kernel) {
11553     TEST_REQUIRES_ARM_NEON;
11554     for (size_t k = 1; k <= 80; k += 17) {
11555       GemmMicrokernelTester()
11556         .mr(1)
11557         .nr(8)
11558         .kr(8)
11559         .sr(1)
11560         .m(1)
11561         .n(8)
11562         .k(k)
11563         .ks(3)
11564         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c8__neon_mlal, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
11565     }
11566   }
11567 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__NEON_MLAL,small_kernel_subtile)11568   TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__NEON_MLAL, small_kernel_subtile) {
11569     TEST_REQUIRES_ARM_NEON;
11570     for (size_t k = 1; k <= 80; k += 17) {
11571       for (uint32_t n = 1; n <= 8; n++) {
11572         for (uint32_t m = 1; m <= 1; m++) {
11573           GemmMicrokernelTester()
11574             .mr(1)
11575             .nr(8)
11576             .kr(8)
11577             .sr(1)
11578             .m(m)
11579             .n(n)
11580             .k(k)
11581             .ks(3)
11582             .iterations(1)
11583             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c8__neon_mlal, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
11584         }
11585       }
11586     }
11587   }
11588 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__NEON_MLAL,n_gt_8_small_kernel)11589   TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__NEON_MLAL, n_gt_8_small_kernel) {
11590     TEST_REQUIRES_ARM_NEON;
11591     for (uint32_t n = 9; n < 16; n++) {
11592       for (size_t k = 1; k <= 80; k += 17) {
11593         GemmMicrokernelTester()
11594           .mr(1)
11595           .nr(8)
11596           .kr(8)
11597           .sr(1)
11598           .m(1)
11599           .n(n)
11600           .k(k)
11601           .ks(3)
11602           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c8__neon_mlal, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
11603       }
11604     }
11605   }
11606 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__NEON_MLAL,n_div_8_small_kernel)11607   TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__NEON_MLAL, n_div_8_small_kernel) {
11608     TEST_REQUIRES_ARM_NEON;
11609     for (uint32_t n = 16; n <= 24; n += 8) {
11610       for (size_t k = 1; k <= 80; k += 17) {
11611         GemmMicrokernelTester()
11612           .mr(1)
11613           .nr(8)
11614           .kr(8)
11615           .sr(1)
11616           .m(1)
11617           .n(n)
11618           .k(k)
11619           .ks(3)
11620           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c8__neon_mlal, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
11621       }
11622     }
11623   }
11624 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__NEON_MLAL,strided_cm_subtile)11625   TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__NEON_MLAL, strided_cm_subtile) {
11626     TEST_REQUIRES_ARM_NEON;
11627     for (size_t k = 1; k <= 80; k += 17) {
11628       for (uint32_t n = 1; n <= 8; n++) {
11629         for (uint32_t m = 1; m <= 1; m++) {
11630           GemmMicrokernelTester()
11631             .mr(1)
11632             .nr(8)
11633             .kr(8)
11634             .sr(1)
11635             .m(m)
11636             .n(n)
11637             .k(k)
11638             .cm_stride(11)
11639             .iterations(1)
11640             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c8__neon_mlal, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
11641         }
11642       }
11643     }
11644   }
11645 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__NEON_MLAL,a_offset)11646   TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__NEON_MLAL, a_offset) {
11647     TEST_REQUIRES_ARM_NEON;
11648     for (size_t k = 1; k <= 80; k += 17) {
11649       GemmMicrokernelTester()
11650         .mr(1)
11651         .nr(8)
11652         .kr(8)
11653         .sr(1)
11654         .m(1)
11655         .n(8)
11656         .k(k)
11657         .ks(3)
11658         .a_offset(83)
11659         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c8__neon_mlal, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
11660     }
11661   }
11662 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__NEON_MLAL,zero)11663   TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__NEON_MLAL, zero) {
11664     TEST_REQUIRES_ARM_NEON;
11665     for (size_t k = 1; k <= 80; k += 17) {
11666       for (uint32_t mz = 0; mz < 1; mz++) {
11667         GemmMicrokernelTester()
11668           .mr(1)
11669           .nr(8)
11670           .kr(8)
11671           .sr(1)
11672           .m(1)
11673           .n(8)
11674           .k(k)
11675           .ks(3)
11676           .a_offset(83)
11677           .zero_index(mz)
11678           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c8__neon_mlal, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
11679       }
11680     }
11681   }
11682 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__NEON_MLAL,qmin)11683   TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__NEON_MLAL, qmin) {
11684     TEST_REQUIRES_ARM_NEON;
11685     GemmMicrokernelTester()
11686       .mr(1)
11687       .nr(8)
11688       .kr(8)
11689       .sr(1)
11690       .m(1)
11691       .n(8)
11692       .k(16)
11693       .qmin(128)
11694       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c8__neon_mlal, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
11695   }
11696 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__NEON_MLAL,qmax)11697   TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__NEON_MLAL, qmax) {
11698     TEST_REQUIRES_ARM_NEON;
11699     GemmMicrokernelTester()
11700       .mr(1)
11701       .nr(8)
11702       .kr(8)
11703       .sr(1)
11704       .m(1)
11705       .n(8)
11706       .k(16)
11707       .qmax(128)
11708       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c8__neon_mlal, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
11709   }
11710 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__NEON_MLAL,strided_cm)11711   TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__NEON_MLAL, strided_cm) {
11712     TEST_REQUIRES_ARM_NEON;
11713     GemmMicrokernelTester()
11714       .mr(1)
11715       .nr(8)
11716       .kr(8)
11717       .sr(1)
11718       .m(1)
11719       .n(8)
11720       .k(16)
11721       .cm_stride(11)
11722       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c8__neon_mlal, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
11723   }
11724 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
11725 
11726 
11727 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QC8_IGEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE_PRFM,k_eq_8)11728   TEST(QC8_IGEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE_PRFM, k_eq_8) {
11729     TEST_REQUIRES_ARM_NEON;
11730     GemmMicrokernelTester()
11731       .mr(1)
11732       .nr(16)
11733       .kr(1)
11734       .sr(1)
11735       .m(1)
11736       .n(16)
11737       .k(8)
11738       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x16__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
11739   }
11740 
TEST(QC8_IGEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE_PRFM,strided_cn)11741   TEST(QC8_IGEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE_PRFM, strided_cn) {
11742     TEST_REQUIRES_ARM_NEON;
11743     GemmMicrokernelTester()
11744       .mr(1)
11745       .nr(16)
11746       .kr(1)
11747       .sr(1)
11748       .m(1)
11749       .n(16)
11750       .k(8)
11751       .cn_stride(19)
11752       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x16__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
11753   }
11754 
TEST(QC8_IGEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE_PRFM,k_eq_8_subtile)11755   TEST(QC8_IGEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE_PRFM, k_eq_8_subtile) {
11756     TEST_REQUIRES_ARM_NEON;
11757     for (uint32_t n = 1; n <= 16; n++) {
11758       for (uint32_t m = 1; m <= 1; m++) {
11759         GemmMicrokernelTester()
11760           .mr(1)
11761           .nr(16)
11762           .kr(1)
11763           .sr(1)
11764           .m(m)
11765           .n(n)
11766           .k(8)
11767           .iterations(1)
11768           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x16__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
11769       }
11770     }
11771   }
11772 
TEST(QC8_IGEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE_PRFM,k_eq_8_subtile_m)11773   TEST(QC8_IGEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE_PRFM, k_eq_8_subtile_m) {
11774     TEST_REQUIRES_ARM_NEON;
11775     for (uint32_t m = 1; m <= 1; m++) {
11776       GemmMicrokernelTester()
11777         .mr(1)
11778         .nr(16)
11779         .kr(1)
11780         .sr(1)
11781         .m(m)
11782         .n(16)
11783         .k(8)
11784         .iterations(1)
11785         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x16__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
11786     }
11787   }
11788 
TEST(QC8_IGEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE_PRFM,k_eq_8_subtile_n)11789   TEST(QC8_IGEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE_PRFM, k_eq_8_subtile_n) {
11790     TEST_REQUIRES_ARM_NEON;
11791     for (uint32_t n = 1; n <= 16; n++) {
11792       GemmMicrokernelTester()
11793         .mr(1)
11794         .nr(16)
11795         .kr(1)
11796         .sr(1)
11797         .m(1)
11798         .n(n)
11799         .k(8)
11800         .iterations(1)
11801         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x16__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
11802     }
11803   }
11804 
TEST(QC8_IGEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE_PRFM,k_lt_8)11805   TEST(QC8_IGEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE_PRFM, k_lt_8) {
11806     TEST_REQUIRES_ARM_NEON;
11807     for (size_t k = 1; k < 8; k++) {
11808       GemmMicrokernelTester()
11809         .mr(1)
11810         .nr(16)
11811         .kr(1)
11812         .sr(1)
11813         .m(1)
11814         .n(16)
11815         .k(k)
11816         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x16__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
11817     }
11818   }
11819 
TEST(QC8_IGEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE_PRFM,k_lt_8_subtile)11820   TEST(QC8_IGEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE_PRFM, k_lt_8_subtile) {
11821     TEST_REQUIRES_ARM_NEON;
11822     for (size_t k = 1; k < 8; k++) {
11823       for (uint32_t n = 1; n <= 16; n++) {
11824         for (uint32_t m = 1; m <= 1; m++) {
11825           GemmMicrokernelTester()
11826             .mr(1)
11827             .nr(16)
11828             .kr(1)
11829             .sr(1)
11830             .m(m)
11831             .n(n)
11832             .k(k)
11833             .iterations(1)
11834             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x16__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
11835         }
11836       }
11837     }
11838   }
11839 
TEST(QC8_IGEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE_PRFM,k_gt_8)11840   TEST(QC8_IGEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE_PRFM, k_gt_8) {
11841     TEST_REQUIRES_ARM_NEON;
11842     for (size_t k = 9; k < 16; k++) {
11843       GemmMicrokernelTester()
11844         .mr(1)
11845         .nr(16)
11846         .kr(1)
11847         .sr(1)
11848         .m(1)
11849         .n(16)
11850         .k(k)
11851         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x16__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
11852     }
11853   }
11854 
TEST(QC8_IGEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE_PRFM,k_gt_8_subtile)11855   TEST(QC8_IGEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE_PRFM, k_gt_8_subtile) {
11856     TEST_REQUIRES_ARM_NEON;
11857     for (size_t k = 9; k < 16; k++) {
11858       for (uint32_t n = 1; n <= 16; n++) {
11859         for (uint32_t m = 1; m <= 1; m++) {
11860           GemmMicrokernelTester()
11861             .mr(1)
11862             .nr(16)
11863             .kr(1)
11864             .sr(1)
11865             .m(m)
11866             .n(n)
11867             .k(k)
11868             .iterations(1)
11869             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x16__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
11870         }
11871       }
11872     }
11873   }
11874 
TEST(QC8_IGEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE_PRFM,k_div_8)11875   TEST(QC8_IGEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE_PRFM, k_div_8) {
11876     TEST_REQUIRES_ARM_NEON;
11877     for (size_t k = 16; k <= 80; k += 8) {
11878       GemmMicrokernelTester()
11879         .mr(1)
11880         .nr(16)
11881         .kr(1)
11882         .sr(1)
11883         .m(1)
11884         .n(16)
11885         .k(k)
11886         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x16__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
11887     }
11888   }
11889 
TEST(QC8_IGEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE_PRFM,k_div_8_subtile)11890   TEST(QC8_IGEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE_PRFM, k_div_8_subtile) {
11891     TEST_REQUIRES_ARM_NEON;
11892     for (size_t k = 16; k <= 80; k += 8) {
11893       for (uint32_t n = 1; n <= 16; n++) {
11894         for (uint32_t m = 1; m <= 1; m++) {
11895           GemmMicrokernelTester()
11896             .mr(1)
11897             .nr(16)
11898             .kr(1)
11899             .sr(1)
11900             .m(m)
11901             .n(n)
11902             .k(k)
11903             .iterations(1)
11904             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x16__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
11905         }
11906       }
11907     }
11908   }
11909 
TEST(QC8_IGEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE_PRFM,n_gt_16)11910   TEST(QC8_IGEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE_PRFM, n_gt_16) {
11911     TEST_REQUIRES_ARM_NEON;
11912     for (uint32_t n = 17; n < 32; n++) {
11913       for (size_t k = 1; k <= 40; k += 9) {
11914         GemmMicrokernelTester()
11915           .mr(1)
11916           .nr(16)
11917           .kr(1)
11918           .sr(1)
11919           .m(1)
11920           .n(n)
11921           .k(k)
11922           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x16__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
11923       }
11924     }
11925   }
11926 
TEST(QC8_IGEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE_PRFM,n_gt_16_strided_cn)11927   TEST(QC8_IGEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE_PRFM, n_gt_16_strided_cn) {
11928     TEST_REQUIRES_ARM_NEON;
11929     for (uint32_t n = 17; n < 32; n++) {
11930       for (size_t k = 1; k <= 40; k += 9) {
11931         GemmMicrokernelTester()
11932           .mr(1)
11933           .nr(16)
11934           .kr(1)
11935           .sr(1)
11936           .m(1)
11937           .n(n)
11938           .k(k)
11939           .cn_stride(19)
11940           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x16__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
11941       }
11942     }
11943   }
11944 
TEST(QC8_IGEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE_PRFM,n_gt_16_subtile)11945   TEST(QC8_IGEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE_PRFM, n_gt_16_subtile) {
11946     TEST_REQUIRES_ARM_NEON;
11947     for (uint32_t n = 17; n < 32; n++) {
11948       for (size_t k = 1; k <= 40; k += 9) {
11949         for (uint32_t m = 1; m <= 1; m++) {
11950           GemmMicrokernelTester()
11951             .mr(1)
11952             .nr(16)
11953             .kr(1)
11954             .sr(1)
11955             .m(m)
11956             .n(n)
11957             .k(k)
11958             .iterations(1)
11959             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x16__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
11960         }
11961       }
11962     }
11963   }
11964 
TEST(QC8_IGEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE_PRFM,n_div_16)11965   TEST(QC8_IGEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE_PRFM, n_div_16) {
11966     TEST_REQUIRES_ARM_NEON;
11967     for (uint32_t n = 32; n <= 48; n += 16) {
11968       for (size_t k = 1; k <= 40; k += 9) {
11969         GemmMicrokernelTester()
11970           .mr(1)
11971           .nr(16)
11972           .kr(1)
11973           .sr(1)
11974           .m(1)
11975           .n(n)
11976           .k(k)
11977           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x16__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
11978       }
11979     }
11980   }
11981 
TEST(QC8_IGEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE_PRFM,n_div_16_strided_cn)11982   TEST(QC8_IGEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE_PRFM, n_div_16_strided_cn) {
11983     TEST_REQUIRES_ARM_NEON;
11984     for (uint32_t n = 32; n <= 48; n += 16) {
11985       for (size_t k = 1; k <= 40; k += 9) {
11986         GemmMicrokernelTester()
11987           .mr(1)
11988           .nr(16)
11989           .kr(1)
11990           .sr(1)
11991           .m(1)
11992           .n(n)
11993           .k(k)
11994           .cn_stride(19)
11995           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x16__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
11996       }
11997     }
11998   }
11999 
TEST(QC8_IGEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE_PRFM,n_div_16_subtile)12000   TEST(QC8_IGEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE_PRFM, n_div_16_subtile) {
12001     TEST_REQUIRES_ARM_NEON;
12002     for (uint32_t n = 32; n <= 48; n += 16) {
12003       for (size_t k = 1; k <= 40; k += 9) {
12004         for (uint32_t m = 1; m <= 1; m++) {
12005           GemmMicrokernelTester()
12006             .mr(1)
12007             .nr(16)
12008             .kr(1)
12009             .sr(1)
12010             .m(m)
12011             .n(n)
12012             .k(k)
12013             .iterations(1)
12014             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x16__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
12015         }
12016       }
12017     }
12018   }
12019 
TEST(QC8_IGEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE_PRFM,small_kernel)12020   TEST(QC8_IGEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE_PRFM, small_kernel) {
12021     TEST_REQUIRES_ARM_NEON;
12022     for (size_t k = 1; k <= 40; k += 9) {
12023       GemmMicrokernelTester()
12024         .mr(1)
12025         .nr(16)
12026         .kr(1)
12027         .sr(1)
12028         .m(1)
12029         .n(16)
12030         .k(k)
12031         .ks(3)
12032         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x16__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
12033     }
12034   }
12035 
TEST(QC8_IGEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE_PRFM,small_kernel_subtile)12036   TEST(QC8_IGEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE_PRFM, small_kernel_subtile) {
12037     TEST_REQUIRES_ARM_NEON;
12038     for (size_t k = 1; k <= 40; k += 9) {
12039       for (uint32_t n = 1; n <= 16; n++) {
12040         for (uint32_t m = 1; m <= 1; m++) {
12041           GemmMicrokernelTester()
12042             .mr(1)
12043             .nr(16)
12044             .kr(1)
12045             .sr(1)
12046             .m(m)
12047             .n(n)
12048             .k(k)
12049             .ks(3)
12050             .iterations(1)
12051             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x16__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
12052         }
12053       }
12054     }
12055   }
12056 
TEST(QC8_IGEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE_PRFM,n_gt_16_small_kernel)12057   TEST(QC8_IGEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE_PRFM, n_gt_16_small_kernel) {
12058     TEST_REQUIRES_ARM_NEON;
12059     for (uint32_t n = 17; n < 32; n++) {
12060       for (size_t k = 1; k <= 40; k += 9) {
12061         GemmMicrokernelTester()
12062           .mr(1)
12063           .nr(16)
12064           .kr(1)
12065           .sr(1)
12066           .m(1)
12067           .n(n)
12068           .k(k)
12069           .ks(3)
12070           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x16__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
12071       }
12072     }
12073   }
12074 
TEST(QC8_IGEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE_PRFM,n_div_16_small_kernel)12075   TEST(QC8_IGEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE_PRFM, n_div_16_small_kernel) {
12076     TEST_REQUIRES_ARM_NEON;
12077     for (uint32_t n = 32; n <= 48; n += 16) {
12078       for (size_t k = 1; k <= 40; k += 9) {
12079         GemmMicrokernelTester()
12080           .mr(1)
12081           .nr(16)
12082           .kr(1)
12083           .sr(1)
12084           .m(1)
12085           .n(n)
12086           .k(k)
12087           .ks(3)
12088           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x16__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
12089       }
12090     }
12091   }
12092 
TEST(QC8_IGEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE_PRFM,strided_cm_subtile)12093   TEST(QC8_IGEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE_PRFM, strided_cm_subtile) {
12094     TEST_REQUIRES_ARM_NEON;
12095     for (size_t k = 1; k <= 40; k += 9) {
12096       for (uint32_t n = 1; n <= 16; n++) {
12097         for (uint32_t m = 1; m <= 1; m++) {
12098           GemmMicrokernelTester()
12099             .mr(1)
12100             .nr(16)
12101             .kr(1)
12102             .sr(1)
12103             .m(m)
12104             .n(n)
12105             .k(k)
12106             .cm_stride(19)
12107             .iterations(1)
12108             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x16__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
12109         }
12110       }
12111     }
12112   }
12113 
TEST(QC8_IGEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE_PRFM,a_offset)12114   TEST(QC8_IGEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE_PRFM, a_offset) {
12115     TEST_REQUIRES_ARM_NEON;
12116     for (size_t k = 1; k <= 40; k += 9) {
12117       GemmMicrokernelTester()
12118         .mr(1)
12119         .nr(16)
12120         .kr(1)
12121         .sr(1)
12122         .m(1)
12123         .n(16)
12124         .k(k)
12125         .ks(3)
12126         .a_offset(43)
12127         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x16__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
12128     }
12129   }
12130 
TEST(QC8_IGEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE_PRFM,zero)12131   TEST(QC8_IGEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE_PRFM, zero) {
12132     TEST_REQUIRES_ARM_NEON;
12133     for (size_t k = 1; k <= 40; k += 9) {
12134       for (uint32_t mz = 0; mz < 1; mz++) {
12135         GemmMicrokernelTester()
12136           .mr(1)
12137           .nr(16)
12138           .kr(1)
12139           .sr(1)
12140           .m(1)
12141           .n(16)
12142           .k(k)
12143           .ks(3)
12144           .a_offset(43)
12145           .zero_index(mz)
12146           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x16__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
12147       }
12148     }
12149   }
12150 
TEST(QC8_IGEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE_PRFM,qmin)12151   TEST(QC8_IGEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE_PRFM, qmin) {
12152     TEST_REQUIRES_ARM_NEON;
12153     GemmMicrokernelTester()
12154       .mr(1)
12155       .nr(16)
12156       .kr(1)
12157       .sr(1)
12158       .m(1)
12159       .n(16)
12160       .k(8)
12161       .qmin(128)
12162       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x16__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
12163   }
12164 
TEST(QC8_IGEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE_PRFM,qmax)12165   TEST(QC8_IGEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE_PRFM, qmax) {
12166     TEST_REQUIRES_ARM_NEON;
12167     GemmMicrokernelTester()
12168       .mr(1)
12169       .nr(16)
12170       .kr(1)
12171       .sr(1)
12172       .m(1)
12173       .n(16)
12174       .k(8)
12175       .qmax(128)
12176       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x16__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
12177   }
12178 
TEST(QC8_IGEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE_PRFM,strided_cm)12179   TEST(QC8_IGEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE_PRFM, strided_cm) {
12180     TEST_REQUIRES_ARM_NEON;
12181     GemmMicrokernelTester()
12182       .mr(1)
12183       .nr(16)
12184       .kr(1)
12185       .sr(1)
12186       .m(1)
12187       .n(16)
12188       .k(8)
12189       .cm_stride(19)
12190       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x16__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
12191   }
12192 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
12193 
12194 
12195 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QC8_IGEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE,k_eq_8)12196   TEST(QC8_IGEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE, k_eq_8) {
12197     TEST_REQUIRES_ARM_NEON_V8;
12198     GemmMicrokernelTester()
12199       .mr(1)
12200       .nr(16)
12201       .kr(1)
12202       .sr(1)
12203       .m(1)
12204       .n(16)
12205       .k(8)
12206       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x16__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
12207   }
12208 
TEST(QC8_IGEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE,strided_cn)12209   TEST(QC8_IGEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE, strided_cn) {
12210     TEST_REQUIRES_ARM_NEON_V8;
12211     GemmMicrokernelTester()
12212       .mr(1)
12213       .nr(16)
12214       .kr(1)
12215       .sr(1)
12216       .m(1)
12217       .n(16)
12218       .k(8)
12219       .cn_stride(19)
12220       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x16__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
12221   }
12222 
TEST(QC8_IGEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE,k_eq_8_subtile)12223   TEST(QC8_IGEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE, k_eq_8_subtile) {
12224     TEST_REQUIRES_ARM_NEON_V8;
12225     for (uint32_t n = 1; n <= 16; n++) {
12226       for (uint32_t m = 1; m <= 1; m++) {
12227         GemmMicrokernelTester()
12228           .mr(1)
12229           .nr(16)
12230           .kr(1)
12231           .sr(1)
12232           .m(m)
12233           .n(n)
12234           .k(8)
12235           .iterations(1)
12236           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x16__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
12237       }
12238     }
12239   }
12240 
TEST(QC8_IGEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE,k_eq_8_subtile_m)12241   TEST(QC8_IGEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE, k_eq_8_subtile_m) {
12242     TEST_REQUIRES_ARM_NEON_V8;
12243     for (uint32_t m = 1; m <= 1; m++) {
12244       GemmMicrokernelTester()
12245         .mr(1)
12246         .nr(16)
12247         .kr(1)
12248         .sr(1)
12249         .m(m)
12250         .n(16)
12251         .k(8)
12252         .iterations(1)
12253         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x16__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
12254     }
12255   }
12256 
TEST(QC8_IGEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE,k_eq_8_subtile_n)12257   TEST(QC8_IGEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE, k_eq_8_subtile_n) {
12258     TEST_REQUIRES_ARM_NEON_V8;
12259     for (uint32_t n = 1; n <= 16; n++) {
12260       GemmMicrokernelTester()
12261         .mr(1)
12262         .nr(16)
12263         .kr(1)
12264         .sr(1)
12265         .m(1)
12266         .n(n)
12267         .k(8)
12268         .iterations(1)
12269         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x16__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
12270     }
12271   }
12272 
TEST(QC8_IGEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE,k_lt_8)12273   TEST(QC8_IGEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE, k_lt_8) {
12274     TEST_REQUIRES_ARM_NEON_V8;
12275     for (size_t k = 1; k < 8; k++) {
12276       GemmMicrokernelTester()
12277         .mr(1)
12278         .nr(16)
12279         .kr(1)
12280         .sr(1)
12281         .m(1)
12282         .n(16)
12283         .k(k)
12284         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x16__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
12285     }
12286   }
12287 
TEST(QC8_IGEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE,k_lt_8_subtile)12288   TEST(QC8_IGEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE, k_lt_8_subtile) {
12289     TEST_REQUIRES_ARM_NEON_V8;
12290     for (size_t k = 1; k < 8; k++) {
12291       for (uint32_t n = 1; n <= 16; n++) {
12292         for (uint32_t m = 1; m <= 1; m++) {
12293           GemmMicrokernelTester()
12294             .mr(1)
12295             .nr(16)
12296             .kr(1)
12297             .sr(1)
12298             .m(m)
12299             .n(n)
12300             .k(k)
12301             .iterations(1)
12302             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x16__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
12303         }
12304       }
12305     }
12306   }
12307 
TEST(QC8_IGEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE,k_gt_8)12308   TEST(QC8_IGEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE, k_gt_8) {
12309     TEST_REQUIRES_ARM_NEON_V8;
12310     for (size_t k = 9; k < 16; k++) {
12311       GemmMicrokernelTester()
12312         .mr(1)
12313         .nr(16)
12314         .kr(1)
12315         .sr(1)
12316         .m(1)
12317         .n(16)
12318         .k(k)
12319         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x16__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
12320     }
12321   }
12322 
TEST(QC8_IGEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE,k_gt_8_subtile)12323   TEST(QC8_IGEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE, k_gt_8_subtile) {
12324     TEST_REQUIRES_ARM_NEON_V8;
12325     for (size_t k = 9; k < 16; k++) {
12326       for (uint32_t n = 1; n <= 16; n++) {
12327         for (uint32_t m = 1; m <= 1; m++) {
12328           GemmMicrokernelTester()
12329             .mr(1)
12330             .nr(16)
12331             .kr(1)
12332             .sr(1)
12333             .m(m)
12334             .n(n)
12335             .k(k)
12336             .iterations(1)
12337             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x16__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
12338         }
12339       }
12340     }
12341   }
12342 
TEST(QC8_IGEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE,k_div_8)12343   TEST(QC8_IGEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE, k_div_8) {
12344     TEST_REQUIRES_ARM_NEON_V8;
12345     for (size_t k = 16; k <= 80; k += 8) {
12346       GemmMicrokernelTester()
12347         .mr(1)
12348         .nr(16)
12349         .kr(1)
12350         .sr(1)
12351         .m(1)
12352         .n(16)
12353         .k(k)
12354         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x16__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
12355     }
12356   }
12357 
TEST(QC8_IGEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE,k_div_8_subtile)12358   TEST(QC8_IGEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE, k_div_8_subtile) {
12359     TEST_REQUIRES_ARM_NEON_V8;
12360     for (size_t k = 16; k <= 80; k += 8) {
12361       for (uint32_t n = 1; n <= 16; n++) {
12362         for (uint32_t m = 1; m <= 1; m++) {
12363           GemmMicrokernelTester()
12364             .mr(1)
12365             .nr(16)
12366             .kr(1)
12367             .sr(1)
12368             .m(m)
12369             .n(n)
12370             .k(k)
12371             .iterations(1)
12372             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x16__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
12373         }
12374       }
12375     }
12376   }
12377 
TEST(QC8_IGEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE,n_gt_16)12378   TEST(QC8_IGEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE, n_gt_16) {
12379     TEST_REQUIRES_ARM_NEON_V8;
12380     for (uint32_t n = 17; n < 32; n++) {
12381       for (size_t k = 1; k <= 40; k += 9) {
12382         GemmMicrokernelTester()
12383           .mr(1)
12384           .nr(16)
12385           .kr(1)
12386           .sr(1)
12387           .m(1)
12388           .n(n)
12389           .k(k)
12390           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x16__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
12391       }
12392     }
12393   }
12394 
TEST(QC8_IGEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE,n_gt_16_strided_cn)12395   TEST(QC8_IGEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE, n_gt_16_strided_cn) {
12396     TEST_REQUIRES_ARM_NEON_V8;
12397     for (uint32_t n = 17; n < 32; n++) {
12398       for (size_t k = 1; k <= 40; k += 9) {
12399         GemmMicrokernelTester()
12400           .mr(1)
12401           .nr(16)
12402           .kr(1)
12403           .sr(1)
12404           .m(1)
12405           .n(n)
12406           .k(k)
12407           .cn_stride(19)
12408           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x16__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
12409       }
12410     }
12411   }
12412 
TEST(QC8_IGEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE,n_gt_16_subtile)12413   TEST(QC8_IGEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE, n_gt_16_subtile) {
12414     TEST_REQUIRES_ARM_NEON_V8;
12415     for (uint32_t n = 17; n < 32; n++) {
12416       for (size_t k = 1; k <= 40; k += 9) {
12417         for (uint32_t m = 1; m <= 1; m++) {
12418           GemmMicrokernelTester()
12419             .mr(1)
12420             .nr(16)
12421             .kr(1)
12422             .sr(1)
12423             .m(m)
12424             .n(n)
12425             .k(k)
12426             .iterations(1)
12427             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x16__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
12428         }
12429       }
12430     }
12431   }
12432 
TEST(QC8_IGEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE,n_div_16)12433   TEST(QC8_IGEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE, n_div_16) {
12434     TEST_REQUIRES_ARM_NEON_V8;
12435     for (uint32_t n = 32; n <= 48; n += 16) {
12436       for (size_t k = 1; k <= 40; k += 9) {
12437         GemmMicrokernelTester()
12438           .mr(1)
12439           .nr(16)
12440           .kr(1)
12441           .sr(1)
12442           .m(1)
12443           .n(n)
12444           .k(k)
12445           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x16__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
12446       }
12447     }
12448   }
12449 
TEST(QC8_IGEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE,n_div_16_strided_cn)12450   TEST(QC8_IGEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE, n_div_16_strided_cn) {
12451     TEST_REQUIRES_ARM_NEON_V8;
12452     for (uint32_t n = 32; n <= 48; n += 16) {
12453       for (size_t k = 1; k <= 40; k += 9) {
12454         GemmMicrokernelTester()
12455           .mr(1)
12456           .nr(16)
12457           .kr(1)
12458           .sr(1)
12459           .m(1)
12460           .n(n)
12461           .k(k)
12462           .cn_stride(19)
12463           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x16__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
12464       }
12465     }
12466   }
12467 
TEST(QC8_IGEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE,n_div_16_subtile)12468   TEST(QC8_IGEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE, n_div_16_subtile) {
12469     TEST_REQUIRES_ARM_NEON_V8;
12470     for (uint32_t n = 32; n <= 48; n += 16) {
12471       for (size_t k = 1; k <= 40; k += 9) {
12472         for (uint32_t m = 1; m <= 1; m++) {
12473           GemmMicrokernelTester()
12474             .mr(1)
12475             .nr(16)
12476             .kr(1)
12477             .sr(1)
12478             .m(m)
12479             .n(n)
12480             .k(k)
12481             .iterations(1)
12482             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x16__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
12483         }
12484       }
12485     }
12486   }
12487 
TEST(QC8_IGEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE,small_kernel)12488   TEST(QC8_IGEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE, small_kernel) {
12489     TEST_REQUIRES_ARM_NEON_V8;
12490     for (size_t k = 1; k <= 40; k += 9) {
12491       GemmMicrokernelTester()
12492         .mr(1)
12493         .nr(16)
12494         .kr(1)
12495         .sr(1)
12496         .m(1)
12497         .n(16)
12498         .k(k)
12499         .ks(3)
12500         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x16__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
12501     }
12502   }
12503 
TEST(QC8_IGEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE,small_kernel_subtile)12504   TEST(QC8_IGEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE, small_kernel_subtile) {
12505     TEST_REQUIRES_ARM_NEON_V8;
12506     for (size_t k = 1; k <= 40; k += 9) {
12507       for (uint32_t n = 1; n <= 16; n++) {
12508         for (uint32_t m = 1; m <= 1; m++) {
12509           GemmMicrokernelTester()
12510             .mr(1)
12511             .nr(16)
12512             .kr(1)
12513             .sr(1)
12514             .m(m)
12515             .n(n)
12516             .k(k)
12517             .ks(3)
12518             .iterations(1)
12519             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x16__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
12520         }
12521       }
12522     }
12523   }
12524 
TEST(QC8_IGEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE,n_gt_16_small_kernel)12525   TEST(QC8_IGEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE, n_gt_16_small_kernel) {
12526     TEST_REQUIRES_ARM_NEON_V8;
12527     for (uint32_t n = 17; n < 32; n++) {
12528       for (size_t k = 1; k <= 40; k += 9) {
12529         GemmMicrokernelTester()
12530           .mr(1)
12531           .nr(16)
12532           .kr(1)
12533           .sr(1)
12534           .m(1)
12535           .n(n)
12536           .k(k)
12537           .ks(3)
12538           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x16__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
12539       }
12540     }
12541   }
12542 
TEST(QC8_IGEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE,n_div_16_small_kernel)12543   TEST(QC8_IGEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE, n_div_16_small_kernel) {
12544     TEST_REQUIRES_ARM_NEON_V8;
12545     for (uint32_t n = 32; n <= 48; n += 16) {
12546       for (size_t k = 1; k <= 40; k += 9) {
12547         GemmMicrokernelTester()
12548           .mr(1)
12549           .nr(16)
12550           .kr(1)
12551           .sr(1)
12552           .m(1)
12553           .n(n)
12554           .k(k)
12555           .ks(3)
12556           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x16__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
12557       }
12558     }
12559   }
12560 
TEST(QC8_IGEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE,strided_cm_subtile)12561   TEST(QC8_IGEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE, strided_cm_subtile) {
12562     TEST_REQUIRES_ARM_NEON_V8;
12563     for (size_t k = 1; k <= 40; k += 9) {
12564       for (uint32_t n = 1; n <= 16; n++) {
12565         for (uint32_t m = 1; m <= 1; m++) {
12566           GemmMicrokernelTester()
12567             .mr(1)
12568             .nr(16)
12569             .kr(1)
12570             .sr(1)
12571             .m(m)
12572             .n(n)
12573             .k(k)
12574             .cm_stride(19)
12575             .iterations(1)
12576             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x16__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
12577         }
12578       }
12579     }
12580   }
12581 
TEST(QC8_IGEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE,a_offset)12582   TEST(QC8_IGEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE, a_offset) {
12583     TEST_REQUIRES_ARM_NEON_V8;
12584     for (size_t k = 1; k <= 40; k += 9) {
12585       GemmMicrokernelTester()
12586         .mr(1)
12587         .nr(16)
12588         .kr(1)
12589         .sr(1)
12590         .m(1)
12591         .n(16)
12592         .k(k)
12593         .ks(3)
12594         .a_offset(43)
12595         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x16__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
12596     }
12597   }
12598 
TEST(QC8_IGEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE,zero)12599   TEST(QC8_IGEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE, zero) {
12600     TEST_REQUIRES_ARM_NEON_V8;
12601     for (size_t k = 1; k <= 40; k += 9) {
12602       for (uint32_t mz = 0; mz < 1; mz++) {
12603         GemmMicrokernelTester()
12604           .mr(1)
12605           .nr(16)
12606           .kr(1)
12607           .sr(1)
12608           .m(1)
12609           .n(16)
12610           .k(k)
12611           .ks(3)
12612           .a_offset(43)
12613           .zero_index(mz)
12614           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x16__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
12615       }
12616     }
12617   }
12618 
TEST(QC8_IGEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE,qmin)12619   TEST(QC8_IGEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE, qmin) {
12620     TEST_REQUIRES_ARM_NEON_V8;
12621     GemmMicrokernelTester()
12622       .mr(1)
12623       .nr(16)
12624       .kr(1)
12625       .sr(1)
12626       .m(1)
12627       .n(16)
12628       .k(8)
12629       .qmin(128)
12630       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x16__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
12631   }
12632 
TEST(QC8_IGEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE,qmax)12633   TEST(QC8_IGEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE, qmax) {
12634     TEST_REQUIRES_ARM_NEON_V8;
12635     GemmMicrokernelTester()
12636       .mr(1)
12637       .nr(16)
12638       .kr(1)
12639       .sr(1)
12640       .m(1)
12641       .n(16)
12642       .k(8)
12643       .qmax(128)
12644       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x16__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
12645   }
12646 
TEST(QC8_IGEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE,strided_cm)12647   TEST(QC8_IGEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE, strided_cm) {
12648     TEST_REQUIRES_ARM_NEON_V8;
12649     GemmMicrokernelTester()
12650       .mr(1)
12651       .nr(16)
12652       .kr(1)
12653       .sr(1)
12654       .m(1)
12655       .n(16)
12656       .k(8)
12657       .cm_stride(19)
12658       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x16__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
12659   }
12660 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
12661 
12662 
12663 #if XNN_ENABLE_ARM_DOTPROD && (XNN_ARCH_ARM && !XNN_PLATFORM_IOS || XNN_ARCH_ARM64)
TEST(QC8_IGEMM_MINMAX_FP32_1X16C4__NEONDOT,k_eq_8)12664   TEST(QC8_IGEMM_MINMAX_FP32_1X16C4__NEONDOT, k_eq_8) {
12665     TEST_REQUIRES_ARM_NEON_DOT;
12666     GemmMicrokernelTester()
12667       .mr(1)
12668       .nr(16)
12669       .kr(4)
12670       .sr(1)
12671       .m(1)
12672       .n(16)
12673       .k(8)
12674       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x16c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
12675   }
12676 
TEST(QC8_IGEMM_MINMAX_FP32_1X16C4__NEONDOT,strided_cn)12677   TEST(QC8_IGEMM_MINMAX_FP32_1X16C4__NEONDOT, strided_cn) {
12678     TEST_REQUIRES_ARM_NEON_DOT;
12679     GemmMicrokernelTester()
12680       .mr(1)
12681       .nr(16)
12682       .kr(4)
12683       .sr(1)
12684       .m(1)
12685       .n(16)
12686       .k(8)
12687       .cn_stride(19)
12688       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x16c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
12689   }
12690 
TEST(QC8_IGEMM_MINMAX_FP32_1X16C4__NEONDOT,k_eq_8_subtile)12691   TEST(QC8_IGEMM_MINMAX_FP32_1X16C4__NEONDOT, k_eq_8_subtile) {
12692     TEST_REQUIRES_ARM_NEON_DOT;
12693     for (uint32_t n = 1; n <= 16; n++) {
12694       for (uint32_t m = 1; m <= 1; m++) {
12695         GemmMicrokernelTester()
12696           .mr(1)
12697           .nr(16)
12698           .kr(4)
12699           .sr(1)
12700           .m(m)
12701           .n(n)
12702           .k(8)
12703           .iterations(1)
12704           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x16c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
12705       }
12706     }
12707   }
12708 
TEST(QC8_IGEMM_MINMAX_FP32_1X16C4__NEONDOT,k_eq_8_subtile_m)12709   TEST(QC8_IGEMM_MINMAX_FP32_1X16C4__NEONDOT, k_eq_8_subtile_m) {
12710     TEST_REQUIRES_ARM_NEON_DOT;
12711     for (uint32_t m = 1; m <= 1; m++) {
12712       GemmMicrokernelTester()
12713         .mr(1)
12714         .nr(16)
12715         .kr(4)
12716         .sr(1)
12717         .m(m)
12718         .n(16)
12719         .k(8)
12720         .iterations(1)
12721         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x16c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
12722     }
12723   }
12724 
TEST(QC8_IGEMM_MINMAX_FP32_1X16C4__NEONDOT,k_eq_8_subtile_n)12725   TEST(QC8_IGEMM_MINMAX_FP32_1X16C4__NEONDOT, k_eq_8_subtile_n) {
12726     TEST_REQUIRES_ARM_NEON_DOT;
12727     for (uint32_t n = 1; n <= 16; n++) {
12728       GemmMicrokernelTester()
12729         .mr(1)
12730         .nr(16)
12731         .kr(4)
12732         .sr(1)
12733         .m(1)
12734         .n(n)
12735         .k(8)
12736         .iterations(1)
12737         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x16c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
12738     }
12739   }
12740 
TEST(QC8_IGEMM_MINMAX_FP32_1X16C4__NEONDOT,k_lt_8)12741   TEST(QC8_IGEMM_MINMAX_FP32_1X16C4__NEONDOT, k_lt_8) {
12742     TEST_REQUIRES_ARM_NEON_DOT;
12743     for (size_t k = 1; k < 8; k++) {
12744       GemmMicrokernelTester()
12745         .mr(1)
12746         .nr(16)
12747         .kr(4)
12748         .sr(1)
12749         .m(1)
12750         .n(16)
12751         .k(k)
12752         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x16c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
12753     }
12754   }
12755 
TEST(QC8_IGEMM_MINMAX_FP32_1X16C4__NEONDOT,k_lt_8_subtile)12756   TEST(QC8_IGEMM_MINMAX_FP32_1X16C4__NEONDOT, k_lt_8_subtile) {
12757     TEST_REQUIRES_ARM_NEON_DOT;
12758     for (size_t k = 1; k < 8; k++) {
12759       for (uint32_t n = 1; n <= 16; n++) {
12760         for (uint32_t m = 1; m <= 1; m++) {
12761           GemmMicrokernelTester()
12762             .mr(1)
12763             .nr(16)
12764             .kr(4)
12765             .sr(1)
12766             .m(m)
12767             .n(n)
12768             .k(k)
12769             .iterations(1)
12770             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x16c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
12771         }
12772       }
12773     }
12774   }
12775 
TEST(QC8_IGEMM_MINMAX_FP32_1X16C4__NEONDOT,k_gt_8)12776   TEST(QC8_IGEMM_MINMAX_FP32_1X16C4__NEONDOT, k_gt_8) {
12777     TEST_REQUIRES_ARM_NEON_DOT;
12778     for (size_t k = 9; k < 16; k++) {
12779       GemmMicrokernelTester()
12780         .mr(1)
12781         .nr(16)
12782         .kr(4)
12783         .sr(1)
12784         .m(1)
12785         .n(16)
12786         .k(k)
12787         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x16c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
12788     }
12789   }
12790 
TEST(QC8_IGEMM_MINMAX_FP32_1X16C4__NEONDOT,k_gt_8_subtile)12791   TEST(QC8_IGEMM_MINMAX_FP32_1X16C4__NEONDOT, k_gt_8_subtile) {
12792     TEST_REQUIRES_ARM_NEON_DOT;
12793     for (size_t k = 9; k < 16; k++) {
12794       for (uint32_t n = 1; n <= 16; n++) {
12795         for (uint32_t m = 1; m <= 1; m++) {
12796           GemmMicrokernelTester()
12797             .mr(1)
12798             .nr(16)
12799             .kr(4)
12800             .sr(1)
12801             .m(m)
12802             .n(n)
12803             .k(k)
12804             .iterations(1)
12805             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x16c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
12806         }
12807       }
12808     }
12809   }
12810 
TEST(QC8_IGEMM_MINMAX_FP32_1X16C4__NEONDOT,k_div_8)12811   TEST(QC8_IGEMM_MINMAX_FP32_1X16C4__NEONDOT, k_div_8) {
12812     TEST_REQUIRES_ARM_NEON_DOT;
12813     for (size_t k = 16; k <= 80; k += 8) {
12814       GemmMicrokernelTester()
12815         .mr(1)
12816         .nr(16)
12817         .kr(4)
12818         .sr(1)
12819         .m(1)
12820         .n(16)
12821         .k(k)
12822         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x16c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
12823     }
12824   }
12825 
TEST(QC8_IGEMM_MINMAX_FP32_1X16C4__NEONDOT,k_div_8_subtile)12826   TEST(QC8_IGEMM_MINMAX_FP32_1X16C4__NEONDOT, k_div_8_subtile) {
12827     TEST_REQUIRES_ARM_NEON_DOT;
12828     for (size_t k = 16; k <= 80; k += 8) {
12829       for (uint32_t n = 1; n <= 16; n++) {
12830         for (uint32_t m = 1; m <= 1; m++) {
12831           GemmMicrokernelTester()
12832             .mr(1)
12833             .nr(16)
12834             .kr(4)
12835             .sr(1)
12836             .m(m)
12837             .n(n)
12838             .k(k)
12839             .iterations(1)
12840             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x16c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
12841         }
12842       }
12843     }
12844   }
12845 
TEST(QC8_IGEMM_MINMAX_FP32_1X16C4__NEONDOT,n_gt_16)12846   TEST(QC8_IGEMM_MINMAX_FP32_1X16C4__NEONDOT, n_gt_16) {
12847     TEST_REQUIRES_ARM_NEON_DOT;
12848     for (uint32_t n = 17; n < 32; n++) {
12849       for (size_t k = 1; k <= 40; k += 9) {
12850         GemmMicrokernelTester()
12851           .mr(1)
12852           .nr(16)
12853           .kr(4)
12854           .sr(1)
12855           .m(1)
12856           .n(n)
12857           .k(k)
12858           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x16c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
12859       }
12860     }
12861   }
12862 
TEST(QC8_IGEMM_MINMAX_FP32_1X16C4__NEONDOT,n_gt_16_strided_cn)12863   TEST(QC8_IGEMM_MINMAX_FP32_1X16C4__NEONDOT, n_gt_16_strided_cn) {
12864     TEST_REQUIRES_ARM_NEON_DOT;
12865     for (uint32_t n = 17; n < 32; n++) {
12866       for (size_t k = 1; k <= 40; k += 9) {
12867         GemmMicrokernelTester()
12868           .mr(1)
12869           .nr(16)
12870           .kr(4)
12871           .sr(1)
12872           .m(1)
12873           .n(n)
12874           .k(k)
12875           .cn_stride(19)
12876           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x16c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
12877       }
12878     }
12879   }
12880 
TEST(QC8_IGEMM_MINMAX_FP32_1X16C4__NEONDOT,n_gt_16_subtile)12881   TEST(QC8_IGEMM_MINMAX_FP32_1X16C4__NEONDOT, n_gt_16_subtile) {
12882     TEST_REQUIRES_ARM_NEON_DOT;
12883     for (uint32_t n = 17; n < 32; n++) {
12884       for (size_t k = 1; k <= 40; k += 9) {
12885         for (uint32_t m = 1; m <= 1; m++) {
12886           GemmMicrokernelTester()
12887             .mr(1)
12888             .nr(16)
12889             .kr(4)
12890             .sr(1)
12891             .m(m)
12892             .n(n)
12893             .k(k)
12894             .iterations(1)
12895             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x16c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
12896         }
12897       }
12898     }
12899   }
12900 
TEST(QC8_IGEMM_MINMAX_FP32_1X16C4__NEONDOT,n_div_16)12901   TEST(QC8_IGEMM_MINMAX_FP32_1X16C4__NEONDOT, n_div_16) {
12902     TEST_REQUIRES_ARM_NEON_DOT;
12903     for (uint32_t n = 32; n <= 48; n += 16) {
12904       for (size_t k = 1; k <= 40; k += 9) {
12905         GemmMicrokernelTester()
12906           .mr(1)
12907           .nr(16)
12908           .kr(4)
12909           .sr(1)
12910           .m(1)
12911           .n(n)
12912           .k(k)
12913           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x16c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
12914       }
12915     }
12916   }
12917 
TEST(QC8_IGEMM_MINMAX_FP32_1X16C4__NEONDOT,n_div_16_strided_cn)12918   TEST(QC8_IGEMM_MINMAX_FP32_1X16C4__NEONDOT, n_div_16_strided_cn) {
12919     TEST_REQUIRES_ARM_NEON_DOT;
12920     for (uint32_t n = 32; n <= 48; n += 16) {
12921       for (size_t k = 1; k <= 40; k += 9) {
12922         GemmMicrokernelTester()
12923           .mr(1)
12924           .nr(16)
12925           .kr(4)
12926           .sr(1)
12927           .m(1)
12928           .n(n)
12929           .k(k)
12930           .cn_stride(19)
12931           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x16c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
12932       }
12933     }
12934   }
12935 
TEST(QC8_IGEMM_MINMAX_FP32_1X16C4__NEONDOT,n_div_16_subtile)12936   TEST(QC8_IGEMM_MINMAX_FP32_1X16C4__NEONDOT, n_div_16_subtile) {
12937     TEST_REQUIRES_ARM_NEON_DOT;
12938     for (uint32_t n = 32; n <= 48; n += 16) {
12939       for (size_t k = 1; k <= 40; k += 9) {
12940         for (uint32_t m = 1; m <= 1; m++) {
12941           GemmMicrokernelTester()
12942             .mr(1)
12943             .nr(16)
12944             .kr(4)
12945             .sr(1)
12946             .m(m)
12947             .n(n)
12948             .k(k)
12949             .iterations(1)
12950             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x16c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
12951         }
12952       }
12953     }
12954   }
12955 
TEST(QC8_IGEMM_MINMAX_FP32_1X16C4__NEONDOT,small_kernel)12956   TEST(QC8_IGEMM_MINMAX_FP32_1X16C4__NEONDOT, small_kernel) {
12957     TEST_REQUIRES_ARM_NEON_DOT;
12958     for (size_t k = 1; k <= 40; k += 9) {
12959       GemmMicrokernelTester()
12960         .mr(1)
12961         .nr(16)
12962         .kr(4)
12963         .sr(1)
12964         .m(1)
12965         .n(16)
12966         .k(k)
12967         .ks(3)
12968         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x16c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
12969     }
12970   }
12971 
TEST(QC8_IGEMM_MINMAX_FP32_1X16C4__NEONDOT,small_kernel_subtile)12972   TEST(QC8_IGEMM_MINMAX_FP32_1X16C4__NEONDOT, small_kernel_subtile) {
12973     TEST_REQUIRES_ARM_NEON_DOT;
12974     for (size_t k = 1; k <= 40; k += 9) {
12975       for (uint32_t n = 1; n <= 16; n++) {
12976         for (uint32_t m = 1; m <= 1; m++) {
12977           GemmMicrokernelTester()
12978             .mr(1)
12979             .nr(16)
12980             .kr(4)
12981             .sr(1)
12982             .m(m)
12983             .n(n)
12984             .k(k)
12985             .ks(3)
12986             .iterations(1)
12987             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x16c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
12988         }
12989       }
12990     }
12991   }
12992 
TEST(QC8_IGEMM_MINMAX_FP32_1X16C4__NEONDOT,n_gt_16_small_kernel)12993   TEST(QC8_IGEMM_MINMAX_FP32_1X16C4__NEONDOT, n_gt_16_small_kernel) {
12994     TEST_REQUIRES_ARM_NEON_DOT;
12995     for (uint32_t n = 17; n < 32; n++) {
12996       for (size_t k = 1; k <= 40; k += 9) {
12997         GemmMicrokernelTester()
12998           .mr(1)
12999           .nr(16)
13000           .kr(4)
13001           .sr(1)
13002           .m(1)
13003           .n(n)
13004           .k(k)
13005           .ks(3)
13006           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x16c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
13007       }
13008     }
13009   }
13010 
TEST(QC8_IGEMM_MINMAX_FP32_1X16C4__NEONDOT,n_div_16_small_kernel)13011   TEST(QC8_IGEMM_MINMAX_FP32_1X16C4__NEONDOT, n_div_16_small_kernel) {
13012     TEST_REQUIRES_ARM_NEON_DOT;
13013     for (uint32_t n = 32; n <= 48; n += 16) {
13014       for (size_t k = 1; k <= 40; k += 9) {
13015         GemmMicrokernelTester()
13016           .mr(1)
13017           .nr(16)
13018           .kr(4)
13019           .sr(1)
13020           .m(1)
13021           .n(n)
13022           .k(k)
13023           .ks(3)
13024           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x16c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
13025       }
13026     }
13027   }
13028 
TEST(QC8_IGEMM_MINMAX_FP32_1X16C4__NEONDOT,strided_cm_subtile)13029   TEST(QC8_IGEMM_MINMAX_FP32_1X16C4__NEONDOT, strided_cm_subtile) {
13030     TEST_REQUIRES_ARM_NEON_DOT;
13031     for (size_t k = 1; k <= 40; k += 9) {
13032       for (uint32_t n = 1; n <= 16; n++) {
13033         for (uint32_t m = 1; m <= 1; m++) {
13034           GemmMicrokernelTester()
13035             .mr(1)
13036             .nr(16)
13037             .kr(4)
13038             .sr(1)
13039             .m(m)
13040             .n(n)
13041             .k(k)
13042             .cm_stride(19)
13043             .iterations(1)
13044             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x16c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
13045         }
13046       }
13047     }
13048   }
13049 
TEST(QC8_IGEMM_MINMAX_FP32_1X16C4__NEONDOT,a_offset)13050   TEST(QC8_IGEMM_MINMAX_FP32_1X16C4__NEONDOT, a_offset) {
13051     TEST_REQUIRES_ARM_NEON_DOT;
13052     for (size_t k = 1; k <= 40; k += 9) {
13053       GemmMicrokernelTester()
13054         .mr(1)
13055         .nr(16)
13056         .kr(4)
13057         .sr(1)
13058         .m(1)
13059         .n(16)
13060         .k(k)
13061         .ks(3)
13062         .a_offset(43)
13063         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x16c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
13064     }
13065   }
13066 
TEST(QC8_IGEMM_MINMAX_FP32_1X16C4__NEONDOT,zero)13067   TEST(QC8_IGEMM_MINMAX_FP32_1X16C4__NEONDOT, zero) {
13068     TEST_REQUIRES_ARM_NEON_DOT;
13069     for (size_t k = 1; k <= 40; k += 9) {
13070       for (uint32_t mz = 0; mz < 1; mz++) {
13071         GemmMicrokernelTester()
13072           .mr(1)
13073           .nr(16)
13074           .kr(4)
13075           .sr(1)
13076           .m(1)
13077           .n(16)
13078           .k(k)
13079           .ks(3)
13080           .a_offset(43)
13081           .zero_index(mz)
13082           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x16c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
13083       }
13084     }
13085   }
13086 
TEST(QC8_IGEMM_MINMAX_FP32_1X16C4__NEONDOT,qmin)13087   TEST(QC8_IGEMM_MINMAX_FP32_1X16C4__NEONDOT, qmin) {
13088     TEST_REQUIRES_ARM_NEON_DOT;
13089     GemmMicrokernelTester()
13090       .mr(1)
13091       .nr(16)
13092       .kr(4)
13093       .sr(1)
13094       .m(1)
13095       .n(16)
13096       .k(8)
13097       .qmin(128)
13098       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x16c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
13099   }
13100 
TEST(QC8_IGEMM_MINMAX_FP32_1X16C4__NEONDOT,qmax)13101   TEST(QC8_IGEMM_MINMAX_FP32_1X16C4__NEONDOT, qmax) {
13102     TEST_REQUIRES_ARM_NEON_DOT;
13103     GemmMicrokernelTester()
13104       .mr(1)
13105       .nr(16)
13106       .kr(4)
13107       .sr(1)
13108       .m(1)
13109       .n(16)
13110       .k(8)
13111       .qmax(128)
13112       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x16c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
13113   }
13114 
TEST(QC8_IGEMM_MINMAX_FP32_1X16C4__NEONDOT,strided_cm)13115   TEST(QC8_IGEMM_MINMAX_FP32_1X16C4__NEONDOT, strided_cm) {
13116     TEST_REQUIRES_ARM_NEON_DOT;
13117     GemmMicrokernelTester()
13118       .mr(1)
13119       .nr(16)
13120       .kr(4)
13121       .sr(1)
13122       .m(1)
13123       .n(16)
13124       .k(8)
13125       .cm_stride(19)
13126       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x16c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
13127   }
13128 #endif  // XNN_ENABLE_ARM_DOTPROD && (XNN_ARCH_ARM && !XNN_PLATFORM_IOS || XNN_ARCH_ARM64)
13129 
13130 
13131 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QC8_IGEMM_MINMAX_FP32_2X8__NEON_MLAL_LANE,k_eq_8)13132   TEST(QC8_IGEMM_MINMAX_FP32_2X8__NEON_MLAL_LANE, k_eq_8) {
13133     TEST_REQUIRES_ARM_NEON;
13134     GemmMicrokernelTester()
13135       .mr(2)
13136       .nr(8)
13137       .kr(1)
13138       .sr(1)
13139       .m(2)
13140       .n(8)
13141       .k(8)
13142       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
13143   }
13144 
TEST(QC8_IGEMM_MINMAX_FP32_2X8__NEON_MLAL_LANE,strided_cn)13145   TEST(QC8_IGEMM_MINMAX_FP32_2X8__NEON_MLAL_LANE, strided_cn) {
13146     TEST_REQUIRES_ARM_NEON;
13147     GemmMicrokernelTester()
13148       .mr(2)
13149       .nr(8)
13150       .kr(1)
13151       .sr(1)
13152       .m(2)
13153       .n(8)
13154       .k(8)
13155       .cn_stride(11)
13156       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
13157   }
13158 
TEST(QC8_IGEMM_MINMAX_FP32_2X8__NEON_MLAL_LANE,k_eq_8_subtile)13159   TEST(QC8_IGEMM_MINMAX_FP32_2X8__NEON_MLAL_LANE, k_eq_8_subtile) {
13160     TEST_REQUIRES_ARM_NEON;
13161     for (uint32_t n = 1; n <= 8; n++) {
13162       for (uint32_t m = 1; m <= 2; m++) {
13163         GemmMicrokernelTester()
13164           .mr(2)
13165           .nr(8)
13166           .kr(1)
13167           .sr(1)
13168           .m(m)
13169           .n(n)
13170           .k(8)
13171           .iterations(1)
13172           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
13173       }
13174     }
13175   }
13176 
TEST(QC8_IGEMM_MINMAX_FP32_2X8__NEON_MLAL_LANE,k_eq_8_subtile_m)13177   TEST(QC8_IGEMM_MINMAX_FP32_2X8__NEON_MLAL_LANE, k_eq_8_subtile_m) {
13178     TEST_REQUIRES_ARM_NEON;
13179     for (uint32_t m = 1; m <= 2; m++) {
13180       GemmMicrokernelTester()
13181         .mr(2)
13182         .nr(8)
13183         .kr(1)
13184         .sr(1)
13185         .m(m)
13186         .n(8)
13187         .k(8)
13188         .iterations(1)
13189         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
13190     }
13191   }
13192 
TEST(QC8_IGEMM_MINMAX_FP32_2X8__NEON_MLAL_LANE,k_eq_8_subtile_n)13193   TEST(QC8_IGEMM_MINMAX_FP32_2X8__NEON_MLAL_LANE, k_eq_8_subtile_n) {
13194     TEST_REQUIRES_ARM_NEON;
13195     for (uint32_t n = 1; n <= 8; n++) {
13196       GemmMicrokernelTester()
13197         .mr(2)
13198         .nr(8)
13199         .kr(1)
13200         .sr(1)
13201         .m(2)
13202         .n(n)
13203         .k(8)
13204         .iterations(1)
13205         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
13206     }
13207   }
13208 
TEST(QC8_IGEMM_MINMAX_FP32_2X8__NEON_MLAL_LANE,k_lt_8)13209   TEST(QC8_IGEMM_MINMAX_FP32_2X8__NEON_MLAL_LANE, k_lt_8) {
13210     TEST_REQUIRES_ARM_NEON;
13211     for (size_t k = 1; k < 8; k++) {
13212       GemmMicrokernelTester()
13213         .mr(2)
13214         .nr(8)
13215         .kr(1)
13216         .sr(1)
13217         .m(2)
13218         .n(8)
13219         .k(k)
13220         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
13221     }
13222   }
13223 
TEST(QC8_IGEMM_MINMAX_FP32_2X8__NEON_MLAL_LANE,k_lt_8_subtile)13224   TEST(QC8_IGEMM_MINMAX_FP32_2X8__NEON_MLAL_LANE, k_lt_8_subtile) {
13225     TEST_REQUIRES_ARM_NEON;
13226     for (size_t k = 1; k < 8; k++) {
13227       for (uint32_t n = 1; n <= 8; n++) {
13228         for (uint32_t m = 1; m <= 2; m++) {
13229           GemmMicrokernelTester()
13230             .mr(2)
13231             .nr(8)
13232             .kr(1)
13233             .sr(1)
13234             .m(m)
13235             .n(n)
13236             .k(k)
13237             .iterations(1)
13238             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
13239         }
13240       }
13241     }
13242   }
13243 
TEST(QC8_IGEMM_MINMAX_FP32_2X8__NEON_MLAL_LANE,k_gt_8)13244   TEST(QC8_IGEMM_MINMAX_FP32_2X8__NEON_MLAL_LANE, k_gt_8) {
13245     TEST_REQUIRES_ARM_NEON;
13246     for (size_t k = 9; k < 16; k++) {
13247       GemmMicrokernelTester()
13248         .mr(2)
13249         .nr(8)
13250         .kr(1)
13251         .sr(1)
13252         .m(2)
13253         .n(8)
13254         .k(k)
13255         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
13256     }
13257   }
13258 
TEST(QC8_IGEMM_MINMAX_FP32_2X8__NEON_MLAL_LANE,k_gt_8_subtile)13259   TEST(QC8_IGEMM_MINMAX_FP32_2X8__NEON_MLAL_LANE, k_gt_8_subtile) {
13260     TEST_REQUIRES_ARM_NEON;
13261     for (size_t k = 9; k < 16; k++) {
13262       for (uint32_t n = 1; n <= 8; n++) {
13263         for (uint32_t m = 1; m <= 2; m++) {
13264           GemmMicrokernelTester()
13265             .mr(2)
13266             .nr(8)
13267             .kr(1)
13268             .sr(1)
13269             .m(m)
13270             .n(n)
13271             .k(k)
13272             .iterations(1)
13273             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
13274         }
13275       }
13276     }
13277   }
13278 
TEST(QC8_IGEMM_MINMAX_FP32_2X8__NEON_MLAL_LANE,k_div_8)13279   TEST(QC8_IGEMM_MINMAX_FP32_2X8__NEON_MLAL_LANE, k_div_8) {
13280     TEST_REQUIRES_ARM_NEON;
13281     for (size_t k = 16; k <= 80; k += 8) {
13282       GemmMicrokernelTester()
13283         .mr(2)
13284         .nr(8)
13285         .kr(1)
13286         .sr(1)
13287         .m(2)
13288         .n(8)
13289         .k(k)
13290         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
13291     }
13292   }
13293 
TEST(QC8_IGEMM_MINMAX_FP32_2X8__NEON_MLAL_LANE,k_div_8_subtile)13294   TEST(QC8_IGEMM_MINMAX_FP32_2X8__NEON_MLAL_LANE, k_div_8_subtile) {
13295     TEST_REQUIRES_ARM_NEON;
13296     for (size_t k = 16; k <= 80; k += 8) {
13297       for (uint32_t n = 1; n <= 8; n++) {
13298         for (uint32_t m = 1; m <= 2; m++) {
13299           GemmMicrokernelTester()
13300             .mr(2)
13301             .nr(8)
13302             .kr(1)
13303             .sr(1)
13304             .m(m)
13305             .n(n)
13306             .k(k)
13307             .iterations(1)
13308             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
13309         }
13310       }
13311     }
13312   }
13313 
TEST(QC8_IGEMM_MINMAX_FP32_2X8__NEON_MLAL_LANE,n_gt_8)13314   TEST(QC8_IGEMM_MINMAX_FP32_2X8__NEON_MLAL_LANE, n_gt_8) {
13315     TEST_REQUIRES_ARM_NEON;
13316     for (uint32_t n = 9; n < 16; n++) {
13317       for (size_t k = 1; k <= 40; k += 9) {
13318         GemmMicrokernelTester()
13319           .mr(2)
13320           .nr(8)
13321           .kr(1)
13322           .sr(1)
13323           .m(2)
13324           .n(n)
13325           .k(k)
13326           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
13327       }
13328     }
13329   }
13330 
TEST(QC8_IGEMM_MINMAX_FP32_2X8__NEON_MLAL_LANE,n_gt_8_strided_cn)13331   TEST(QC8_IGEMM_MINMAX_FP32_2X8__NEON_MLAL_LANE, n_gt_8_strided_cn) {
13332     TEST_REQUIRES_ARM_NEON;
13333     for (uint32_t n = 9; n < 16; n++) {
13334       for (size_t k = 1; k <= 40; k += 9) {
13335         GemmMicrokernelTester()
13336           .mr(2)
13337           .nr(8)
13338           .kr(1)
13339           .sr(1)
13340           .m(2)
13341           .n(n)
13342           .k(k)
13343           .cn_stride(11)
13344           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
13345       }
13346     }
13347   }
13348 
TEST(QC8_IGEMM_MINMAX_FP32_2X8__NEON_MLAL_LANE,n_gt_8_subtile)13349   TEST(QC8_IGEMM_MINMAX_FP32_2X8__NEON_MLAL_LANE, n_gt_8_subtile) {
13350     TEST_REQUIRES_ARM_NEON;
13351     for (uint32_t n = 9; n < 16; n++) {
13352       for (size_t k = 1; k <= 40; k += 9) {
13353         for (uint32_t m = 1; m <= 2; m++) {
13354           GemmMicrokernelTester()
13355             .mr(2)
13356             .nr(8)
13357             .kr(1)
13358             .sr(1)
13359             .m(m)
13360             .n(n)
13361             .k(k)
13362             .iterations(1)
13363             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
13364         }
13365       }
13366     }
13367   }
13368 
TEST(QC8_IGEMM_MINMAX_FP32_2X8__NEON_MLAL_LANE,n_div_8)13369   TEST(QC8_IGEMM_MINMAX_FP32_2X8__NEON_MLAL_LANE, n_div_8) {
13370     TEST_REQUIRES_ARM_NEON;
13371     for (uint32_t n = 16; n <= 24; n += 8) {
13372       for (size_t k = 1; k <= 40; k += 9) {
13373         GemmMicrokernelTester()
13374           .mr(2)
13375           .nr(8)
13376           .kr(1)
13377           .sr(1)
13378           .m(2)
13379           .n(n)
13380           .k(k)
13381           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
13382       }
13383     }
13384   }
13385 
TEST(QC8_IGEMM_MINMAX_FP32_2X8__NEON_MLAL_LANE,n_div_8_strided_cn)13386   TEST(QC8_IGEMM_MINMAX_FP32_2X8__NEON_MLAL_LANE, n_div_8_strided_cn) {
13387     TEST_REQUIRES_ARM_NEON;
13388     for (uint32_t n = 16; n <= 24; n += 8) {
13389       for (size_t k = 1; k <= 40; k += 9) {
13390         GemmMicrokernelTester()
13391           .mr(2)
13392           .nr(8)
13393           .kr(1)
13394           .sr(1)
13395           .m(2)
13396           .n(n)
13397           .k(k)
13398           .cn_stride(11)
13399           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
13400       }
13401     }
13402   }
13403 
TEST(QC8_IGEMM_MINMAX_FP32_2X8__NEON_MLAL_LANE,n_div_8_subtile)13404   TEST(QC8_IGEMM_MINMAX_FP32_2X8__NEON_MLAL_LANE, n_div_8_subtile) {
13405     TEST_REQUIRES_ARM_NEON;
13406     for (uint32_t n = 16; n <= 24; n += 8) {
13407       for (size_t k = 1; k <= 40; k += 9) {
13408         for (uint32_t m = 1; m <= 2; m++) {
13409           GemmMicrokernelTester()
13410             .mr(2)
13411             .nr(8)
13412             .kr(1)
13413             .sr(1)
13414             .m(m)
13415             .n(n)
13416             .k(k)
13417             .iterations(1)
13418             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
13419         }
13420       }
13421     }
13422   }
13423 
TEST(QC8_IGEMM_MINMAX_FP32_2X8__NEON_MLAL_LANE,small_kernel)13424   TEST(QC8_IGEMM_MINMAX_FP32_2X8__NEON_MLAL_LANE, small_kernel) {
13425     TEST_REQUIRES_ARM_NEON;
13426     for (size_t k = 1; k <= 40; k += 9) {
13427       GemmMicrokernelTester()
13428         .mr(2)
13429         .nr(8)
13430         .kr(1)
13431         .sr(1)
13432         .m(2)
13433         .n(8)
13434         .k(k)
13435         .ks(3)
13436         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
13437     }
13438   }
13439 
TEST(QC8_IGEMM_MINMAX_FP32_2X8__NEON_MLAL_LANE,small_kernel_subtile)13440   TEST(QC8_IGEMM_MINMAX_FP32_2X8__NEON_MLAL_LANE, small_kernel_subtile) {
13441     TEST_REQUIRES_ARM_NEON;
13442     for (size_t k = 1; k <= 40; k += 9) {
13443       for (uint32_t n = 1; n <= 8; n++) {
13444         for (uint32_t m = 1; m <= 2; m++) {
13445           GemmMicrokernelTester()
13446             .mr(2)
13447             .nr(8)
13448             .kr(1)
13449             .sr(1)
13450             .m(m)
13451             .n(n)
13452             .k(k)
13453             .ks(3)
13454             .iterations(1)
13455             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
13456         }
13457       }
13458     }
13459   }
13460 
TEST(QC8_IGEMM_MINMAX_FP32_2X8__NEON_MLAL_LANE,n_gt_8_small_kernel)13461   TEST(QC8_IGEMM_MINMAX_FP32_2X8__NEON_MLAL_LANE, n_gt_8_small_kernel) {
13462     TEST_REQUIRES_ARM_NEON;
13463     for (uint32_t n = 9; n < 16; n++) {
13464       for (size_t k = 1; k <= 40; k += 9) {
13465         GemmMicrokernelTester()
13466           .mr(2)
13467           .nr(8)
13468           .kr(1)
13469           .sr(1)
13470           .m(2)
13471           .n(n)
13472           .k(k)
13473           .ks(3)
13474           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
13475       }
13476     }
13477   }
13478 
TEST(QC8_IGEMM_MINMAX_FP32_2X8__NEON_MLAL_LANE,n_div_8_small_kernel)13479   TEST(QC8_IGEMM_MINMAX_FP32_2X8__NEON_MLAL_LANE, n_div_8_small_kernel) {
13480     TEST_REQUIRES_ARM_NEON;
13481     for (uint32_t n = 16; n <= 24; n += 8) {
13482       for (size_t k = 1; k <= 40; k += 9) {
13483         GemmMicrokernelTester()
13484           .mr(2)
13485           .nr(8)
13486           .kr(1)
13487           .sr(1)
13488           .m(2)
13489           .n(n)
13490           .k(k)
13491           .ks(3)
13492           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
13493       }
13494     }
13495   }
13496 
TEST(QC8_IGEMM_MINMAX_FP32_2X8__NEON_MLAL_LANE,strided_cm_subtile)13497   TEST(QC8_IGEMM_MINMAX_FP32_2X8__NEON_MLAL_LANE, strided_cm_subtile) {
13498     TEST_REQUIRES_ARM_NEON;
13499     for (size_t k = 1; k <= 40; k += 9) {
13500       for (uint32_t n = 1; n <= 8; n++) {
13501         for (uint32_t m = 1; m <= 2; m++) {
13502           GemmMicrokernelTester()
13503             .mr(2)
13504             .nr(8)
13505             .kr(1)
13506             .sr(1)
13507             .m(m)
13508             .n(n)
13509             .k(k)
13510             .cm_stride(11)
13511             .iterations(1)
13512             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
13513         }
13514       }
13515     }
13516   }
13517 
TEST(QC8_IGEMM_MINMAX_FP32_2X8__NEON_MLAL_LANE,a_offset)13518   TEST(QC8_IGEMM_MINMAX_FP32_2X8__NEON_MLAL_LANE, a_offset) {
13519     TEST_REQUIRES_ARM_NEON;
13520     for (size_t k = 1; k <= 40; k += 9) {
13521       GemmMicrokernelTester()
13522         .mr(2)
13523         .nr(8)
13524         .kr(1)
13525         .sr(1)
13526         .m(2)
13527         .n(8)
13528         .k(k)
13529         .ks(3)
13530         .a_offset(83)
13531         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
13532     }
13533   }
13534 
TEST(QC8_IGEMM_MINMAX_FP32_2X8__NEON_MLAL_LANE,zero)13535   TEST(QC8_IGEMM_MINMAX_FP32_2X8__NEON_MLAL_LANE, zero) {
13536     TEST_REQUIRES_ARM_NEON;
13537     for (size_t k = 1; k <= 40; k += 9) {
13538       for (uint32_t mz = 0; mz < 2; mz++) {
13539         GemmMicrokernelTester()
13540           .mr(2)
13541           .nr(8)
13542           .kr(1)
13543           .sr(1)
13544           .m(2)
13545           .n(8)
13546           .k(k)
13547           .ks(3)
13548           .a_offset(83)
13549           .zero_index(mz)
13550           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
13551       }
13552     }
13553   }
13554 
TEST(QC8_IGEMM_MINMAX_FP32_2X8__NEON_MLAL_LANE,qmin)13555   TEST(QC8_IGEMM_MINMAX_FP32_2X8__NEON_MLAL_LANE, qmin) {
13556     TEST_REQUIRES_ARM_NEON;
13557     GemmMicrokernelTester()
13558       .mr(2)
13559       .nr(8)
13560       .kr(1)
13561       .sr(1)
13562       .m(2)
13563       .n(8)
13564       .k(8)
13565       .qmin(128)
13566       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
13567   }
13568 
TEST(QC8_IGEMM_MINMAX_FP32_2X8__NEON_MLAL_LANE,qmax)13569   TEST(QC8_IGEMM_MINMAX_FP32_2X8__NEON_MLAL_LANE, qmax) {
13570     TEST_REQUIRES_ARM_NEON;
13571     GemmMicrokernelTester()
13572       .mr(2)
13573       .nr(8)
13574       .kr(1)
13575       .sr(1)
13576       .m(2)
13577       .n(8)
13578       .k(8)
13579       .qmax(128)
13580       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
13581   }
13582 
TEST(QC8_IGEMM_MINMAX_FP32_2X8__NEON_MLAL_LANE,strided_cm)13583   TEST(QC8_IGEMM_MINMAX_FP32_2X8__NEON_MLAL_LANE, strided_cm) {
13584     TEST_REQUIRES_ARM_NEON;
13585     GemmMicrokernelTester()
13586       .mr(2)
13587       .nr(8)
13588       .kr(1)
13589       .sr(1)
13590       .m(2)
13591       .n(8)
13592       .k(8)
13593       .cm_stride(11)
13594       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
13595   }
13596 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
13597 
13598 
13599 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_DUP,k_eq_16)13600   TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_DUP, k_eq_16) {
13601     TEST_REQUIRES_ARM_NEON;
13602     GemmMicrokernelTester()
13603       .mr(2)
13604       .nr(8)
13605       .kr(2)
13606       .sr(1)
13607       .m(2)
13608       .n(8)
13609       .k(16)
13610       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
13611   }
13612 
TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_DUP,strided_cn)13613   TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_DUP, strided_cn) {
13614     TEST_REQUIRES_ARM_NEON;
13615     GemmMicrokernelTester()
13616       .mr(2)
13617       .nr(8)
13618       .kr(2)
13619       .sr(1)
13620       .m(2)
13621       .n(8)
13622       .k(16)
13623       .cn_stride(11)
13624       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
13625   }
13626 
TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_DUP,k_eq_16_subtile)13627   TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_DUP, k_eq_16_subtile) {
13628     TEST_REQUIRES_ARM_NEON;
13629     for (uint32_t n = 1; n <= 8; n++) {
13630       for (uint32_t m = 1; m <= 2; m++) {
13631         GemmMicrokernelTester()
13632           .mr(2)
13633           .nr(8)
13634           .kr(2)
13635           .sr(1)
13636           .m(m)
13637           .n(n)
13638           .k(16)
13639           .iterations(1)
13640           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
13641       }
13642     }
13643   }
13644 
TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_DUP,k_eq_16_subtile_m)13645   TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_DUP, k_eq_16_subtile_m) {
13646     TEST_REQUIRES_ARM_NEON;
13647     for (uint32_t m = 1; m <= 2; m++) {
13648       GemmMicrokernelTester()
13649         .mr(2)
13650         .nr(8)
13651         .kr(2)
13652         .sr(1)
13653         .m(m)
13654         .n(8)
13655         .k(16)
13656         .iterations(1)
13657         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
13658     }
13659   }
13660 
TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_DUP,k_eq_16_subtile_n)13661   TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_DUP, k_eq_16_subtile_n) {
13662     TEST_REQUIRES_ARM_NEON;
13663     for (uint32_t n = 1; n <= 8; n++) {
13664       GemmMicrokernelTester()
13665         .mr(2)
13666         .nr(8)
13667         .kr(2)
13668         .sr(1)
13669         .m(2)
13670         .n(n)
13671         .k(16)
13672         .iterations(1)
13673         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
13674     }
13675   }
13676 
TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_DUP,k_lt_16)13677   TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_DUP, k_lt_16) {
13678     TEST_REQUIRES_ARM_NEON;
13679     for (size_t k = 1; k < 16; k++) {
13680       GemmMicrokernelTester()
13681         .mr(2)
13682         .nr(8)
13683         .kr(2)
13684         .sr(1)
13685         .m(2)
13686         .n(8)
13687         .k(k)
13688         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
13689     }
13690   }
13691 
TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_DUP,k_lt_16_subtile)13692   TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_DUP, k_lt_16_subtile) {
13693     TEST_REQUIRES_ARM_NEON;
13694     for (size_t k = 1; k < 16; k++) {
13695       for (uint32_t n = 1; n <= 8; n++) {
13696         for (uint32_t m = 1; m <= 2; m++) {
13697           GemmMicrokernelTester()
13698             .mr(2)
13699             .nr(8)
13700             .kr(2)
13701             .sr(1)
13702             .m(m)
13703             .n(n)
13704             .k(k)
13705             .iterations(1)
13706             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
13707         }
13708       }
13709     }
13710   }
13711 
TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_DUP,k_gt_16)13712   TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_DUP, k_gt_16) {
13713     TEST_REQUIRES_ARM_NEON;
13714     for (size_t k = 17; k < 32; k++) {
13715       GemmMicrokernelTester()
13716         .mr(2)
13717         .nr(8)
13718         .kr(2)
13719         .sr(1)
13720         .m(2)
13721         .n(8)
13722         .k(k)
13723         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
13724     }
13725   }
13726 
TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_DUP,k_gt_16_subtile)13727   TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_DUP, k_gt_16_subtile) {
13728     TEST_REQUIRES_ARM_NEON;
13729     for (size_t k = 17; k < 32; k++) {
13730       for (uint32_t n = 1; n <= 8; n++) {
13731         for (uint32_t m = 1; m <= 2; m++) {
13732           GemmMicrokernelTester()
13733             .mr(2)
13734             .nr(8)
13735             .kr(2)
13736             .sr(1)
13737             .m(m)
13738             .n(n)
13739             .k(k)
13740             .iterations(1)
13741             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
13742         }
13743       }
13744     }
13745   }
13746 
TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_DUP,k_div_16)13747   TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_DUP, k_div_16) {
13748     TEST_REQUIRES_ARM_NEON;
13749     for (size_t k = 32; k <= 160; k += 16) {
13750       GemmMicrokernelTester()
13751         .mr(2)
13752         .nr(8)
13753         .kr(2)
13754         .sr(1)
13755         .m(2)
13756         .n(8)
13757         .k(k)
13758         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
13759     }
13760   }
13761 
TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_DUP,k_div_16_subtile)13762   TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_DUP, k_div_16_subtile) {
13763     TEST_REQUIRES_ARM_NEON;
13764     for (size_t k = 32; k <= 160; k += 16) {
13765       for (uint32_t n = 1; n <= 8; n++) {
13766         for (uint32_t m = 1; m <= 2; m++) {
13767           GemmMicrokernelTester()
13768             .mr(2)
13769             .nr(8)
13770             .kr(2)
13771             .sr(1)
13772             .m(m)
13773             .n(n)
13774             .k(k)
13775             .iterations(1)
13776             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
13777         }
13778       }
13779     }
13780   }
13781 
TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_DUP,n_gt_8)13782   TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_DUP, n_gt_8) {
13783     TEST_REQUIRES_ARM_NEON;
13784     for (uint32_t n = 9; n < 16; n++) {
13785       for (size_t k = 1; k <= 80; k += 17) {
13786         GemmMicrokernelTester()
13787           .mr(2)
13788           .nr(8)
13789           .kr(2)
13790           .sr(1)
13791           .m(2)
13792           .n(n)
13793           .k(k)
13794           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
13795       }
13796     }
13797   }
13798 
TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_DUP,n_gt_8_strided_cn)13799   TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_DUP, n_gt_8_strided_cn) {
13800     TEST_REQUIRES_ARM_NEON;
13801     for (uint32_t n = 9; n < 16; n++) {
13802       for (size_t k = 1; k <= 80; k += 17) {
13803         GemmMicrokernelTester()
13804           .mr(2)
13805           .nr(8)
13806           .kr(2)
13807           .sr(1)
13808           .m(2)
13809           .n(n)
13810           .k(k)
13811           .cn_stride(11)
13812           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
13813       }
13814     }
13815   }
13816 
TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_DUP,n_gt_8_subtile)13817   TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_DUP, n_gt_8_subtile) {
13818     TEST_REQUIRES_ARM_NEON;
13819     for (uint32_t n = 9; n < 16; n++) {
13820       for (size_t k = 1; k <= 80; k += 17) {
13821         for (uint32_t m = 1; m <= 2; m++) {
13822           GemmMicrokernelTester()
13823             .mr(2)
13824             .nr(8)
13825             .kr(2)
13826             .sr(1)
13827             .m(m)
13828             .n(n)
13829             .k(k)
13830             .iterations(1)
13831             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
13832         }
13833       }
13834     }
13835   }
13836 
TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_DUP,n_div_8)13837   TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_DUP, n_div_8) {
13838     TEST_REQUIRES_ARM_NEON;
13839     for (uint32_t n = 16; n <= 24; n += 8) {
13840       for (size_t k = 1; k <= 80; k += 17) {
13841         GemmMicrokernelTester()
13842           .mr(2)
13843           .nr(8)
13844           .kr(2)
13845           .sr(1)
13846           .m(2)
13847           .n(n)
13848           .k(k)
13849           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
13850       }
13851     }
13852   }
13853 
TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_DUP,n_div_8_strided_cn)13854   TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_DUP, n_div_8_strided_cn) {
13855     TEST_REQUIRES_ARM_NEON;
13856     for (uint32_t n = 16; n <= 24; n += 8) {
13857       for (size_t k = 1; k <= 80; k += 17) {
13858         GemmMicrokernelTester()
13859           .mr(2)
13860           .nr(8)
13861           .kr(2)
13862           .sr(1)
13863           .m(2)
13864           .n(n)
13865           .k(k)
13866           .cn_stride(11)
13867           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
13868       }
13869     }
13870   }
13871 
TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_DUP,n_div_8_subtile)13872   TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_DUP, n_div_8_subtile) {
13873     TEST_REQUIRES_ARM_NEON;
13874     for (uint32_t n = 16; n <= 24; n += 8) {
13875       for (size_t k = 1; k <= 80; k += 17) {
13876         for (uint32_t m = 1; m <= 2; m++) {
13877           GemmMicrokernelTester()
13878             .mr(2)
13879             .nr(8)
13880             .kr(2)
13881             .sr(1)
13882             .m(m)
13883             .n(n)
13884             .k(k)
13885             .iterations(1)
13886             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
13887         }
13888       }
13889     }
13890   }
13891 
TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_DUP,small_kernel)13892   TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_DUP, small_kernel) {
13893     TEST_REQUIRES_ARM_NEON;
13894     for (size_t k = 1; k <= 80; k += 17) {
13895       GemmMicrokernelTester()
13896         .mr(2)
13897         .nr(8)
13898         .kr(2)
13899         .sr(1)
13900         .m(2)
13901         .n(8)
13902         .k(k)
13903         .ks(3)
13904         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
13905     }
13906   }
13907 
TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_DUP,small_kernel_subtile)13908   TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_DUP, small_kernel_subtile) {
13909     TEST_REQUIRES_ARM_NEON;
13910     for (size_t k = 1; k <= 80; k += 17) {
13911       for (uint32_t n = 1; n <= 8; n++) {
13912         for (uint32_t m = 1; m <= 2; m++) {
13913           GemmMicrokernelTester()
13914             .mr(2)
13915             .nr(8)
13916             .kr(2)
13917             .sr(1)
13918             .m(m)
13919             .n(n)
13920             .k(k)
13921             .ks(3)
13922             .iterations(1)
13923             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
13924         }
13925       }
13926     }
13927   }
13928 
TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_DUP,n_gt_8_small_kernel)13929   TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_DUP, n_gt_8_small_kernel) {
13930     TEST_REQUIRES_ARM_NEON;
13931     for (uint32_t n = 9; n < 16; n++) {
13932       for (size_t k = 1; k <= 80; k += 17) {
13933         GemmMicrokernelTester()
13934           .mr(2)
13935           .nr(8)
13936           .kr(2)
13937           .sr(1)
13938           .m(2)
13939           .n(n)
13940           .k(k)
13941           .ks(3)
13942           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
13943       }
13944     }
13945   }
13946 
TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_DUP,n_div_8_small_kernel)13947   TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_DUP, n_div_8_small_kernel) {
13948     TEST_REQUIRES_ARM_NEON;
13949     for (uint32_t n = 16; n <= 24; n += 8) {
13950       for (size_t k = 1; k <= 80; k += 17) {
13951         GemmMicrokernelTester()
13952           .mr(2)
13953           .nr(8)
13954           .kr(2)
13955           .sr(1)
13956           .m(2)
13957           .n(n)
13958           .k(k)
13959           .ks(3)
13960           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
13961       }
13962     }
13963   }
13964 
TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_DUP,strided_cm_subtile)13965   TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_DUP, strided_cm_subtile) {
13966     TEST_REQUIRES_ARM_NEON;
13967     for (size_t k = 1; k <= 80; k += 17) {
13968       for (uint32_t n = 1; n <= 8; n++) {
13969         for (uint32_t m = 1; m <= 2; m++) {
13970           GemmMicrokernelTester()
13971             .mr(2)
13972             .nr(8)
13973             .kr(2)
13974             .sr(1)
13975             .m(m)
13976             .n(n)
13977             .k(k)
13978             .cm_stride(11)
13979             .iterations(1)
13980             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
13981         }
13982       }
13983     }
13984   }
13985 
TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_DUP,a_offset)13986   TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_DUP, a_offset) {
13987     TEST_REQUIRES_ARM_NEON;
13988     for (size_t k = 1; k <= 80; k += 17) {
13989       GemmMicrokernelTester()
13990         .mr(2)
13991         .nr(8)
13992         .kr(2)
13993         .sr(1)
13994         .m(2)
13995         .n(8)
13996         .k(k)
13997         .ks(3)
13998         .a_offset(163)
13999         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
14000     }
14001   }
14002 
TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_DUP,zero)14003   TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_DUP, zero) {
14004     TEST_REQUIRES_ARM_NEON;
14005     for (size_t k = 1; k <= 80; k += 17) {
14006       for (uint32_t mz = 0; mz < 2; mz++) {
14007         GemmMicrokernelTester()
14008           .mr(2)
14009           .nr(8)
14010           .kr(2)
14011           .sr(1)
14012           .m(2)
14013           .n(8)
14014           .k(k)
14015           .ks(3)
14016           .a_offset(163)
14017           .zero_index(mz)
14018           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
14019       }
14020     }
14021   }
14022 
TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_DUP,qmin)14023   TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_DUP, qmin) {
14024     TEST_REQUIRES_ARM_NEON;
14025     GemmMicrokernelTester()
14026       .mr(2)
14027       .nr(8)
14028       .kr(2)
14029       .sr(1)
14030       .m(2)
14031       .n(8)
14032       .k(16)
14033       .qmin(128)
14034       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
14035   }
14036 
TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_DUP,qmax)14037   TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_DUP, qmax) {
14038     TEST_REQUIRES_ARM_NEON;
14039     GemmMicrokernelTester()
14040       .mr(2)
14041       .nr(8)
14042       .kr(2)
14043       .sr(1)
14044       .m(2)
14045       .n(8)
14046       .k(16)
14047       .qmax(128)
14048       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
14049   }
14050 
TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_DUP,strided_cm)14051   TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_DUP, strided_cm) {
14052     TEST_REQUIRES_ARM_NEON;
14053     GemmMicrokernelTester()
14054       .mr(2)
14055       .nr(8)
14056       .kr(2)
14057       .sr(1)
14058       .m(2)
14059       .n(8)
14060       .k(16)
14061       .cm_stride(11)
14062       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
14063   }
14064 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
14065 
14066 
14067 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD1R,k_eq_16)14068   TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD1R, k_eq_16) {
14069     TEST_REQUIRES_ARM_NEON;
14070     GemmMicrokernelTester()
14071       .mr(2)
14072       .nr(8)
14073       .kr(2)
14074       .sr(1)
14075       .m(2)
14076       .n(8)
14077       .k(16)
14078       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
14079   }
14080 
TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD1R,strided_cn)14081   TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD1R, strided_cn) {
14082     TEST_REQUIRES_ARM_NEON;
14083     GemmMicrokernelTester()
14084       .mr(2)
14085       .nr(8)
14086       .kr(2)
14087       .sr(1)
14088       .m(2)
14089       .n(8)
14090       .k(16)
14091       .cn_stride(11)
14092       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
14093   }
14094 
TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD1R,k_eq_16_subtile)14095   TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD1R, k_eq_16_subtile) {
14096     TEST_REQUIRES_ARM_NEON;
14097     for (uint32_t n = 1; n <= 8; n++) {
14098       for (uint32_t m = 1; m <= 2; m++) {
14099         GemmMicrokernelTester()
14100           .mr(2)
14101           .nr(8)
14102           .kr(2)
14103           .sr(1)
14104           .m(m)
14105           .n(n)
14106           .k(16)
14107           .iterations(1)
14108           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
14109       }
14110     }
14111   }
14112 
TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD1R,k_eq_16_subtile_m)14113   TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD1R, k_eq_16_subtile_m) {
14114     TEST_REQUIRES_ARM_NEON;
14115     for (uint32_t m = 1; m <= 2; m++) {
14116       GemmMicrokernelTester()
14117         .mr(2)
14118         .nr(8)
14119         .kr(2)
14120         .sr(1)
14121         .m(m)
14122         .n(8)
14123         .k(16)
14124         .iterations(1)
14125         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
14126     }
14127   }
14128 
TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD1R,k_eq_16_subtile_n)14129   TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD1R, k_eq_16_subtile_n) {
14130     TEST_REQUIRES_ARM_NEON;
14131     for (uint32_t n = 1; n <= 8; n++) {
14132       GemmMicrokernelTester()
14133         .mr(2)
14134         .nr(8)
14135         .kr(2)
14136         .sr(1)
14137         .m(2)
14138         .n(n)
14139         .k(16)
14140         .iterations(1)
14141         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
14142     }
14143   }
14144 
TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD1R,k_lt_16)14145   TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD1R, k_lt_16) {
14146     TEST_REQUIRES_ARM_NEON;
14147     for (size_t k = 1; k < 16; k++) {
14148       GemmMicrokernelTester()
14149         .mr(2)
14150         .nr(8)
14151         .kr(2)
14152         .sr(1)
14153         .m(2)
14154         .n(8)
14155         .k(k)
14156         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
14157     }
14158   }
14159 
TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD1R,k_lt_16_subtile)14160   TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD1R, k_lt_16_subtile) {
14161     TEST_REQUIRES_ARM_NEON;
14162     for (size_t k = 1; k < 16; k++) {
14163       for (uint32_t n = 1; n <= 8; n++) {
14164         for (uint32_t m = 1; m <= 2; m++) {
14165           GemmMicrokernelTester()
14166             .mr(2)
14167             .nr(8)
14168             .kr(2)
14169             .sr(1)
14170             .m(m)
14171             .n(n)
14172             .k(k)
14173             .iterations(1)
14174             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
14175         }
14176       }
14177     }
14178   }
14179 
TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD1R,k_gt_16)14180   TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD1R, k_gt_16) {
14181     TEST_REQUIRES_ARM_NEON;
14182     for (size_t k = 17; k < 32; k++) {
14183       GemmMicrokernelTester()
14184         .mr(2)
14185         .nr(8)
14186         .kr(2)
14187         .sr(1)
14188         .m(2)
14189         .n(8)
14190         .k(k)
14191         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
14192     }
14193   }
14194 
TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD1R,k_gt_16_subtile)14195   TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD1R, k_gt_16_subtile) {
14196     TEST_REQUIRES_ARM_NEON;
14197     for (size_t k = 17; k < 32; k++) {
14198       for (uint32_t n = 1; n <= 8; n++) {
14199         for (uint32_t m = 1; m <= 2; m++) {
14200           GemmMicrokernelTester()
14201             .mr(2)
14202             .nr(8)
14203             .kr(2)
14204             .sr(1)
14205             .m(m)
14206             .n(n)
14207             .k(k)
14208             .iterations(1)
14209             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
14210         }
14211       }
14212     }
14213   }
14214 
TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD1R,k_div_16)14215   TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD1R, k_div_16) {
14216     TEST_REQUIRES_ARM_NEON;
14217     for (size_t k = 32; k <= 160; k += 16) {
14218       GemmMicrokernelTester()
14219         .mr(2)
14220         .nr(8)
14221         .kr(2)
14222         .sr(1)
14223         .m(2)
14224         .n(8)
14225         .k(k)
14226         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
14227     }
14228   }
14229 
TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD1R,k_div_16_subtile)14230   TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD1R, k_div_16_subtile) {
14231     TEST_REQUIRES_ARM_NEON;
14232     for (size_t k = 32; k <= 160; k += 16) {
14233       for (uint32_t n = 1; n <= 8; n++) {
14234         for (uint32_t m = 1; m <= 2; m++) {
14235           GemmMicrokernelTester()
14236             .mr(2)
14237             .nr(8)
14238             .kr(2)
14239             .sr(1)
14240             .m(m)
14241             .n(n)
14242             .k(k)
14243             .iterations(1)
14244             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
14245         }
14246       }
14247     }
14248   }
14249 
TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD1R,n_gt_8)14250   TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD1R, n_gt_8) {
14251     TEST_REQUIRES_ARM_NEON;
14252     for (uint32_t n = 9; n < 16; n++) {
14253       for (size_t k = 1; k <= 80; k += 17) {
14254         GemmMicrokernelTester()
14255           .mr(2)
14256           .nr(8)
14257           .kr(2)
14258           .sr(1)
14259           .m(2)
14260           .n(n)
14261           .k(k)
14262           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
14263       }
14264     }
14265   }
14266 
TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD1R,n_gt_8_strided_cn)14267   TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD1R, n_gt_8_strided_cn) {
14268     TEST_REQUIRES_ARM_NEON;
14269     for (uint32_t n = 9; n < 16; n++) {
14270       for (size_t k = 1; k <= 80; k += 17) {
14271         GemmMicrokernelTester()
14272           .mr(2)
14273           .nr(8)
14274           .kr(2)
14275           .sr(1)
14276           .m(2)
14277           .n(n)
14278           .k(k)
14279           .cn_stride(11)
14280           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
14281       }
14282     }
14283   }
14284 
TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD1R,n_gt_8_subtile)14285   TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD1R, n_gt_8_subtile) {
14286     TEST_REQUIRES_ARM_NEON;
14287     for (uint32_t n = 9; n < 16; n++) {
14288       for (size_t k = 1; k <= 80; k += 17) {
14289         for (uint32_t m = 1; m <= 2; m++) {
14290           GemmMicrokernelTester()
14291             .mr(2)
14292             .nr(8)
14293             .kr(2)
14294             .sr(1)
14295             .m(m)
14296             .n(n)
14297             .k(k)
14298             .iterations(1)
14299             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
14300         }
14301       }
14302     }
14303   }
14304 
TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD1R,n_div_8)14305   TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD1R, n_div_8) {
14306     TEST_REQUIRES_ARM_NEON;
14307     for (uint32_t n = 16; n <= 24; n += 8) {
14308       for (size_t k = 1; k <= 80; k += 17) {
14309         GemmMicrokernelTester()
14310           .mr(2)
14311           .nr(8)
14312           .kr(2)
14313           .sr(1)
14314           .m(2)
14315           .n(n)
14316           .k(k)
14317           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
14318       }
14319     }
14320   }
14321 
TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD1R,n_div_8_strided_cn)14322   TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD1R, n_div_8_strided_cn) {
14323     TEST_REQUIRES_ARM_NEON;
14324     for (uint32_t n = 16; n <= 24; n += 8) {
14325       for (size_t k = 1; k <= 80; k += 17) {
14326         GemmMicrokernelTester()
14327           .mr(2)
14328           .nr(8)
14329           .kr(2)
14330           .sr(1)
14331           .m(2)
14332           .n(n)
14333           .k(k)
14334           .cn_stride(11)
14335           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
14336       }
14337     }
14338   }
14339 
TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD1R,n_div_8_subtile)14340   TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD1R, n_div_8_subtile) {
14341     TEST_REQUIRES_ARM_NEON;
14342     for (uint32_t n = 16; n <= 24; n += 8) {
14343       for (size_t k = 1; k <= 80; k += 17) {
14344         for (uint32_t m = 1; m <= 2; m++) {
14345           GemmMicrokernelTester()
14346             .mr(2)
14347             .nr(8)
14348             .kr(2)
14349             .sr(1)
14350             .m(m)
14351             .n(n)
14352             .k(k)
14353             .iterations(1)
14354             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
14355         }
14356       }
14357     }
14358   }
14359 
TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD1R,small_kernel)14360   TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD1R, small_kernel) {
14361     TEST_REQUIRES_ARM_NEON;
14362     for (size_t k = 1; k <= 80; k += 17) {
14363       GemmMicrokernelTester()
14364         .mr(2)
14365         .nr(8)
14366         .kr(2)
14367         .sr(1)
14368         .m(2)
14369         .n(8)
14370         .k(k)
14371         .ks(3)
14372         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
14373     }
14374   }
14375 
TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD1R,small_kernel_subtile)14376   TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD1R, small_kernel_subtile) {
14377     TEST_REQUIRES_ARM_NEON;
14378     for (size_t k = 1; k <= 80; k += 17) {
14379       for (uint32_t n = 1; n <= 8; n++) {
14380         for (uint32_t m = 1; m <= 2; m++) {
14381           GemmMicrokernelTester()
14382             .mr(2)
14383             .nr(8)
14384             .kr(2)
14385             .sr(1)
14386             .m(m)
14387             .n(n)
14388             .k(k)
14389             .ks(3)
14390             .iterations(1)
14391             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
14392         }
14393       }
14394     }
14395   }
14396 
TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD1R,n_gt_8_small_kernel)14397   TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD1R, n_gt_8_small_kernel) {
14398     TEST_REQUIRES_ARM_NEON;
14399     for (uint32_t n = 9; n < 16; n++) {
14400       for (size_t k = 1; k <= 80; k += 17) {
14401         GemmMicrokernelTester()
14402           .mr(2)
14403           .nr(8)
14404           .kr(2)
14405           .sr(1)
14406           .m(2)
14407           .n(n)
14408           .k(k)
14409           .ks(3)
14410           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
14411       }
14412     }
14413   }
14414 
TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD1R,n_div_8_small_kernel)14415   TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD1R, n_div_8_small_kernel) {
14416     TEST_REQUIRES_ARM_NEON;
14417     for (uint32_t n = 16; n <= 24; n += 8) {
14418       for (size_t k = 1; k <= 80; k += 17) {
14419         GemmMicrokernelTester()
14420           .mr(2)
14421           .nr(8)
14422           .kr(2)
14423           .sr(1)
14424           .m(2)
14425           .n(n)
14426           .k(k)
14427           .ks(3)
14428           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
14429       }
14430     }
14431   }
14432 
TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD1R,strided_cm_subtile)14433   TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD1R, strided_cm_subtile) {
14434     TEST_REQUIRES_ARM_NEON;
14435     for (size_t k = 1; k <= 80; k += 17) {
14436       for (uint32_t n = 1; n <= 8; n++) {
14437         for (uint32_t m = 1; m <= 2; m++) {
14438           GemmMicrokernelTester()
14439             .mr(2)
14440             .nr(8)
14441             .kr(2)
14442             .sr(1)
14443             .m(m)
14444             .n(n)
14445             .k(k)
14446             .cm_stride(11)
14447             .iterations(1)
14448             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
14449         }
14450       }
14451     }
14452   }
14453 
TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD1R,a_offset)14454   TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD1R, a_offset) {
14455     TEST_REQUIRES_ARM_NEON;
14456     for (size_t k = 1; k <= 80; k += 17) {
14457       GemmMicrokernelTester()
14458         .mr(2)
14459         .nr(8)
14460         .kr(2)
14461         .sr(1)
14462         .m(2)
14463         .n(8)
14464         .k(k)
14465         .ks(3)
14466         .a_offset(163)
14467         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
14468     }
14469   }
14470 
TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD1R,zero)14471   TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD1R, zero) {
14472     TEST_REQUIRES_ARM_NEON;
14473     for (size_t k = 1; k <= 80; k += 17) {
14474       for (uint32_t mz = 0; mz < 2; mz++) {
14475         GemmMicrokernelTester()
14476           .mr(2)
14477           .nr(8)
14478           .kr(2)
14479           .sr(1)
14480           .m(2)
14481           .n(8)
14482           .k(k)
14483           .ks(3)
14484           .a_offset(163)
14485           .zero_index(mz)
14486           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
14487       }
14488     }
14489   }
14490 
TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD1R,qmin)14491   TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD1R, qmin) {
14492     TEST_REQUIRES_ARM_NEON;
14493     GemmMicrokernelTester()
14494       .mr(2)
14495       .nr(8)
14496       .kr(2)
14497       .sr(1)
14498       .m(2)
14499       .n(8)
14500       .k(16)
14501       .qmin(128)
14502       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
14503   }
14504 
TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD1R,qmax)14505   TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD1R, qmax) {
14506     TEST_REQUIRES_ARM_NEON;
14507     GemmMicrokernelTester()
14508       .mr(2)
14509       .nr(8)
14510       .kr(2)
14511       .sr(1)
14512       .m(2)
14513       .n(8)
14514       .k(16)
14515       .qmax(128)
14516       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
14517   }
14518 
TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD1R,strided_cm)14519   TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD1R, strided_cm) {
14520     TEST_REQUIRES_ARM_NEON;
14521     GemmMicrokernelTester()
14522       .mr(2)
14523       .nr(8)
14524       .kr(2)
14525       .sr(1)
14526       .m(2)
14527       .n(8)
14528       .k(16)
14529       .cm_stride(11)
14530       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
14531   }
14532 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
14533 
14534 
14535 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD2R,k_eq_16)14536   TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD2R, k_eq_16) {
14537     TEST_REQUIRES_ARM_NEON_V8;
14538     GemmMicrokernelTester()
14539       .mr(2)
14540       .nr(8)
14541       .kr(2)
14542       .sr(1)
14543       .m(2)
14544       .n(8)
14545       .k(16)
14546       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
14547   }
14548 
TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD2R,strided_cn)14549   TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD2R, strided_cn) {
14550     TEST_REQUIRES_ARM_NEON_V8;
14551     GemmMicrokernelTester()
14552       .mr(2)
14553       .nr(8)
14554       .kr(2)
14555       .sr(1)
14556       .m(2)
14557       .n(8)
14558       .k(16)
14559       .cn_stride(11)
14560       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
14561   }
14562 
TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD2R,k_eq_16_subtile)14563   TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD2R, k_eq_16_subtile) {
14564     TEST_REQUIRES_ARM_NEON_V8;
14565     for (uint32_t n = 1; n <= 8; n++) {
14566       for (uint32_t m = 1; m <= 2; m++) {
14567         GemmMicrokernelTester()
14568           .mr(2)
14569           .nr(8)
14570           .kr(2)
14571           .sr(1)
14572           .m(m)
14573           .n(n)
14574           .k(16)
14575           .iterations(1)
14576           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
14577       }
14578     }
14579   }
14580 
TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD2R,k_eq_16_subtile_m)14581   TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD2R, k_eq_16_subtile_m) {
14582     TEST_REQUIRES_ARM_NEON_V8;
14583     for (uint32_t m = 1; m <= 2; m++) {
14584       GemmMicrokernelTester()
14585         .mr(2)
14586         .nr(8)
14587         .kr(2)
14588         .sr(1)
14589         .m(m)
14590         .n(8)
14591         .k(16)
14592         .iterations(1)
14593         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
14594     }
14595   }
14596 
TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD2R,k_eq_16_subtile_n)14597   TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD2R, k_eq_16_subtile_n) {
14598     TEST_REQUIRES_ARM_NEON_V8;
14599     for (uint32_t n = 1; n <= 8; n++) {
14600       GemmMicrokernelTester()
14601         .mr(2)
14602         .nr(8)
14603         .kr(2)
14604         .sr(1)
14605         .m(2)
14606         .n(n)
14607         .k(16)
14608         .iterations(1)
14609         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
14610     }
14611   }
14612 
TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD2R,k_lt_16)14613   TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD2R, k_lt_16) {
14614     TEST_REQUIRES_ARM_NEON_V8;
14615     for (size_t k = 1; k < 16; k++) {
14616       GemmMicrokernelTester()
14617         .mr(2)
14618         .nr(8)
14619         .kr(2)
14620         .sr(1)
14621         .m(2)
14622         .n(8)
14623         .k(k)
14624         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
14625     }
14626   }
14627 
TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD2R,k_lt_16_subtile)14628   TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD2R, k_lt_16_subtile) {
14629     TEST_REQUIRES_ARM_NEON_V8;
14630     for (size_t k = 1; k < 16; k++) {
14631       for (uint32_t n = 1; n <= 8; n++) {
14632         for (uint32_t m = 1; m <= 2; m++) {
14633           GemmMicrokernelTester()
14634             .mr(2)
14635             .nr(8)
14636             .kr(2)
14637             .sr(1)
14638             .m(m)
14639             .n(n)
14640             .k(k)
14641             .iterations(1)
14642             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
14643         }
14644       }
14645     }
14646   }
14647 
TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD2R,k_gt_16)14648   TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD2R, k_gt_16) {
14649     TEST_REQUIRES_ARM_NEON_V8;
14650     for (size_t k = 17; k < 32; k++) {
14651       GemmMicrokernelTester()
14652         .mr(2)
14653         .nr(8)
14654         .kr(2)
14655         .sr(1)
14656         .m(2)
14657         .n(8)
14658         .k(k)
14659         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
14660     }
14661   }
14662 
TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD2R,k_gt_16_subtile)14663   TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD2R, k_gt_16_subtile) {
14664     TEST_REQUIRES_ARM_NEON_V8;
14665     for (size_t k = 17; k < 32; k++) {
14666       for (uint32_t n = 1; n <= 8; n++) {
14667         for (uint32_t m = 1; m <= 2; m++) {
14668           GemmMicrokernelTester()
14669             .mr(2)
14670             .nr(8)
14671             .kr(2)
14672             .sr(1)
14673             .m(m)
14674             .n(n)
14675             .k(k)
14676             .iterations(1)
14677             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
14678         }
14679       }
14680     }
14681   }
14682 
TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD2R,k_div_16)14683   TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD2R, k_div_16) {
14684     TEST_REQUIRES_ARM_NEON_V8;
14685     for (size_t k = 32; k <= 160; k += 16) {
14686       GemmMicrokernelTester()
14687         .mr(2)
14688         .nr(8)
14689         .kr(2)
14690         .sr(1)
14691         .m(2)
14692         .n(8)
14693         .k(k)
14694         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
14695     }
14696   }
14697 
TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD2R,k_div_16_subtile)14698   TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD2R, k_div_16_subtile) {
14699     TEST_REQUIRES_ARM_NEON_V8;
14700     for (size_t k = 32; k <= 160; k += 16) {
14701       for (uint32_t n = 1; n <= 8; n++) {
14702         for (uint32_t m = 1; m <= 2; m++) {
14703           GemmMicrokernelTester()
14704             .mr(2)
14705             .nr(8)
14706             .kr(2)
14707             .sr(1)
14708             .m(m)
14709             .n(n)
14710             .k(k)
14711             .iterations(1)
14712             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
14713         }
14714       }
14715     }
14716   }
14717 
TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD2R,n_gt_8)14718   TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD2R, n_gt_8) {
14719     TEST_REQUIRES_ARM_NEON_V8;
14720     for (uint32_t n = 9; n < 16; n++) {
14721       for (size_t k = 1; k <= 80; k += 17) {
14722         GemmMicrokernelTester()
14723           .mr(2)
14724           .nr(8)
14725           .kr(2)
14726           .sr(1)
14727           .m(2)
14728           .n(n)
14729           .k(k)
14730           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
14731       }
14732     }
14733   }
14734 
TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD2R,n_gt_8_strided_cn)14735   TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD2R, n_gt_8_strided_cn) {
14736     TEST_REQUIRES_ARM_NEON_V8;
14737     for (uint32_t n = 9; n < 16; n++) {
14738       for (size_t k = 1; k <= 80; k += 17) {
14739         GemmMicrokernelTester()
14740           .mr(2)
14741           .nr(8)
14742           .kr(2)
14743           .sr(1)
14744           .m(2)
14745           .n(n)
14746           .k(k)
14747           .cn_stride(11)
14748           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
14749       }
14750     }
14751   }
14752 
TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD2R,n_gt_8_subtile)14753   TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD2R, n_gt_8_subtile) {
14754     TEST_REQUIRES_ARM_NEON_V8;
14755     for (uint32_t n = 9; n < 16; n++) {
14756       for (size_t k = 1; k <= 80; k += 17) {
14757         for (uint32_t m = 1; m <= 2; m++) {
14758           GemmMicrokernelTester()
14759             .mr(2)
14760             .nr(8)
14761             .kr(2)
14762             .sr(1)
14763             .m(m)
14764             .n(n)
14765             .k(k)
14766             .iterations(1)
14767             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
14768         }
14769       }
14770     }
14771   }
14772 
TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD2R,n_div_8)14773   TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD2R, n_div_8) {
14774     TEST_REQUIRES_ARM_NEON_V8;
14775     for (uint32_t n = 16; n <= 24; n += 8) {
14776       for (size_t k = 1; k <= 80; k += 17) {
14777         GemmMicrokernelTester()
14778           .mr(2)
14779           .nr(8)
14780           .kr(2)
14781           .sr(1)
14782           .m(2)
14783           .n(n)
14784           .k(k)
14785           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
14786       }
14787     }
14788   }
14789 
TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD2R,n_div_8_strided_cn)14790   TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD2R, n_div_8_strided_cn) {
14791     TEST_REQUIRES_ARM_NEON_V8;
14792     for (uint32_t n = 16; n <= 24; n += 8) {
14793       for (size_t k = 1; k <= 80; k += 17) {
14794         GemmMicrokernelTester()
14795           .mr(2)
14796           .nr(8)
14797           .kr(2)
14798           .sr(1)
14799           .m(2)
14800           .n(n)
14801           .k(k)
14802           .cn_stride(11)
14803           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
14804       }
14805     }
14806   }
14807 
TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD2R,n_div_8_subtile)14808   TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD2R, n_div_8_subtile) {
14809     TEST_REQUIRES_ARM_NEON_V8;
14810     for (uint32_t n = 16; n <= 24; n += 8) {
14811       for (size_t k = 1; k <= 80; k += 17) {
14812         for (uint32_t m = 1; m <= 2; m++) {
14813           GemmMicrokernelTester()
14814             .mr(2)
14815             .nr(8)
14816             .kr(2)
14817             .sr(1)
14818             .m(m)
14819             .n(n)
14820             .k(k)
14821             .iterations(1)
14822             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
14823         }
14824       }
14825     }
14826   }
14827 
TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD2R,small_kernel)14828   TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD2R, small_kernel) {
14829     TEST_REQUIRES_ARM_NEON_V8;
14830     for (size_t k = 1; k <= 80; k += 17) {
14831       GemmMicrokernelTester()
14832         .mr(2)
14833         .nr(8)
14834         .kr(2)
14835         .sr(1)
14836         .m(2)
14837         .n(8)
14838         .k(k)
14839         .ks(3)
14840         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
14841     }
14842   }
14843 
TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD2R,small_kernel_subtile)14844   TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD2R, small_kernel_subtile) {
14845     TEST_REQUIRES_ARM_NEON_V8;
14846     for (size_t k = 1; k <= 80; k += 17) {
14847       for (uint32_t n = 1; n <= 8; n++) {
14848         for (uint32_t m = 1; m <= 2; m++) {
14849           GemmMicrokernelTester()
14850             .mr(2)
14851             .nr(8)
14852             .kr(2)
14853             .sr(1)
14854             .m(m)
14855             .n(n)
14856             .k(k)
14857             .ks(3)
14858             .iterations(1)
14859             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
14860         }
14861       }
14862     }
14863   }
14864 
TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD2R,n_gt_8_small_kernel)14865   TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD2R, n_gt_8_small_kernel) {
14866     TEST_REQUIRES_ARM_NEON_V8;
14867     for (uint32_t n = 9; n < 16; n++) {
14868       for (size_t k = 1; k <= 80; k += 17) {
14869         GemmMicrokernelTester()
14870           .mr(2)
14871           .nr(8)
14872           .kr(2)
14873           .sr(1)
14874           .m(2)
14875           .n(n)
14876           .k(k)
14877           .ks(3)
14878           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
14879       }
14880     }
14881   }
14882 
TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD2R,n_div_8_small_kernel)14883   TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD2R, n_div_8_small_kernel) {
14884     TEST_REQUIRES_ARM_NEON_V8;
14885     for (uint32_t n = 16; n <= 24; n += 8) {
14886       for (size_t k = 1; k <= 80; k += 17) {
14887         GemmMicrokernelTester()
14888           .mr(2)
14889           .nr(8)
14890           .kr(2)
14891           .sr(1)
14892           .m(2)
14893           .n(n)
14894           .k(k)
14895           .ks(3)
14896           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
14897       }
14898     }
14899   }
14900 
TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD2R,strided_cm_subtile)14901   TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD2R, strided_cm_subtile) {
14902     TEST_REQUIRES_ARM_NEON_V8;
14903     for (size_t k = 1; k <= 80; k += 17) {
14904       for (uint32_t n = 1; n <= 8; n++) {
14905         for (uint32_t m = 1; m <= 2; m++) {
14906           GemmMicrokernelTester()
14907             .mr(2)
14908             .nr(8)
14909             .kr(2)
14910             .sr(1)
14911             .m(m)
14912             .n(n)
14913             .k(k)
14914             .cm_stride(11)
14915             .iterations(1)
14916             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
14917         }
14918       }
14919     }
14920   }
14921 
TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD2R,a_offset)14922   TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD2R, a_offset) {
14923     TEST_REQUIRES_ARM_NEON_V8;
14924     for (size_t k = 1; k <= 80; k += 17) {
14925       GemmMicrokernelTester()
14926         .mr(2)
14927         .nr(8)
14928         .kr(2)
14929         .sr(1)
14930         .m(2)
14931         .n(8)
14932         .k(k)
14933         .ks(3)
14934         .a_offset(163)
14935         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
14936     }
14937   }
14938 
TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD2R,zero)14939   TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD2R, zero) {
14940     TEST_REQUIRES_ARM_NEON_V8;
14941     for (size_t k = 1; k <= 80; k += 17) {
14942       for (uint32_t mz = 0; mz < 2; mz++) {
14943         GemmMicrokernelTester()
14944           .mr(2)
14945           .nr(8)
14946           .kr(2)
14947           .sr(1)
14948           .m(2)
14949           .n(8)
14950           .k(k)
14951           .ks(3)
14952           .a_offset(163)
14953           .zero_index(mz)
14954           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
14955       }
14956     }
14957   }
14958 
TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD2R,qmin)14959   TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD2R, qmin) {
14960     TEST_REQUIRES_ARM_NEON_V8;
14961     GemmMicrokernelTester()
14962       .mr(2)
14963       .nr(8)
14964       .kr(2)
14965       .sr(1)
14966       .m(2)
14967       .n(8)
14968       .k(16)
14969       .qmin(128)
14970       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
14971   }
14972 
TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD2R,qmax)14973   TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD2R, qmax) {
14974     TEST_REQUIRES_ARM_NEON_V8;
14975     GemmMicrokernelTester()
14976       .mr(2)
14977       .nr(8)
14978       .kr(2)
14979       .sr(1)
14980       .m(2)
14981       .n(8)
14982       .k(16)
14983       .qmax(128)
14984       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
14985   }
14986 
TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD2R,strided_cm)14987   TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD2R, strided_cm) {
14988     TEST_REQUIRES_ARM_NEON_V8;
14989     GemmMicrokernelTester()
14990       .mr(2)
14991       .nr(8)
14992       .kr(2)
14993       .sr(1)
14994       .m(2)
14995       .n(8)
14996       .k(16)
14997       .cm_stride(11)
14998       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
14999   }
15000 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
15001 
15002 
15003 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QC8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_LD1R,k_eq_16)15004   TEST(QC8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_LD1R, k_eq_16) {
15005     TEST_REQUIRES_ARM_NEON;
15006     GemmMicrokernelTester()
15007       .mr(2)
15008       .nr(8)
15009       .kr(4)
15010       .sr(1)
15011       .m(2)
15012       .n(8)
15013       .k(16)
15014       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c4__neon_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
15015   }
15016 
TEST(QC8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_LD1R,strided_cn)15017   TEST(QC8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_LD1R, strided_cn) {
15018     TEST_REQUIRES_ARM_NEON;
15019     GemmMicrokernelTester()
15020       .mr(2)
15021       .nr(8)
15022       .kr(4)
15023       .sr(1)
15024       .m(2)
15025       .n(8)
15026       .k(16)
15027       .cn_stride(11)
15028       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c4__neon_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
15029   }
15030 
TEST(QC8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_LD1R,k_eq_16_subtile)15031   TEST(QC8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_LD1R, k_eq_16_subtile) {
15032     TEST_REQUIRES_ARM_NEON;
15033     for (uint32_t n = 1; n <= 8; n++) {
15034       for (uint32_t m = 1; m <= 2; m++) {
15035         GemmMicrokernelTester()
15036           .mr(2)
15037           .nr(8)
15038           .kr(4)
15039           .sr(1)
15040           .m(m)
15041           .n(n)
15042           .k(16)
15043           .iterations(1)
15044           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c4__neon_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
15045       }
15046     }
15047   }
15048 
TEST(QC8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_LD1R,k_eq_16_subtile_m)15049   TEST(QC8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_LD1R, k_eq_16_subtile_m) {
15050     TEST_REQUIRES_ARM_NEON;
15051     for (uint32_t m = 1; m <= 2; m++) {
15052       GemmMicrokernelTester()
15053         .mr(2)
15054         .nr(8)
15055         .kr(4)
15056         .sr(1)
15057         .m(m)
15058         .n(8)
15059         .k(16)
15060         .iterations(1)
15061         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c4__neon_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
15062     }
15063   }
15064 
TEST(QC8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_LD1R,k_eq_16_subtile_n)15065   TEST(QC8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_LD1R, k_eq_16_subtile_n) {
15066     TEST_REQUIRES_ARM_NEON;
15067     for (uint32_t n = 1; n <= 8; n++) {
15068       GemmMicrokernelTester()
15069         .mr(2)
15070         .nr(8)
15071         .kr(4)
15072         .sr(1)
15073         .m(2)
15074         .n(n)
15075         .k(16)
15076         .iterations(1)
15077         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c4__neon_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
15078     }
15079   }
15080 
TEST(QC8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_LD1R,k_lt_16)15081   TEST(QC8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_LD1R, k_lt_16) {
15082     TEST_REQUIRES_ARM_NEON;
15083     for (size_t k = 1; k < 16; k++) {
15084       GemmMicrokernelTester()
15085         .mr(2)
15086         .nr(8)
15087         .kr(4)
15088         .sr(1)
15089         .m(2)
15090         .n(8)
15091         .k(k)
15092         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c4__neon_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
15093     }
15094   }
15095 
TEST(QC8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_LD1R,k_lt_16_subtile)15096   TEST(QC8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_LD1R, k_lt_16_subtile) {
15097     TEST_REQUIRES_ARM_NEON;
15098     for (size_t k = 1; k < 16; k++) {
15099       for (uint32_t n = 1; n <= 8; n++) {
15100         for (uint32_t m = 1; m <= 2; m++) {
15101           GemmMicrokernelTester()
15102             .mr(2)
15103             .nr(8)
15104             .kr(4)
15105             .sr(1)
15106             .m(m)
15107             .n(n)
15108             .k(k)
15109             .iterations(1)
15110             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c4__neon_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
15111         }
15112       }
15113     }
15114   }
15115 
TEST(QC8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_LD1R,k_gt_16)15116   TEST(QC8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_LD1R, k_gt_16) {
15117     TEST_REQUIRES_ARM_NEON;
15118     for (size_t k = 17; k < 32; k++) {
15119       GemmMicrokernelTester()
15120         .mr(2)
15121         .nr(8)
15122         .kr(4)
15123         .sr(1)
15124         .m(2)
15125         .n(8)
15126         .k(k)
15127         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c4__neon_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
15128     }
15129   }
15130 
TEST(QC8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_LD1R,k_gt_16_subtile)15131   TEST(QC8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_LD1R, k_gt_16_subtile) {
15132     TEST_REQUIRES_ARM_NEON;
15133     for (size_t k = 17; k < 32; k++) {
15134       for (uint32_t n = 1; n <= 8; n++) {
15135         for (uint32_t m = 1; m <= 2; m++) {
15136           GemmMicrokernelTester()
15137             .mr(2)
15138             .nr(8)
15139             .kr(4)
15140             .sr(1)
15141             .m(m)
15142             .n(n)
15143             .k(k)
15144             .iterations(1)
15145             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c4__neon_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
15146         }
15147       }
15148     }
15149   }
15150 
TEST(QC8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_LD1R,k_div_16)15151   TEST(QC8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_LD1R, k_div_16) {
15152     TEST_REQUIRES_ARM_NEON;
15153     for (size_t k = 32; k <= 160; k += 16) {
15154       GemmMicrokernelTester()
15155         .mr(2)
15156         .nr(8)
15157         .kr(4)
15158         .sr(1)
15159         .m(2)
15160         .n(8)
15161         .k(k)
15162         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c4__neon_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
15163     }
15164   }
15165 
TEST(QC8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_LD1R,k_div_16_subtile)15166   TEST(QC8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_LD1R, k_div_16_subtile) {
15167     TEST_REQUIRES_ARM_NEON;
15168     for (size_t k = 32; k <= 160; k += 16) {
15169       for (uint32_t n = 1; n <= 8; n++) {
15170         for (uint32_t m = 1; m <= 2; m++) {
15171           GemmMicrokernelTester()
15172             .mr(2)
15173             .nr(8)
15174             .kr(4)
15175             .sr(1)
15176             .m(m)
15177             .n(n)
15178             .k(k)
15179             .iterations(1)
15180             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c4__neon_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
15181         }
15182       }
15183     }
15184   }
15185 
TEST(QC8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_LD1R,n_gt_8)15186   TEST(QC8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_LD1R, n_gt_8) {
15187     TEST_REQUIRES_ARM_NEON;
15188     for (uint32_t n = 9; n < 16; n++) {
15189       for (size_t k = 1; k <= 80; k += 17) {
15190         GemmMicrokernelTester()
15191           .mr(2)
15192           .nr(8)
15193           .kr(4)
15194           .sr(1)
15195           .m(2)
15196           .n(n)
15197           .k(k)
15198           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c4__neon_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
15199       }
15200     }
15201   }
15202 
TEST(QC8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_LD1R,n_gt_8_strided_cn)15203   TEST(QC8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_LD1R, n_gt_8_strided_cn) {
15204     TEST_REQUIRES_ARM_NEON;
15205     for (uint32_t n = 9; n < 16; n++) {
15206       for (size_t k = 1; k <= 80; k += 17) {
15207         GemmMicrokernelTester()
15208           .mr(2)
15209           .nr(8)
15210           .kr(4)
15211           .sr(1)
15212           .m(2)
15213           .n(n)
15214           .k(k)
15215           .cn_stride(11)
15216           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c4__neon_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
15217       }
15218     }
15219   }
15220 
TEST(QC8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_LD1R,n_gt_8_subtile)15221   TEST(QC8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_LD1R, n_gt_8_subtile) {
15222     TEST_REQUIRES_ARM_NEON;
15223     for (uint32_t n = 9; n < 16; n++) {
15224       for (size_t k = 1; k <= 80; k += 17) {
15225         for (uint32_t m = 1; m <= 2; m++) {
15226           GemmMicrokernelTester()
15227             .mr(2)
15228             .nr(8)
15229             .kr(4)
15230             .sr(1)
15231             .m(m)
15232             .n(n)
15233             .k(k)
15234             .iterations(1)
15235             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c4__neon_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
15236         }
15237       }
15238     }
15239   }
15240 
TEST(QC8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_LD1R,n_div_8)15241   TEST(QC8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_LD1R, n_div_8) {
15242     TEST_REQUIRES_ARM_NEON;
15243     for (uint32_t n = 16; n <= 24; n += 8) {
15244       for (size_t k = 1; k <= 80; k += 17) {
15245         GemmMicrokernelTester()
15246           .mr(2)
15247           .nr(8)
15248           .kr(4)
15249           .sr(1)
15250           .m(2)
15251           .n(n)
15252           .k(k)
15253           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c4__neon_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
15254       }
15255     }
15256   }
15257 
TEST(QC8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_LD1R,n_div_8_strided_cn)15258   TEST(QC8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_LD1R, n_div_8_strided_cn) {
15259     TEST_REQUIRES_ARM_NEON;
15260     for (uint32_t n = 16; n <= 24; n += 8) {
15261       for (size_t k = 1; k <= 80; k += 17) {
15262         GemmMicrokernelTester()
15263           .mr(2)
15264           .nr(8)
15265           .kr(4)
15266           .sr(1)
15267           .m(2)
15268           .n(n)
15269           .k(k)
15270           .cn_stride(11)
15271           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c4__neon_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
15272       }
15273     }
15274   }
15275 
TEST(QC8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_LD1R,n_div_8_subtile)15276   TEST(QC8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_LD1R, n_div_8_subtile) {
15277     TEST_REQUIRES_ARM_NEON;
15278     for (uint32_t n = 16; n <= 24; n += 8) {
15279       for (size_t k = 1; k <= 80; k += 17) {
15280         for (uint32_t m = 1; m <= 2; m++) {
15281           GemmMicrokernelTester()
15282             .mr(2)
15283             .nr(8)
15284             .kr(4)
15285             .sr(1)
15286             .m(m)
15287             .n(n)
15288             .k(k)
15289             .iterations(1)
15290             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c4__neon_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
15291         }
15292       }
15293     }
15294   }
15295 
TEST(QC8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_LD1R,small_kernel)15296   TEST(QC8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_LD1R, small_kernel) {
15297     TEST_REQUIRES_ARM_NEON;
15298     for (size_t k = 1; k <= 80; k += 17) {
15299       GemmMicrokernelTester()
15300         .mr(2)
15301         .nr(8)
15302         .kr(4)
15303         .sr(1)
15304         .m(2)
15305         .n(8)
15306         .k(k)
15307         .ks(3)
15308         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c4__neon_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
15309     }
15310   }
15311 
TEST(QC8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_LD1R,small_kernel_subtile)15312   TEST(QC8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_LD1R, small_kernel_subtile) {
15313     TEST_REQUIRES_ARM_NEON;
15314     for (size_t k = 1; k <= 80; k += 17) {
15315       for (uint32_t n = 1; n <= 8; n++) {
15316         for (uint32_t m = 1; m <= 2; m++) {
15317           GemmMicrokernelTester()
15318             .mr(2)
15319             .nr(8)
15320             .kr(4)
15321             .sr(1)
15322             .m(m)
15323             .n(n)
15324             .k(k)
15325             .ks(3)
15326             .iterations(1)
15327             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c4__neon_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
15328         }
15329       }
15330     }
15331   }
15332 
TEST(QC8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_LD1R,n_gt_8_small_kernel)15333   TEST(QC8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_LD1R, n_gt_8_small_kernel) {
15334     TEST_REQUIRES_ARM_NEON;
15335     for (uint32_t n = 9; n < 16; n++) {
15336       for (size_t k = 1; k <= 80; k += 17) {
15337         GemmMicrokernelTester()
15338           .mr(2)
15339           .nr(8)
15340           .kr(4)
15341           .sr(1)
15342           .m(2)
15343           .n(n)
15344           .k(k)
15345           .ks(3)
15346           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c4__neon_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
15347       }
15348     }
15349   }
15350 
TEST(QC8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_LD1R,n_div_8_small_kernel)15351   TEST(QC8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_LD1R, n_div_8_small_kernel) {
15352     TEST_REQUIRES_ARM_NEON;
15353     for (uint32_t n = 16; n <= 24; n += 8) {
15354       for (size_t k = 1; k <= 80; k += 17) {
15355         GemmMicrokernelTester()
15356           .mr(2)
15357           .nr(8)
15358           .kr(4)
15359           .sr(1)
15360           .m(2)
15361           .n(n)
15362           .k(k)
15363           .ks(3)
15364           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c4__neon_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
15365       }
15366     }
15367   }
15368 
TEST(QC8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_LD1R,strided_cm_subtile)15369   TEST(QC8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_LD1R, strided_cm_subtile) {
15370     TEST_REQUIRES_ARM_NEON;
15371     for (size_t k = 1; k <= 80; k += 17) {
15372       for (uint32_t n = 1; n <= 8; n++) {
15373         for (uint32_t m = 1; m <= 2; m++) {
15374           GemmMicrokernelTester()
15375             .mr(2)
15376             .nr(8)
15377             .kr(4)
15378             .sr(1)
15379             .m(m)
15380             .n(n)
15381             .k(k)
15382             .cm_stride(11)
15383             .iterations(1)
15384             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c4__neon_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
15385         }
15386       }
15387     }
15388   }
15389 
TEST(QC8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_LD1R,a_offset)15390   TEST(QC8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_LD1R, a_offset) {
15391     TEST_REQUIRES_ARM_NEON;
15392     for (size_t k = 1; k <= 80; k += 17) {
15393       GemmMicrokernelTester()
15394         .mr(2)
15395         .nr(8)
15396         .kr(4)
15397         .sr(1)
15398         .m(2)
15399         .n(8)
15400         .k(k)
15401         .ks(3)
15402         .a_offset(163)
15403         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c4__neon_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
15404     }
15405   }
15406 
TEST(QC8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_LD1R,zero)15407   TEST(QC8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_LD1R, zero) {
15408     TEST_REQUIRES_ARM_NEON;
15409     for (size_t k = 1; k <= 80; k += 17) {
15410       for (uint32_t mz = 0; mz < 2; mz++) {
15411         GemmMicrokernelTester()
15412           .mr(2)
15413           .nr(8)
15414           .kr(4)
15415           .sr(1)
15416           .m(2)
15417           .n(8)
15418           .k(k)
15419           .ks(3)
15420           .a_offset(163)
15421           .zero_index(mz)
15422           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c4__neon_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
15423       }
15424     }
15425   }
15426 
TEST(QC8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_LD1R,qmin)15427   TEST(QC8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_LD1R, qmin) {
15428     TEST_REQUIRES_ARM_NEON;
15429     GemmMicrokernelTester()
15430       .mr(2)
15431       .nr(8)
15432       .kr(4)
15433       .sr(1)
15434       .m(2)
15435       .n(8)
15436       .k(16)
15437       .qmin(128)
15438       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c4__neon_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
15439   }
15440 
TEST(QC8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_LD1R,qmax)15441   TEST(QC8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_LD1R, qmax) {
15442     TEST_REQUIRES_ARM_NEON;
15443     GemmMicrokernelTester()
15444       .mr(2)
15445       .nr(8)
15446       .kr(4)
15447       .sr(1)
15448       .m(2)
15449       .n(8)
15450       .k(16)
15451       .qmax(128)
15452       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c4__neon_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
15453   }
15454 
TEST(QC8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_LD1R,strided_cm)15455   TEST(QC8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_LD1R, strided_cm) {
15456     TEST_REQUIRES_ARM_NEON;
15457     GemmMicrokernelTester()
15458       .mr(2)
15459       .nr(8)
15460       .kr(4)
15461       .sr(1)
15462       .m(2)
15463       .n(8)
15464       .k(16)
15465       .cm_stride(11)
15466       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c4__neon_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
15467   }
15468 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
15469 
15470 
15471 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QC8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_LD2R,k_eq_16)15472   TEST(QC8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_LD2R, k_eq_16) {
15473     TEST_REQUIRES_ARM_NEON;
15474     GemmMicrokernelTester()
15475       .mr(2)
15476       .nr(8)
15477       .kr(4)
15478       .sr(1)
15479       .m(2)
15480       .n(8)
15481       .k(16)
15482       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c4__neon_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
15483   }
15484 
TEST(QC8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_LD2R,strided_cn)15485   TEST(QC8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_LD2R, strided_cn) {
15486     TEST_REQUIRES_ARM_NEON;
15487     GemmMicrokernelTester()
15488       .mr(2)
15489       .nr(8)
15490       .kr(4)
15491       .sr(1)
15492       .m(2)
15493       .n(8)
15494       .k(16)
15495       .cn_stride(11)
15496       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c4__neon_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
15497   }
15498 
TEST(QC8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_LD2R,k_eq_16_subtile)15499   TEST(QC8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_LD2R, k_eq_16_subtile) {
15500     TEST_REQUIRES_ARM_NEON;
15501     for (uint32_t n = 1; n <= 8; n++) {
15502       for (uint32_t m = 1; m <= 2; m++) {
15503         GemmMicrokernelTester()
15504           .mr(2)
15505           .nr(8)
15506           .kr(4)
15507           .sr(1)
15508           .m(m)
15509           .n(n)
15510           .k(16)
15511           .iterations(1)
15512           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c4__neon_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
15513       }
15514     }
15515   }
15516 
TEST(QC8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_LD2R,k_eq_16_subtile_m)15517   TEST(QC8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_LD2R, k_eq_16_subtile_m) {
15518     TEST_REQUIRES_ARM_NEON;
15519     for (uint32_t m = 1; m <= 2; m++) {
15520       GemmMicrokernelTester()
15521         .mr(2)
15522         .nr(8)
15523         .kr(4)
15524         .sr(1)
15525         .m(m)
15526         .n(8)
15527         .k(16)
15528         .iterations(1)
15529         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c4__neon_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
15530     }
15531   }
15532 
TEST(QC8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_LD2R,k_eq_16_subtile_n)15533   TEST(QC8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_LD2R, k_eq_16_subtile_n) {
15534     TEST_REQUIRES_ARM_NEON;
15535     for (uint32_t n = 1; n <= 8; n++) {
15536       GemmMicrokernelTester()
15537         .mr(2)
15538         .nr(8)
15539         .kr(4)
15540         .sr(1)
15541         .m(2)
15542         .n(n)
15543         .k(16)
15544         .iterations(1)
15545         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c4__neon_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
15546     }
15547   }
15548 
TEST(QC8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_LD2R,k_lt_16)15549   TEST(QC8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_LD2R, k_lt_16) {
15550     TEST_REQUIRES_ARM_NEON;
15551     for (size_t k = 1; k < 16; k++) {
15552       GemmMicrokernelTester()
15553         .mr(2)
15554         .nr(8)
15555         .kr(4)
15556         .sr(1)
15557         .m(2)
15558         .n(8)
15559         .k(k)
15560         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c4__neon_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
15561     }
15562   }
15563 
TEST(QC8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_LD2R,k_lt_16_subtile)15564   TEST(QC8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_LD2R, k_lt_16_subtile) {
15565     TEST_REQUIRES_ARM_NEON;
15566     for (size_t k = 1; k < 16; k++) {
15567       for (uint32_t n = 1; n <= 8; n++) {
15568         for (uint32_t m = 1; m <= 2; m++) {
15569           GemmMicrokernelTester()
15570             .mr(2)
15571             .nr(8)
15572             .kr(4)
15573             .sr(1)
15574             .m(m)
15575             .n(n)
15576             .k(k)
15577             .iterations(1)
15578             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c4__neon_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
15579         }
15580       }
15581     }
15582   }
15583 
TEST(QC8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_LD2R,k_gt_16)15584   TEST(QC8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_LD2R, k_gt_16) {
15585     TEST_REQUIRES_ARM_NEON;
15586     for (size_t k = 17; k < 32; k++) {
15587       GemmMicrokernelTester()
15588         .mr(2)
15589         .nr(8)
15590         .kr(4)
15591         .sr(1)
15592         .m(2)
15593         .n(8)
15594         .k(k)
15595         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c4__neon_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
15596     }
15597   }
15598 
TEST(QC8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_LD2R,k_gt_16_subtile)15599   TEST(QC8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_LD2R, k_gt_16_subtile) {
15600     TEST_REQUIRES_ARM_NEON;
15601     for (size_t k = 17; k < 32; k++) {
15602       for (uint32_t n = 1; n <= 8; n++) {
15603         for (uint32_t m = 1; m <= 2; m++) {
15604           GemmMicrokernelTester()
15605             .mr(2)
15606             .nr(8)
15607             .kr(4)
15608             .sr(1)
15609             .m(m)
15610             .n(n)
15611             .k(k)
15612             .iterations(1)
15613             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c4__neon_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
15614         }
15615       }
15616     }
15617   }
15618 
TEST(QC8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_LD2R,k_div_16)15619   TEST(QC8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_LD2R, k_div_16) {
15620     TEST_REQUIRES_ARM_NEON;
15621     for (size_t k = 32; k <= 160; k += 16) {
15622       GemmMicrokernelTester()
15623         .mr(2)
15624         .nr(8)
15625         .kr(4)
15626         .sr(1)
15627         .m(2)
15628         .n(8)
15629         .k(k)
15630         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c4__neon_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
15631     }
15632   }
15633 
TEST(QC8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_LD2R,k_div_16_subtile)15634   TEST(QC8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_LD2R, k_div_16_subtile) {
15635     TEST_REQUIRES_ARM_NEON;
15636     for (size_t k = 32; k <= 160; k += 16) {
15637       for (uint32_t n = 1; n <= 8; n++) {
15638         for (uint32_t m = 1; m <= 2; m++) {
15639           GemmMicrokernelTester()
15640             .mr(2)
15641             .nr(8)
15642             .kr(4)
15643             .sr(1)
15644             .m(m)
15645             .n(n)
15646             .k(k)
15647             .iterations(1)
15648             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c4__neon_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
15649         }
15650       }
15651     }
15652   }
15653 
TEST(QC8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_LD2R,n_gt_8)15654   TEST(QC8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_LD2R, n_gt_8) {
15655     TEST_REQUIRES_ARM_NEON;
15656     for (uint32_t n = 9; n < 16; n++) {
15657       for (size_t k = 1; k <= 80; k += 17) {
15658         GemmMicrokernelTester()
15659           .mr(2)
15660           .nr(8)
15661           .kr(4)
15662           .sr(1)
15663           .m(2)
15664           .n(n)
15665           .k(k)
15666           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c4__neon_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
15667       }
15668     }
15669   }
15670 
TEST(QC8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_LD2R,n_gt_8_strided_cn)15671   TEST(QC8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_LD2R, n_gt_8_strided_cn) {
15672     TEST_REQUIRES_ARM_NEON;
15673     for (uint32_t n = 9; n < 16; n++) {
15674       for (size_t k = 1; k <= 80; k += 17) {
15675         GemmMicrokernelTester()
15676           .mr(2)
15677           .nr(8)
15678           .kr(4)
15679           .sr(1)
15680           .m(2)
15681           .n(n)
15682           .k(k)
15683           .cn_stride(11)
15684           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c4__neon_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
15685       }
15686     }
15687   }
15688 
TEST(QC8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_LD2R,n_gt_8_subtile)15689   TEST(QC8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_LD2R, n_gt_8_subtile) {
15690     TEST_REQUIRES_ARM_NEON;
15691     for (uint32_t n = 9; n < 16; n++) {
15692       for (size_t k = 1; k <= 80; k += 17) {
15693         for (uint32_t m = 1; m <= 2; m++) {
15694           GemmMicrokernelTester()
15695             .mr(2)
15696             .nr(8)
15697             .kr(4)
15698             .sr(1)
15699             .m(m)
15700             .n(n)
15701             .k(k)
15702             .iterations(1)
15703             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c4__neon_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
15704         }
15705       }
15706     }
15707   }
15708 
TEST(QC8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_LD2R,n_div_8)15709   TEST(QC8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_LD2R, n_div_8) {
15710     TEST_REQUIRES_ARM_NEON;
15711     for (uint32_t n = 16; n <= 24; n += 8) {
15712       for (size_t k = 1; k <= 80; k += 17) {
15713         GemmMicrokernelTester()
15714           .mr(2)
15715           .nr(8)
15716           .kr(4)
15717           .sr(1)
15718           .m(2)
15719           .n(n)
15720           .k(k)
15721           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c4__neon_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
15722       }
15723     }
15724   }
15725 
TEST(QC8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_LD2R,n_div_8_strided_cn)15726   TEST(QC8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_LD2R, n_div_8_strided_cn) {
15727     TEST_REQUIRES_ARM_NEON;
15728     for (uint32_t n = 16; n <= 24; n += 8) {
15729       for (size_t k = 1; k <= 80; k += 17) {
15730         GemmMicrokernelTester()
15731           .mr(2)
15732           .nr(8)
15733           .kr(4)
15734           .sr(1)
15735           .m(2)
15736           .n(n)
15737           .k(k)
15738           .cn_stride(11)
15739           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c4__neon_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
15740       }
15741     }
15742   }
15743 
TEST(QC8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_LD2R,n_div_8_subtile)15744   TEST(QC8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_LD2R, n_div_8_subtile) {
15745     TEST_REQUIRES_ARM_NEON;
15746     for (uint32_t n = 16; n <= 24; n += 8) {
15747       for (size_t k = 1; k <= 80; k += 17) {
15748         for (uint32_t m = 1; m <= 2; m++) {
15749           GemmMicrokernelTester()
15750             .mr(2)
15751             .nr(8)
15752             .kr(4)
15753             .sr(1)
15754             .m(m)
15755             .n(n)
15756             .k(k)
15757             .iterations(1)
15758             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c4__neon_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
15759         }
15760       }
15761     }
15762   }
15763 
TEST(QC8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_LD2R,small_kernel)15764   TEST(QC8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_LD2R, small_kernel) {
15765     TEST_REQUIRES_ARM_NEON;
15766     for (size_t k = 1; k <= 80; k += 17) {
15767       GemmMicrokernelTester()
15768         .mr(2)
15769         .nr(8)
15770         .kr(4)
15771         .sr(1)
15772         .m(2)
15773         .n(8)
15774         .k(k)
15775         .ks(3)
15776         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c4__neon_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
15777     }
15778   }
15779 
TEST(QC8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_LD2R,small_kernel_subtile)15780   TEST(QC8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_LD2R, small_kernel_subtile) {
15781     TEST_REQUIRES_ARM_NEON;
15782     for (size_t k = 1; k <= 80; k += 17) {
15783       for (uint32_t n = 1; n <= 8; n++) {
15784         for (uint32_t m = 1; m <= 2; m++) {
15785           GemmMicrokernelTester()
15786             .mr(2)
15787             .nr(8)
15788             .kr(4)
15789             .sr(1)
15790             .m(m)
15791             .n(n)
15792             .k(k)
15793             .ks(3)
15794             .iterations(1)
15795             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c4__neon_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
15796         }
15797       }
15798     }
15799   }
15800 
TEST(QC8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_LD2R,n_gt_8_small_kernel)15801   TEST(QC8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_LD2R, n_gt_8_small_kernel) {
15802     TEST_REQUIRES_ARM_NEON;
15803     for (uint32_t n = 9; n < 16; n++) {
15804       for (size_t k = 1; k <= 80; k += 17) {
15805         GemmMicrokernelTester()
15806           .mr(2)
15807           .nr(8)
15808           .kr(4)
15809           .sr(1)
15810           .m(2)
15811           .n(n)
15812           .k(k)
15813           .ks(3)
15814           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c4__neon_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
15815       }
15816     }
15817   }
15818 
TEST(QC8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_LD2R,n_div_8_small_kernel)15819   TEST(QC8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_LD2R, n_div_8_small_kernel) {
15820     TEST_REQUIRES_ARM_NEON;
15821     for (uint32_t n = 16; n <= 24; n += 8) {
15822       for (size_t k = 1; k <= 80; k += 17) {
15823         GemmMicrokernelTester()
15824           .mr(2)
15825           .nr(8)
15826           .kr(4)
15827           .sr(1)
15828           .m(2)
15829           .n(n)
15830           .k(k)
15831           .ks(3)
15832           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c4__neon_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
15833       }
15834     }
15835   }
15836 
TEST(QC8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_LD2R,strided_cm_subtile)15837   TEST(QC8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_LD2R, strided_cm_subtile) {
15838     TEST_REQUIRES_ARM_NEON;
15839     for (size_t k = 1; k <= 80; k += 17) {
15840       for (uint32_t n = 1; n <= 8; n++) {
15841         for (uint32_t m = 1; m <= 2; m++) {
15842           GemmMicrokernelTester()
15843             .mr(2)
15844             .nr(8)
15845             .kr(4)
15846             .sr(1)
15847             .m(m)
15848             .n(n)
15849             .k(k)
15850             .cm_stride(11)
15851             .iterations(1)
15852             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c4__neon_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
15853         }
15854       }
15855     }
15856   }
15857 
TEST(QC8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_LD2R,a_offset)15858   TEST(QC8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_LD2R, a_offset) {
15859     TEST_REQUIRES_ARM_NEON;
15860     for (size_t k = 1; k <= 80; k += 17) {
15861       GemmMicrokernelTester()
15862         .mr(2)
15863         .nr(8)
15864         .kr(4)
15865         .sr(1)
15866         .m(2)
15867         .n(8)
15868         .k(k)
15869         .ks(3)
15870         .a_offset(163)
15871         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c4__neon_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
15872     }
15873   }
15874 
TEST(QC8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_LD2R,zero)15875   TEST(QC8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_LD2R, zero) {
15876     TEST_REQUIRES_ARM_NEON;
15877     for (size_t k = 1; k <= 80; k += 17) {
15878       for (uint32_t mz = 0; mz < 2; mz++) {
15879         GemmMicrokernelTester()
15880           .mr(2)
15881           .nr(8)
15882           .kr(4)
15883           .sr(1)
15884           .m(2)
15885           .n(8)
15886           .k(k)
15887           .ks(3)
15888           .a_offset(163)
15889           .zero_index(mz)
15890           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c4__neon_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
15891       }
15892     }
15893   }
15894 
TEST(QC8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_LD2R,qmin)15895   TEST(QC8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_LD2R, qmin) {
15896     TEST_REQUIRES_ARM_NEON;
15897     GemmMicrokernelTester()
15898       .mr(2)
15899       .nr(8)
15900       .kr(4)
15901       .sr(1)
15902       .m(2)
15903       .n(8)
15904       .k(16)
15905       .qmin(128)
15906       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c4__neon_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
15907   }
15908 
TEST(QC8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_LD2R,qmax)15909   TEST(QC8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_LD2R, qmax) {
15910     TEST_REQUIRES_ARM_NEON;
15911     GemmMicrokernelTester()
15912       .mr(2)
15913       .nr(8)
15914       .kr(4)
15915       .sr(1)
15916       .m(2)
15917       .n(8)
15918       .k(16)
15919       .qmax(128)
15920       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c4__neon_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
15921   }
15922 
TEST(QC8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_LD2R,strided_cm)15923   TEST(QC8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_LD2R, strided_cm) {
15924     TEST_REQUIRES_ARM_NEON;
15925     GemmMicrokernelTester()
15926       .mr(2)
15927       .nr(8)
15928       .kr(4)
15929       .sr(1)
15930       .m(2)
15931       .n(8)
15932       .k(16)
15933       .cm_stride(11)
15934       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c4__neon_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
15935   }
15936 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
15937 
15938 
15939 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QC8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_DUP,k_eq_16)15940   TEST(QC8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_DUP, k_eq_16) {
15941     TEST_REQUIRES_ARM_NEON_V8;
15942     GemmMicrokernelTester()
15943       .mr(2)
15944       .nr(8)
15945       .kr(4)
15946       .sr(1)
15947       .m(2)
15948       .n(8)
15949       .k(16)
15950       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
15951   }
15952 
TEST(QC8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_DUP,strided_cn)15953   TEST(QC8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_DUP, strided_cn) {
15954     TEST_REQUIRES_ARM_NEON_V8;
15955     GemmMicrokernelTester()
15956       .mr(2)
15957       .nr(8)
15958       .kr(4)
15959       .sr(1)
15960       .m(2)
15961       .n(8)
15962       .k(16)
15963       .cn_stride(11)
15964       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
15965   }
15966 
TEST(QC8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_DUP,k_eq_16_subtile)15967   TEST(QC8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_DUP, k_eq_16_subtile) {
15968     TEST_REQUIRES_ARM_NEON_V8;
15969     for (uint32_t n = 1; n <= 8; n++) {
15970       for (uint32_t m = 1; m <= 2; m++) {
15971         GemmMicrokernelTester()
15972           .mr(2)
15973           .nr(8)
15974           .kr(4)
15975           .sr(1)
15976           .m(m)
15977           .n(n)
15978           .k(16)
15979           .iterations(1)
15980           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
15981       }
15982     }
15983   }
15984 
TEST(QC8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_DUP,k_eq_16_subtile_m)15985   TEST(QC8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_DUP, k_eq_16_subtile_m) {
15986     TEST_REQUIRES_ARM_NEON_V8;
15987     for (uint32_t m = 1; m <= 2; m++) {
15988       GemmMicrokernelTester()
15989         .mr(2)
15990         .nr(8)
15991         .kr(4)
15992         .sr(1)
15993         .m(m)
15994         .n(8)
15995         .k(16)
15996         .iterations(1)
15997         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
15998     }
15999   }
16000 
TEST(QC8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_DUP,k_eq_16_subtile_n)16001   TEST(QC8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_DUP, k_eq_16_subtile_n) {
16002     TEST_REQUIRES_ARM_NEON_V8;
16003     for (uint32_t n = 1; n <= 8; n++) {
16004       GemmMicrokernelTester()
16005         .mr(2)
16006         .nr(8)
16007         .kr(4)
16008         .sr(1)
16009         .m(2)
16010         .n(n)
16011         .k(16)
16012         .iterations(1)
16013         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
16014     }
16015   }
16016 
TEST(QC8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_DUP,k_lt_16)16017   TEST(QC8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_DUP, k_lt_16) {
16018     TEST_REQUIRES_ARM_NEON_V8;
16019     for (size_t k = 1; k < 16; k++) {
16020       GemmMicrokernelTester()
16021         .mr(2)
16022         .nr(8)
16023         .kr(4)
16024         .sr(1)
16025         .m(2)
16026         .n(8)
16027         .k(k)
16028         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
16029     }
16030   }
16031 
TEST(QC8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_DUP,k_lt_16_subtile)16032   TEST(QC8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_DUP, k_lt_16_subtile) {
16033     TEST_REQUIRES_ARM_NEON_V8;
16034     for (size_t k = 1; k < 16; k++) {
16035       for (uint32_t n = 1; n <= 8; n++) {
16036         for (uint32_t m = 1; m <= 2; m++) {
16037           GemmMicrokernelTester()
16038             .mr(2)
16039             .nr(8)
16040             .kr(4)
16041             .sr(1)
16042             .m(m)
16043             .n(n)
16044             .k(k)
16045             .iterations(1)
16046             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
16047         }
16048       }
16049     }
16050   }
16051 
TEST(QC8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_DUP,k_gt_16)16052   TEST(QC8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_DUP, k_gt_16) {
16053     TEST_REQUIRES_ARM_NEON_V8;
16054     for (size_t k = 17; k < 32; k++) {
16055       GemmMicrokernelTester()
16056         .mr(2)
16057         .nr(8)
16058         .kr(4)
16059         .sr(1)
16060         .m(2)
16061         .n(8)
16062         .k(k)
16063         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
16064     }
16065   }
16066 
TEST(QC8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_DUP,k_gt_16_subtile)16067   TEST(QC8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_DUP, k_gt_16_subtile) {
16068     TEST_REQUIRES_ARM_NEON_V8;
16069     for (size_t k = 17; k < 32; k++) {
16070       for (uint32_t n = 1; n <= 8; n++) {
16071         for (uint32_t m = 1; m <= 2; m++) {
16072           GemmMicrokernelTester()
16073             .mr(2)
16074             .nr(8)
16075             .kr(4)
16076             .sr(1)
16077             .m(m)
16078             .n(n)
16079             .k(k)
16080             .iterations(1)
16081             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
16082         }
16083       }
16084     }
16085   }
16086 
TEST(QC8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_DUP,k_div_16)16087   TEST(QC8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_DUP, k_div_16) {
16088     TEST_REQUIRES_ARM_NEON_V8;
16089     for (size_t k = 32; k <= 160; k += 16) {
16090       GemmMicrokernelTester()
16091         .mr(2)
16092         .nr(8)
16093         .kr(4)
16094         .sr(1)
16095         .m(2)
16096         .n(8)
16097         .k(k)
16098         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
16099     }
16100   }
16101 
TEST(QC8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_DUP,k_div_16_subtile)16102   TEST(QC8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_DUP, k_div_16_subtile) {
16103     TEST_REQUIRES_ARM_NEON_V8;
16104     for (size_t k = 32; k <= 160; k += 16) {
16105       for (uint32_t n = 1; n <= 8; n++) {
16106         for (uint32_t m = 1; m <= 2; m++) {
16107           GemmMicrokernelTester()
16108             .mr(2)
16109             .nr(8)
16110             .kr(4)
16111             .sr(1)
16112             .m(m)
16113             .n(n)
16114             .k(k)
16115             .iterations(1)
16116             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
16117         }
16118       }
16119     }
16120   }
16121 
TEST(QC8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_DUP,n_gt_8)16122   TEST(QC8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_DUP, n_gt_8) {
16123     TEST_REQUIRES_ARM_NEON_V8;
16124     for (uint32_t n = 9; n < 16; n++) {
16125       for (size_t k = 1; k <= 80; k += 17) {
16126         GemmMicrokernelTester()
16127           .mr(2)
16128           .nr(8)
16129           .kr(4)
16130           .sr(1)
16131           .m(2)
16132           .n(n)
16133           .k(k)
16134           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
16135       }
16136     }
16137   }
16138 
TEST(QC8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_DUP,n_gt_8_strided_cn)16139   TEST(QC8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_DUP, n_gt_8_strided_cn) {
16140     TEST_REQUIRES_ARM_NEON_V8;
16141     for (uint32_t n = 9; n < 16; n++) {
16142       for (size_t k = 1; k <= 80; k += 17) {
16143         GemmMicrokernelTester()
16144           .mr(2)
16145           .nr(8)
16146           .kr(4)
16147           .sr(1)
16148           .m(2)
16149           .n(n)
16150           .k(k)
16151           .cn_stride(11)
16152           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
16153       }
16154     }
16155   }
16156 
TEST(QC8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_DUP,n_gt_8_subtile)16157   TEST(QC8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_DUP, n_gt_8_subtile) {
16158     TEST_REQUIRES_ARM_NEON_V8;
16159     for (uint32_t n = 9; n < 16; n++) {
16160       for (size_t k = 1; k <= 80; k += 17) {
16161         for (uint32_t m = 1; m <= 2; m++) {
16162           GemmMicrokernelTester()
16163             .mr(2)
16164             .nr(8)
16165             .kr(4)
16166             .sr(1)
16167             .m(m)
16168             .n(n)
16169             .k(k)
16170             .iterations(1)
16171             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
16172         }
16173       }
16174     }
16175   }
16176 
TEST(QC8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_DUP,n_div_8)16177   TEST(QC8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_DUP, n_div_8) {
16178     TEST_REQUIRES_ARM_NEON_V8;
16179     for (uint32_t n = 16; n <= 24; n += 8) {
16180       for (size_t k = 1; k <= 80; k += 17) {
16181         GemmMicrokernelTester()
16182           .mr(2)
16183           .nr(8)
16184           .kr(4)
16185           .sr(1)
16186           .m(2)
16187           .n(n)
16188           .k(k)
16189           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
16190       }
16191     }
16192   }
16193 
TEST(QC8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_DUP,n_div_8_strided_cn)16194   TEST(QC8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_DUP, n_div_8_strided_cn) {
16195     TEST_REQUIRES_ARM_NEON_V8;
16196     for (uint32_t n = 16; n <= 24; n += 8) {
16197       for (size_t k = 1; k <= 80; k += 17) {
16198         GemmMicrokernelTester()
16199           .mr(2)
16200           .nr(8)
16201           .kr(4)
16202           .sr(1)
16203           .m(2)
16204           .n(n)
16205           .k(k)
16206           .cn_stride(11)
16207           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
16208       }
16209     }
16210   }
16211 
TEST(QC8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_DUP,n_div_8_subtile)16212   TEST(QC8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_DUP, n_div_8_subtile) {
16213     TEST_REQUIRES_ARM_NEON_V8;
16214     for (uint32_t n = 16; n <= 24; n += 8) {
16215       for (size_t k = 1; k <= 80; k += 17) {
16216         for (uint32_t m = 1; m <= 2; m++) {
16217           GemmMicrokernelTester()
16218             .mr(2)
16219             .nr(8)
16220             .kr(4)
16221             .sr(1)
16222             .m(m)
16223             .n(n)
16224             .k(k)
16225             .iterations(1)
16226             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
16227         }
16228       }
16229     }
16230   }
16231 
TEST(QC8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_DUP,small_kernel)16232   TEST(QC8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_DUP, small_kernel) {
16233     TEST_REQUIRES_ARM_NEON_V8;
16234     for (size_t k = 1; k <= 80; k += 17) {
16235       GemmMicrokernelTester()
16236         .mr(2)
16237         .nr(8)
16238         .kr(4)
16239         .sr(1)
16240         .m(2)
16241         .n(8)
16242         .k(k)
16243         .ks(3)
16244         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
16245     }
16246   }
16247 
TEST(QC8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_DUP,small_kernel_subtile)16248   TEST(QC8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_DUP, small_kernel_subtile) {
16249     TEST_REQUIRES_ARM_NEON_V8;
16250     for (size_t k = 1; k <= 80; k += 17) {
16251       for (uint32_t n = 1; n <= 8; n++) {
16252         for (uint32_t m = 1; m <= 2; m++) {
16253           GemmMicrokernelTester()
16254             .mr(2)
16255             .nr(8)
16256             .kr(4)
16257             .sr(1)
16258             .m(m)
16259             .n(n)
16260             .k(k)
16261             .ks(3)
16262             .iterations(1)
16263             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
16264         }
16265       }
16266     }
16267   }
16268 
TEST(QC8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_DUP,n_gt_8_small_kernel)16269   TEST(QC8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_DUP, n_gt_8_small_kernel) {
16270     TEST_REQUIRES_ARM_NEON_V8;
16271     for (uint32_t n = 9; n < 16; n++) {
16272       for (size_t k = 1; k <= 80; k += 17) {
16273         GemmMicrokernelTester()
16274           .mr(2)
16275           .nr(8)
16276           .kr(4)
16277           .sr(1)
16278           .m(2)
16279           .n(n)
16280           .k(k)
16281           .ks(3)
16282           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
16283       }
16284     }
16285   }
16286 
TEST(QC8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_DUP,n_div_8_small_kernel)16287   TEST(QC8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_DUP, n_div_8_small_kernel) {
16288     TEST_REQUIRES_ARM_NEON_V8;
16289     for (uint32_t n = 16; n <= 24; n += 8) {
16290       for (size_t k = 1; k <= 80; k += 17) {
16291         GemmMicrokernelTester()
16292           .mr(2)
16293           .nr(8)
16294           .kr(4)
16295           .sr(1)
16296           .m(2)
16297           .n(n)
16298           .k(k)
16299           .ks(3)
16300           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
16301       }
16302     }
16303   }
16304 
TEST(QC8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_DUP,strided_cm_subtile)16305   TEST(QC8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_DUP, strided_cm_subtile) {
16306     TEST_REQUIRES_ARM_NEON_V8;
16307     for (size_t k = 1; k <= 80; k += 17) {
16308       for (uint32_t n = 1; n <= 8; n++) {
16309         for (uint32_t m = 1; m <= 2; m++) {
16310           GemmMicrokernelTester()
16311             .mr(2)
16312             .nr(8)
16313             .kr(4)
16314             .sr(1)
16315             .m(m)
16316             .n(n)
16317             .k(k)
16318             .cm_stride(11)
16319             .iterations(1)
16320             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
16321         }
16322       }
16323     }
16324   }
16325 
TEST(QC8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_DUP,a_offset)16326   TEST(QC8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_DUP, a_offset) {
16327     TEST_REQUIRES_ARM_NEON_V8;
16328     for (size_t k = 1; k <= 80; k += 17) {
16329       GemmMicrokernelTester()
16330         .mr(2)
16331         .nr(8)
16332         .kr(4)
16333         .sr(1)
16334         .m(2)
16335         .n(8)
16336         .k(k)
16337         .ks(3)
16338         .a_offset(163)
16339         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
16340     }
16341   }
16342 
TEST(QC8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_DUP,zero)16343   TEST(QC8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_DUP, zero) {
16344     TEST_REQUIRES_ARM_NEON_V8;
16345     for (size_t k = 1; k <= 80; k += 17) {
16346       for (uint32_t mz = 0; mz < 2; mz++) {
16347         GemmMicrokernelTester()
16348           .mr(2)
16349           .nr(8)
16350           .kr(4)
16351           .sr(1)
16352           .m(2)
16353           .n(8)
16354           .k(k)
16355           .ks(3)
16356           .a_offset(163)
16357           .zero_index(mz)
16358           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
16359       }
16360     }
16361   }
16362 
TEST(QC8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_DUP,qmin)16363   TEST(QC8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_DUP, qmin) {
16364     TEST_REQUIRES_ARM_NEON_V8;
16365     GemmMicrokernelTester()
16366       .mr(2)
16367       .nr(8)
16368       .kr(4)
16369       .sr(1)
16370       .m(2)
16371       .n(8)
16372       .k(16)
16373       .qmin(128)
16374       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
16375   }
16376 
TEST(QC8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_DUP,qmax)16377   TEST(QC8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_DUP, qmax) {
16378     TEST_REQUIRES_ARM_NEON_V8;
16379     GemmMicrokernelTester()
16380       .mr(2)
16381       .nr(8)
16382       .kr(4)
16383       .sr(1)
16384       .m(2)
16385       .n(8)
16386       .k(16)
16387       .qmax(128)
16388       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
16389   }
16390 
TEST(QC8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_DUP,strided_cm)16391   TEST(QC8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_DUP, strided_cm) {
16392     TEST_REQUIRES_ARM_NEON_V8;
16393     GemmMicrokernelTester()
16394       .mr(2)
16395       .nr(8)
16396       .kr(4)
16397       .sr(1)
16398       .m(2)
16399       .n(8)
16400       .k(16)
16401       .cm_stride(11)
16402       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
16403   }
16404 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
16405 
16406 
16407 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QC8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_LD1R,k_eq_16)16408   TEST(QC8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_LD1R, k_eq_16) {
16409     TEST_REQUIRES_ARM_NEON_V8;
16410     GemmMicrokernelTester()
16411       .mr(2)
16412       .nr(8)
16413       .kr(4)
16414       .sr(1)
16415       .m(2)
16416       .n(8)
16417       .k(16)
16418       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
16419   }
16420 
TEST(QC8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_LD1R,strided_cn)16421   TEST(QC8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_LD1R, strided_cn) {
16422     TEST_REQUIRES_ARM_NEON_V8;
16423     GemmMicrokernelTester()
16424       .mr(2)
16425       .nr(8)
16426       .kr(4)
16427       .sr(1)
16428       .m(2)
16429       .n(8)
16430       .k(16)
16431       .cn_stride(11)
16432       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
16433   }
16434 
TEST(QC8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_LD1R,k_eq_16_subtile)16435   TEST(QC8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_LD1R, k_eq_16_subtile) {
16436     TEST_REQUIRES_ARM_NEON_V8;
16437     for (uint32_t n = 1; n <= 8; n++) {
16438       for (uint32_t m = 1; m <= 2; m++) {
16439         GemmMicrokernelTester()
16440           .mr(2)
16441           .nr(8)
16442           .kr(4)
16443           .sr(1)
16444           .m(m)
16445           .n(n)
16446           .k(16)
16447           .iterations(1)
16448           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
16449       }
16450     }
16451   }
16452 
TEST(QC8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_LD1R,k_eq_16_subtile_m)16453   TEST(QC8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_LD1R, k_eq_16_subtile_m) {
16454     TEST_REQUIRES_ARM_NEON_V8;
16455     for (uint32_t m = 1; m <= 2; m++) {
16456       GemmMicrokernelTester()
16457         .mr(2)
16458         .nr(8)
16459         .kr(4)
16460         .sr(1)
16461         .m(m)
16462         .n(8)
16463         .k(16)
16464         .iterations(1)
16465         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
16466     }
16467   }
16468 
TEST(QC8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_LD1R,k_eq_16_subtile_n)16469   TEST(QC8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_LD1R, k_eq_16_subtile_n) {
16470     TEST_REQUIRES_ARM_NEON_V8;
16471     for (uint32_t n = 1; n <= 8; n++) {
16472       GemmMicrokernelTester()
16473         .mr(2)
16474         .nr(8)
16475         .kr(4)
16476         .sr(1)
16477         .m(2)
16478         .n(n)
16479         .k(16)
16480         .iterations(1)
16481         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
16482     }
16483   }
16484 
TEST(QC8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_LD1R,k_lt_16)16485   TEST(QC8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_LD1R, k_lt_16) {
16486     TEST_REQUIRES_ARM_NEON_V8;
16487     for (size_t k = 1; k < 16; k++) {
16488       GemmMicrokernelTester()
16489         .mr(2)
16490         .nr(8)
16491         .kr(4)
16492         .sr(1)
16493         .m(2)
16494         .n(8)
16495         .k(k)
16496         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
16497     }
16498   }
16499 
TEST(QC8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_LD1R,k_lt_16_subtile)16500   TEST(QC8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_LD1R, k_lt_16_subtile) {
16501     TEST_REQUIRES_ARM_NEON_V8;
16502     for (size_t k = 1; k < 16; k++) {
16503       for (uint32_t n = 1; n <= 8; n++) {
16504         for (uint32_t m = 1; m <= 2; m++) {
16505           GemmMicrokernelTester()
16506             .mr(2)
16507             .nr(8)
16508             .kr(4)
16509             .sr(1)
16510             .m(m)
16511             .n(n)
16512             .k(k)
16513             .iterations(1)
16514             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
16515         }
16516       }
16517     }
16518   }
16519 
TEST(QC8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_LD1R,k_gt_16)16520   TEST(QC8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_LD1R, k_gt_16) {
16521     TEST_REQUIRES_ARM_NEON_V8;
16522     for (size_t k = 17; k < 32; k++) {
16523       GemmMicrokernelTester()
16524         .mr(2)
16525         .nr(8)
16526         .kr(4)
16527         .sr(1)
16528         .m(2)
16529         .n(8)
16530         .k(k)
16531         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
16532     }
16533   }
16534 
TEST(QC8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_LD1R,k_gt_16_subtile)16535   TEST(QC8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_LD1R, k_gt_16_subtile) {
16536     TEST_REQUIRES_ARM_NEON_V8;
16537     for (size_t k = 17; k < 32; k++) {
16538       for (uint32_t n = 1; n <= 8; n++) {
16539         for (uint32_t m = 1; m <= 2; m++) {
16540           GemmMicrokernelTester()
16541             .mr(2)
16542             .nr(8)
16543             .kr(4)
16544             .sr(1)
16545             .m(m)
16546             .n(n)
16547             .k(k)
16548             .iterations(1)
16549             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
16550         }
16551       }
16552     }
16553   }
16554 
TEST(QC8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_LD1R,k_div_16)16555   TEST(QC8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_LD1R, k_div_16) {
16556     TEST_REQUIRES_ARM_NEON_V8;
16557     for (size_t k = 32; k <= 160; k += 16) {
16558       GemmMicrokernelTester()
16559         .mr(2)
16560         .nr(8)
16561         .kr(4)
16562         .sr(1)
16563         .m(2)
16564         .n(8)
16565         .k(k)
16566         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
16567     }
16568   }
16569 
TEST(QC8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_LD1R,k_div_16_subtile)16570   TEST(QC8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_LD1R, k_div_16_subtile) {
16571     TEST_REQUIRES_ARM_NEON_V8;
16572     for (size_t k = 32; k <= 160; k += 16) {
16573       for (uint32_t n = 1; n <= 8; n++) {
16574         for (uint32_t m = 1; m <= 2; m++) {
16575           GemmMicrokernelTester()
16576             .mr(2)
16577             .nr(8)
16578             .kr(4)
16579             .sr(1)
16580             .m(m)
16581             .n(n)
16582             .k(k)
16583             .iterations(1)
16584             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
16585         }
16586       }
16587     }
16588   }
16589 
TEST(QC8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_LD1R,n_gt_8)16590   TEST(QC8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_LD1R, n_gt_8) {
16591     TEST_REQUIRES_ARM_NEON_V8;
16592     for (uint32_t n = 9; n < 16; n++) {
16593       for (size_t k = 1; k <= 80; k += 17) {
16594         GemmMicrokernelTester()
16595           .mr(2)
16596           .nr(8)
16597           .kr(4)
16598           .sr(1)
16599           .m(2)
16600           .n(n)
16601           .k(k)
16602           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
16603       }
16604     }
16605   }
16606 
TEST(QC8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_LD1R,n_gt_8_strided_cn)16607   TEST(QC8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_LD1R, n_gt_8_strided_cn) {
16608     TEST_REQUIRES_ARM_NEON_V8;
16609     for (uint32_t n = 9; n < 16; n++) {
16610       for (size_t k = 1; k <= 80; k += 17) {
16611         GemmMicrokernelTester()
16612           .mr(2)
16613           .nr(8)
16614           .kr(4)
16615           .sr(1)
16616           .m(2)
16617           .n(n)
16618           .k(k)
16619           .cn_stride(11)
16620           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
16621       }
16622     }
16623   }
16624 
TEST(QC8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_LD1R,n_gt_8_subtile)16625   TEST(QC8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_LD1R, n_gt_8_subtile) {
16626     TEST_REQUIRES_ARM_NEON_V8;
16627     for (uint32_t n = 9; n < 16; n++) {
16628       for (size_t k = 1; k <= 80; k += 17) {
16629         for (uint32_t m = 1; m <= 2; m++) {
16630           GemmMicrokernelTester()
16631             .mr(2)
16632             .nr(8)
16633             .kr(4)
16634             .sr(1)
16635             .m(m)
16636             .n(n)
16637             .k(k)
16638             .iterations(1)
16639             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
16640         }
16641       }
16642     }
16643   }
16644 
TEST(QC8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_LD1R,n_div_8)16645   TEST(QC8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_LD1R, n_div_8) {
16646     TEST_REQUIRES_ARM_NEON_V8;
16647     for (uint32_t n = 16; n <= 24; n += 8) {
16648       for (size_t k = 1; k <= 80; k += 17) {
16649         GemmMicrokernelTester()
16650           .mr(2)
16651           .nr(8)
16652           .kr(4)
16653           .sr(1)
16654           .m(2)
16655           .n(n)
16656           .k(k)
16657           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
16658       }
16659     }
16660   }
16661 
TEST(QC8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_LD1R,n_div_8_strided_cn)16662   TEST(QC8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_LD1R, n_div_8_strided_cn) {
16663     TEST_REQUIRES_ARM_NEON_V8;
16664     for (uint32_t n = 16; n <= 24; n += 8) {
16665       for (size_t k = 1; k <= 80; k += 17) {
16666         GemmMicrokernelTester()
16667           .mr(2)
16668           .nr(8)
16669           .kr(4)
16670           .sr(1)
16671           .m(2)
16672           .n(n)
16673           .k(k)
16674           .cn_stride(11)
16675           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
16676       }
16677     }
16678   }
16679 
TEST(QC8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_LD1R,n_div_8_subtile)16680   TEST(QC8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_LD1R, n_div_8_subtile) {
16681     TEST_REQUIRES_ARM_NEON_V8;
16682     for (uint32_t n = 16; n <= 24; n += 8) {
16683       for (size_t k = 1; k <= 80; k += 17) {
16684         for (uint32_t m = 1; m <= 2; m++) {
16685           GemmMicrokernelTester()
16686             .mr(2)
16687             .nr(8)
16688             .kr(4)
16689             .sr(1)
16690             .m(m)
16691             .n(n)
16692             .k(k)
16693             .iterations(1)
16694             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
16695         }
16696       }
16697     }
16698   }
16699 
TEST(QC8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_LD1R,small_kernel)16700   TEST(QC8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_LD1R, small_kernel) {
16701     TEST_REQUIRES_ARM_NEON_V8;
16702     for (size_t k = 1; k <= 80; k += 17) {
16703       GemmMicrokernelTester()
16704         .mr(2)
16705         .nr(8)
16706         .kr(4)
16707         .sr(1)
16708         .m(2)
16709         .n(8)
16710         .k(k)
16711         .ks(3)
16712         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
16713     }
16714   }
16715 
TEST(QC8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_LD1R,small_kernel_subtile)16716   TEST(QC8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_LD1R, small_kernel_subtile) {
16717     TEST_REQUIRES_ARM_NEON_V8;
16718     for (size_t k = 1; k <= 80; k += 17) {
16719       for (uint32_t n = 1; n <= 8; n++) {
16720         for (uint32_t m = 1; m <= 2; m++) {
16721           GemmMicrokernelTester()
16722             .mr(2)
16723             .nr(8)
16724             .kr(4)
16725             .sr(1)
16726             .m(m)
16727             .n(n)
16728             .k(k)
16729             .ks(3)
16730             .iterations(1)
16731             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
16732         }
16733       }
16734     }
16735   }
16736 
TEST(QC8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_LD1R,n_gt_8_small_kernel)16737   TEST(QC8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_LD1R, n_gt_8_small_kernel) {
16738     TEST_REQUIRES_ARM_NEON_V8;
16739     for (uint32_t n = 9; n < 16; n++) {
16740       for (size_t k = 1; k <= 80; k += 17) {
16741         GemmMicrokernelTester()
16742           .mr(2)
16743           .nr(8)
16744           .kr(4)
16745           .sr(1)
16746           .m(2)
16747           .n(n)
16748           .k(k)
16749           .ks(3)
16750           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
16751       }
16752     }
16753   }
16754 
TEST(QC8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_LD1R,n_div_8_small_kernel)16755   TEST(QC8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_LD1R, n_div_8_small_kernel) {
16756     TEST_REQUIRES_ARM_NEON_V8;
16757     for (uint32_t n = 16; n <= 24; n += 8) {
16758       for (size_t k = 1; k <= 80; k += 17) {
16759         GemmMicrokernelTester()
16760           .mr(2)
16761           .nr(8)
16762           .kr(4)
16763           .sr(1)
16764           .m(2)
16765           .n(n)
16766           .k(k)
16767           .ks(3)
16768           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
16769       }
16770     }
16771   }
16772 
TEST(QC8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_LD1R,strided_cm_subtile)16773   TEST(QC8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_LD1R, strided_cm_subtile) {
16774     TEST_REQUIRES_ARM_NEON_V8;
16775     for (size_t k = 1; k <= 80; k += 17) {
16776       for (uint32_t n = 1; n <= 8; n++) {
16777         for (uint32_t m = 1; m <= 2; m++) {
16778           GemmMicrokernelTester()
16779             .mr(2)
16780             .nr(8)
16781             .kr(4)
16782             .sr(1)
16783             .m(m)
16784             .n(n)
16785             .k(k)
16786             .cm_stride(11)
16787             .iterations(1)
16788             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
16789         }
16790       }
16791     }
16792   }
16793 
TEST(QC8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_LD1R,a_offset)16794   TEST(QC8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_LD1R, a_offset) {
16795     TEST_REQUIRES_ARM_NEON_V8;
16796     for (size_t k = 1; k <= 80; k += 17) {
16797       GemmMicrokernelTester()
16798         .mr(2)
16799         .nr(8)
16800         .kr(4)
16801         .sr(1)
16802         .m(2)
16803         .n(8)
16804         .k(k)
16805         .ks(3)
16806         .a_offset(163)
16807         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
16808     }
16809   }
16810 
TEST(QC8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_LD1R,zero)16811   TEST(QC8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_LD1R, zero) {
16812     TEST_REQUIRES_ARM_NEON_V8;
16813     for (size_t k = 1; k <= 80; k += 17) {
16814       for (uint32_t mz = 0; mz < 2; mz++) {
16815         GemmMicrokernelTester()
16816           .mr(2)
16817           .nr(8)
16818           .kr(4)
16819           .sr(1)
16820           .m(2)
16821           .n(8)
16822           .k(k)
16823           .ks(3)
16824           .a_offset(163)
16825           .zero_index(mz)
16826           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
16827       }
16828     }
16829   }
16830 
TEST(QC8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_LD1R,qmin)16831   TEST(QC8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_LD1R, qmin) {
16832     TEST_REQUIRES_ARM_NEON_V8;
16833     GemmMicrokernelTester()
16834       .mr(2)
16835       .nr(8)
16836       .kr(4)
16837       .sr(1)
16838       .m(2)
16839       .n(8)
16840       .k(16)
16841       .qmin(128)
16842       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
16843   }
16844 
TEST(QC8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_LD1R,qmax)16845   TEST(QC8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_LD1R, qmax) {
16846     TEST_REQUIRES_ARM_NEON_V8;
16847     GemmMicrokernelTester()
16848       .mr(2)
16849       .nr(8)
16850       .kr(4)
16851       .sr(1)
16852       .m(2)
16853       .n(8)
16854       .k(16)
16855       .qmax(128)
16856       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
16857   }
16858 
TEST(QC8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_LD1R,strided_cm)16859   TEST(QC8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_LD1R, strided_cm) {
16860     TEST_REQUIRES_ARM_NEON_V8;
16861     GemmMicrokernelTester()
16862       .mr(2)
16863       .nr(8)
16864       .kr(4)
16865       .sr(1)
16866       .m(2)
16867       .n(8)
16868       .k(16)
16869       .cm_stride(11)
16870       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
16871   }
16872 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
16873 
16874 
16875 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QC8_IGEMM_MINMAX_FP32_2X8C4S2__NEON_MLAL,k_eq_16)16876   TEST(QC8_IGEMM_MINMAX_FP32_2X8C4S2__NEON_MLAL, k_eq_16) {
16877     TEST_REQUIRES_ARM_NEON;
16878     GemmMicrokernelTester()
16879       .mr(2)
16880       .nr(8)
16881       .kr(4)
16882       .sr(2)
16883       .m(2)
16884       .n(8)
16885       .k(16)
16886       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c4s2__neon_mlal, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
16887   }
16888 
TEST(QC8_IGEMM_MINMAX_FP32_2X8C4S2__NEON_MLAL,strided_cn)16889   TEST(QC8_IGEMM_MINMAX_FP32_2X8C4S2__NEON_MLAL, strided_cn) {
16890     TEST_REQUIRES_ARM_NEON;
16891     GemmMicrokernelTester()
16892       .mr(2)
16893       .nr(8)
16894       .kr(4)
16895       .sr(2)
16896       .m(2)
16897       .n(8)
16898       .k(16)
16899       .cn_stride(11)
16900       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c4s2__neon_mlal, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
16901   }
16902 
TEST(QC8_IGEMM_MINMAX_FP32_2X8C4S2__NEON_MLAL,k_eq_16_subtile)16903   TEST(QC8_IGEMM_MINMAX_FP32_2X8C4S2__NEON_MLAL, k_eq_16_subtile) {
16904     TEST_REQUIRES_ARM_NEON;
16905     for (uint32_t n = 1; n <= 8; n++) {
16906       for (uint32_t m = 1; m <= 2; m++) {
16907         GemmMicrokernelTester()
16908           .mr(2)
16909           .nr(8)
16910           .kr(4)
16911           .sr(2)
16912           .m(m)
16913           .n(n)
16914           .k(16)
16915           .iterations(1)
16916           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c4s2__neon_mlal, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
16917       }
16918     }
16919   }
16920 
TEST(QC8_IGEMM_MINMAX_FP32_2X8C4S2__NEON_MLAL,k_eq_16_subtile_m)16921   TEST(QC8_IGEMM_MINMAX_FP32_2X8C4S2__NEON_MLAL, k_eq_16_subtile_m) {
16922     TEST_REQUIRES_ARM_NEON;
16923     for (uint32_t m = 1; m <= 2; m++) {
16924       GemmMicrokernelTester()
16925         .mr(2)
16926         .nr(8)
16927         .kr(4)
16928         .sr(2)
16929         .m(m)
16930         .n(8)
16931         .k(16)
16932         .iterations(1)
16933         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c4s2__neon_mlal, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
16934     }
16935   }
16936 
TEST(QC8_IGEMM_MINMAX_FP32_2X8C4S2__NEON_MLAL,k_eq_16_subtile_n)16937   TEST(QC8_IGEMM_MINMAX_FP32_2X8C4S2__NEON_MLAL, k_eq_16_subtile_n) {
16938     TEST_REQUIRES_ARM_NEON;
16939     for (uint32_t n = 1; n <= 8; n++) {
16940       GemmMicrokernelTester()
16941         .mr(2)
16942         .nr(8)
16943         .kr(4)
16944         .sr(2)
16945         .m(2)
16946         .n(n)
16947         .k(16)
16948         .iterations(1)
16949         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c4s2__neon_mlal, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
16950     }
16951   }
16952 
TEST(QC8_IGEMM_MINMAX_FP32_2X8C4S2__NEON_MLAL,k_lt_16)16953   TEST(QC8_IGEMM_MINMAX_FP32_2X8C4S2__NEON_MLAL, k_lt_16) {
16954     TEST_REQUIRES_ARM_NEON;
16955     for (size_t k = 1; k < 16; k++) {
16956       GemmMicrokernelTester()
16957         .mr(2)
16958         .nr(8)
16959         .kr(4)
16960         .sr(2)
16961         .m(2)
16962         .n(8)
16963         .k(k)
16964         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c4s2__neon_mlal, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
16965     }
16966   }
16967 
TEST(QC8_IGEMM_MINMAX_FP32_2X8C4S2__NEON_MLAL,k_lt_16_subtile)16968   TEST(QC8_IGEMM_MINMAX_FP32_2X8C4S2__NEON_MLAL, k_lt_16_subtile) {
16969     TEST_REQUIRES_ARM_NEON;
16970     for (size_t k = 1; k < 16; k++) {
16971       for (uint32_t n = 1; n <= 8; n++) {
16972         for (uint32_t m = 1; m <= 2; m++) {
16973           GemmMicrokernelTester()
16974             .mr(2)
16975             .nr(8)
16976             .kr(4)
16977             .sr(2)
16978             .m(m)
16979             .n(n)
16980             .k(k)
16981             .iterations(1)
16982             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c4s2__neon_mlal, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
16983         }
16984       }
16985     }
16986   }
16987 
TEST(QC8_IGEMM_MINMAX_FP32_2X8C4S2__NEON_MLAL,k_gt_16)16988   TEST(QC8_IGEMM_MINMAX_FP32_2X8C4S2__NEON_MLAL, k_gt_16) {
16989     TEST_REQUIRES_ARM_NEON;
16990     for (size_t k = 17; k < 32; k++) {
16991       GemmMicrokernelTester()
16992         .mr(2)
16993         .nr(8)
16994         .kr(4)
16995         .sr(2)
16996         .m(2)
16997         .n(8)
16998         .k(k)
16999         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c4s2__neon_mlal, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
17000     }
17001   }
17002 
TEST(QC8_IGEMM_MINMAX_FP32_2X8C4S2__NEON_MLAL,k_gt_16_subtile)17003   TEST(QC8_IGEMM_MINMAX_FP32_2X8C4S2__NEON_MLAL, k_gt_16_subtile) {
17004     TEST_REQUIRES_ARM_NEON;
17005     for (size_t k = 17; k < 32; k++) {
17006       for (uint32_t n = 1; n <= 8; n++) {
17007         for (uint32_t m = 1; m <= 2; m++) {
17008           GemmMicrokernelTester()
17009             .mr(2)
17010             .nr(8)
17011             .kr(4)
17012             .sr(2)
17013             .m(m)
17014             .n(n)
17015             .k(k)
17016             .iterations(1)
17017             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c4s2__neon_mlal, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
17018         }
17019       }
17020     }
17021   }
17022 
TEST(QC8_IGEMM_MINMAX_FP32_2X8C4S2__NEON_MLAL,k_div_16)17023   TEST(QC8_IGEMM_MINMAX_FP32_2X8C4S2__NEON_MLAL, k_div_16) {
17024     TEST_REQUIRES_ARM_NEON;
17025     for (size_t k = 32; k <= 160; k += 16) {
17026       GemmMicrokernelTester()
17027         .mr(2)
17028         .nr(8)
17029         .kr(4)
17030         .sr(2)
17031         .m(2)
17032         .n(8)
17033         .k(k)
17034         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c4s2__neon_mlal, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
17035     }
17036   }
17037 
TEST(QC8_IGEMM_MINMAX_FP32_2X8C4S2__NEON_MLAL,k_div_16_subtile)17038   TEST(QC8_IGEMM_MINMAX_FP32_2X8C4S2__NEON_MLAL, k_div_16_subtile) {
17039     TEST_REQUIRES_ARM_NEON;
17040     for (size_t k = 32; k <= 160; k += 16) {
17041       for (uint32_t n = 1; n <= 8; n++) {
17042         for (uint32_t m = 1; m <= 2; m++) {
17043           GemmMicrokernelTester()
17044             .mr(2)
17045             .nr(8)
17046             .kr(4)
17047             .sr(2)
17048             .m(m)
17049             .n(n)
17050             .k(k)
17051             .iterations(1)
17052             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c4s2__neon_mlal, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
17053         }
17054       }
17055     }
17056   }
17057 
TEST(QC8_IGEMM_MINMAX_FP32_2X8C4S2__NEON_MLAL,n_gt_8)17058   TEST(QC8_IGEMM_MINMAX_FP32_2X8C4S2__NEON_MLAL, n_gt_8) {
17059     TEST_REQUIRES_ARM_NEON;
17060     for (uint32_t n = 9; n < 16; n++) {
17061       for (size_t k = 1; k <= 80; k += 17) {
17062         GemmMicrokernelTester()
17063           .mr(2)
17064           .nr(8)
17065           .kr(4)
17066           .sr(2)
17067           .m(2)
17068           .n(n)
17069           .k(k)
17070           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c4s2__neon_mlal, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
17071       }
17072     }
17073   }
17074 
TEST(QC8_IGEMM_MINMAX_FP32_2X8C4S2__NEON_MLAL,n_gt_8_strided_cn)17075   TEST(QC8_IGEMM_MINMAX_FP32_2X8C4S2__NEON_MLAL, n_gt_8_strided_cn) {
17076     TEST_REQUIRES_ARM_NEON;
17077     for (uint32_t n = 9; n < 16; n++) {
17078       for (size_t k = 1; k <= 80; k += 17) {
17079         GemmMicrokernelTester()
17080           .mr(2)
17081           .nr(8)
17082           .kr(4)
17083           .sr(2)
17084           .m(2)
17085           .n(n)
17086           .k(k)
17087           .cn_stride(11)
17088           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c4s2__neon_mlal, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
17089       }
17090     }
17091   }
17092 
TEST(QC8_IGEMM_MINMAX_FP32_2X8C4S2__NEON_MLAL,n_gt_8_subtile)17093   TEST(QC8_IGEMM_MINMAX_FP32_2X8C4S2__NEON_MLAL, n_gt_8_subtile) {
17094     TEST_REQUIRES_ARM_NEON;
17095     for (uint32_t n = 9; n < 16; n++) {
17096       for (size_t k = 1; k <= 80; k += 17) {
17097         for (uint32_t m = 1; m <= 2; m++) {
17098           GemmMicrokernelTester()
17099             .mr(2)
17100             .nr(8)
17101             .kr(4)
17102             .sr(2)
17103             .m(m)
17104             .n(n)
17105             .k(k)
17106             .iterations(1)
17107             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c4s2__neon_mlal, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
17108         }
17109       }
17110     }
17111   }
17112 
TEST(QC8_IGEMM_MINMAX_FP32_2X8C4S2__NEON_MLAL,n_div_8)17113   TEST(QC8_IGEMM_MINMAX_FP32_2X8C4S2__NEON_MLAL, n_div_8) {
17114     TEST_REQUIRES_ARM_NEON;
17115     for (uint32_t n = 16; n <= 24; n += 8) {
17116       for (size_t k = 1; k <= 80; k += 17) {
17117         GemmMicrokernelTester()
17118           .mr(2)
17119           .nr(8)
17120           .kr(4)
17121           .sr(2)
17122           .m(2)
17123           .n(n)
17124           .k(k)
17125           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c4s2__neon_mlal, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
17126       }
17127     }
17128   }
17129 
TEST(QC8_IGEMM_MINMAX_FP32_2X8C4S2__NEON_MLAL,n_div_8_strided_cn)17130   TEST(QC8_IGEMM_MINMAX_FP32_2X8C4S2__NEON_MLAL, n_div_8_strided_cn) {
17131     TEST_REQUIRES_ARM_NEON;
17132     for (uint32_t n = 16; n <= 24; n += 8) {
17133       for (size_t k = 1; k <= 80; k += 17) {
17134         GemmMicrokernelTester()
17135           .mr(2)
17136           .nr(8)
17137           .kr(4)
17138           .sr(2)
17139           .m(2)
17140           .n(n)
17141           .k(k)
17142           .cn_stride(11)
17143           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c4s2__neon_mlal, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
17144       }
17145     }
17146   }
17147 
TEST(QC8_IGEMM_MINMAX_FP32_2X8C4S2__NEON_MLAL,n_div_8_subtile)17148   TEST(QC8_IGEMM_MINMAX_FP32_2X8C4S2__NEON_MLAL, n_div_8_subtile) {
17149     TEST_REQUIRES_ARM_NEON;
17150     for (uint32_t n = 16; n <= 24; n += 8) {
17151       for (size_t k = 1; k <= 80; k += 17) {
17152         for (uint32_t m = 1; m <= 2; m++) {
17153           GemmMicrokernelTester()
17154             .mr(2)
17155             .nr(8)
17156             .kr(4)
17157             .sr(2)
17158             .m(m)
17159             .n(n)
17160             .k(k)
17161             .iterations(1)
17162             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c4s2__neon_mlal, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
17163         }
17164       }
17165     }
17166   }
17167 
TEST(QC8_IGEMM_MINMAX_FP32_2X8C4S2__NEON_MLAL,small_kernel)17168   TEST(QC8_IGEMM_MINMAX_FP32_2X8C4S2__NEON_MLAL, small_kernel) {
17169     TEST_REQUIRES_ARM_NEON;
17170     for (size_t k = 1; k <= 80; k += 17) {
17171       GemmMicrokernelTester()
17172         .mr(2)
17173         .nr(8)
17174         .kr(4)
17175         .sr(2)
17176         .m(2)
17177         .n(8)
17178         .k(k)
17179         .ks(3)
17180         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c4s2__neon_mlal, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
17181     }
17182   }
17183 
TEST(QC8_IGEMM_MINMAX_FP32_2X8C4S2__NEON_MLAL,small_kernel_subtile)17184   TEST(QC8_IGEMM_MINMAX_FP32_2X8C4S2__NEON_MLAL, small_kernel_subtile) {
17185     TEST_REQUIRES_ARM_NEON;
17186     for (size_t k = 1; k <= 80; k += 17) {
17187       for (uint32_t n = 1; n <= 8; n++) {
17188         for (uint32_t m = 1; m <= 2; m++) {
17189           GemmMicrokernelTester()
17190             .mr(2)
17191             .nr(8)
17192             .kr(4)
17193             .sr(2)
17194             .m(m)
17195             .n(n)
17196             .k(k)
17197             .ks(3)
17198             .iterations(1)
17199             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c4s2__neon_mlal, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
17200         }
17201       }
17202     }
17203   }
17204 
TEST(QC8_IGEMM_MINMAX_FP32_2X8C4S2__NEON_MLAL,n_gt_8_small_kernel)17205   TEST(QC8_IGEMM_MINMAX_FP32_2X8C4S2__NEON_MLAL, n_gt_8_small_kernel) {
17206     TEST_REQUIRES_ARM_NEON;
17207     for (uint32_t n = 9; n < 16; n++) {
17208       for (size_t k = 1; k <= 80; k += 17) {
17209         GemmMicrokernelTester()
17210           .mr(2)
17211           .nr(8)
17212           .kr(4)
17213           .sr(2)
17214           .m(2)
17215           .n(n)
17216           .k(k)
17217           .ks(3)
17218           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c4s2__neon_mlal, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
17219       }
17220     }
17221   }
17222 
TEST(QC8_IGEMM_MINMAX_FP32_2X8C4S2__NEON_MLAL,n_div_8_small_kernel)17223   TEST(QC8_IGEMM_MINMAX_FP32_2X8C4S2__NEON_MLAL, n_div_8_small_kernel) {
17224     TEST_REQUIRES_ARM_NEON;
17225     for (uint32_t n = 16; n <= 24; n += 8) {
17226       for (size_t k = 1; k <= 80; k += 17) {
17227         GemmMicrokernelTester()
17228           .mr(2)
17229           .nr(8)
17230           .kr(4)
17231           .sr(2)
17232           .m(2)
17233           .n(n)
17234           .k(k)
17235           .ks(3)
17236           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c4s2__neon_mlal, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
17237       }
17238     }
17239   }
17240 
TEST(QC8_IGEMM_MINMAX_FP32_2X8C4S2__NEON_MLAL,strided_cm_subtile)17241   TEST(QC8_IGEMM_MINMAX_FP32_2X8C4S2__NEON_MLAL, strided_cm_subtile) {
17242     TEST_REQUIRES_ARM_NEON;
17243     for (size_t k = 1; k <= 80; k += 17) {
17244       for (uint32_t n = 1; n <= 8; n++) {
17245         for (uint32_t m = 1; m <= 2; m++) {
17246           GemmMicrokernelTester()
17247             .mr(2)
17248             .nr(8)
17249             .kr(4)
17250             .sr(2)
17251             .m(m)
17252             .n(n)
17253             .k(k)
17254             .cm_stride(11)
17255             .iterations(1)
17256             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c4s2__neon_mlal, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
17257         }
17258       }
17259     }
17260   }
17261 
TEST(QC8_IGEMM_MINMAX_FP32_2X8C4S2__NEON_MLAL,a_offset)17262   TEST(QC8_IGEMM_MINMAX_FP32_2X8C4S2__NEON_MLAL, a_offset) {
17263     TEST_REQUIRES_ARM_NEON;
17264     for (size_t k = 1; k <= 80; k += 17) {
17265       GemmMicrokernelTester()
17266         .mr(2)
17267         .nr(8)
17268         .kr(4)
17269         .sr(2)
17270         .m(2)
17271         .n(8)
17272         .k(k)
17273         .ks(3)
17274         .a_offset(163)
17275         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c4s2__neon_mlal, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
17276     }
17277   }
17278 
TEST(QC8_IGEMM_MINMAX_FP32_2X8C4S2__NEON_MLAL,zero)17279   TEST(QC8_IGEMM_MINMAX_FP32_2X8C4S2__NEON_MLAL, zero) {
17280     TEST_REQUIRES_ARM_NEON;
17281     for (size_t k = 1; k <= 80; k += 17) {
17282       for (uint32_t mz = 0; mz < 2; mz++) {
17283         GemmMicrokernelTester()
17284           .mr(2)
17285           .nr(8)
17286           .kr(4)
17287           .sr(2)
17288           .m(2)
17289           .n(8)
17290           .k(k)
17291           .ks(3)
17292           .a_offset(163)
17293           .zero_index(mz)
17294           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c4s2__neon_mlal, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
17295       }
17296     }
17297   }
17298 
TEST(QC8_IGEMM_MINMAX_FP32_2X8C4S2__NEON_MLAL,qmin)17299   TEST(QC8_IGEMM_MINMAX_FP32_2X8C4S2__NEON_MLAL, qmin) {
17300     TEST_REQUIRES_ARM_NEON;
17301     GemmMicrokernelTester()
17302       .mr(2)
17303       .nr(8)
17304       .kr(4)
17305       .sr(2)
17306       .m(2)
17307       .n(8)
17308       .k(16)
17309       .qmin(128)
17310       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c4s2__neon_mlal, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
17311   }
17312 
TEST(QC8_IGEMM_MINMAX_FP32_2X8C4S2__NEON_MLAL,qmax)17313   TEST(QC8_IGEMM_MINMAX_FP32_2X8C4S2__NEON_MLAL, qmax) {
17314     TEST_REQUIRES_ARM_NEON;
17315     GemmMicrokernelTester()
17316       .mr(2)
17317       .nr(8)
17318       .kr(4)
17319       .sr(2)
17320       .m(2)
17321       .n(8)
17322       .k(16)
17323       .qmax(128)
17324       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c4s2__neon_mlal, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
17325   }
17326 
TEST(QC8_IGEMM_MINMAX_FP32_2X8C4S2__NEON_MLAL,strided_cm)17327   TEST(QC8_IGEMM_MINMAX_FP32_2X8C4S2__NEON_MLAL, strided_cm) {
17328     TEST_REQUIRES_ARM_NEON;
17329     GemmMicrokernelTester()
17330       .mr(2)
17331       .nr(8)
17332       .kr(4)
17333       .sr(2)
17334       .m(2)
17335       .n(8)
17336       .k(16)
17337       .cm_stride(11)
17338       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c4s2__neon_mlal, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
17339   }
17340 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
17341 
17342 
17343 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QC8_IGEMM_MINMAX_FP32_3X8__NEONV8_MLAL_LANE,k_eq_8)17344   TEST(QC8_IGEMM_MINMAX_FP32_3X8__NEONV8_MLAL_LANE, k_eq_8) {
17345     TEST_REQUIRES_ARM_NEON_V8;
17346     GemmMicrokernelTester()
17347       .mr(3)
17348       .nr(8)
17349       .kr(1)
17350       .sr(1)
17351       .m(3)
17352       .n(8)
17353       .k(8)
17354       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x8__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
17355   }
17356 
TEST(QC8_IGEMM_MINMAX_FP32_3X8__NEONV8_MLAL_LANE,strided_cn)17357   TEST(QC8_IGEMM_MINMAX_FP32_3X8__NEONV8_MLAL_LANE, strided_cn) {
17358     TEST_REQUIRES_ARM_NEON_V8;
17359     GemmMicrokernelTester()
17360       .mr(3)
17361       .nr(8)
17362       .kr(1)
17363       .sr(1)
17364       .m(3)
17365       .n(8)
17366       .k(8)
17367       .cn_stride(11)
17368       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x8__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
17369   }
17370 
TEST(QC8_IGEMM_MINMAX_FP32_3X8__NEONV8_MLAL_LANE,k_eq_8_subtile)17371   TEST(QC8_IGEMM_MINMAX_FP32_3X8__NEONV8_MLAL_LANE, k_eq_8_subtile) {
17372     TEST_REQUIRES_ARM_NEON_V8;
17373     for (uint32_t n = 1; n <= 8; n++) {
17374       for (uint32_t m = 1; m <= 3; m++) {
17375         GemmMicrokernelTester()
17376           .mr(3)
17377           .nr(8)
17378           .kr(1)
17379           .sr(1)
17380           .m(m)
17381           .n(n)
17382           .k(8)
17383           .iterations(1)
17384           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x8__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
17385       }
17386     }
17387   }
17388 
TEST(QC8_IGEMM_MINMAX_FP32_3X8__NEONV8_MLAL_LANE,k_eq_8_subtile_m)17389   TEST(QC8_IGEMM_MINMAX_FP32_3X8__NEONV8_MLAL_LANE, k_eq_8_subtile_m) {
17390     TEST_REQUIRES_ARM_NEON_V8;
17391     for (uint32_t m = 1; m <= 3; m++) {
17392       GemmMicrokernelTester()
17393         .mr(3)
17394         .nr(8)
17395         .kr(1)
17396         .sr(1)
17397         .m(m)
17398         .n(8)
17399         .k(8)
17400         .iterations(1)
17401         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x8__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
17402     }
17403   }
17404 
TEST(QC8_IGEMM_MINMAX_FP32_3X8__NEONV8_MLAL_LANE,k_eq_8_subtile_n)17405   TEST(QC8_IGEMM_MINMAX_FP32_3X8__NEONV8_MLAL_LANE, k_eq_8_subtile_n) {
17406     TEST_REQUIRES_ARM_NEON_V8;
17407     for (uint32_t n = 1; n <= 8; n++) {
17408       GemmMicrokernelTester()
17409         .mr(3)
17410         .nr(8)
17411         .kr(1)
17412         .sr(1)
17413         .m(3)
17414         .n(n)
17415         .k(8)
17416         .iterations(1)
17417         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x8__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
17418     }
17419   }
17420 
TEST(QC8_IGEMM_MINMAX_FP32_3X8__NEONV8_MLAL_LANE,k_lt_8)17421   TEST(QC8_IGEMM_MINMAX_FP32_3X8__NEONV8_MLAL_LANE, k_lt_8) {
17422     TEST_REQUIRES_ARM_NEON_V8;
17423     for (size_t k = 1; k < 8; k++) {
17424       GemmMicrokernelTester()
17425         .mr(3)
17426         .nr(8)
17427         .kr(1)
17428         .sr(1)
17429         .m(3)
17430         .n(8)
17431         .k(k)
17432         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x8__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
17433     }
17434   }
17435 
TEST(QC8_IGEMM_MINMAX_FP32_3X8__NEONV8_MLAL_LANE,k_lt_8_subtile)17436   TEST(QC8_IGEMM_MINMAX_FP32_3X8__NEONV8_MLAL_LANE, k_lt_8_subtile) {
17437     TEST_REQUIRES_ARM_NEON_V8;
17438     for (size_t k = 1; k < 8; k++) {
17439       for (uint32_t n = 1; n <= 8; n++) {
17440         for (uint32_t m = 1; m <= 3; m++) {
17441           GemmMicrokernelTester()
17442             .mr(3)
17443             .nr(8)
17444             .kr(1)
17445             .sr(1)
17446             .m(m)
17447             .n(n)
17448             .k(k)
17449             .iterations(1)
17450             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x8__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
17451         }
17452       }
17453     }
17454   }
17455 
TEST(QC8_IGEMM_MINMAX_FP32_3X8__NEONV8_MLAL_LANE,k_gt_8)17456   TEST(QC8_IGEMM_MINMAX_FP32_3X8__NEONV8_MLAL_LANE, k_gt_8) {
17457     TEST_REQUIRES_ARM_NEON_V8;
17458     for (size_t k = 9; k < 16; k++) {
17459       GemmMicrokernelTester()
17460         .mr(3)
17461         .nr(8)
17462         .kr(1)
17463         .sr(1)
17464         .m(3)
17465         .n(8)
17466         .k(k)
17467         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x8__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
17468     }
17469   }
17470 
TEST(QC8_IGEMM_MINMAX_FP32_3X8__NEONV8_MLAL_LANE,k_gt_8_subtile)17471   TEST(QC8_IGEMM_MINMAX_FP32_3X8__NEONV8_MLAL_LANE, k_gt_8_subtile) {
17472     TEST_REQUIRES_ARM_NEON_V8;
17473     for (size_t k = 9; k < 16; k++) {
17474       for (uint32_t n = 1; n <= 8; n++) {
17475         for (uint32_t m = 1; m <= 3; m++) {
17476           GemmMicrokernelTester()
17477             .mr(3)
17478             .nr(8)
17479             .kr(1)
17480             .sr(1)
17481             .m(m)
17482             .n(n)
17483             .k(k)
17484             .iterations(1)
17485             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x8__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
17486         }
17487       }
17488     }
17489   }
17490 
TEST(QC8_IGEMM_MINMAX_FP32_3X8__NEONV8_MLAL_LANE,k_div_8)17491   TEST(QC8_IGEMM_MINMAX_FP32_3X8__NEONV8_MLAL_LANE, k_div_8) {
17492     TEST_REQUIRES_ARM_NEON_V8;
17493     for (size_t k = 16; k <= 80; k += 8) {
17494       GemmMicrokernelTester()
17495         .mr(3)
17496         .nr(8)
17497         .kr(1)
17498         .sr(1)
17499         .m(3)
17500         .n(8)
17501         .k(k)
17502         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x8__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
17503     }
17504   }
17505 
TEST(QC8_IGEMM_MINMAX_FP32_3X8__NEONV8_MLAL_LANE,k_div_8_subtile)17506   TEST(QC8_IGEMM_MINMAX_FP32_3X8__NEONV8_MLAL_LANE, k_div_8_subtile) {
17507     TEST_REQUIRES_ARM_NEON_V8;
17508     for (size_t k = 16; k <= 80; k += 8) {
17509       for (uint32_t n = 1; n <= 8; n++) {
17510         for (uint32_t m = 1; m <= 3; m++) {
17511           GemmMicrokernelTester()
17512             .mr(3)
17513             .nr(8)
17514             .kr(1)
17515             .sr(1)
17516             .m(m)
17517             .n(n)
17518             .k(k)
17519             .iterations(1)
17520             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x8__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
17521         }
17522       }
17523     }
17524   }
17525 
TEST(QC8_IGEMM_MINMAX_FP32_3X8__NEONV8_MLAL_LANE,n_gt_8)17526   TEST(QC8_IGEMM_MINMAX_FP32_3X8__NEONV8_MLAL_LANE, n_gt_8) {
17527     TEST_REQUIRES_ARM_NEON_V8;
17528     for (uint32_t n = 9; n < 16; n++) {
17529       for (size_t k = 1; k <= 40; k += 9) {
17530         GemmMicrokernelTester()
17531           .mr(3)
17532           .nr(8)
17533           .kr(1)
17534           .sr(1)
17535           .m(3)
17536           .n(n)
17537           .k(k)
17538           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x8__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
17539       }
17540     }
17541   }
17542 
TEST(QC8_IGEMM_MINMAX_FP32_3X8__NEONV8_MLAL_LANE,n_gt_8_strided_cn)17543   TEST(QC8_IGEMM_MINMAX_FP32_3X8__NEONV8_MLAL_LANE, n_gt_8_strided_cn) {
17544     TEST_REQUIRES_ARM_NEON_V8;
17545     for (uint32_t n = 9; n < 16; n++) {
17546       for (size_t k = 1; k <= 40; k += 9) {
17547         GemmMicrokernelTester()
17548           .mr(3)
17549           .nr(8)
17550           .kr(1)
17551           .sr(1)
17552           .m(3)
17553           .n(n)
17554           .k(k)
17555           .cn_stride(11)
17556           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x8__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
17557       }
17558     }
17559   }
17560 
TEST(QC8_IGEMM_MINMAX_FP32_3X8__NEONV8_MLAL_LANE,n_gt_8_subtile)17561   TEST(QC8_IGEMM_MINMAX_FP32_3X8__NEONV8_MLAL_LANE, n_gt_8_subtile) {
17562     TEST_REQUIRES_ARM_NEON_V8;
17563     for (uint32_t n = 9; n < 16; n++) {
17564       for (size_t k = 1; k <= 40; k += 9) {
17565         for (uint32_t m = 1; m <= 3; m++) {
17566           GemmMicrokernelTester()
17567             .mr(3)
17568             .nr(8)
17569             .kr(1)
17570             .sr(1)
17571             .m(m)
17572             .n(n)
17573             .k(k)
17574             .iterations(1)
17575             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x8__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
17576         }
17577       }
17578     }
17579   }
17580 
TEST(QC8_IGEMM_MINMAX_FP32_3X8__NEONV8_MLAL_LANE,n_div_8)17581   TEST(QC8_IGEMM_MINMAX_FP32_3X8__NEONV8_MLAL_LANE, n_div_8) {
17582     TEST_REQUIRES_ARM_NEON_V8;
17583     for (uint32_t n = 16; n <= 24; n += 8) {
17584       for (size_t k = 1; k <= 40; k += 9) {
17585         GemmMicrokernelTester()
17586           .mr(3)
17587           .nr(8)
17588           .kr(1)
17589           .sr(1)
17590           .m(3)
17591           .n(n)
17592           .k(k)
17593           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x8__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
17594       }
17595     }
17596   }
17597 
TEST(QC8_IGEMM_MINMAX_FP32_3X8__NEONV8_MLAL_LANE,n_div_8_strided_cn)17598   TEST(QC8_IGEMM_MINMAX_FP32_3X8__NEONV8_MLAL_LANE, n_div_8_strided_cn) {
17599     TEST_REQUIRES_ARM_NEON_V8;
17600     for (uint32_t n = 16; n <= 24; n += 8) {
17601       for (size_t k = 1; k <= 40; k += 9) {
17602         GemmMicrokernelTester()
17603           .mr(3)
17604           .nr(8)
17605           .kr(1)
17606           .sr(1)
17607           .m(3)
17608           .n(n)
17609           .k(k)
17610           .cn_stride(11)
17611           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x8__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
17612       }
17613     }
17614   }
17615 
TEST(QC8_IGEMM_MINMAX_FP32_3X8__NEONV8_MLAL_LANE,n_div_8_subtile)17616   TEST(QC8_IGEMM_MINMAX_FP32_3X8__NEONV8_MLAL_LANE, n_div_8_subtile) {
17617     TEST_REQUIRES_ARM_NEON_V8;
17618     for (uint32_t n = 16; n <= 24; n += 8) {
17619       for (size_t k = 1; k <= 40; k += 9) {
17620         for (uint32_t m = 1; m <= 3; m++) {
17621           GemmMicrokernelTester()
17622             .mr(3)
17623             .nr(8)
17624             .kr(1)
17625             .sr(1)
17626             .m(m)
17627             .n(n)
17628             .k(k)
17629             .iterations(1)
17630             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x8__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
17631         }
17632       }
17633     }
17634   }
17635 
TEST(QC8_IGEMM_MINMAX_FP32_3X8__NEONV8_MLAL_LANE,small_kernel)17636   TEST(QC8_IGEMM_MINMAX_FP32_3X8__NEONV8_MLAL_LANE, small_kernel) {
17637     TEST_REQUIRES_ARM_NEON_V8;
17638     for (size_t k = 1; k <= 40; k += 9) {
17639       GemmMicrokernelTester()
17640         .mr(3)
17641         .nr(8)
17642         .kr(1)
17643         .sr(1)
17644         .m(3)
17645         .n(8)
17646         .k(k)
17647         .ks(3)
17648         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x8__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
17649     }
17650   }
17651 
TEST(QC8_IGEMM_MINMAX_FP32_3X8__NEONV8_MLAL_LANE,small_kernel_subtile)17652   TEST(QC8_IGEMM_MINMAX_FP32_3X8__NEONV8_MLAL_LANE, small_kernel_subtile) {
17653     TEST_REQUIRES_ARM_NEON_V8;
17654     for (size_t k = 1; k <= 40; k += 9) {
17655       for (uint32_t n = 1; n <= 8; n++) {
17656         for (uint32_t m = 1; m <= 3; m++) {
17657           GemmMicrokernelTester()
17658             .mr(3)
17659             .nr(8)
17660             .kr(1)
17661             .sr(1)
17662             .m(m)
17663             .n(n)
17664             .k(k)
17665             .ks(3)
17666             .iterations(1)
17667             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x8__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
17668         }
17669       }
17670     }
17671   }
17672 
TEST(QC8_IGEMM_MINMAX_FP32_3X8__NEONV8_MLAL_LANE,n_gt_8_small_kernel)17673   TEST(QC8_IGEMM_MINMAX_FP32_3X8__NEONV8_MLAL_LANE, n_gt_8_small_kernel) {
17674     TEST_REQUIRES_ARM_NEON_V8;
17675     for (uint32_t n = 9; n < 16; n++) {
17676       for (size_t k = 1; k <= 40; k += 9) {
17677         GemmMicrokernelTester()
17678           .mr(3)
17679           .nr(8)
17680           .kr(1)
17681           .sr(1)
17682           .m(3)
17683           .n(n)
17684           .k(k)
17685           .ks(3)
17686           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x8__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
17687       }
17688     }
17689   }
17690 
TEST(QC8_IGEMM_MINMAX_FP32_3X8__NEONV8_MLAL_LANE,n_div_8_small_kernel)17691   TEST(QC8_IGEMM_MINMAX_FP32_3X8__NEONV8_MLAL_LANE, n_div_8_small_kernel) {
17692     TEST_REQUIRES_ARM_NEON_V8;
17693     for (uint32_t n = 16; n <= 24; n += 8) {
17694       for (size_t k = 1; k <= 40; k += 9) {
17695         GemmMicrokernelTester()
17696           .mr(3)
17697           .nr(8)
17698           .kr(1)
17699           .sr(1)
17700           .m(3)
17701           .n(n)
17702           .k(k)
17703           .ks(3)
17704           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x8__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
17705       }
17706     }
17707   }
17708 
TEST(QC8_IGEMM_MINMAX_FP32_3X8__NEONV8_MLAL_LANE,strided_cm_subtile)17709   TEST(QC8_IGEMM_MINMAX_FP32_3X8__NEONV8_MLAL_LANE, strided_cm_subtile) {
17710     TEST_REQUIRES_ARM_NEON_V8;
17711     for (size_t k = 1; k <= 40; k += 9) {
17712       for (uint32_t n = 1; n <= 8; n++) {
17713         for (uint32_t m = 1; m <= 3; m++) {
17714           GemmMicrokernelTester()
17715             .mr(3)
17716             .nr(8)
17717             .kr(1)
17718             .sr(1)
17719             .m(m)
17720             .n(n)
17721             .k(k)
17722             .cm_stride(11)
17723             .iterations(1)
17724             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x8__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
17725         }
17726       }
17727     }
17728   }
17729 
TEST(QC8_IGEMM_MINMAX_FP32_3X8__NEONV8_MLAL_LANE,a_offset)17730   TEST(QC8_IGEMM_MINMAX_FP32_3X8__NEONV8_MLAL_LANE, a_offset) {
17731     TEST_REQUIRES_ARM_NEON_V8;
17732     for (size_t k = 1; k <= 40; k += 9) {
17733       GemmMicrokernelTester()
17734         .mr(3)
17735         .nr(8)
17736         .kr(1)
17737         .sr(1)
17738         .m(3)
17739         .n(8)
17740         .k(k)
17741         .ks(3)
17742         .a_offset(127)
17743         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x8__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
17744     }
17745   }
17746 
TEST(QC8_IGEMM_MINMAX_FP32_3X8__NEONV8_MLAL_LANE,zero)17747   TEST(QC8_IGEMM_MINMAX_FP32_3X8__NEONV8_MLAL_LANE, zero) {
17748     TEST_REQUIRES_ARM_NEON_V8;
17749     for (size_t k = 1; k <= 40; k += 9) {
17750       for (uint32_t mz = 0; mz < 3; mz++) {
17751         GemmMicrokernelTester()
17752           .mr(3)
17753           .nr(8)
17754           .kr(1)
17755           .sr(1)
17756           .m(3)
17757           .n(8)
17758           .k(k)
17759           .ks(3)
17760           .a_offset(127)
17761           .zero_index(mz)
17762           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x8__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
17763       }
17764     }
17765   }
17766 
TEST(QC8_IGEMM_MINMAX_FP32_3X8__NEONV8_MLAL_LANE,qmin)17767   TEST(QC8_IGEMM_MINMAX_FP32_3X8__NEONV8_MLAL_LANE, qmin) {
17768     TEST_REQUIRES_ARM_NEON_V8;
17769     GemmMicrokernelTester()
17770       .mr(3)
17771       .nr(8)
17772       .kr(1)
17773       .sr(1)
17774       .m(3)
17775       .n(8)
17776       .k(8)
17777       .qmin(128)
17778       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x8__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
17779   }
17780 
TEST(QC8_IGEMM_MINMAX_FP32_3X8__NEONV8_MLAL_LANE,qmax)17781   TEST(QC8_IGEMM_MINMAX_FP32_3X8__NEONV8_MLAL_LANE, qmax) {
17782     TEST_REQUIRES_ARM_NEON_V8;
17783     GemmMicrokernelTester()
17784       .mr(3)
17785       .nr(8)
17786       .kr(1)
17787       .sr(1)
17788       .m(3)
17789       .n(8)
17790       .k(8)
17791       .qmax(128)
17792       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x8__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
17793   }
17794 
TEST(QC8_IGEMM_MINMAX_FP32_3X8__NEONV8_MLAL_LANE,strided_cm)17795   TEST(QC8_IGEMM_MINMAX_FP32_3X8__NEONV8_MLAL_LANE, strided_cm) {
17796     TEST_REQUIRES_ARM_NEON_V8;
17797     GemmMicrokernelTester()
17798       .mr(3)
17799       .nr(8)
17800       .kr(1)
17801       .sr(1)
17802       .m(3)
17803       .n(8)
17804       .k(8)
17805       .cm_stride(11)
17806       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x8__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
17807   }
17808 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
17809 
17810 
17811 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QC8_IGEMM_MINMAX_FP32_3X8__NEONV8_MLAL_LANE_PRFM,k_eq_8)17812   TEST(QC8_IGEMM_MINMAX_FP32_3X8__NEONV8_MLAL_LANE_PRFM, k_eq_8) {
17813     TEST_REQUIRES_ARM_NEON_V8;
17814     GemmMicrokernelTester()
17815       .mr(3)
17816       .nr(8)
17817       .kr(1)
17818       .sr(1)
17819       .m(3)
17820       .n(8)
17821       .k(8)
17822       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x8__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
17823   }
17824 
TEST(QC8_IGEMM_MINMAX_FP32_3X8__NEONV8_MLAL_LANE_PRFM,strided_cn)17825   TEST(QC8_IGEMM_MINMAX_FP32_3X8__NEONV8_MLAL_LANE_PRFM, strided_cn) {
17826     TEST_REQUIRES_ARM_NEON_V8;
17827     GemmMicrokernelTester()
17828       .mr(3)
17829       .nr(8)
17830       .kr(1)
17831       .sr(1)
17832       .m(3)
17833       .n(8)
17834       .k(8)
17835       .cn_stride(11)
17836       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x8__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
17837   }
17838 
TEST(QC8_IGEMM_MINMAX_FP32_3X8__NEONV8_MLAL_LANE_PRFM,k_eq_8_subtile)17839   TEST(QC8_IGEMM_MINMAX_FP32_3X8__NEONV8_MLAL_LANE_PRFM, k_eq_8_subtile) {
17840     TEST_REQUIRES_ARM_NEON_V8;
17841     for (uint32_t n = 1; n <= 8; n++) {
17842       for (uint32_t m = 1; m <= 3; m++) {
17843         GemmMicrokernelTester()
17844           .mr(3)
17845           .nr(8)
17846           .kr(1)
17847           .sr(1)
17848           .m(m)
17849           .n(n)
17850           .k(8)
17851           .iterations(1)
17852           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x8__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
17853       }
17854     }
17855   }
17856 
TEST(QC8_IGEMM_MINMAX_FP32_3X8__NEONV8_MLAL_LANE_PRFM,k_eq_8_subtile_m)17857   TEST(QC8_IGEMM_MINMAX_FP32_3X8__NEONV8_MLAL_LANE_PRFM, k_eq_8_subtile_m) {
17858     TEST_REQUIRES_ARM_NEON_V8;
17859     for (uint32_t m = 1; m <= 3; m++) {
17860       GemmMicrokernelTester()
17861         .mr(3)
17862         .nr(8)
17863         .kr(1)
17864         .sr(1)
17865         .m(m)
17866         .n(8)
17867         .k(8)
17868         .iterations(1)
17869         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x8__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
17870     }
17871   }
17872 
TEST(QC8_IGEMM_MINMAX_FP32_3X8__NEONV8_MLAL_LANE_PRFM,k_eq_8_subtile_n)17873   TEST(QC8_IGEMM_MINMAX_FP32_3X8__NEONV8_MLAL_LANE_PRFM, k_eq_8_subtile_n) {
17874     TEST_REQUIRES_ARM_NEON_V8;
17875     for (uint32_t n = 1; n <= 8; n++) {
17876       GemmMicrokernelTester()
17877         .mr(3)
17878         .nr(8)
17879         .kr(1)
17880         .sr(1)
17881         .m(3)
17882         .n(n)
17883         .k(8)
17884         .iterations(1)
17885         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x8__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
17886     }
17887   }
17888 
TEST(QC8_IGEMM_MINMAX_FP32_3X8__NEONV8_MLAL_LANE_PRFM,k_lt_8)17889   TEST(QC8_IGEMM_MINMAX_FP32_3X8__NEONV8_MLAL_LANE_PRFM, k_lt_8) {
17890     TEST_REQUIRES_ARM_NEON_V8;
17891     for (size_t k = 1; k < 8; k++) {
17892       GemmMicrokernelTester()
17893         .mr(3)
17894         .nr(8)
17895         .kr(1)
17896         .sr(1)
17897         .m(3)
17898         .n(8)
17899         .k(k)
17900         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x8__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
17901     }
17902   }
17903 
TEST(QC8_IGEMM_MINMAX_FP32_3X8__NEONV8_MLAL_LANE_PRFM,k_lt_8_subtile)17904   TEST(QC8_IGEMM_MINMAX_FP32_3X8__NEONV8_MLAL_LANE_PRFM, k_lt_8_subtile) {
17905     TEST_REQUIRES_ARM_NEON_V8;
17906     for (size_t k = 1; k < 8; k++) {
17907       for (uint32_t n = 1; n <= 8; n++) {
17908         for (uint32_t m = 1; m <= 3; m++) {
17909           GemmMicrokernelTester()
17910             .mr(3)
17911             .nr(8)
17912             .kr(1)
17913             .sr(1)
17914             .m(m)
17915             .n(n)
17916             .k(k)
17917             .iterations(1)
17918             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x8__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
17919         }
17920       }
17921     }
17922   }
17923 
TEST(QC8_IGEMM_MINMAX_FP32_3X8__NEONV8_MLAL_LANE_PRFM,k_gt_8)17924   TEST(QC8_IGEMM_MINMAX_FP32_3X8__NEONV8_MLAL_LANE_PRFM, k_gt_8) {
17925     TEST_REQUIRES_ARM_NEON_V8;
17926     for (size_t k = 9; k < 16; k++) {
17927       GemmMicrokernelTester()
17928         .mr(3)
17929         .nr(8)
17930         .kr(1)
17931         .sr(1)
17932         .m(3)
17933         .n(8)
17934         .k(k)
17935         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x8__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
17936     }
17937   }
17938 
TEST(QC8_IGEMM_MINMAX_FP32_3X8__NEONV8_MLAL_LANE_PRFM,k_gt_8_subtile)17939   TEST(QC8_IGEMM_MINMAX_FP32_3X8__NEONV8_MLAL_LANE_PRFM, k_gt_8_subtile) {
17940     TEST_REQUIRES_ARM_NEON_V8;
17941     for (size_t k = 9; k < 16; k++) {
17942       for (uint32_t n = 1; n <= 8; n++) {
17943         for (uint32_t m = 1; m <= 3; m++) {
17944           GemmMicrokernelTester()
17945             .mr(3)
17946             .nr(8)
17947             .kr(1)
17948             .sr(1)
17949             .m(m)
17950             .n(n)
17951             .k(k)
17952             .iterations(1)
17953             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x8__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
17954         }
17955       }
17956     }
17957   }
17958 
TEST(QC8_IGEMM_MINMAX_FP32_3X8__NEONV8_MLAL_LANE_PRFM,k_div_8)17959   TEST(QC8_IGEMM_MINMAX_FP32_3X8__NEONV8_MLAL_LANE_PRFM, k_div_8) {
17960     TEST_REQUIRES_ARM_NEON_V8;
17961     for (size_t k = 16; k <= 80; k += 8) {
17962       GemmMicrokernelTester()
17963         .mr(3)
17964         .nr(8)
17965         .kr(1)
17966         .sr(1)
17967         .m(3)
17968         .n(8)
17969         .k(k)
17970         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x8__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
17971     }
17972   }
17973 
TEST(QC8_IGEMM_MINMAX_FP32_3X8__NEONV8_MLAL_LANE_PRFM,k_div_8_subtile)17974   TEST(QC8_IGEMM_MINMAX_FP32_3X8__NEONV8_MLAL_LANE_PRFM, k_div_8_subtile) {
17975     TEST_REQUIRES_ARM_NEON_V8;
17976     for (size_t k = 16; k <= 80; k += 8) {
17977       for (uint32_t n = 1; n <= 8; n++) {
17978         for (uint32_t m = 1; m <= 3; m++) {
17979           GemmMicrokernelTester()
17980             .mr(3)
17981             .nr(8)
17982             .kr(1)
17983             .sr(1)
17984             .m(m)
17985             .n(n)
17986             .k(k)
17987             .iterations(1)
17988             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x8__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
17989         }
17990       }
17991     }
17992   }
17993 
TEST(QC8_IGEMM_MINMAX_FP32_3X8__NEONV8_MLAL_LANE_PRFM,n_gt_8)17994   TEST(QC8_IGEMM_MINMAX_FP32_3X8__NEONV8_MLAL_LANE_PRFM, n_gt_8) {
17995     TEST_REQUIRES_ARM_NEON_V8;
17996     for (uint32_t n = 9; n < 16; n++) {
17997       for (size_t k = 1; k <= 40; k += 9) {
17998         GemmMicrokernelTester()
17999           .mr(3)
18000           .nr(8)
18001           .kr(1)
18002           .sr(1)
18003           .m(3)
18004           .n(n)
18005           .k(k)
18006           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x8__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
18007       }
18008     }
18009   }
18010 
TEST(QC8_IGEMM_MINMAX_FP32_3X8__NEONV8_MLAL_LANE_PRFM,n_gt_8_strided_cn)18011   TEST(QC8_IGEMM_MINMAX_FP32_3X8__NEONV8_MLAL_LANE_PRFM, n_gt_8_strided_cn) {
18012     TEST_REQUIRES_ARM_NEON_V8;
18013     for (uint32_t n = 9; n < 16; n++) {
18014       for (size_t k = 1; k <= 40; k += 9) {
18015         GemmMicrokernelTester()
18016           .mr(3)
18017           .nr(8)
18018           .kr(1)
18019           .sr(1)
18020           .m(3)
18021           .n(n)
18022           .k(k)
18023           .cn_stride(11)
18024           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x8__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
18025       }
18026     }
18027   }
18028 
TEST(QC8_IGEMM_MINMAX_FP32_3X8__NEONV8_MLAL_LANE_PRFM,n_gt_8_subtile)18029   TEST(QC8_IGEMM_MINMAX_FP32_3X8__NEONV8_MLAL_LANE_PRFM, n_gt_8_subtile) {
18030     TEST_REQUIRES_ARM_NEON_V8;
18031     for (uint32_t n = 9; n < 16; n++) {
18032       for (size_t k = 1; k <= 40; k += 9) {
18033         for (uint32_t m = 1; m <= 3; m++) {
18034           GemmMicrokernelTester()
18035             .mr(3)
18036             .nr(8)
18037             .kr(1)
18038             .sr(1)
18039             .m(m)
18040             .n(n)
18041             .k(k)
18042             .iterations(1)
18043             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x8__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
18044         }
18045       }
18046     }
18047   }
18048 
TEST(QC8_IGEMM_MINMAX_FP32_3X8__NEONV8_MLAL_LANE_PRFM,n_div_8)18049   TEST(QC8_IGEMM_MINMAX_FP32_3X8__NEONV8_MLAL_LANE_PRFM, n_div_8) {
18050     TEST_REQUIRES_ARM_NEON_V8;
18051     for (uint32_t n = 16; n <= 24; n += 8) {
18052       for (size_t k = 1; k <= 40; k += 9) {
18053         GemmMicrokernelTester()
18054           .mr(3)
18055           .nr(8)
18056           .kr(1)
18057           .sr(1)
18058           .m(3)
18059           .n(n)
18060           .k(k)
18061           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x8__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
18062       }
18063     }
18064   }
18065 
TEST(QC8_IGEMM_MINMAX_FP32_3X8__NEONV8_MLAL_LANE_PRFM,n_div_8_strided_cn)18066   TEST(QC8_IGEMM_MINMAX_FP32_3X8__NEONV8_MLAL_LANE_PRFM, n_div_8_strided_cn) {
18067     TEST_REQUIRES_ARM_NEON_V8;
18068     for (uint32_t n = 16; n <= 24; n += 8) {
18069       for (size_t k = 1; k <= 40; k += 9) {
18070         GemmMicrokernelTester()
18071           .mr(3)
18072           .nr(8)
18073           .kr(1)
18074           .sr(1)
18075           .m(3)
18076           .n(n)
18077           .k(k)
18078           .cn_stride(11)
18079           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x8__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
18080       }
18081     }
18082   }
18083 
TEST(QC8_IGEMM_MINMAX_FP32_3X8__NEONV8_MLAL_LANE_PRFM,n_div_8_subtile)18084   TEST(QC8_IGEMM_MINMAX_FP32_3X8__NEONV8_MLAL_LANE_PRFM, n_div_8_subtile) {
18085     TEST_REQUIRES_ARM_NEON_V8;
18086     for (uint32_t n = 16; n <= 24; n += 8) {
18087       for (size_t k = 1; k <= 40; k += 9) {
18088         for (uint32_t m = 1; m <= 3; m++) {
18089           GemmMicrokernelTester()
18090             .mr(3)
18091             .nr(8)
18092             .kr(1)
18093             .sr(1)
18094             .m(m)
18095             .n(n)
18096             .k(k)
18097             .iterations(1)
18098             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x8__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
18099         }
18100       }
18101     }
18102   }
18103 
TEST(QC8_IGEMM_MINMAX_FP32_3X8__NEONV8_MLAL_LANE_PRFM,small_kernel)18104   TEST(QC8_IGEMM_MINMAX_FP32_3X8__NEONV8_MLAL_LANE_PRFM, small_kernel) {
18105     TEST_REQUIRES_ARM_NEON_V8;
18106     for (size_t k = 1; k <= 40; k += 9) {
18107       GemmMicrokernelTester()
18108         .mr(3)
18109         .nr(8)
18110         .kr(1)
18111         .sr(1)
18112         .m(3)
18113         .n(8)
18114         .k(k)
18115         .ks(3)
18116         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x8__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
18117     }
18118   }
18119 
TEST(QC8_IGEMM_MINMAX_FP32_3X8__NEONV8_MLAL_LANE_PRFM,small_kernel_subtile)18120   TEST(QC8_IGEMM_MINMAX_FP32_3X8__NEONV8_MLAL_LANE_PRFM, small_kernel_subtile) {
18121     TEST_REQUIRES_ARM_NEON_V8;
18122     for (size_t k = 1; k <= 40; k += 9) {
18123       for (uint32_t n = 1; n <= 8; n++) {
18124         for (uint32_t m = 1; m <= 3; m++) {
18125           GemmMicrokernelTester()
18126             .mr(3)
18127             .nr(8)
18128             .kr(1)
18129             .sr(1)
18130             .m(m)
18131             .n(n)
18132             .k(k)
18133             .ks(3)
18134             .iterations(1)
18135             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x8__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
18136         }
18137       }
18138     }
18139   }
18140 
TEST(QC8_IGEMM_MINMAX_FP32_3X8__NEONV8_MLAL_LANE_PRFM,n_gt_8_small_kernel)18141   TEST(QC8_IGEMM_MINMAX_FP32_3X8__NEONV8_MLAL_LANE_PRFM, n_gt_8_small_kernel) {
18142     TEST_REQUIRES_ARM_NEON_V8;
18143     for (uint32_t n = 9; n < 16; n++) {
18144       for (size_t k = 1; k <= 40; k += 9) {
18145         GemmMicrokernelTester()
18146           .mr(3)
18147           .nr(8)
18148           .kr(1)
18149           .sr(1)
18150           .m(3)
18151           .n(n)
18152           .k(k)
18153           .ks(3)
18154           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x8__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
18155       }
18156     }
18157   }
18158 
TEST(QC8_IGEMM_MINMAX_FP32_3X8__NEONV8_MLAL_LANE_PRFM,n_div_8_small_kernel)18159   TEST(QC8_IGEMM_MINMAX_FP32_3X8__NEONV8_MLAL_LANE_PRFM, n_div_8_small_kernel) {
18160     TEST_REQUIRES_ARM_NEON_V8;
18161     for (uint32_t n = 16; n <= 24; n += 8) {
18162       for (size_t k = 1; k <= 40; k += 9) {
18163         GemmMicrokernelTester()
18164           .mr(3)
18165           .nr(8)
18166           .kr(1)
18167           .sr(1)
18168           .m(3)
18169           .n(n)
18170           .k(k)
18171           .ks(3)
18172           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x8__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
18173       }
18174     }
18175   }
18176 
TEST(QC8_IGEMM_MINMAX_FP32_3X8__NEONV8_MLAL_LANE_PRFM,strided_cm_subtile)18177   TEST(QC8_IGEMM_MINMAX_FP32_3X8__NEONV8_MLAL_LANE_PRFM, strided_cm_subtile) {
18178     TEST_REQUIRES_ARM_NEON_V8;
18179     for (size_t k = 1; k <= 40; k += 9) {
18180       for (uint32_t n = 1; n <= 8; n++) {
18181         for (uint32_t m = 1; m <= 3; m++) {
18182           GemmMicrokernelTester()
18183             .mr(3)
18184             .nr(8)
18185             .kr(1)
18186             .sr(1)
18187             .m(m)
18188             .n(n)
18189             .k(k)
18190             .cm_stride(11)
18191             .iterations(1)
18192             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x8__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
18193         }
18194       }
18195     }
18196   }
18197 
TEST(QC8_IGEMM_MINMAX_FP32_3X8__NEONV8_MLAL_LANE_PRFM,a_offset)18198   TEST(QC8_IGEMM_MINMAX_FP32_3X8__NEONV8_MLAL_LANE_PRFM, a_offset) {
18199     TEST_REQUIRES_ARM_NEON_V8;
18200     for (size_t k = 1; k <= 40; k += 9) {
18201       GemmMicrokernelTester()
18202         .mr(3)
18203         .nr(8)
18204         .kr(1)
18205         .sr(1)
18206         .m(3)
18207         .n(8)
18208         .k(k)
18209         .ks(3)
18210         .a_offset(127)
18211         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x8__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
18212     }
18213   }
18214 
TEST(QC8_IGEMM_MINMAX_FP32_3X8__NEONV8_MLAL_LANE_PRFM,zero)18215   TEST(QC8_IGEMM_MINMAX_FP32_3X8__NEONV8_MLAL_LANE_PRFM, zero) {
18216     TEST_REQUIRES_ARM_NEON_V8;
18217     for (size_t k = 1; k <= 40; k += 9) {
18218       for (uint32_t mz = 0; mz < 3; mz++) {
18219         GemmMicrokernelTester()
18220           .mr(3)
18221           .nr(8)
18222           .kr(1)
18223           .sr(1)
18224           .m(3)
18225           .n(8)
18226           .k(k)
18227           .ks(3)
18228           .a_offset(127)
18229           .zero_index(mz)
18230           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x8__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
18231       }
18232     }
18233   }
18234 
TEST(QC8_IGEMM_MINMAX_FP32_3X8__NEONV8_MLAL_LANE_PRFM,qmin)18235   TEST(QC8_IGEMM_MINMAX_FP32_3X8__NEONV8_MLAL_LANE_PRFM, qmin) {
18236     TEST_REQUIRES_ARM_NEON_V8;
18237     GemmMicrokernelTester()
18238       .mr(3)
18239       .nr(8)
18240       .kr(1)
18241       .sr(1)
18242       .m(3)
18243       .n(8)
18244       .k(8)
18245       .qmin(128)
18246       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x8__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
18247   }
18248 
TEST(QC8_IGEMM_MINMAX_FP32_3X8__NEONV8_MLAL_LANE_PRFM,qmax)18249   TEST(QC8_IGEMM_MINMAX_FP32_3X8__NEONV8_MLAL_LANE_PRFM, qmax) {
18250     TEST_REQUIRES_ARM_NEON_V8;
18251     GemmMicrokernelTester()
18252       .mr(3)
18253       .nr(8)
18254       .kr(1)
18255       .sr(1)
18256       .m(3)
18257       .n(8)
18258       .k(8)
18259       .qmax(128)
18260       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x8__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
18261   }
18262 
TEST(QC8_IGEMM_MINMAX_FP32_3X8__NEONV8_MLAL_LANE_PRFM,strided_cm)18263   TEST(QC8_IGEMM_MINMAX_FP32_3X8__NEONV8_MLAL_LANE_PRFM, strided_cm) {
18264     TEST_REQUIRES_ARM_NEON_V8;
18265     GemmMicrokernelTester()
18266       .mr(3)
18267       .nr(8)
18268       .kr(1)
18269       .sr(1)
18270       .m(3)
18271       .n(8)
18272       .k(8)
18273       .cm_stride(11)
18274       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x8__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
18275   }
18276 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
18277 
18278 
18279 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QC8_IGEMM_MINMAX_FP32_4X8__NEONV8_MLAL_LANE_PRFM,k_eq_8)18280   TEST(QC8_IGEMM_MINMAX_FP32_4X8__NEONV8_MLAL_LANE_PRFM, k_eq_8) {
18281     TEST_REQUIRES_ARM_NEON_V8;
18282     GemmMicrokernelTester()
18283       .mr(4)
18284       .nr(8)
18285       .kr(1)
18286       .sr(1)
18287       .m(4)
18288       .n(8)
18289       .k(8)
18290       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
18291   }
18292 
TEST(QC8_IGEMM_MINMAX_FP32_4X8__NEONV8_MLAL_LANE_PRFM,strided_cn)18293   TEST(QC8_IGEMM_MINMAX_FP32_4X8__NEONV8_MLAL_LANE_PRFM, strided_cn) {
18294     TEST_REQUIRES_ARM_NEON_V8;
18295     GemmMicrokernelTester()
18296       .mr(4)
18297       .nr(8)
18298       .kr(1)
18299       .sr(1)
18300       .m(4)
18301       .n(8)
18302       .k(8)
18303       .cn_stride(11)
18304       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
18305   }
18306 
TEST(QC8_IGEMM_MINMAX_FP32_4X8__NEONV8_MLAL_LANE_PRFM,k_eq_8_subtile)18307   TEST(QC8_IGEMM_MINMAX_FP32_4X8__NEONV8_MLAL_LANE_PRFM, k_eq_8_subtile) {
18308     TEST_REQUIRES_ARM_NEON_V8;
18309     for (uint32_t n = 1; n <= 8; n++) {
18310       for (uint32_t m = 1; m <= 4; m++) {
18311         GemmMicrokernelTester()
18312           .mr(4)
18313           .nr(8)
18314           .kr(1)
18315           .sr(1)
18316           .m(m)
18317           .n(n)
18318           .k(8)
18319           .iterations(1)
18320           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
18321       }
18322     }
18323   }
18324 
TEST(QC8_IGEMM_MINMAX_FP32_4X8__NEONV8_MLAL_LANE_PRFM,k_eq_8_subtile_m)18325   TEST(QC8_IGEMM_MINMAX_FP32_4X8__NEONV8_MLAL_LANE_PRFM, k_eq_8_subtile_m) {
18326     TEST_REQUIRES_ARM_NEON_V8;
18327     for (uint32_t m = 1; m <= 4; m++) {
18328       GemmMicrokernelTester()
18329         .mr(4)
18330         .nr(8)
18331         .kr(1)
18332         .sr(1)
18333         .m(m)
18334         .n(8)
18335         .k(8)
18336         .iterations(1)
18337         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
18338     }
18339   }
18340 
TEST(QC8_IGEMM_MINMAX_FP32_4X8__NEONV8_MLAL_LANE_PRFM,k_eq_8_subtile_n)18341   TEST(QC8_IGEMM_MINMAX_FP32_4X8__NEONV8_MLAL_LANE_PRFM, k_eq_8_subtile_n) {
18342     TEST_REQUIRES_ARM_NEON_V8;
18343     for (uint32_t n = 1; n <= 8; n++) {
18344       GemmMicrokernelTester()
18345         .mr(4)
18346         .nr(8)
18347         .kr(1)
18348         .sr(1)
18349         .m(4)
18350         .n(n)
18351         .k(8)
18352         .iterations(1)
18353         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
18354     }
18355   }
18356 
TEST(QC8_IGEMM_MINMAX_FP32_4X8__NEONV8_MLAL_LANE_PRFM,k_lt_8)18357   TEST(QC8_IGEMM_MINMAX_FP32_4X8__NEONV8_MLAL_LANE_PRFM, k_lt_8) {
18358     TEST_REQUIRES_ARM_NEON_V8;
18359     for (size_t k = 1; k < 8; k++) {
18360       GemmMicrokernelTester()
18361         .mr(4)
18362         .nr(8)
18363         .kr(1)
18364         .sr(1)
18365         .m(4)
18366         .n(8)
18367         .k(k)
18368         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
18369     }
18370   }
18371 
TEST(QC8_IGEMM_MINMAX_FP32_4X8__NEONV8_MLAL_LANE_PRFM,k_lt_8_subtile)18372   TEST(QC8_IGEMM_MINMAX_FP32_4X8__NEONV8_MLAL_LANE_PRFM, k_lt_8_subtile) {
18373     TEST_REQUIRES_ARM_NEON_V8;
18374     for (size_t k = 1; k < 8; k++) {
18375       for (uint32_t n = 1; n <= 8; n++) {
18376         for (uint32_t m = 1; m <= 4; m++) {
18377           GemmMicrokernelTester()
18378             .mr(4)
18379             .nr(8)
18380             .kr(1)
18381             .sr(1)
18382             .m(m)
18383             .n(n)
18384             .k(k)
18385             .iterations(1)
18386             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
18387         }
18388       }
18389     }
18390   }
18391 
TEST(QC8_IGEMM_MINMAX_FP32_4X8__NEONV8_MLAL_LANE_PRFM,k_gt_8)18392   TEST(QC8_IGEMM_MINMAX_FP32_4X8__NEONV8_MLAL_LANE_PRFM, k_gt_8) {
18393     TEST_REQUIRES_ARM_NEON_V8;
18394     for (size_t k = 9; k < 16; k++) {
18395       GemmMicrokernelTester()
18396         .mr(4)
18397         .nr(8)
18398         .kr(1)
18399         .sr(1)
18400         .m(4)
18401         .n(8)
18402         .k(k)
18403         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
18404     }
18405   }
18406 
TEST(QC8_IGEMM_MINMAX_FP32_4X8__NEONV8_MLAL_LANE_PRFM,k_gt_8_subtile)18407   TEST(QC8_IGEMM_MINMAX_FP32_4X8__NEONV8_MLAL_LANE_PRFM, k_gt_8_subtile) {
18408     TEST_REQUIRES_ARM_NEON_V8;
18409     for (size_t k = 9; k < 16; k++) {
18410       for (uint32_t n = 1; n <= 8; n++) {
18411         for (uint32_t m = 1; m <= 4; m++) {
18412           GemmMicrokernelTester()
18413             .mr(4)
18414             .nr(8)
18415             .kr(1)
18416             .sr(1)
18417             .m(m)
18418             .n(n)
18419             .k(k)
18420             .iterations(1)
18421             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
18422         }
18423       }
18424     }
18425   }
18426 
TEST(QC8_IGEMM_MINMAX_FP32_4X8__NEONV8_MLAL_LANE_PRFM,k_div_8)18427   TEST(QC8_IGEMM_MINMAX_FP32_4X8__NEONV8_MLAL_LANE_PRFM, k_div_8) {
18428     TEST_REQUIRES_ARM_NEON_V8;
18429     for (size_t k = 16; k <= 80; k += 8) {
18430       GemmMicrokernelTester()
18431         .mr(4)
18432         .nr(8)
18433         .kr(1)
18434         .sr(1)
18435         .m(4)
18436         .n(8)
18437         .k(k)
18438         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
18439     }
18440   }
18441 
TEST(QC8_IGEMM_MINMAX_FP32_4X8__NEONV8_MLAL_LANE_PRFM,k_div_8_subtile)18442   TEST(QC8_IGEMM_MINMAX_FP32_4X8__NEONV8_MLAL_LANE_PRFM, k_div_8_subtile) {
18443     TEST_REQUIRES_ARM_NEON_V8;
18444     for (size_t k = 16; k <= 80; k += 8) {
18445       for (uint32_t n = 1; n <= 8; n++) {
18446         for (uint32_t m = 1; m <= 4; m++) {
18447           GemmMicrokernelTester()
18448             .mr(4)
18449             .nr(8)
18450             .kr(1)
18451             .sr(1)
18452             .m(m)
18453             .n(n)
18454             .k(k)
18455             .iterations(1)
18456             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
18457         }
18458       }
18459     }
18460   }
18461 
TEST(QC8_IGEMM_MINMAX_FP32_4X8__NEONV8_MLAL_LANE_PRFM,n_gt_8)18462   TEST(QC8_IGEMM_MINMAX_FP32_4X8__NEONV8_MLAL_LANE_PRFM, n_gt_8) {
18463     TEST_REQUIRES_ARM_NEON_V8;
18464     for (uint32_t n = 9; n < 16; n++) {
18465       for (size_t k = 1; k <= 40; k += 9) {
18466         GemmMicrokernelTester()
18467           .mr(4)
18468           .nr(8)
18469           .kr(1)
18470           .sr(1)
18471           .m(4)
18472           .n(n)
18473           .k(k)
18474           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
18475       }
18476     }
18477   }
18478 
TEST(QC8_IGEMM_MINMAX_FP32_4X8__NEONV8_MLAL_LANE_PRFM,n_gt_8_strided_cn)18479   TEST(QC8_IGEMM_MINMAX_FP32_4X8__NEONV8_MLAL_LANE_PRFM, n_gt_8_strided_cn) {
18480     TEST_REQUIRES_ARM_NEON_V8;
18481     for (uint32_t n = 9; n < 16; n++) {
18482       for (size_t k = 1; k <= 40; k += 9) {
18483         GemmMicrokernelTester()
18484           .mr(4)
18485           .nr(8)
18486           .kr(1)
18487           .sr(1)
18488           .m(4)
18489           .n(n)
18490           .k(k)
18491           .cn_stride(11)
18492           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
18493       }
18494     }
18495   }
18496 
TEST(QC8_IGEMM_MINMAX_FP32_4X8__NEONV8_MLAL_LANE_PRFM,n_gt_8_subtile)18497   TEST(QC8_IGEMM_MINMAX_FP32_4X8__NEONV8_MLAL_LANE_PRFM, n_gt_8_subtile) {
18498     TEST_REQUIRES_ARM_NEON_V8;
18499     for (uint32_t n = 9; n < 16; n++) {
18500       for (size_t k = 1; k <= 40; k += 9) {
18501         for (uint32_t m = 1; m <= 4; m++) {
18502           GemmMicrokernelTester()
18503             .mr(4)
18504             .nr(8)
18505             .kr(1)
18506             .sr(1)
18507             .m(m)
18508             .n(n)
18509             .k(k)
18510             .iterations(1)
18511             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
18512         }
18513       }
18514     }
18515   }
18516 
TEST(QC8_IGEMM_MINMAX_FP32_4X8__NEONV8_MLAL_LANE_PRFM,n_div_8)18517   TEST(QC8_IGEMM_MINMAX_FP32_4X8__NEONV8_MLAL_LANE_PRFM, n_div_8) {
18518     TEST_REQUIRES_ARM_NEON_V8;
18519     for (uint32_t n = 16; n <= 24; n += 8) {
18520       for (size_t k = 1; k <= 40; k += 9) {
18521         GemmMicrokernelTester()
18522           .mr(4)
18523           .nr(8)
18524           .kr(1)
18525           .sr(1)
18526           .m(4)
18527           .n(n)
18528           .k(k)
18529           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
18530       }
18531     }
18532   }
18533 
TEST(QC8_IGEMM_MINMAX_FP32_4X8__NEONV8_MLAL_LANE_PRFM,n_div_8_strided_cn)18534   TEST(QC8_IGEMM_MINMAX_FP32_4X8__NEONV8_MLAL_LANE_PRFM, n_div_8_strided_cn) {
18535     TEST_REQUIRES_ARM_NEON_V8;
18536     for (uint32_t n = 16; n <= 24; n += 8) {
18537       for (size_t k = 1; k <= 40; k += 9) {
18538         GemmMicrokernelTester()
18539           .mr(4)
18540           .nr(8)
18541           .kr(1)
18542           .sr(1)
18543           .m(4)
18544           .n(n)
18545           .k(k)
18546           .cn_stride(11)
18547           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
18548       }
18549     }
18550   }
18551 
TEST(QC8_IGEMM_MINMAX_FP32_4X8__NEONV8_MLAL_LANE_PRFM,n_div_8_subtile)18552   TEST(QC8_IGEMM_MINMAX_FP32_4X8__NEONV8_MLAL_LANE_PRFM, n_div_8_subtile) {
18553     TEST_REQUIRES_ARM_NEON_V8;
18554     for (uint32_t n = 16; n <= 24; n += 8) {
18555       for (size_t k = 1; k <= 40; k += 9) {
18556         for (uint32_t m = 1; m <= 4; m++) {
18557           GemmMicrokernelTester()
18558             .mr(4)
18559             .nr(8)
18560             .kr(1)
18561             .sr(1)
18562             .m(m)
18563             .n(n)
18564             .k(k)
18565             .iterations(1)
18566             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
18567         }
18568       }
18569     }
18570   }
18571 
TEST(QC8_IGEMM_MINMAX_FP32_4X8__NEONV8_MLAL_LANE_PRFM,small_kernel)18572   TEST(QC8_IGEMM_MINMAX_FP32_4X8__NEONV8_MLAL_LANE_PRFM, small_kernel) {
18573     TEST_REQUIRES_ARM_NEON_V8;
18574     for (size_t k = 1; k <= 40; k += 9) {
18575       GemmMicrokernelTester()
18576         .mr(4)
18577         .nr(8)
18578         .kr(1)
18579         .sr(1)
18580         .m(4)
18581         .n(8)
18582         .k(k)
18583         .ks(3)
18584         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
18585     }
18586   }
18587 
TEST(QC8_IGEMM_MINMAX_FP32_4X8__NEONV8_MLAL_LANE_PRFM,small_kernel_subtile)18588   TEST(QC8_IGEMM_MINMAX_FP32_4X8__NEONV8_MLAL_LANE_PRFM, small_kernel_subtile) {
18589     TEST_REQUIRES_ARM_NEON_V8;
18590     for (size_t k = 1; k <= 40; k += 9) {
18591       for (uint32_t n = 1; n <= 8; n++) {
18592         for (uint32_t m = 1; m <= 4; m++) {
18593           GemmMicrokernelTester()
18594             .mr(4)
18595             .nr(8)
18596             .kr(1)
18597             .sr(1)
18598             .m(m)
18599             .n(n)
18600             .k(k)
18601             .ks(3)
18602             .iterations(1)
18603             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
18604         }
18605       }
18606     }
18607   }
18608 
TEST(QC8_IGEMM_MINMAX_FP32_4X8__NEONV8_MLAL_LANE_PRFM,n_gt_8_small_kernel)18609   TEST(QC8_IGEMM_MINMAX_FP32_4X8__NEONV8_MLAL_LANE_PRFM, n_gt_8_small_kernel) {
18610     TEST_REQUIRES_ARM_NEON_V8;
18611     for (uint32_t n = 9; n < 16; n++) {
18612       for (size_t k = 1; k <= 40; k += 9) {
18613         GemmMicrokernelTester()
18614           .mr(4)
18615           .nr(8)
18616           .kr(1)
18617           .sr(1)
18618           .m(4)
18619           .n(n)
18620           .k(k)
18621           .ks(3)
18622           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
18623       }
18624     }
18625   }
18626 
TEST(QC8_IGEMM_MINMAX_FP32_4X8__NEONV8_MLAL_LANE_PRFM,n_div_8_small_kernel)18627   TEST(QC8_IGEMM_MINMAX_FP32_4X8__NEONV8_MLAL_LANE_PRFM, n_div_8_small_kernel) {
18628     TEST_REQUIRES_ARM_NEON_V8;
18629     for (uint32_t n = 16; n <= 24; n += 8) {
18630       for (size_t k = 1; k <= 40; k += 9) {
18631         GemmMicrokernelTester()
18632           .mr(4)
18633           .nr(8)
18634           .kr(1)
18635           .sr(1)
18636           .m(4)
18637           .n(n)
18638           .k(k)
18639           .ks(3)
18640           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
18641       }
18642     }
18643   }
18644 
TEST(QC8_IGEMM_MINMAX_FP32_4X8__NEONV8_MLAL_LANE_PRFM,strided_cm_subtile)18645   TEST(QC8_IGEMM_MINMAX_FP32_4X8__NEONV8_MLAL_LANE_PRFM, strided_cm_subtile) {
18646     TEST_REQUIRES_ARM_NEON_V8;
18647     for (size_t k = 1; k <= 40; k += 9) {
18648       for (uint32_t n = 1; n <= 8; n++) {
18649         for (uint32_t m = 1; m <= 4; m++) {
18650           GemmMicrokernelTester()
18651             .mr(4)
18652             .nr(8)
18653             .kr(1)
18654             .sr(1)
18655             .m(m)
18656             .n(n)
18657             .k(k)
18658             .cm_stride(11)
18659             .iterations(1)
18660             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
18661         }
18662       }
18663     }
18664   }
18665 
TEST(QC8_IGEMM_MINMAX_FP32_4X8__NEONV8_MLAL_LANE_PRFM,a_offset)18666   TEST(QC8_IGEMM_MINMAX_FP32_4X8__NEONV8_MLAL_LANE_PRFM, a_offset) {
18667     TEST_REQUIRES_ARM_NEON_V8;
18668     for (size_t k = 1; k <= 40; k += 9) {
18669       GemmMicrokernelTester()
18670         .mr(4)
18671         .nr(8)
18672         .kr(1)
18673         .sr(1)
18674         .m(4)
18675         .n(8)
18676         .k(k)
18677         .ks(3)
18678         .a_offset(163)
18679         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
18680     }
18681   }
18682 
TEST(QC8_IGEMM_MINMAX_FP32_4X8__NEONV8_MLAL_LANE_PRFM,zero)18683   TEST(QC8_IGEMM_MINMAX_FP32_4X8__NEONV8_MLAL_LANE_PRFM, zero) {
18684     TEST_REQUIRES_ARM_NEON_V8;
18685     for (size_t k = 1; k <= 40; k += 9) {
18686       for (uint32_t mz = 0; mz < 4; mz++) {
18687         GemmMicrokernelTester()
18688           .mr(4)
18689           .nr(8)
18690           .kr(1)
18691           .sr(1)
18692           .m(4)
18693           .n(8)
18694           .k(k)
18695           .ks(3)
18696           .a_offset(163)
18697           .zero_index(mz)
18698           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
18699       }
18700     }
18701   }
18702 
TEST(QC8_IGEMM_MINMAX_FP32_4X8__NEONV8_MLAL_LANE_PRFM,qmin)18703   TEST(QC8_IGEMM_MINMAX_FP32_4X8__NEONV8_MLAL_LANE_PRFM, qmin) {
18704     TEST_REQUIRES_ARM_NEON_V8;
18705     GemmMicrokernelTester()
18706       .mr(4)
18707       .nr(8)
18708       .kr(1)
18709       .sr(1)
18710       .m(4)
18711       .n(8)
18712       .k(8)
18713       .qmin(128)
18714       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
18715   }
18716 
TEST(QC8_IGEMM_MINMAX_FP32_4X8__NEONV8_MLAL_LANE_PRFM,qmax)18717   TEST(QC8_IGEMM_MINMAX_FP32_4X8__NEONV8_MLAL_LANE_PRFM, qmax) {
18718     TEST_REQUIRES_ARM_NEON_V8;
18719     GemmMicrokernelTester()
18720       .mr(4)
18721       .nr(8)
18722       .kr(1)
18723       .sr(1)
18724       .m(4)
18725       .n(8)
18726       .k(8)
18727       .qmax(128)
18728       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
18729   }
18730 
TEST(QC8_IGEMM_MINMAX_FP32_4X8__NEONV8_MLAL_LANE_PRFM,strided_cm)18731   TEST(QC8_IGEMM_MINMAX_FP32_4X8__NEONV8_MLAL_LANE_PRFM, strided_cm) {
18732     TEST_REQUIRES_ARM_NEON_V8;
18733     GemmMicrokernelTester()
18734       .mr(4)
18735       .nr(8)
18736       .kr(1)
18737       .sr(1)
18738       .m(4)
18739       .n(8)
18740       .k(8)
18741       .cm_stride(11)
18742       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
18743   }
18744 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
18745 
18746 
18747 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QC8_IGEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE_PRFM,k_eq_8)18748   TEST(QC8_IGEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE_PRFM, k_eq_8) {
18749     TEST_REQUIRES_ARM_NEON;
18750     GemmMicrokernelTester()
18751       .mr(4)
18752       .nr(16)
18753       .kr(1)
18754       .sr(1)
18755       .m(4)
18756       .n(16)
18757       .k(8)
18758       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x16__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
18759   }
18760 
TEST(QC8_IGEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE_PRFM,strided_cn)18761   TEST(QC8_IGEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE_PRFM, strided_cn) {
18762     TEST_REQUIRES_ARM_NEON;
18763     GemmMicrokernelTester()
18764       .mr(4)
18765       .nr(16)
18766       .kr(1)
18767       .sr(1)
18768       .m(4)
18769       .n(16)
18770       .k(8)
18771       .cn_stride(19)
18772       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x16__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
18773   }
18774 
TEST(QC8_IGEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE_PRFM,k_eq_8_subtile)18775   TEST(QC8_IGEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE_PRFM, k_eq_8_subtile) {
18776     TEST_REQUIRES_ARM_NEON;
18777     for (uint32_t n = 1; n <= 16; n++) {
18778       for (uint32_t m = 1; m <= 4; m++) {
18779         GemmMicrokernelTester()
18780           .mr(4)
18781           .nr(16)
18782           .kr(1)
18783           .sr(1)
18784           .m(m)
18785           .n(n)
18786           .k(8)
18787           .iterations(1)
18788           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x16__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
18789       }
18790     }
18791   }
18792 
TEST(QC8_IGEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE_PRFM,k_eq_8_subtile_m)18793   TEST(QC8_IGEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE_PRFM, k_eq_8_subtile_m) {
18794     TEST_REQUIRES_ARM_NEON;
18795     for (uint32_t m = 1; m <= 4; m++) {
18796       GemmMicrokernelTester()
18797         .mr(4)
18798         .nr(16)
18799         .kr(1)
18800         .sr(1)
18801         .m(m)
18802         .n(16)
18803         .k(8)
18804         .iterations(1)
18805         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x16__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
18806     }
18807   }
18808 
TEST(QC8_IGEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE_PRFM,k_eq_8_subtile_n)18809   TEST(QC8_IGEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE_PRFM, k_eq_8_subtile_n) {
18810     TEST_REQUIRES_ARM_NEON;
18811     for (uint32_t n = 1; n <= 16; n++) {
18812       GemmMicrokernelTester()
18813         .mr(4)
18814         .nr(16)
18815         .kr(1)
18816         .sr(1)
18817         .m(4)
18818         .n(n)
18819         .k(8)
18820         .iterations(1)
18821         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x16__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
18822     }
18823   }
18824 
TEST(QC8_IGEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE_PRFM,k_lt_8)18825   TEST(QC8_IGEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE_PRFM, k_lt_8) {
18826     TEST_REQUIRES_ARM_NEON;
18827     for (size_t k = 1; k < 8; k++) {
18828       GemmMicrokernelTester()
18829         .mr(4)
18830         .nr(16)
18831         .kr(1)
18832         .sr(1)
18833         .m(4)
18834         .n(16)
18835         .k(k)
18836         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x16__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
18837     }
18838   }
18839 
TEST(QC8_IGEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE_PRFM,k_lt_8_subtile)18840   TEST(QC8_IGEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE_PRFM, k_lt_8_subtile) {
18841     TEST_REQUIRES_ARM_NEON;
18842     for (size_t k = 1; k < 8; k++) {
18843       for (uint32_t n = 1; n <= 16; n++) {
18844         for (uint32_t m = 1; m <= 4; m++) {
18845           GemmMicrokernelTester()
18846             .mr(4)
18847             .nr(16)
18848             .kr(1)
18849             .sr(1)
18850             .m(m)
18851             .n(n)
18852             .k(k)
18853             .iterations(1)
18854             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x16__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
18855         }
18856       }
18857     }
18858   }
18859 
TEST(QC8_IGEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE_PRFM,k_gt_8)18860   TEST(QC8_IGEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE_PRFM, k_gt_8) {
18861     TEST_REQUIRES_ARM_NEON;
18862     for (size_t k = 9; k < 16; k++) {
18863       GemmMicrokernelTester()
18864         .mr(4)
18865         .nr(16)
18866         .kr(1)
18867         .sr(1)
18868         .m(4)
18869         .n(16)
18870         .k(k)
18871         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x16__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
18872     }
18873   }
18874 
TEST(QC8_IGEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE_PRFM,k_gt_8_subtile)18875   TEST(QC8_IGEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE_PRFM, k_gt_8_subtile) {
18876     TEST_REQUIRES_ARM_NEON;
18877     for (size_t k = 9; k < 16; k++) {
18878       for (uint32_t n = 1; n <= 16; n++) {
18879         for (uint32_t m = 1; m <= 4; m++) {
18880           GemmMicrokernelTester()
18881             .mr(4)
18882             .nr(16)
18883             .kr(1)
18884             .sr(1)
18885             .m(m)
18886             .n(n)
18887             .k(k)
18888             .iterations(1)
18889             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x16__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
18890         }
18891       }
18892     }
18893   }
18894 
TEST(QC8_IGEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE_PRFM,k_div_8)18895   TEST(QC8_IGEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE_PRFM, k_div_8) {
18896     TEST_REQUIRES_ARM_NEON;
18897     for (size_t k = 16; k <= 80; k += 8) {
18898       GemmMicrokernelTester()
18899         .mr(4)
18900         .nr(16)
18901         .kr(1)
18902         .sr(1)
18903         .m(4)
18904         .n(16)
18905         .k(k)
18906         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x16__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
18907     }
18908   }
18909 
TEST(QC8_IGEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE_PRFM,k_div_8_subtile)18910   TEST(QC8_IGEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE_PRFM, k_div_8_subtile) {
18911     TEST_REQUIRES_ARM_NEON;
18912     for (size_t k = 16; k <= 80; k += 8) {
18913       for (uint32_t n = 1; n <= 16; n++) {
18914         for (uint32_t m = 1; m <= 4; m++) {
18915           GemmMicrokernelTester()
18916             .mr(4)
18917             .nr(16)
18918             .kr(1)
18919             .sr(1)
18920             .m(m)
18921             .n(n)
18922             .k(k)
18923             .iterations(1)
18924             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x16__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
18925         }
18926       }
18927     }
18928   }
18929 
TEST(QC8_IGEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE_PRFM,n_gt_16)18930   TEST(QC8_IGEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE_PRFM, n_gt_16) {
18931     TEST_REQUIRES_ARM_NEON;
18932     for (uint32_t n = 17; n < 32; n++) {
18933       for (size_t k = 1; k <= 40; k += 9) {
18934         GemmMicrokernelTester()
18935           .mr(4)
18936           .nr(16)
18937           .kr(1)
18938           .sr(1)
18939           .m(4)
18940           .n(n)
18941           .k(k)
18942           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x16__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
18943       }
18944     }
18945   }
18946 
TEST(QC8_IGEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE_PRFM,n_gt_16_strided_cn)18947   TEST(QC8_IGEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE_PRFM, n_gt_16_strided_cn) {
18948     TEST_REQUIRES_ARM_NEON;
18949     for (uint32_t n = 17; n < 32; n++) {
18950       for (size_t k = 1; k <= 40; k += 9) {
18951         GemmMicrokernelTester()
18952           .mr(4)
18953           .nr(16)
18954           .kr(1)
18955           .sr(1)
18956           .m(4)
18957           .n(n)
18958           .k(k)
18959           .cn_stride(19)
18960           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x16__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
18961       }
18962     }
18963   }
18964 
TEST(QC8_IGEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE_PRFM,n_gt_16_subtile)18965   TEST(QC8_IGEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE_PRFM, n_gt_16_subtile) {
18966     TEST_REQUIRES_ARM_NEON;
18967     for (uint32_t n = 17; n < 32; n++) {
18968       for (size_t k = 1; k <= 40; k += 9) {
18969         for (uint32_t m = 1; m <= 4; m++) {
18970           GemmMicrokernelTester()
18971             .mr(4)
18972             .nr(16)
18973             .kr(1)
18974             .sr(1)
18975             .m(m)
18976             .n(n)
18977             .k(k)
18978             .iterations(1)
18979             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x16__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
18980         }
18981       }
18982     }
18983   }
18984 
TEST(QC8_IGEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE_PRFM,n_div_16)18985   TEST(QC8_IGEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE_PRFM, n_div_16) {
18986     TEST_REQUIRES_ARM_NEON;
18987     for (uint32_t n = 32; n <= 48; n += 16) {
18988       for (size_t k = 1; k <= 40; k += 9) {
18989         GemmMicrokernelTester()
18990           .mr(4)
18991           .nr(16)
18992           .kr(1)
18993           .sr(1)
18994           .m(4)
18995           .n(n)
18996           .k(k)
18997           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x16__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
18998       }
18999     }
19000   }
19001 
TEST(QC8_IGEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE_PRFM,n_div_16_strided_cn)19002   TEST(QC8_IGEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE_PRFM, n_div_16_strided_cn) {
19003     TEST_REQUIRES_ARM_NEON;
19004     for (uint32_t n = 32; n <= 48; n += 16) {
19005       for (size_t k = 1; k <= 40; k += 9) {
19006         GemmMicrokernelTester()
19007           .mr(4)
19008           .nr(16)
19009           .kr(1)
19010           .sr(1)
19011           .m(4)
19012           .n(n)
19013           .k(k)
19014           .cn_stride(19)
19015           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x16__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
19016       }
19017     }
19018   }
19019 
TEST(QC8_IGEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE_PRFM,n_div_16_subtile)19020   TEST(QC8_IGEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE_PRFM, n_div_16_subtile) {
19021     TEST_REQUIRES_ARM_NEON;
19022     for (uint32_t n = 32; n <= 48; n += 16) {
19023       for (size_t k = 1; k <= 40; k += 9) {
19024         for (uint32_t m = 1; m <= 4; m++) {
19025           GemmMicrokernelTester()
19026             .mr(4)
19027             .nr(16)
19028             .kr(1)
19029             .sr(1)
19030             .m(m)
19031             .n(n)
19032             .k(k)
19033             .iterations(1)
19034             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x16__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
19035         }
19036       }
19037     }
19038   }
19039 
TEST(QC8_IGEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE_PRFM,small_kernel)19040   TEST(QC8_IGEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE_PRFM, small_kernel) {
19041     TEST_REQUIRES_ARM_NEON;
19042     for (size_t k = 1; k <= 40; k += 9) {
19043       GemmMicrokernelTester()
19044         .mr(4)
19045         .nr(16)
19046         .kr(1)
19047         .sr(1)
19048         .m(4)
19049         .n(16)
19050         .k(k)
19051         .ks(3)
19052         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x16__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
19053     }
19054   }
19055 
TEST(QC8_IGEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE_PRFM,small_kernel_subtile)19056   TEST(QC8_IGEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE_PRFM, small_kernel_subtile) {
19057     TEST_REQUIRES_ARM_NEON;
19058     for (size_t k = 1; k <= 40; k += 9) {
19059       for (uint32_t n = 1; n <= 16; n++) {
19060         for (uint32_t m = 1; m <= 4; m++) {
19061           GemmMicrokernelTester()
19062             .mr(4)
19063             .nr(16)
19064             .kr(1)
19065             .sr(1)
19066             .m(m)
19067             .n(n)
19068             .k(k)
19069             .ks(3)
19070             .iterations(1)
19071             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x16__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
19072         }
19073       }
19074     }
19075   }
19076 
TEST(QC8_IGEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE_PRFM,n_gt_16_small_kernel)19077   TEST(QC8_IGEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE_PRFM, n_gt_16_small_kernel) {
19078     TEST_REQUIRES_ARM_NEON;
19079     for (uint32_t n = 17; n < 32; n++) {
19080       for (size_t k = 1; k <= 40; k += 9) {
19081         GemmMicrokernelTester()
19082           .mr(4)
19083           .nr(16)
19084           .kr(1)
19085           .sr(1)
19086           .m(4)
19087           .n(n)
19088           .k(k)
19089           .ks(3)
19090           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x16__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
19091       }
19092     }
19093   }
19094 
TEST(QC8_IGEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE_PRFM,n_div_16_small_kernel)19095   TEST(QC8_IGEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE_PRFM, n_div_16_small_kernel) {
19096     TEST_REQUIRES_ARM_NEON;
19097     for (uint32_t n = 32; n <= 48; n += 16) {
19098       for (size_t k = 1; k <= 40; k += 9) {
19099         GemmMicrokernelTester()
19100           .mr(4)
19101           .nr(16)
19102           .kr(1)
19103           .sr(1)
19104           .m(4)
19105           .n(n)
19106           .k(k)
19107           .ks(3)
19108           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x16__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
19109       }
19110     }
19111   }
19112 
TEST(QC8_IGEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE_PRFM,strided_cm_subtile)19113   TEST(QC8_IGEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE_PRFM, strided_cm_subtile) {
19114     TEST_REQUIRES_ARM_NEON;
19115     for (size_t k = 1; k <= 40; k += 9) {
19116       for (uint32_t n = 1; n <= 16; n++) {
19117         for (uint32_t m = 1; m <= 4; m++) {
19118           GemmMicrokernelTester()
19119             .mr(4)
19120             .nr(16)
19121             .kr(1)
19122             .sr(1)
19123             .m(m)
19124             .n(n)
19125             .k(k)
19126             .cm_stride(19)
19127             .iterations(1)
19128             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x16__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
19129         }
19130       }
19131     }
19132   }
19133 
TEST(QC8_IGEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE_PRFM,a_offset)19134   TEST(QC8_IGEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE_PRFM, a_offset) {
19135     TEST_REQUIRES_ARM_NEON;
19136     for (size_t k = 1; k <= 40; k += 9) {
19137       GemmMicrokernelTester()
19138         .mr(4)
19139         .nr(16)
19140         .kr(1)
19141         .sr(1)
19142         .m(4)
19143         .n(16)
19144         .k(k)
19145         .ks(3)
19146         .a_offset(163)
19147         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x16__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
19148     }
19149   }
19150 
TEST(QC8_IGEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE_PRFM,zero)19151   TEST(QC8_IGEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE_PRFM, zero) {
19152     TEST_REQUIRES_ARM_NEON;
19153     for (size_t k = 1; k <= 40; k += 9) {
19154       for (uint32_t mz = 0; mz < 4; mz++) {
19155         GemmMicrokernelTester()
19156           .mr(4)
19157           .nr(16)
19158           .kr(1)
19159           .sr(1)
19160           .m(4)
19161           .n(16)
19162           .k(k)
19163           .ks(3)
19164           .a_offset(163)
19165           .zero_index(mz)
19166           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x16__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
19167       }
19168     }
19169   }
19170 
TEST(QC8_IGEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE_PRFM,qmin)19171   TEST(QC8_IGEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE_PRFM, qmin) {
19172     TEST_REQUIRES_ARM_NEON;
19173     GemmMicrokernelTester()
19174       .mr(4)
19175       .nr(16)
19176       .kr(1)
19177       .sr(1)
19178       .m(4)
19179       .n(16)
19180       .k(8)
19181       .qmin(128)
19182       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x16__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
19183   }
19184 
TEST(QC8_IGEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE_PRFM,qmax)19185   TEST(QC8_IGEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE_PRFM, qmax) {
19186     TEST_REQUIRES_ARM_NEON;
19187     GemmMicrokernelTester()
19188       .mr(4)
19189       .nr(16)
19190       .kr(1)
19191       .sr(1)
19192       .m(4)
19193       .n(16)
19194       .k(8)
19195       .qmax(128)
19196       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x16__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
19197   }
19198 
TEST(QC8_IGEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE_PRFM,strided_cm)19199   TEST(QC8_IGEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE_PRFM, strided_cm) {
19200     TEST_REQUIRES_ARM_NEON;
19201     GemmMicrokernelTester()
19202       .mr(4)
19203       .nr(16)
19204       .kr(1)
19205       .sr(1)
19206       .m(4)
19207       .n(16)
19208       .k(8)
19209       .cm_stride(19)
19210       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x16__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
19211   }
19212 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
19213 
19214 
19215 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QC8_IGEMM_MINMAX_FP32_6X8__NEONV8_MLAL_LANE_PRFM,k_eq_8)19216   TEST(QC8_IGEMM_MINMAX_FP32_6X8__NEONV8_MLAL_LANE_PRFM, k_eq_8) {
19217     TEST_REQUIRES_ARM_NEON_V8;
19218     GemmMicrokernelTester()
19219       .mr(6)
19220       .nr(8)
19221       .kr(1)
19222       .sr(1)
19223       .m(6)
19224       .n(8)
19225       .k(8)
19226       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_6x8__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
19227   }
19228 
TEST(QC8_IGEMM_MINMAX_FP32_6X8__NEONV8_MLAL_LANE_PRFM,strided_cn)19229   TEST(QC8_IGEMM_MINMAX_FP32_6X8__NEONV8_MLAL_LANE_PRFM, strided_cn) {
19230     TEST_REQUIRES_ARM_NEON_V8;
19231     GemmMicrokernelTester()
19232       .mr(6)
19233       .nr(8)
19234       .kr(1)
19235       .sr(1)
19236       .m(6)
19237       .n(8)
19238       .k(8)
19239       .cn_stride(11)
19240       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_6x8__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
19241   }
19242 
TEST(QC8_IGEMM_MINMAX_FP32_6X8__NEONV8_MLAL_LANE_PRFM,k_eq_8_subtile)19243   TEST(QC8_IGEMM_MINMAX_FP32_6X8__NEONV8_MLAL_LANE_PRFM, k_eq_8_subtile) {
19244     TEST_REQUIRES_ARM_NEON_V8;
19245     for (uint32_t n = 1; n <= 8; n++) {
19246       for (uint32_t m = 1; m <= 6; m++) {
19247         GemmMicrokernelTester()
19248           .mr(6)
19249           .nr(8)
19250           .kr(1)
19251           .sr(1)
19252           .m(m)
19253           .n(n)
19254           .k(8)
19255           .iterations(1)
19256           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_6x8__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
19257       }
19258     }
19259   }
19260 
TEST(QC8_IGEMM_MINMAX_FP32_6X8__NEONV8_MLAL_LANE_PRFM,k_eq_8_subtile_m)19261   TEST(QC8_IGEMM_MINMAX_FP32_6X8__NEONV8_MLAL_LANE_PRFM, k_eq_8_subtile_m) {
19262     TEST_REQUIRES_ARM_NEON_V8;
19263     for (uint32_t m = 1; m <= 6; m++) {
19264       GemmMicrokernelTester()
19265         .mr(6)
19266         .nr(8)
19267         .kr(1)
19268         .sr(1)
19269         .m(m)
19270         .n(8)
19271         .k(8)
19272         .iterations(1)
19273         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_6x8__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
19274     }
19275   }
19276 
TEST(QC8_IGEMM_MINMAX_FP32_6X8__NEONV8_MLAL_LANE_PRFM,k_eq_8_subtile_n)19277   TEST(QC8_IGEMM_MINMAX_FP32_6X8__NEONV8_MLAL_LANE_PRFM, k_eq_8_subtile_n) {
19278     TEST_REQUIRES_ARM_NEON_V8;
19279     for (uint32_t n = 1; n <= 8; n++) {
19280       GemmMicrokernelTester()
19281         .mr(6)
19282         .nr(8)
19283         .kr(1)
19284         .sr(1)
19285         .m(6)
19286         .n(n)
19287         .k(8)
19288         .iterations(1)
19289         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_6x8__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
19290     }
19291   }
19292 
TEST(QC8_IGEMM_MINMAX_FP32_6X8__NEONV8_MLAL_LANE_PRFM,k_lt_8)19293   TEST(QC8_IGEMM_MINMAX_FP32_6X8__NEONV8_MLAL_LANE_PRFM, k_lt_8) {
19294     TEST_REQUIRES_ARM_NEON_V8;
19295     for (size_t k = 1; k < 8; k++) {
19296       GemmMicrokernelTester()
19297         .mr(6)
19298         .nr(8)
19299         .kr(1)
19300         .sr(1)
19301         .m(6)
19302         .n(8)
19303         .k(k)
19304         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_6x8__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
19305     }
19306   }
19307 
TEST(QC8_IGEMM_MINMAX_FP32_6X8__NEONV8_MLAL_LANE_PRFM,k_lt_8_subtile)19308   TEST(QC8_IGEMM_MINMAX_FP32_6X8__NEONV8_MLAL_LANE_PRFM, k_lt_8_subtile) {
19309     TEST_REQUIRES_ARM_NEON_V8;
19310     for (size_t k = 1; k < 8; k++) {
19311       for (uint32_t n = 1; n <= 8; n++) {
19312         for (uint32_t m = 1; m <= 6; m++) {
19313           GemmMicrokernelTester()
19314             .mr(6)
19315             .nr(8)
19316             .kr(1)
19317             .sr(1)
19318             .m(m)
19319             .n(n)
19320             .k(k)
19321             .iterations(1)
19322             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_6x8__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
19323         }
19324       }
19325     }
19326   }
19327 
TEST(QC8_IGEMM_MINMAX_FP32_6X8__NEONV8_MLAL_LANE_PRFM,k_gt_8)19328   TEST(QC8_IGEMM_MINMAX_FP32_6X8__NEONV8_MLAL_LANE_PRFM, k_gt_8) {
19329     TEST_REQUIRES_ARM_NEON_V8;
19330     for (size_t k = 9; k < 16; k++) {
19331       GemmMicrokernelTester()
19332         .mr(6)
19333         .nr(8)
19334         .kr(1)
19335         .sr(1)
19336         .m(6)
19337         .n(8)
19338         .k(k)
19339         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_6x8__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
19340     }
19341   }
19342 
TEST(QC8_IGEMM_MINMAX_FP32_6X8__NEONV8_MLAL_LANE_PRFM,k_gt_8_subtile)19343   TEST(QC8_IGEMM_MINMAX_FP32_6X8__NEONV8_MLAL_LANE_PRFM, k_gt_8_subtile) {
19344     TEST_REQUIRES_ARM_NEON_V8;
19345     for (size_t k = 9; k < 16; k++) {
19346       for (uint32_t n = 1; n <= 8; n++) {
19347         for (uint32_t m = 1; m <= 6; m++) {
19348           GemmMicrokernelTester()
19349             .mr(6)
19350             .nr(8)
19351             .kr(1)
19352             .sr(1)
19353             .m(m)
19354             .n(n)
19355             .k(k)
19356             .iterations(1)
19357             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_6x8__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
19358         }
19359       }
19360     }
19361   }
19362 
TEST(QC8_IGEMM_MINMAX_FP32_6X8__NEONV8_MLAL_LANE_PRFM,k_div_8)19363   TEST(QC8_IGEMM_MINMAX_FP32_6X8__NEONV8_MLAL_LANE_PRFM, k_div_8) {
19364     TEST_REQUIRES_ARM_NEON_V8;
19365     for (size_t k = 16; k <= 80; k += 8) {
19366       GemmMicrokernelTester()
19367         .mr(6)
19368         .nr(8)
19369         .kr(1)
19370         .sr(1)
19371         .m(6)
19372         .n(8)
19373         .k(k)
19374         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_6x8__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
19375     }
19376   }
19377 
TEST(QC8_IGEMM_MINMAX_FP32_6X8__NEONV8_MLAL_LANE_PRFM,k_div_8_subtile)19378   TEST(QC8_IGEMM_MINMAX_FP32_6X8__NEONV8_MLAL_LANE_PRFM, k_div_8_subtile) {
19379     TEST_REQUIRES_ARM_NEON_V8;
19380     for (size_t k = 16; k <= 80; k += 8) {
19381       for (uint32_t n = 1; n <= 8; n++) {
19382         for (uint32_t m = 1; m <= 6; m++) {
19383           GemmMicrokernelTester()
19384             .mr(6)
19385             .nr(8)
19386             .kr(1)
19387             .sr(1)
19388             .m(m)
19389             .n(n)
19390             .k(k)
19391             .iterations(1)
19392             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_6x8__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
19393         }
19394       }
19395     }
19396   }
19397 
TEST(QC8_IGEMM_MINMAX_FP32_6X8__NEONV8_MLAL_LANE_PRFM,n_gt_8)19398   TEST(QC8_IGEMM_MINMAX_FP32_6X8__NEONV8_MLAL_LANE_PRFM, n_gt_8) {
19399     TEST_REQUIRES_ARM_NEON_V8;
19400     for (uint32_t n = 9; n < 16; n++) {
19401       for (size_t k = 1; k <= 40; k += 9) {
19402         GemmMicrokernelTester()
19403           .mr(6)
19404           .nr(8)
19405           .kr(1)
19406           .sr(1)
19407           .m(6)
19408           .n(n)
19409           .k(k)
19410           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_6x8__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
19411       }
19412     }
19413   }
19414 
TEST(QC8_IGEMM_MINMAX_FP32_6X8__NEONV8_MLAL_LANE_PRFM,n_gt_8_strided_cn)19415   TEST(QC8_IGEMM_MINMAX_FP32_6X8__NEONV8_MLAL_LANE_PRFM, n_gt_8_strided_cn) {
19416     TEST_REQUIRES_ARM_NEON_V8;
19417     for (uint32_t n = 9; n < 16; n++) {
19418       for (size_t k = 1; k <= 40; k += 9) {
19419         GemmMicrokernelTester()
19420           .mr(6)
19421           .nr(8)
19422           .kr(1)
19423           .sr(1)
19424           .m(6)
19425           .n(n)
19426           .k(k)
19427           .cn_stride(11)
19428           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_6x8__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
19429       }
19430     }
19431   }
19432 
TEST(QC8_IGEMM_MINMAX_FP32_6X8__NEONV8_MLAL_LANE_PRFM,n_gt_8_subtile)19433   TEST(QC8_IGEMM_MINMAX_FP32_6X8__NEONV8_MLAL_LANE_PRFM, n_gt_8_subtile) {
19434     TEST_REQUIRES_ARM_NEON_V8;
19435     for (uint32_t n = 9; n < 16; n++) {
19436       for (size_t k = 1; k <= 40; k += 9) {
19437         for (uint32_t m = 1; m <= 6; m++) {
19438           GemmMicrokernelTester()
19439             .mr(6)
19440             .nr(8)
19441             .kr(1)
19442             .sr(1)
19443             .m(m)
19444             .n(n)
19445             .k(k)
19446             .iterations(1)
19447             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_6x8__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
19448         }
19449       }
19450     }
19451   }
19452 
TEST(QC8_IGEMM_MINMAX_FP32_6X8__NEONV8_MLAL_LANE_PRFM,n_div_8)19453   TEST(QC8_IGEMM_MINMAX_FP32_6X8__NEONV8_MLAL_LANE_PRFM, n_div_8) {
19454     TEST_REQUIRES_ARM_NEON_V8;
19455     for (uint32_t n = 16; n <= 24; n += 8) {
19456       for (size_t k = 1; k <= 40; k += 9) {
19457         GemmMicrokernelTester()
19458           .mr(6)
19459           .nr(8)
19460           .kr(1)
19461           .sr(1)
19462           .m(6)
19463           .n(n)
19464           .k(k)
19465           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_6x8__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
19466       }
19467     }
19468   }
19469 
TEST(QC8_IGEMM_MINMAX_FP32_6X8__NEONV8_MLAL_LANE_PRFM,n_div_8_strided_cn)19470   TEST(QC8_IGEMM_MINMAX_FP32_6X8__NEONV8_MLAL_LANE_PRFM, n_div_8_strided_cn) {
19471     TEST_REQUIRES_ARM_NEON_V8;
19472     for (uint32_t n = 16; n <= 24; n += 8) {
19473       for (size_t k = 1; k <= 40; k += 9) {
19474         GemmMicrokernelTester()
19475           .mr(6)
19476           .nr(8)
19477           .kr(1)
19478           .sr(1)
19479           .m(6)
19480           .n(n)
19481           .k(k)
19482           .cn_stride(11)
19483           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_6x8__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
19484       }
19485     }
19486   }
19487 
TEST(QC8_IGEMM_MINMAX_FP32_6X8__NEONV8_MLAL_LANE_PRFM,n_div_8_subtile)19488   TEST(QC8_IGEMM_MINMAX_FP32_6X8__NEONV8_MLAL_LANE_PRFM, n_div_8_subtile) {
19489     TEST_REQUIRES_ARM_NEON_V8;
19490     for (uint32_t n = 16; n <= 24; n += 8) {
19491       for (size_t k = 1; k <= 40; k += 9) {
19492         for (uint32_t m = 1; m <= 6; m++) {
19493           GemmMicrokernelTester()
19494             .mr(6)
19495             .nr(8)
19496             .kr(1)
19497             .sr(1)
19498             .m(m)
19499             .n(n)
19500             .k(k)
19501             .iterations(1)
19502             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_6x8__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
19503         }
19504       }
19505     }
19506   }
19507 
TEST(QC8_IGEMM_MINMAX_FP32_6X8__NEONV8_MLAL_LANE_PRFM,small_kernel)19508   TEST(QC8_IGEMM_MINMAX_FP32_6X8__NEONV8_MLAL_LANE_PRFM, small_kernel) {
19509     TEST_REQUIRES_ARM_NEON_V8;
19510     for (size_t k = 1; k <= 40; k += 9) {
19511       GemmMicrokernelTester()
19512         .mr(6)
19513         .nr(8)
19514         .kr(1)
19515         .sr(1)
19516         .m(6)
19517         .n(8)
19518         .k(k)
19519         .ks(3)
19520         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_6x8__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
19521     }
19522   }
19523 
TEST(QC8_IGEMM_MINMAX_FP32_6X8__NEONV8_MLAL_LANE_PRFM,small_kernel_subtile)19524   TEST(QC8_IGEMM_MINMAX_FP32_6X8__NEONV8_MLAL_LANE_PRFM, small_kernel_subtile) {
19525     TEST_REQUIRES_ARM_NEON_V8;
19526     for (size_t k = 1; k <= 40; k += 9) {
19527       for (uint32_t n = 1; n <= 8; n++) {
19528         for (uint32_t m = 1; m <= 6; m++) {
19529           GemmMicrokernelTester()
19530             .mr(6)
19531             .nr(8)
19532             .kr(1)
19533             .sr(1)
19534             .m(m)
19535             .n(n)
19536             .k(k)
19537             .ks(3)
19538             .iterations(1)
19539             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_6x8__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
19540         }
19541       }
19542     }
19543   }
19544 
TEST(QC8_IGEMM_MINMAX_FP32_6X8__NEONV8_MLAL_LANE_PRFM,n_gt_8_small_kernel)19545   TEST(QC8_IGEMM_MINMAX_FP32_6X8__NEONV8_MLAL_LANE_PRFM, n_gt_8_small_kernel) {
19546     TEST_REQUIRES_ARM_NEON_V8;
19547     for (uint32_t n = 9; n < 16; n++) {
19548       for (size_t k = 1; k <= 40; k += 9) {
19549         GemmMicrokernelTester()
19550           .mr(6)
19551           .nr(8)
19552           .kr(1)
19553           .sr(1)
19554           .m(6)
19555           .n(n)
19556           .k(k)
19557           .ks(3)
19558           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_6x8__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
19559       }
19560     }
19561   }
19562 
TEST(QC8_IGEMM_MINMAX_FP32_6X8__NEONV8_MLAL_LANE_PRFM,n_div_8_small_kernel)19563   TEST(QC8_IGEMM_MINMAX_FP32_6X8__NEONV8_MLAL_LANE_PRFM, n_div_8_small_kernel) {
19564     TEST_REQUIRES_ARM_NEON_V8;
19565     for (uint32_t n = 16; n <= 24; n += 8) {
19566       for (size_t k = 1; k <= 40; k += 9) {
19567         GemmMicrokernelTester()
19568           .mr(6)
19569           .nr(8)
19570           .kr(1)
19571           .sr(1)
19572           .m(6)
19573           .n(n)
19574           .k(k)
19575           .ks(3)
19576           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_6x8__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
19577       }
19578     }
19579   }
19580 
TEST(QC8_IGEMM_MINMAX_FP32_6X8__NEONV8_MLAL_LANE_PRFM,strided_cm_subtile)19581   TEST(QC8_IGEMM_MINMAX_FP32_6X8__NEONV8_MLAL_LANE_PRFM, strided_cm_subtile) {
19582     TEST_REQUIRES_ARM_NEON_V8;
19583     for (size_t k = 1; k <= 40; k += 9) {
19584       for (uint32_t n = 1; n <= 8; n++) {
19585         for (uint32_t m = 1; m <= 6; m++) {
19586           GemmMicrokernelTester()
19587             .mr(6)
19588             .nr(8)
19589             .kr(1)
19590             .sr(1)
19591             .m(m)
19592             .n(n)
19593             .k(k)
19594             .cm_stride(11)
19595             .iterations(1)
19596             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_6x8__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
19597         }
19598       }
19599     }
19600   }
19601 
TEST(QC8_IGEMM_MINMAX_FP32_6X8__NEONV8_MLAL_LANE_PRFM,a_offset)19602   TEST(QC8_IGEMM_MINMAX_FP32_6X8__NEONV8_MLAL_LANE_PRFM, a_offset) {
19603     TEST_REQUIRES_ARM_NEON_V8;
19604     for (size_t k = 1; k <= 40; k += 9) {
19605       GemmMicrokernelTester()
19606         .mr(6)
19607         .nr(8)
19608         .kr(1)
19609         .sr(1)
19610         .m(6)
19611         .n(8)
19612         .k(k)
19613         .ks(3)
19614         .a_offset(251)
19615         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_6x8__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
19616     }
19617   }
19618 
TEST(QC8_IGEMM_MINMAX_FP32_6X8__NEONV8_MLAL_LANE_PRFM,zero)19619   TEST(QC8_IGEMM_MINMAX_FP32_6X8__NEONV8_MLAL_LANE_PRFM, zero) {
19620     TEST_REQUIRES_ARM_NEON_V8;
19621     for (size_t k = 1; k <= 40; k += 9) {
19622       for (uint32_t mz = 0; mz < 6; mz++) {
19623         GemmMicrokernelTester()
19624           .mr(6)
19625           .nr(8)
19626           .kr(1)
19627           .sr(1)
19628           .m(6)
19629           .n(8)
19630           .k(k)
19631           .ks(3)
19632           .a_offset(251)
19633           .zero_index(mz)
19634           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_6x8__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
19635       }
19636     }
19637   }
19638 
TEST(QC8_IGEMM_MINMAX_FP32_6X8__NEONV8_MLAL_LANE_PRFM,qmin)19639   TEST(QC8_IGEMM_MINMAX_FP32_6X8__NEONV8_MLAL_LANE_PRFM, qmin) {
19640     TEST_REQUIRES_ARM_NEON_V8;
19641     GemmMicrokernelTester()
19642       .mr(6)
19643       .nr(8)
19644       .kr(1)
19645       .sr(1)
19646       .m(6)
19647       .n(8)
19648       .k(8)
19649       .qmin(128)
19650       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_6x8__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
19651   }
19652 
TEST(QC8_IGEMM_MINMAX_FP32_6X8__NEONV8_MLAL_LANE_PRFM,qmax)19653   TEST(QC8_IGEMM_MINMAX_FP32_6X8__NEONV8_MLAL_LANE_PRFM, qmax) {
19654     TEST_REQUIRES_ARM_NEON_V8;
19655     GemmMicrokernelTester()
19656       .mr(6)
19657       .nr(8)
19658       .kr(1)
19659       .sr(1)
19660       .m(6)
19661       .n(8)
19662       .k(8)
19663       .qmax(128)
19664       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_6x8__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
19665   }
19666 
TEST(QC8_IGEMM_MINMAX_FP32_6X8__NEONV8_MLAL_LANE_PRFM,strided_cm)19667   TEST(QC8_IGEMM_MINMAX_FP32_6X8__NEONV8_MLAL_LANE_PRFM, strided_cm) {
19668     TEST_REQUIRES_ARM_NEON_V8;
19669     GemmMicrokernelTester()
19670       .mr(6)
19671       .nr(8)
19672       .kr(1)
19673       .sr(1)
19674       .m(6)
19675       .n(8)
19676       .k(8)
19677       .cm_stride(11)
19678       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_6x8__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
19679   }
19680 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
19681 
19682 
19683 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD64,k_eq_8)19684   TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD64, k_eq_8) {
19685     TEST_REQUIRES_X86_SSE41;
19686     GemmMicrokernelTester()
19687       .mr(3)
19688       .nr(4)
19689       .kr(2)
19690       .sr(1)
19691       .m(3)
19692       .n(4)
19693       .k(8)
19694       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
19695   }
19696 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD64,strided_cn)19697   TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD64, strided_cn) {
19698     TEST_REQUIRES_X86_SSE41;
19699     GemmMicrokernelTester()
19700       .mr(3)
19701       .nr(4)
19702       .kr(2)
19703       .sr(1)
19704       .m(3)
19705       .n(4)
19706       .k(8)
19707       .cn_stride(7)
19708       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
19709   }
19710 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD64,k_eq_8_subtile)19711   TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD64, k_eq_8_subtile) {
19712     TEST_REQUIRES_X86_SSE41;
19713     for (uint32_t n = 1; n <= 4; n++) {
19714       for (uint32_t m = 1; m <= 3; m++) {
19715         GemmMicrokernelTester()
19716           .mr(3)
19717           .nr(4)
19718           .kr(2)
19719           .sr(1)
19720           .m(m)
19721           .n(n)
19722           .k(8)
19723           .iterations(1)
19724           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
19725       }
19726     }
19727   }
19728 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD64,k_eq_8_subtile_m)19729   TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD64, k_eq_8_subtile_m) {
19730     TEST_REQUIRES_X86_SSE41;
19731     for (uint32_t m = 1; m <= 3; m++) {
19732       GemmMicrokernelTester()
19733         .mr(3)
19734         .nr(4)
19735         .kr(2)
19736         .sr(1)
19737         .m(m)
19738         .n(4)
19739         .k(8)
19740         .iterations(1)
19741         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
19742     }
19743   }
19744 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD64,k_eq_8_subtile_n)19745   TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD64, k_eq_8_subtile_n) {
19746     TEST_REQUIRES_X86_SSE41;
19747     for (uint32_t n = 1; n <= 4; n++) {
19748       GemmMicrokernelTester()
19749         .mr(3)
19750         .nr(4)
19751         .kr(2)
19752         .sr(1)
19753         .m(3)
19754         .n(n)
19755         .k(8)
19756         .iterations(1)
19757         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
19758     }
19759   }
19760 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD64,k_lt_8)19761   TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD64, k_lt_8) {
19762     TEST_REQUIRES_X86_SSE41;
19763     for (size_t k = 1; k < 8; k++) {
19764       GemmMicrokernelTester()
19765         .mr(3)
19766         .nr(4)
19767         .kr(2)
19768         .sr(1)
19769         .m(3)
19770         .n(4)
19771         .k(k)
19772         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
19773     }
19774   }
19775 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD64,k_lt_8_subtile)19776   TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD64, k_lt_8_subtile) {
19777     TEST_REQUIRES_X86_SSE41;
19778     for (size_t k = 1; k < 8; k++) {
19779       for (uint32_t n = 1; n <= 4; n++) {
19780         for (uint32_t m = 1; m <= 3; m++) {
19781           GemmMicrokernelTester()
19782             .mr(3)
19783             .nr(4)
19784             .kr(2)
19785             .sr(1)
19786             .m(m)
19787             .n(n)
19788             .k(k)
19789             .iterations(1)
19790             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
19791         }
19792       }
19793     }
19794   }
19795 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD64,k_gt_8)19796   TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD64, k_gt_8) {
19797     TEST_REQUIRES_X86_SSE41;
19798     for (size_t k = 9; k < 16; k++) {
19799       GemmMicrokernelTester()
19800         .mr(3)
19801         .nr(4)
19802         .kr(2)
19803         .sr(1)
19804         .m(3)
19805         .n(4)
19806         .k(k)
19807         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
19808     }
19809   }
19810 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD64,k_gt_8_subtile)19811   TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD64, k_gt_8_subtile) {
19812     TEST_REQUIRES_X86_SSE41;
19813     for (size_t k = 9; k < 16; k++) {
19814       for (uint32_t n = 1; n <= 4; n++) {
19815         for (uint32_t m = 1; m <= 3; m++) {
19816           GemmMicrokernelTester()
19817             .mr(3)
19818             .nr(4)
19819             .kr(2)
19820             .sr(1)
19821             .m(m)
19822             .n(n)
19823             .k(k)
19824             .iterations(1)
19825             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
19826         }
19827       }
19828     }
19829   }
19830 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD64,k_div_8)19831   TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD64, k_div_8) {
19832     TEST_REQUIRES_X86_SSE41;
19833     for (size_t k = 16; k <= 80; k += 8) {
19834       GemmMicrokernelTester()
19835         .mr(3)
19836         .nr(4)
19837         .kr(2)
19838         .sr(1)
19839         .m(3)
19840         .n(4)
19841         .k(k)
19842         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
19843     }
19844   }
19845 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD64,k_div_8_subtile)19846   TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD64, k_div_8_subtile) {
19847     TEST_REQUIRES_X86_SSE41;
19848     for (size_t k = 16; k <= 80; k += 8) {
19849       for (uint32_t n = 1; n <= 4; n++) {
19850         for (uint32_t m = 1; m <= 3; m++) {
19851           GemmMicrokernelTester()
19852             .mr(3)
19853             .nr(4)
19854             .kr(2)
19855             .sr(1)
19856             .m(m)
19857             .n(n)
19858             .k(k)
19859             .iterations(1)
19860             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
19861         }
19862       }
19863     }
19864   }
19865 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD64,n_gt_4)19866   TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD64, n_gt_4) {
19867     TEST_REQUIRES_X86_SSE41;
19868     for (uint32_t n = 5; n < 8; n++) {
19869       for (size_t k = 1; k <= 40; k += 9) {
19870         GemmMicrokernelTester()
19871           .mr(3)
19872           .nr(4)
19873           .kr(2)
19874           .sr(1)
19875           .m(3)
19876           .n(n)
19877           .k(k)
19878           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
19879       }
19880     }
19881   }
19882 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD64,n_gt_4_strided_cn)19883   TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD64, n_gt_4_strided_cn) {
19884     TEST_REQUIRES_X86_SSE41;
19885     for (uint32_t n = 5; n < 8; n++) {
19886       for (size_t k = 1; k <= 40; k += 9) {
19887         GemmMicrokernelTester()
19888           .mr(3)
19889           .nr(4)
19890           .kr(2)
19891           .sr(1)
19892           .m(3)
19893           .n(n)
19894           .k(k)
19895           .cn_stride(7)
19896           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
19897       }
19898     }
19899   }
19900 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD64,n_gt_4_subtile)19901   TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD64, n_gt_4_subtile) {
19902     TEST_REQUIRES_X86_SSE41;
19903     for (uint32_t n = 5; n < 8; n++) {
19904       for (size_t k = 1; k <= 40; k += 9) {
19905         for (uint32_t m = 1; m <= 3; m++) {
19906           GemmMicrokernelTester()
19907             .mr(3)
19908             .nr(4)
19909             .kr(2)
19910             .sr(1)
19911             .m(m)
19912             .n(n)
19913             .k(k)
19914             .iterations(1)
19915             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
19916         }
19917       }
19918     }
19919   }
19920 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD64,n_div_4)19921   TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD64, n_div_4) {
19922     TEST_REQUIRES_X86_SSE41;
19923     for (uint32_t n = 8; n <= 12; n += 4) {
19924       for (size_t k = 1; k <= 40; k += 9) {
19925         GemmMicrokernelTester()
19926           .mr(3)
19927           .nr(4)
19928           .kr(2)
19929           .sr(1)
19930           .m(3)
19931           .n(n)
19932           .k(k)
19933           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
19934       }
19935     }
19936   }
19937 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD64,n_div_4_strided_cn)19938   TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD64, n_div_4_strided_cn) {
19939     TEST_REQUIRES_X86_SSE41;
19940     for (uint32_t n = 8; n <= 12; n += 4) {
19941       for (size_t k = 1; k <= 40; k += 9) {
19942         GemmMicrokernelTester()
19943           .mr(3)
19944           .nr(4)
19945           .kr(2)
19946           .sr(1)
19947           .m(3)
19948           .n(n)
19949           .k(k)
19950           .cn_stride(7)
19951           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
19952       }
19953     }
19954   }
19955 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD64,n_div_4_subtile)19956   TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD64, n_div_4_subtile) {
19957     TEST_REQUIRES_X86_SSE41;
19958     for (uint32_t n = 8; n <= 12; n += 4) {
19959       for (size_t k = 1; k <= 40; k += 9) {
19960         for (uint32_t m = 1; m <= 3; m++) {
19961           GemmMicrokernelTester()
19962             .mr(3)
19963             .nr(4)
19964             .kr(2)
19965             .sr(1)
19966             .m(m)
19967             .n(n)
19968             .k(k)
19969             .iterations(1)
19970             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
19971         }
19972       }
19973     }
19974   }
19975 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD64,small_kernel)19976   TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD64, small_kernel) {
19977     TEST_REQUIRES_X86_SSE41;
19978     for (size_t k = 1; k <= 40; k += 9) {
19979       GemmMicrokernelTester()
19980         .mr(3)
19981         .nr(4)
19982         .kr(2)
19983         .sr(1)
19984         .m(3)
19985         .n(4)
19986         .k(k)
19987         .ks(3)
19988         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
19989     }
19990   }
19991 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD64,small_kernel_subtile)19992   TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD64, small_kernel_subtile) {
19993     TEST_REQUIRES_X86_SSE41;
19994     for (size_t k = 1; k <= 40; k += 9) {
19995       for (uint32_t n = 1; n <= 4; n++) {
19996         for (uint32_t m = 1; m <= 3; m++) {
19997           GemmMicrokernelTester()
19998             .mr(3)
19999             .nr(4)
20000             .kr(2)
20001             .sr(1)
20002             .m(m)
20003             .n(n)
20004             .k(k)
20005             .ks(3)
20006             .iterations(1)
20007             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20008         }
20009       }
20010     }
20011   }
20012 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD64,n_gt_4_small_kernel)20013   TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD64, n_gt_4_small_kernel) {
20014     TEST_REQUIRES_X86_SSE41;
20015     for (uint32_t n = 5; n < 8; n++) {
20016       for (size_t k = 1; k <= 40; k += 9) {
20017         GemmMicrokernelTester()
20018           .mr(3)
20019           .nr(4)
20020           .kr(2)
20021           .sr(1)
20022           .m(3)
20023           .n(n)
20024           .k(k)
20025           .ks(3)
20026           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20027       }
20028     }
20029   }
20030 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD64,n_div_4_small_kernel)20031   TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD64, n_div_4_small_kernel) {
20032     TEST_REQUIRES_X86_SSE41;
20033     for (uint32_t n = 8; n <= 12; n += 4) {
20034       for (size_t k = 1; k <= 40; k += 9) {
20035         GemmMicrokernelTester()
20036           .mr(3)
20037           .nr(4)
20038           .kr(2)
20039           .sr(1)
20040           .m(3)
20041           .n(n)
20042           .k(k)
20043           .ks(3)
20044           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20045       }
20046     }
20047   }
20048 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD64,strided_cm_subtile)20049   TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD64, strided_cm_subtile) {
20050     TEST_REQUIRES_X86_SSE41;
20051     for (size_t k = 1; k <= 40; k += 9) {
20052       for (uint32_t n = 1; n <= 4; n++) {
20053         for (uint32_t m = 1; m <= 3; m++) {
20054           GemmMicrokernelTester()
20055             .mr(3)
20056             .nr(4)
20057             .kr(2)
20058             .sr(1)
20059             .m(m)
20060             .n(n)
20061             .k(k)
20062             .cm_stride(7)
20063             .iterations(1)
20064             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20065         }
20066       }
20067     }
20068   }
20069 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD64,a_offset)20070   TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD64, a_offset) {
20071     TEST_REQUIRES_X86_SSE41;
20072     for (size_t k = 1; k <= 40; k += 9) {
20073       GemmMicrokernelTester()
20074         .mr(3)
20075         .nr(4)
20076         .kr(2)
20077         .sr(1)
20078         .m(3)
20079         .n(4)
20080         .k(k)
20081         .ks(3)
20082         .a_offset(127)
20083         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20084     }
20085   }
20086 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD64,zero)20087   TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD64, zero) {
20088     TEST_REQUIRES_X86_SSE41;
20089     for (size_t k = 1; k <= 40; k += 9) {
20090       for (uint32_t mz = 0; mz < 3; mz++) {
20091         GemmMicrokernelTester()
20092           .mr(3)
20093           .nr(4)
20094           .kr(2)
20095           .sr(1)
20096           .m(3)
20097           .n(4)
20098           .k(k)
20099           .ks(3)
20100           .a_offset(127)
20101           .zero_index(mz)
20102           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20103       }
20104     }
20105   }
20106 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD64,qmin)20107   TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD64, qmin) {
20108     TEST_REQUIRES_X86_SSE41;
20109     GemmMicrokernelTester()
20110       .mr(3)
20111       .nr(4)
20112       .kr(2)
20113       .sr(1)
20114       .m(3)
20115       .n(4)
20116       .k(8)
20117       .qmin(128)
20118       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20119   }
20120 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD64,qmax)20121   TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD64, qmax) {
20122     TEST_REQUIRES_X86_SSE41;
20123     GemmMicrokernelTester()
20124       .mr(3)
20125       .nr(4)
20126       .kr(2)
20127       .sr(1)
20128       .m(3)
20129       .n(4)
20130       .k(8)
20131       .qmax(128)
20132       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20133   }
20134 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD64,strided_cm)20135   TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD64, strided_cm) {
20136     TEST_REQUIRES_X86_SSE41;
20137     GemmMicrokernelTester()
20138       .mr(3)
20139       .nr(4)
20140       .kr(2)
20141       .sr(1)
20142       .m(3)
20143       .n(4)
20144       .k(8)
20145       .cm_stride(7)
20146       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20147   }
20148 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
20149 
20150 
20151 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD64,k_eq_8)20152   TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD64, k_eq_8) {
20153     TEST_REQUIRES_X86_SSE2;
20154     GemmMicrokernelTester()
20155       .mr(4)
20156       .nr(4)
20157       .kr(2)
20158       .sr(1)
20159       .m(4)
20160       .n(4)
20161       .k(8)
20162       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
20163   }
20164 
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD64,strided_cn)20165   TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD64, strided_cn) {
20166     TEST_REQUIRES_X86_SSE2;
20167     GemmMicrokernelTester()
20168       .mr(4)
20169       .nr(4)
20170       .kr(2)
20171       .sr(1)
20172       .m(4)
20173       .n(4)
20174       .k(8)
20175       .cn_stride(7)
20176       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
20177   }
20178 
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD64,k_eq_8_subtile)20179   TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD64, k_eq_8_subtile) {
20180     TEST_REQUIRES_X86_SSE2;
20181     for (uint32_t n = 1; n <= 4; n++) {
20182       for (uint32_t m = 1; m <= 4; m++) {
20183         GemmMicrokernelTester()
20184           .mr(4)
20185           .nr(4)
20186           .kr(2)
20187           .sr(1)
20188           .m(m)
20189           .n(n)
20190           .k(8)
20191           .iterations(1)
20192           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
20193       }
20194     }
20195   }
20196 
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD64,k_eq_8_subtile_m)20197   TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD64, k_eq_8_subtile_m) {
20198     TEST_REQUIRES_X86_SSE2;
20199     for (uint32_t m = 1; m <= 4; m++) {
20200       GemmMicrokernelTester()
20201         .mr(4)
20202         .nr(4)
20203         .kr(2)
20204         .sr(1)
20205         .m(m)
20206         .n(4)
20207         .k(8)
20208         .iterations(1)
20209         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
20210     }
20211   }
20212 
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD64,k_eq_8_subtile_n)20213   TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD64, k_eq_8_subtile_n) {
20214     TEST_REQUIRES_X86_SSE2;
20215     for (uint32_t n = 1; n <= 4; n++) {
20216       GemmMicrokernelTester()
20217         .mr(4)
20218         .nr(4)
20219         .kr(2)
20220         .sr(1)
20221         .m(4)
20222         .n(n)
20223         .k(8)
20224         .iterations(1)
20225         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
20226     }
20227   }
20228 
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD64,k_lt_8)20229   TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD64, k_lt_8) {
20230     TEST_REQUIRES_X86_SSE2;
20231     for (size_t k = 1; k < 8; k++) {
20232       GemmMicrokernelTester()
20233         .mr(4)
20234         .nr(4)
20235         .kr(2)
20236         .sr(1)
20237         .m(4)
20238         .n(4)
20239         .k(k)
20240         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
20241     }
20242   }
20243 
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD64,k_lt_8_subtile)20244   TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD64, k_lt_8_subtile) {
20245     TEST_REQUIRES_X86_SSE2;
20246     for (size_t k = 1; k < 8; k++) {
20247       for (uint32_t n = 1; n <= 4; n++) {
20248         for (uint32_t m = 1; m <= 4; m++) {
20249           GemmMicrokernelTester()
20250             .mr(4)
20251             .nr(4)
20252             .kr(2)
20253             .sr(1)
20254             .m(m)
20255             .n(n)
20256             .k(k)
20257             .iterations(1)
20258             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
20259         }
20260       }
20261     }
20262   }
20263 
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD64,k_gt_8)20264   TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD64, k_gt_8) {
20265     TEST_REQUIRES_X86_SSE2;
20266     for (size_t k = 9; k < 16; k++) {
20267       GemmMicrokernelTester()
20268         .mr(4)
20269         .nr(4)
20270         .kr(2)
20271         .sr(1)
20272         .m(4)
20273         .n(4)
20274         .k(k)
20275         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
20276     }
20277   }
20278 
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD64,k_gt_8_subtile)20279   TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD64, k_gt_8_subtile) {
20280     TEST_REQUIRES_X86_SSE2;
20281     for (size_t k = 9; k < 16; k++) {
20282       for (uint32_t n = 1; n <= 4; n++) {
20283         for (uint32_t m = 1; m <= 4; m++) {
20284           GemmMicrokernelTester()
20285             .mr(4)
20286             .nr(4)
20287             .kr(2)
20288             .sr(1)
20289             .m(m)
20290             .n(n)
20291             .k(k)
20292             .iterations(1)
20293             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
20294         }
20295       }
20296     }
20297   }
20298 
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD64,k_div_8)20299   TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD64, k_div_8) {
20300     TEST_REQUIRES_X86_SSE2;
20301     for (size_t k = 16; k <= 80; k += 8) {
20302       GemmMicrokernelTester()
20303         .mr(4)
20304         .nr(4)
20305         .kr(2)
20306         .sr(1)
20307         .m(4)
20308         .n(4)
20309         .k(k)
20310         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
20311     }
20312   }
20313 
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD64,k_div_8_subtile)20314   TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD64, k_div_8_subtile) {
20315     TEST_REQUIRES_X86_SSE2;
20316     for (size_t k = 16; k <= 80; k += 8) {
20317       for (uint32_t n = 1; n <= 4; n++) {
20318         for (uint32_t m = 1; m <= 4; m++) {
20319           GemmMicrokernelTester()
20320             .mr(4)
20321             .nr(4)
20322             .kr(2)
20323             .sr(1)
20324             .m(m)
20325             .n(n)
20326             .k(k)
20327             .iterations(1)
20328             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
20329         }
20330       }
20331     }
20332   }
20333 
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD64,n_gt_4)20334   TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD64, n_gt_4) {
20335     TEST_REQUIRES_X86_SSE2;
20336     for (uint32_t n = 5; n < 8; n++) {
20337       for (size_t k = 1; k <= 40; k += 9) {
20338         GemmMicrokernelTester()
20339           .mr(4)
20340           .nr(4)
20341           .kr(2)
20342           .sr(1)
20343           .m(4)
20344           .n(n)
20345           .k(k)
20346           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
20347       }
20348     }
20349   }
20350 
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD64,n_gt_4_strided_cn)20351   TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD64, n_gt_4_strided_cn) {
20352     TEST_REQUIRES_X86_SSE2;
20353     for (uint32_t n = 5; n < 8; n++) {
20354       for (size_t k = 1; k <= 40; k += 9) {
20355         GemmMicrokernelTester()
20356           .mr(4)
20357           .nr(4)
20358           .kr(2)
20359           .sr(1)
20360           .m(4)
20361           .n(n)
20362           .k(k)
20363           .cn_stride(7)
20364           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
20365       }
20366     }
20367   }
20368 
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD64,n_gt_4_subtile)20369   TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD64, n_gt_4_subtile) {
20370     TEST_REQUIRES_X86_SSE2;
20371     for (uint32_t n = 5; n < 8; n++) {
20372       for (size_t k = 1; k <= 40; k += 9) {
20373         for (uint32_t m = 1; m <= 4; m++) {
20374           GemmMicrokernelTester()
20375             .mr(4)
20376             .nr(4)
20377             .kr(2)
20378             .sr(1)
20379             .m(m)
20380             .n(n)
20381             .k(k)
20382             .iterations(1)
20383             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
20384         }
20385       }
20386     }
20387   }
20388 
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD64,n_div_4)20389   TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD64, n_div_4) {
20390     TEST_REQUIRES_X86_SSE2;
20391     for (uint32_t n = 8; n <= 12; n += 4) {
20392       for (size_t k = 1; k <= 40; k += 9) {
20393         GemmMicrokernelTester()
20394           .mr(4)
20395           .nr(4)
20396           .kr(2)
20397           .sr(1)
20398           .m(4)
20399           .n(n)
20400           .k(k)
20401           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
20402       }
20403     }
20404   }
20405 
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD64,n_div_4_strided_cn)20406   TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD64, n_div_4_strided_cn) {
20407     TEST_REQUIRES_X86_SSE2;
20408     for (uint32_t n = 8; n <= 12; n += 4) {
20409       for (size_t k = 1; k <= 40; k += 9) {
20410         GemmMicrokernelTester()
20411           .mr(4)
20412           .nr(4)
20413           .kr(2)
20414           .sr(1)
20415           .m(4)
20416           .n(n)
20417           .k(k)
20418           .cn_stride(7)
20419           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
20420       }
20421     }
20422   }
20423 
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD64,n_div_4_subtile)20424   TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD64, n_div_4_subtile) {
20425     TEST_REQUIRES_X86_SSE2;
20426     for (uint32_t n = 8; n <= 12; n += 4) {
20427       for (size_t k = 1; k <= 40; k += 9) {
20428         for (uint32_t m = 1; m <= 4; m++) {
20429           GemmMicrokernelTester()
20430             .mr(4)
20431             .nr(4)
20432             .kr(2)
20433             .sr(1)
20434             .m(m)
20435             .n(n)
20436             .k(k)
20437             .iterations(1)
20438             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
20439         }
20440       }
20441     }
20442   }
20443 
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD64,small_kernel)20444   TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD64, small_kernel) {
20445     TEST_REQUIRES_X86_SSE2;
20446     for (size_t k = 1; k <= 40; k += 9) {
20447       GemmMicrokernelTester()
20448         .mr(4)
20449         .nr(4)
20450         .kr(2)
20451         .sr(1)
20452         .m(4)
20453         .n(4)
20454         .k(k)
20455         .ks(3)
20456         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
20457     }
20458   }
20459 
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD64,small_kernel_subtile)20460   TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD64, small_kernel_subtile) {
20461     TEST_REQUIRES_X86_SSE2;
20462     for (size_t k = 1; k <= 40; k += 9) {
20463       for (uint32_t n = 1; n <= 4; n++) {
20464         for (uint32_t m = 1; m <= 4; m++) {
20465           GemmMicrokernelTester()
20466             .mr(4)
20467             .nr(4)
20468             .kr(2)
20469             .sr(1)
20470             .m(m)
20471             .n(n)
20472             .k(k)
20473             .ks(3)
20474             .iterations(1)
20475             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
20476         }
20477       }
20478     }
20479   }
20480 
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD64,n_gt_4_small_kernel)20481   TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD64, n_gt_4_small_kernel) {
20482     TEST_REQUIRES_X86_SSE2;
20483     for (uint32_t n = 5; n < 8; n++) {
20484       for (size_t k = 1; k <= 40; k += 9) {
20485         GemmMicrokernelTester()
20486           .mr(4)
20487           .nr(4)
20488           .kr(2)
20489           .sr(1)
20490           .m(4)
20491           .n(n)
20492           .k(k)
20493           .ks(3)
20494           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
20495       }
20496     }
20497   }
20498 
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD64,n_div_4_small_kernel)20499   TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD64, n_div_4_small_kernel) {
20500     TEST_REQUIRES_X86_SSE2;
20501     for (uint32_t n = 8; n <= 12; n += 4) {
20502       for (size_t k = 1; k <= 40; k += 9) {
20503         GemmMicrokernelTester()
20504           .mr(4)
20505           .nr(4)
20506           .kr(2)
20507           .sr(1)
20508           .m(4)
20509           .n(n)
20510           .k(k)
20511           .ks(3)
20512           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
20513       }
20514     }
20515   }
20516 
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD64,strided_cm_subtile)20517   TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD64, strided_cm_subtile) {
20518     TEST_REQUIRES_X86_SSE2;
20519     for (size_t k = 1; k <= 40; k += 9) {
20520       for (uint32_t n = 1; n <= 4; n++) {
20521         for (uint32_t m = 1; m <= 4; m++) {
20522           GemmMicrokernelTester()
20523             .mr(4)
20524             .nr(4)
20525             .kr(2)
20526             .sr(1)
20527             .m(m)
20528             .n(n)
20529             .k(k)
20530             .cm_stride(7)
20531             .iterations(1)
20532             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
20533         }
20534       }
20535     }
20536   }
20537 
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD64,a_offset)20538   TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD64, a_offset) {
20539     TEST_REQUIRES_X86_SSE2;
20540     for (size_t k = 1; k <= 40; k += 9) {
20541       GemmMicrokernelTester()
20542         .mr(4)
20543         .nr(4)
20544         .kr(2)
20545         .sr(1)
20546         .m(4)
20547         .n(4)
20548         .k(k)
20549         .ks(3)
20550         .a_offset(163)
20551         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
20552     }
20553   }
20554 
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD64,zero)20555   TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD64, zero) {
20556     TEST_REQUIRES_X86_SSE2;
20557     for (size_t k = 1; k <= 40; k += 9) {
20558       for (uint32_t mz = 0; mz < 4; mz++) {
20559         GemmMicrokernelTester()
20560           .mr(4)
20561           .nr(4)
20562           .kr(2)
20563           .sr(1)
20564           .m(4)
20565           .n(4)
20566           .k(k)
20567           .ks(3)
20568           .a_offset(163)
20569           .zero_index(mz)
20570           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
20571       }
20572     }
20573   }
20574 
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD64,qmin)20575   TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD64, qmin) {
20576     TEST_REQUIRES_X86_SSE2;
20577     GemmMicrokernelTester()
20578       .mr(4)
20579       .nr(4)
20580       .kr(2)
20581       .sr(1)
20582       .m(4)
20583       .n(4)
20584       .k(8)
20585       .qmin(128)
20586       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
20587   }
20588 
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD64,qmax)20589   TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD64, qmax) {
20590     TEST_REQUIRES_X86_SSE2;
20591     GemmMicrokernelTester()
20592       .mr(4)
20593       .nr(4)
20594       .kr(2)
20595       .sr(1)
20596       .m(4)
20597       .n(4)
20598       .k(8)
20599       .qmax(128)
20600       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
20601   }
20602 
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD64,strided_cm)20603   TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD64, strided_cm) {
20604     TEST_REQUIRES_X86_SSE2;
20605     GemmMicrokernelTester()
20606       .mr(4)
20607       .nr(4)
20608       .kr(2)
20609       .sr(1)
20610       .m(4)
20611       .n(4)
20612       .k(8)
20613       .cm_stride(7)
20614       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
20615   }
20616 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
20617 
20618 
20619 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD64,k_eq_8)20620   TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD64, k_eq_8) {
20621     TEST_REQUIRES_X86_XOP;
20622     GemmMicrokernelTester()
20623       .mr(1)
20624       .nr(4)
20625       .kr(2)
20626       .sr(1)
20627       .m(1)
20628       .n(4)
20629       .k(8)
20630       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20631   }
20632 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD64,strided_cn)20633   TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD64, strided_cn) {
20634     TEST_REQUIRES_X86_XOP;
20635     GemmMicrokernelTester()
20636       .mr(1)
20637       .nr(4)
20638       .kr(2)
20639       .sr(1)
20640       .m(1)
20641       .n(4)
20642       .k(8)
20643       .cn_stride(7)
20644       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20645   }
20646 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD64,k_eq_8_subtile)20647   TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD64, k_eq_8_subtile) {
20648     TEST_REQUIRES_X86_XOP;
20649     for (uint32_t n = 1; n <= 4; n++) {
20650       for (uint32_t m = 1; m <= 1; m++) {
20651         GemmMicrokernelTester()
20652           .mr(1)
20653           .nr(4)
20654           .kr(2)
20655           .sr(1)
20656           .m(m)
20657           .n(n)
20658           .k(8)
20659           .iterations(1)
20660           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20661       }
20662     }
20663   }
20664 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD64,k_eq_8_subtile_m)20665   TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD64, k_eq_8_subtile_m) {
20666     TEST_REQUIRES_X86_XOP;
20667     for (uint32_t m = 1; m <= 1; m++) {
20668       GemmMicrokernelTester()
20669         .mr(1)
20670         .nr(4)
20671         .kr(2)
20672         .sr(1)
20673         .m(m)
20674         .n(4)
20675         .k(8)
20676         .iterations(1)
20677         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20678     }
20679   }
20680 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD64,k_eq_8_subtile_n)20681   TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD64, k_eq_8_subtile_n) {
20682     TEST_REQUIRES_X86_XOP;
20683     for (uint32_t n = 1; n <= 4; n++) {
20684       GemmMicrokernelTester()
20685         .mr(1)
20686         .nr(4)
20687         .kr(2)
20688         .sr(1)
20689         .m(1)
20690         .n(n)
20691         .k(8)
20692         .iterations(1)
20693         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20694     }
20695   }
20696 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD64,k_lt_8)20697   TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD64, k_lt_8) {
20698     TEST_REQUIRES_X86_XOP;
20699     for (size_t k = 1; k < 8; k++) {
20700       GemmMicrokernelTester()
20701         .mr(1)
20702         .nr(4)
20703         .kr(2)
20704         .sr(1)
20705         .m(1)
20706         .n(4)
20707         .k(k)
20708         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20709     }
20710   }
20711 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD64,k_lt_8_subtile)20712   TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD64, k_lt_8_subtile) {
20713     TEST_REQUIRES_X86_XOP;
20714     for (size_t k = 1; k < 8; k++) {
20715       for (uint32_t n = 1; n <= 4; n++) {
20716         for (uint32_t m = 1; m <= 1; m++) {
20717           GemmMicrokernelTester()
20718             .mr(1)
20719             .nr(4)
20720             .kr(2)
20721             .sr(1)
20722             .m(m)
20723             .n(n)
20724             .k(k)
20725             .iterations(1)
20726             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20727         }
20728       }
20729     }
20730   }
20731 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD64,k_gt_8)20732   TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD64, k_gt_8) {
20733     TEST_REQUIRES_X86_XOP;
20734     for (size_t k = 9; k < 16; k++) {
20735       GemmMicrokernelTester()
20736         .mr(1)
20737         .nr(4)
20738         .kr(2)
20739         .sr(1)
20740         .m(1)
20741         .n(4)
20742         .k(k)
20743         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20744     }
20745   }
20746 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD64,k_gt_8_subtile)20747   TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD64, k_gt_8_subtile) {
20748     TEST_REQUIRES_X86_XOP;
20749     for (size_t k = 9; k < 16; k++) {
20750       for (uint32_t n = 1; n <= 4; n++) {
20751         for (uint32_t m = 1; m <= 1; m++) {
20752           GemmMicrokernelTester()
20753             .mr(1)
20754             .nr(4)
20755             .kr(2)
20756             .sr(1)
20757             .m(m)
20758             .n(n)
20759             .k(k)
20760             .iterations(1)
20761             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20762         }
20763       }
20764     }
20765   }
20766 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD64,k_div_8)20767   TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD64, k_div_8) {
20768     TEST_REQUIRES_X86_XOP;
20769     for (size_t k = 16; k <= 80; k += 8) {
20770       GemmMicrokernelTester()
20771         .mr(1)
20772         .nr(4)
20773         .kr(2)
20774         .sr(1)
20775         .m(1)
20776         .n(4)
20777         .k(k)
20778         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20779     }
20780   }
20781 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD64,k_div_8_subtile)20782   TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD64, k_div_8_subtile) {
20783     TEST_REQUIRES_X86_XOP;
20784     for (size_t k = 16; k <= 80; k += 8) {
20785       for (uint32_t n = 1; n <= 4; n++) {
20786         for (uint32_t m = 1; m <= 1; m++) {
20787           GemmMicrokernelTester()
20788             .mr(1)
20789             .nr(4)
20790             .kr(2)
20791             .sr(1)
20792             .m(m)
20793             .n(n)
20794             .k(k)
20795             .iterations(1)
20796             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20797         }
20798       }
20799     }
20800   }
20801 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD64,n_gt_4)20802   TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD64, n_gt_4) {
20803     TEST_REQUIRES_X86_XOP;
20804     for (uint32_t n = 5; n < 8; n++) {
20805       for (size_t k = 1; k <= 40; k += 9) {
20806         GemmMicrokernelTester()
20807           .mr(1)
20808           .nr(4)
20809           .kr(2)
20810           .sr(1)
20811           .m(1)
20812           .n(n)
20813           .k(k)
20814           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20815       }
20816     }
20817   }
20818 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD64,n_gt_4_strided_cn)20819   TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD64, n_gt_4_strided_cn) {
20820     TEST_REQUIRES_X86_XOP;
20821     for (uint32_t n = 5; n < 8; n++) {
20822       for (size_t k = 1; k <= 40; k += 9) {
20823         GemmMicrokernelTester()
20824           .mr(1)
20825           .nr(4)
20826           .kr(2)
20827           .sr(1)
20828           .m(1)
20829           .n(n)
20830           .k(k)
20831           .cn_stride(7)
20832           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20833       }
20834     }
20835   }
20836 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD64,n_gt_4_subtile)20837   TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD64, n_gt_4_subtile) {
20838     TEST_REQUIRES_X86_XOP;
20839     for (uint32_t n = 5; n < 8; n++) {
20840       for (size_t k = 1; k <= 40; k += 9) {
20841         for (uint32_t m = 1; m <= 1; m++) {
20842           GemmMicrokernelTester()
20843             .mr(1)
20844             .nr(4)
20845             .kr(2)
20846             .sr(1)
20847             .m(m)
20848             .n(n)
20849             .k(k)
20850             .iterations(1)
20851             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20852         }
20853       }
20854     }
20855   }
20856 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD64,n_div_4)20857   TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD64, n_div_4) {
20858     TEST_REQUIRES_X86_XOP;
20859     for (uint32_t n = 8; n <= 12; n += 4) {
20860       for (size_t k = 1; k <= 40; k += 9) {
20861         GemmMicrokernelTester()
20862           .mr(1)
20863           .nr(4)
20864           .kr(2)
20865           .sr(1)
20866           .m(1)
20867           .n(n)
20868           .k(k)
20869           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20870       }
20871     }
20872   }
20873 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD64,n_div_4_strided_cn)20874   TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD64, n_div_4_strided_cn) {
20875     TEST_REQUIRES_X86_XOP;
20876     for (uint32_t n = 8; n <= 12; n += 4) {
20877       for (size_t k = 1; k <= 40; k += 9) {
20878         GemmMicrokernelTester()
20879           .mr(1)
20880           .nr(4)
20881           .kr(2)
20882           .sr(1)
20883           .m(1)
20884           .n(n)
20885           .k(k)
20886           .cn_stride(7)
20887           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20888       }
20889     }
20890   }
20891 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD64,n_div_4_subtile)20892   TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD64, n_div_4_subtile) {
20893     TEST_REQUIRES_X86_XOP;
20894     for (uint32_t n = 8; n <= 12; n += 4) {
20895       for (size_t k = 1; k <= 40; k += 9) {
20896         for (uint32_t m = 1; m <= 1; m++) {
20897           GemmMicrokernelTester()
20898             .mr(1)
20899             .nr(4)
20900             .kr(2)
20901             .sr(1)
20902             .m(m)
20903             .n(n)
20904             .k(k)
20905             .iterations(1)
20906             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20907         }
20908       }
20909     }
20910   }
20911 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD64,small_kernel)20912   TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD64, small_kernel) {
20913     TEST_REQUIRES_X86_XOP;
20914     for (size_t k = 1; k <= 40; k += 9) {
20915       GemmMicrokernelTester()
20916         .mr(1)
20917         .nr(4)
20918         .kr(2)
20919         .sr(1)
20920         .m(1)
20921         .n(4)
20922         .k(k)
20923         .ks(3)
20924         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20925     }
20926   }
20927 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD64,small_kernel_subtile)20928   TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD64, small_kernel_subtile) {
20929     TEST_REQUIRES_X86_XOP;
20930     for (size_t k = 1; k <= 40; k += 9) {
20931       for (uint32_t n = 1; n <= 4; n++) {
20932         for (uint32_t m = 1; m <= 1; m++) {
20933           GemmMicrokernelTester()
20934             .mr(1)
20935             .nr(4)
20936             .kr(2)
20937             .sr(1)
20938             .m(m)
20939             .n(n)
20940             .k(k)
20941             .ks(3)
20942             .iterations(1)
20943             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20944         }
20945       }
20946     }
20947   }
20948 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD64,n_gt_4_small_kernel)20949   TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD64, n_gt_4_small_kernel) {
20950     TEST_REQUIRES_X86_XOP;
20951     for (uint32_t n = 5; n < 8; n++) {
20952       for (size_t k = 1; k <= 40; k += 9) {
20953         GemmMicrokernelTester()
20954           .mr(1)
20955           .nr(4)
20956           .kr(2)
20957           .sr(1)
20958           .m(1)
20959           .n(n)
20960           .k(k)
20961           .ks(3)
20962           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20963       }
20964     }
20965   }
20966 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD64,n_div_4_small_kernel)20967   TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD64, n_div_4_small_kernel) {
20968     TEST_REQUIRES_X86_XOP;
20969     for (uint32_t n = 8; n <= 12; n += 4) {
20970       for (size_t k = 1; k <= 40; k += 9) {
20971         GemmMicrokernelTester()
20972           .mr(1)
20973           .nr(4)
20974           .kr(2)
20975           .sr(1)
20976           .m(1)
20977           .n(n)
20978           .k(k)
20979           .ks(3)
20980           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20981       }
20982     }
20983   }
20984 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD64,strided_cm_subtile)20985   TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD64, strided_cm_subtile) {
20986     TEST_REQUIRES_X86_XOP;
20987     for (size_t k = 1; k <= 40; k += 9) {
20988       for (uint32_t n = 1; n <= 4; n++) {
20989         for (uint32_t m = 1; m <= 1; m++) {
20990           GemmMicrokernelTester()
20991             .mr(1)
20992             .nr(4)
20993             .kr(2)
20994             .sr(1)
20995             .m(m)
20996             .n(n)
20997             .k(k)
20998             .cm_stride(7)
20999             .iterations(1)
21000             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21001         }
21002       }
21003     }
21004   }
21005 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD64,a_offset)21006   TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD64, a_offset) {
21007     TEST_REQUIRES_X86_XOP;
21008     for (size_t k = 1; k <= 40; k += 9) {
21009       GemmMicrokernelTester()
21010         .mr(1)
21011         .nr(4)
21012         .kr(2)
21013         .sr(1)
21014         .m(1)
21015         .n(4)
21016         .k(k)
21017         .ks(3)
21018         .a_offset(43)
21019         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21020     }
21021   }
21022 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD64,zero)21023   TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD64, zero) {
21024     TEST_REQUIRES_X86_XOP;
21025     for (size_t k = 1; k <= 40; k += 9) {
21026       for (uint32_t mz = 0; mz < 1; mz++) {
21027         GemmMicrokernelTester()
21028           .mr(1)
21029           .nr(4)
21030           .kr(2)
21031           .sr(1)
21032           .m(1)
21033           .n(4)
21034           .k(k)
21035           .ks(3)
21036           .a_offset(43)
21037           .zero_index(mz)
21038           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21039       }
21040     }
21041   }
21042 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD64,qmin)21043   TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD64, qmin) {
21044     TEST_REQUIRES_X86_XOP;
21045     GemmMicrokernelTester()
21046       .mr(1)
21047       .nr(4)
21048       .kr(2)
21049       .sr(1)
21050       .m(1)
21051       .n(4)
21052       .k(8)
21053       .qmin(128)
21054       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21055   }
21056 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD64,qmax)21057   TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD64, qmax) {
21058     TEST_REQUIRES_X86_XOP;
21059     GemmMicrokernelTester()
21060       .mr(1)
21061       .nr(4)
21062       .kr(2)
21063       .sr(1)
21064       .m(1)
21065       .n(4)
21066       .k(8)
21067       .qmax(128)
21068       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21069   }
21070 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD64,strided_cm)21071   TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD64, strided_cm) {
21072     TEST_REQUIRES_X86_XOP;
21073     GemmMicrokernelTester()
21074       .mr(1)
21075       .nr(4)
21076       .kr(2)
21077       .sr(1)
21078       .m(1)
21079       .n(4)
21080       .k(8)
21081       .cm_stride(7)
21082       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21083   }
21084 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
21085 
21086 
21087 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD64,k_eq_8)21088   TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD64, k_eq_8) {
21089     TEST_REQUIRES_X86_XOP;
21090     GemmMicrokernelTester()
21091       .mr(2)
21092       .nr(4)
21093       .kr(2)
21094       .sr(1)
21095       .m(2)
21096       .n(4)
21097       .k(8)
21098       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21099   }
21100 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD64,strided_cn)21101   TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD64, strided_cn) {
21102     TEST_REQUIRES_X86_XOP;
21103     GemmMicrokernelTester()
21104       .mr(2)
21105       .nr(4)
21106       .kr(2)
21107       .sr(1)
21108       .m(2)
21109       .n(4)
21110       .k(8)
21111       .cn_stride(7)
21112       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21113   }
21114 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD64,k_eq_8_subtile)21115   TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD64, k_eq_8_subtile) {
21116     TEST_REQUIRES_X86_XOP;
21117     for (uint32_t n = 1; n <= 4; n++) {
21118       for (uint32_t m = 1; m <= 2; m++) {
21119         GemmMicrokernelTester()
21120           .mr(2)
21121           .nr(4)
21122           .kr(2)
21123           .sr(1)
21124           .m(m)
21125           .n(n)
21126           .k(8)
21127           .iterations(1)
21128           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21129       }
21130     }
21131   }
21132 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD64,k_eq_8_subtile_m)21133   TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD64, k_eq_8_subtile_m) {
21134     TEST_REQUIRES_X86_XOP;
21135     for (uint32_t m = 1; m <= 2; m++) {
21136       GemmMicrokernelTester()
21137         .mr(2)
21138         .nr(4)
21139         .kr(2)
21140         .sr(1)
21141         .m(m)
21142         .n(4)
21143         .k(8)
21144         .iterations(1)
21145         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21146     }
21147   }
21148 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD64,k_eq_8_subtile_n)21149   TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD64, k_eq_8_subtile_n) {
21150     TEST_REQUIRES_X86_XOP;
21151     for (uint32_t n = 1; n <= 4; n++) {
21152       GemmMicrokernelTester()
21153         .mr(2)
21154         .nr(4)
21155         .kr(2)
21156         .sr(1)
21157         .m(2)
21158         .n(n)
21159         .k(8)
21160         .iterations(1)
21161         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21162     }
21163   }
21164 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD64,k_lt_8)21165   TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD64, k_lt_8) {
21166     TEST_REQUIRES_X86_XOP;
21167     for (size_t k = 1; k < 8; k++) {
21168       GemmMicrokernelTester()
21169         .mr(2)
21170         .nr(4)
21171         .kr(2)
21172         .sr(1)
21173         .m(2)
21174         .n(4)
21175         .k(k)
21176         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21177     }
21178   }
21179 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD64,k_lt_8_subtile)21180   TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD64, k_lt_8_subtile) {
21181     TEST_REQUIRES_X86_XOP;
21182     for (size_t k = 1; k < 8; k++) {
21183       for (uint32_t n = 1; n <= 4; n++) {
21184         for (uint32_t m = 1; m <= 2; m++) {
21185           GemmMicrokernelTester()
21186             .mr(2)
21187             .nr(4)
21188             .kr(2)
21189             .sr(1)
21190             .m(m)
21191             .n(n)
21192             .k(k)
21193             .iterations(1)
21194             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21195         }
21196       }
21197     }
21198   }
21199 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD64,k_gt_8)21200   TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD64, k_gt_8) {
21201     TEST_REQUIRES_X86_XOP;
21202     for (size_t k = 9; k < 16; k++) {
21203       GemmMicrokernelTester()
21204         .mr(2)
21205         .nr(4)
21206         .kr(2)
21207         .sr(1)
21208         .m(2)
21209         .n(4)
21210         .k(k)
21211         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21212     }
21213   }
21214 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD64,k_gt_8_subtile)21215   TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD64, k_gt_8_subtile) {
21216     TEST_REQUIRES_X86_XOP;
21217     for (size_t k = 9; k < 16; k++) {
21218       for (uint32_t n = 1; n <= 4; n++) {
21219         for (uint32_t m = 1; m <= 2; m++) {
21220           GemmMicrokernelTester()
21221             .mr(2)
21222             .nr(4)
21223             .kr(2)
21224             .sr(1)
21225             .m(m)
21226             .n(n)
21227             .k(k)
21228             .iterations(1)
21229             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21230         }
21231       }
21232     }
21233   }
21234 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD64,k_div_8)21235   TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD64, k_div_8) {
21236     TEST_REQUIRES_X86_XOP;
21237     for (size_t k = 16; k <= 80; k += 8) {
21238       GemmMicrokernelTester()
21239         .mr(2)
21240         .nr(4)
21241         .kr(2)
21242         .sr(1)
21243         .m(2)
21244         .n(4)
21245         .k(k)
21246         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21247     }
21248   }
21249 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD64,k_div_8_subtile)21250   TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD64, k_div_8_subtile) {
21251     TEST_REQUIRES_X86_XOP;
21252     for (size_t k = 16; k <= 80; k += 8) {
21253       for (uint32_t n = 1; n <= 4; n++) {
21254         for (uint32_t m = 1; m <= 2; m++) {
21255           GemmMicrokernelTester()
21256             .mr(2)
21257             .nr(4)
21258             .kr(2)
21259             .sr(1)
21260             .m(m)
21261             .n(n)
21262             .k(k)
21263             .iterations(1)
21264             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21265         }
21266       }
21267     }
21268   }
21269 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD64,n_gt_4)21270   TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD64, n_gt_4) {
21271     TEST_REQUIRES_X86_XOP;
21272     for (uint32_t n = 5; n < 8; n++) {
21273       for (size_t k = 1; k <= 40; k += 9) {
21274         GemmMicrokernelTester()
21275           .mr(2)
21276           .nr(4)
21277           .kr(2)
21278           .sr(1)
21279           .m(2)
21280           .n(n)
21281           .k(k)
21282           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21283       }
21284     }
21285   }
21286 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD64,n_gt_4_strided_cn)21287   TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD64, n_gt_4_strided_cn) {
21288     TEST_REQUIRES_X86_XOP;
21289     for (uint32_t n = 5; n < 8; n++) {
21290       for (size_t k = 1; k <= 40; k += 9) {
21291         GemmMicrokernelTester()
21292           .mr(2)
21293           .nr(4)
21294           .kr(2)
21295           .sr(1)
21296           .m(2)
21297           .n(n)
21298           .k(k)
21299           .cn_stride(7)
21300           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21301       }
21302     }
21303   }
21304 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD64,n_gt_4_subtile)21305   TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD64, n_gt_4_subtile) {
21306     TEST_REQUIRES_X86_XOP;
21307     for (uint32_t n = 5; n < 8; n++) {
21308       for (size_t k = 1; k <= 40; k += 9) {
21309         for (uint32_t m = 1; m <= 2; m++) {
21310           GemmMicrokernelTester()
21311             .mr(2)
21312             .nr(4)
21313             .kr(2)
21314             .sr(1)
21315             .m(m)
21316             .n(n)
21317             .k(k)
21318             .iterations(1)
21319             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21320         }
21321       }
21322     }
21323   }
21324 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD64,n_div_4)21325   TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD64, n_div_4) {
21326     TEST_REQUIRES_X86_XOP;
21327     for (uint32_t n = 8; n <= 12; n += 4) {
21328       for (size_t k = 1; k <= 40; k += 9) {
21329         GemmMicrokernelTester()
21330           .mr(2)
21331           .nr(4)
21332           .kr(2)
21333           .sr(1)
21334           .m(2)
21335           .n(n)
21336           .k(k)
21337           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21338       }
21339     }
21340   }
21341 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD64,n_div_4_strided_cn)21342   TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD64, n_div_4_strided_cn) {
21343     TEST_REQUIRES_X86_XOP;
21344     for (uint32_t n = 8; n <= 12; n += 4) {
21345       for (size_t k = 1; k <= 40; k += 9) {
21346         GemmMicrokernelTester()
21347           .mr(2)
21348           .nr(4)
21349           .kr(2)
21350           .sr(1)
21351           .m(2)
21352           .n(n)
21353           .k(k)
21354           .cn_stride(7)
21355           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21356       }
21357     }
21358   }
21359 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD64,n_div_4_subtile)21360   TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD64, n_div_4_subtile) {
21361     TEST_REQUIRES_X86_XOP;
21362     for (uint32_t n = 8; n <= 12; n += 4) {
21363       for (size_t k = 1; k <= 40; k += 9) {
21364         for (uint32_t m = 1; m <= 2; m++) {
21365           GemmMicrokernelTester()
21366             .mr(2)
21367             .nr(4)
21368             .kr(2)
21369             .sr(1)
21370             .m(m)
21371             .n(n)
21372             .k(k)
21373             .iterations(1)
21374             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21375         }
21376       }
21377     }
21378   }
21379 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD64,small_kernel)21380   TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD64, small_kernel) {
21381     TEST_REQUIRES_X86_XOP;
21382     for (size_t k = 1; k <= 40; k += 9) {
21383       GemmMicrokernelTester()
21384         .mr(2)
21385         .nr(4)
21386         .kr(2)
21387         .sr(1)
21388         .m(2)
21389         .n(4)
21390         .k(k)
21391         .ks(3)
21392         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21393     }
21394   }
21395 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD64,small_kernel_subtile)21396   TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD64, small_kernel_subtile) {
21397     TEST_REQUIRES_X86_XOP;
21398     for (size_t k = 1; k <= 40; k += 9) {
21399       for (uint32_t n = 1; n <= 4; n++) {
21400         for (uint32_t m = 1; m <= 2; m++) {
21401           GemmMicrokernelTester()
21402             .mr(2)
21403             .nr(4)
21404             .kr(2)
21405             .sr(1)
21406             .m(m)
21407             .n(n)
21408             .k(k)
21409             .ks(3)
21410             .iterations(1)
21411             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21412         }
21413       }
21414     }
21415   }
21416 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD64,n_gt_4_small_kernel)21417   TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD64, n_gt_4_small_kernel) {
21418     TEST_REQUIRES_X86_XOP;
21419     for (uint32_t n = 5; n < 8; n++) {
21420       for (size_t k = 1; k <= 40; k += 9) {
21421         GemmMicrokernelTester()
21422           .mr(2)
21423           .nr(4)
21424           .kr(2)
21425           .sr(1)
21426           .m(2)
21427           .n(n)
21428           .k(k)
21429           .ks(3)
21430           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21431       }
21432     }
21433   }
21434 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD64,n_div_4_small_kernel)21435   TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD64, n_div_4_small_kernel) {
21436     TEST_REQUIRES_X86_XOP;
21437     for (uint32_t n = 8; n <= 12; n += 4) {
21438       for (size_t k = 1; k <= 40; k += 9) {
21439         GemmMicrokernelTester()
21440           .mr(2)
21441           .nr(4)
21442           .kr(2)
21443           .sr(1)
21444           .m(2)
21445           .n(n)
21446           .k(k)
21447           .ks(3)
21448           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21449       }
21450     }
21451   }
21452 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD64,strided_cm_subtile)21453   TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD64, strided_cm_subtile) {
21454     TEST_REQUIRES_X86_XOP;
21455     for (size_t k = 1; k <= 40; k += 9) {
21456       for (uint32_t n = 1; n <= 4; n++) {
21457         for (uint32_t m = 1; m <= 2; m++) {
21458           GemmMicrokernelTester()
21459             .mr(2)
21460             .nr(4)
21461             .kr(2)
21462             .sr(1)
21463             .m(m)
21464             .n(n)
21465             .k(k)
21466             .cm_stride(7)
21467             .iterations(1)
21468             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21469         }
21470       }
21471     }
21472   }
21473 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD64,a_offset)21474   TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD64, a_offset) {
21475     TEST_REQUIRES_X86_XOP;
21476     for (size_t k = 1; k <= 40; k += 9) {
21477       GemmMicrokernelTester()
21478         .mr(2)
21479         .nr(4)
21480         .kr(2)
21481         .sr(1)
21482         .m(2)
21483         .n(4)
21484         .k(k)
21485         .ks(3)
21486         .a_offset(83)
21487         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21488     }
21489   }
21490 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD64,zero)21491   TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD64, zero) {
21492     TEST_REQUIRES_X86_XOP;
21493     for (size_t k = 1; k <= 40; k += 9) {
21494       for (uint32_t mz = 0; mz < 2; mz++) {
21495         GemmMicrokernelTester()
21496           .mr(2)
21497           .nr(4)
21498           .kr(2)
21499           .sr(1)
21500           .m(2)
21501           .n(4)
21502           .k(k)
21503           .ks(3)
21504           .a_offset(83)
21505           .zero_index(mz)
21506           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21507       }
21508     }
21509   }
21510 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD64,qmin)21511   TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD64, qmin) {
21512     TEST_REQUIRES_X86_XOP;
21513     GemmMicrokernelTester()
21514       .mr(2)
21515       .nr(4)
21516       .kr(2)
21517       .sr(1)
21518       .m(2)
21519       .n(4)
21520       .k(8)
21521       .qmin(128)
21522       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21523   }
21524 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD64,qmax)21525   TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD64, qmax) {
21526     TEST_REQUIRES_X86_XOP;
21527     GemmMicrokernelTester()
21528       .mr(2)
21529       .nr(4)
21530       .kr(2)
21531       .sr(1)
21532       .m(2)
21533       .n(4)
21534       .k(8)
21535       .qmax(128)
21536       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21537   }
21538 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD64,strided_cm)21539   TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD64, strided_cm) {
21540     TEST_REQUIRES_X86_XOP;
21541     GemmMicrokernelTester()
21542       .mr(2)
21543       .nr(4)
21544       .kr(2)
21545       .sr(1)
21546       .m(2)
21547       .n(4)
21548       .k(8)
21549       .cm_stride(7)
21550       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21551   }
21552 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
21553 
21554 
21555 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD64,k_eq_8)21556   TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD64, k_eq_8) {
21557     TEST_REQUIRES_X86_AVX;
21558     GemmMicrokernelTester()
21559       .mr(4)
21560       .nr(4)
21561       .kr(2)
21562       .sr(1)
21563       .m(4)
21564       .n(4)
21565       .k(8)
21566       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21567   }
21568 
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD64,strided_cn)21569   TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD64, strided_cn) {
21570     TEST_REQUIRES_X86_AVX;
21571     GemmMicrokernelTester()
21572       .mr(4)
21573       .nr(4)
21574       .kr(2)
21575       .sr(1)
21576       .m(4)
21577       .n(4)
21578       .k(8)
21579       .cn_stride(7)
21580       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21581   }
21582 
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD64,k_eq_8_subtile)21583   TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD64, k_eq_8_subtile) {
21584     TEST_REQUIRES_X86_AVX;
21585     for (uint32_t n = 1; n <= 4; n++) {
21586       for (uint32_t m = 1; m <= 4; m++) {
21587         GemmMicrokernelTester()
21588           .mr(4)
21589           .nr(4)
21590           .kr(2)
21591           .sr(1)
21592           .m(m)
21593           .n(n)
21594           .k(8)
21595           .iterations(1)
21596           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21597       }
21598     }
21599   }
21600 
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD64,k_eq_8_subtile_m)21601   TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD64, k_eq_8_subtile_m) {
21602     TEST_REQUIRES_X86_AVX;
21603     for (uint32_t m = 1; m <= 4; m++) {
21604       GemmMicrokernelTester()
21605         .mr(4)
21606         .nr(4)
21607         .kr(2)
21608         .sr(1)
21609         .m(m)
21610         .n(4)
21611         .k(8)
21612         .iterations(1)
21613         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21614     }
21615   }
21616 
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD64,k_eq_8_subtile_n)21617   TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD64, k_eq_8_subtile_n) {
21618     TEST_REQUIRES_X86_AVX;
21619     for (uint32_t n = 1; n <= 4; n++) {
21620       GemmMicrokernelTester()
21621         .mr(4)
21622         .nr(4)
21623         .kr(2)
21624         .sr(1)
21625         .m(4)
21626         .n(n)
21627         .k(8)
21628         .iterations(1)
21629         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21630     }
21631   }
21632 
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD64,k_lt_8)21633   TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD64, k_lt_8) {
21634     TEST_REQUIRES_X86_AVX;
21635     for (size_t k = 1; k < 8; k++) {
21636       GemmMicrokernelTester()
21637         .mr(4)
21638         .nr(4)
21639         .kr(2)
21640         .sr(1)
21641         .m(4)
21642         .n(4)
21643         .k(k)
21644         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21645     }
21646   }
21647 
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD64,k_lt_8_subtile)21648   TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD64, k_lt_8_subtile) {
21649     TEST_REQUIRES_X86_AVX;
21650     for (size_t k = 1; k < 8; k++) {
21651       for (uint32_t n = 1; n <= 4; n++) {
21652         for (uint32_t m = 1; m <= 4; m++) {
21653           GemmMicrokernelTester()
21654             .mr(4)
21655             .nr(4)
21656             .kr(2)
21657             .sr(1)
21658             .m(m)
21659             .n(n)
21660             .k(k)
21661             .iterations(1)
21662             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21663         }
21664       }
21665     }
21666   }
21667 
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD64,k_gt_8)21668   TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD64, k_gt_8) {
21669     TEST_REQUIRES_X86_AVX;
21670     for (size_t k = 9; k < 16; k++) {
21671       GemmMicrokernelTester()
21672         .mr(4)
21673         .nr(4)
21674         .kr(2)
21675         .sr(1)
21676         .m(4)
21677         .n(4)
21678         .k(k)
21679         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21680     }
21681   }
21682 
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD64,k_gt_8_subtile)21683   TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD64, k_gt_8_subtile) {
21684     TEST_REQUIRES_X86_AVX;
21685     for (size_t k = 9; k < 16; k++) {
21686       for (uint32_t n = 1; n <= 4; n++) {
21687         for (uint32_t m = 1; m <= 4; m++) {
21688           GemmMicrokernelTester()
21689             .mr(4)
21690             .nr(4)
21691             .kr(2)
21692             .sr(1)
21693             .m(m)
21694             .n(n)
21695             .k(k)
21696             .iterations(1)
21697             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21698         }
21699       }
21700     }
21701   }
21702 
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD64,k_div_8)21703   TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD64, k_div_8) {
21704     TEST_REQUIRES_X86_AVX;
21705     for (size_t k = 16; k <= 80; k += 8) {
21706       GemmMicrokernelTester()
21707         .mr(4)
21708         .nr(4)
21709         .kr(2)
21710         .sr(1)
21711         .m(4)
21712         .n(4)
21713         .k(k)
21714         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21715     }
21716   }
21717 
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD64,k_div_8_subtile)21718   TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD64, k_div_8_subtile) {
21719     TEST_REQUIRES_X86_AVX;
21720     for (size_t k = 16; k <= 80; k += 8) {
21721       for (uint32_t n = 1; n <= 4; n++) {
21722         for (uint32_t m = 1; m <= 4; m++) {
21723           GemmMicrokernelTester()
21724             .mr(4)
21725             .nr(4)
21726             .kr(2)
21727             .sr(1)
21728             .m(m)
21729             .n(n)
21730             .k(k)
21731             .iterations(1)
21732             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21733         }
21734       }
21735     }
21736   }
21737 
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD64,n_gt_4)21738   TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD64, n_gt_4) {
21739     TEST_REQUIRES_X86_AVX;
21740     for (uint32_t n = 5; n < 8; n++) {
21741       for (size_t k = 1; k <= 40; k += 9) {
21742         GemmMicrokernelTester()
21743           .mr(4)
21744           .nr(4)
21745           .kr(2)
21746           .sr(1)
21747           .m(4)
21748           .n(n)
21749           .k(k)
21750           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21751       }
21752     }
21753   }
21754 
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD64,n_gt_4_strided_cn)21755   TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD64, n_gt_4_strided_cn) {
21756     TEST_REQUIRES_X86_AVX;
21757     for (uint32_t n = 5; n < 8; n++) {
21758       for (size_t k = 1; k <= 40; k += 9) {
21759         GemmMicrokernelTester()
21760           .mr(4)
21761           .nr(4)
21762           .kr(2)
21763           .sr(1)
21764           .m(4)
21765           .n(n)
21766           .k(k)
21767           .cn_stride(7)
21768           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21769       }
21770     }
21771   }
21772 
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD64,n_gt_4_subtile)21773   TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD64, n_gt_4_subtile) {
21774     TEST_REQUIRES_X86_AVX;
21775     for (uint32_t n = 5; n < 8; n++) {
21776       for (size_t k = 1; k <= 40; k += 9) {
21777         for (uint32_t m = 1; m <= 4; m++) {
21778           GemmMicrokernelTester()
21779             .mr(4)
21780             .nr(4)
21781             .kr(2)
21782             .sr(1)
21783             .m(m)
21784             .n(n)
21785             .k(k)
21786             .iterations(1)
21787             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21788         }
21789       }
21790     }
21791   }
21792 
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD64,n_div_4)21793   TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD64, n_div_4) {
21794     TEST_REQUIRES_X86_AVX;
21795     for (uint32_t n = 8; n <= 12; n += 4) {
21796       for (size_t k = 1; k <= 40; k += 9) {
21797         GemmMicrokernelTester()
21798           .mr(4)
21799           .nr(4)
21800           .kr(2)
21801           .sr(1)
21802           .m(4)
21803           .n(n)
21804           .k(k)
21805           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21806       }
21807     }
21808   }
21809 
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD64,n_div_4_strided_cn)21810   TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD64, n_div_4_strided_cn) {
21811     TEST_REQUIRES_X86_AVX;
21812     for (uint32_t n = 8; n <= 12; n += 4) {
21813       for (size_t k = 1; k <= 40; k += 9) {
21814         GemmMicrokernelTester()
21815           .mr(4)
21816           .nr(4)
21817           .kr(2)
21818           .sr(1)
21819           .m(4)
21820           .n(n)
21821           .k(k)
21822           .cn_stride(7)
21823           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21824       }
21825     }
21826   }
21827 
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD64,n_div_4_subtile)21828   TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD64, n_div_4_subtile) {
21829     TEST_REQUIRES_X86_AVX;
21830     for (uint32_t n = 8; n <= 12; n += 4) {
21831       for (size_t k = 1; k <= 40; k += 9) {
21832         for (uint32_t m = 1; m <= 4; m++) {
21833           GemmMicrokernelTester()
21834             .mr(4)
21835             .nr(4)
21836             .kr(2)
21837             .sr(1)
21838             .m(m)
21839             .n(n)
21840             .k(k)
21841             .iterations(1)
21842             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21843         }
21844       }
21845     }
21846   }
21847 
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD64,small_kernel)21848   TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD64, small_kernel) {
21849     TEST_REQUIRES_X86_AVX;
21850     for (size_t k = 1; k <= 40; k += 9) {
21851       GemmMicrokernelTester()
21852         .mr(4)
21853         .nr(4)
21854         .kr(2)
21855         .sr(1)
21856         .m(4)
21857         .n(4)
21858         .k(k)
21859         .ks(3)
21860         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21861     }
21862   }
21863 
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD64,small_kernel_subtile)21864   TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD64, small_kernel_subtile) {
21865     TEST_REQUIRES_X86_AVX;
21866     for (size_t k = 1; k <= 40; k += 9) {
21867       for (uint32_t n = 1; n <= 4; n++) {
21868         for (uint32_t m = 1; m <= 4; m++) {
21869           GemmMicrokernelTester()
21870             .mr(4)
21871             .nr(4)
21872             .kr(2)
21873             .sr(1)
21874             .m(m)
21875             .n(n)
21876             .k(k)
21877             .ks(3)
21878             .iterations(1)
21879             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21880         }
21881       }
21882     }
21883   }
21884 
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD64,n_gt_4_small_kernel)21885   TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD64, n_gt_4_small_kernel) {
21886     TEST_REQUIRES_X86_AVX;
21887     for (uint32_t n = 5; n < 8; n++) {
21888       for (size_t k = 1; k <= 40; k += 9) {
21889         GemmMicrokernelTester()
21890           .mr(4)
21891           .nr(4)
21892           .kr(2)
21893           .sr(1)
21894           .m(4)
21895           .n(n)
21896           .k(k)
21897           .ks(3)
21898           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21899       }
21900     }
21901   }
21902 
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD64,n_div_4_small_kernel)21903   TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD64, n_div_4_small_kernel) {
21904     TEST_REQUIRES_X86_AVX;
21905     for (uint32_t n = 8; n <= 12; n += 4) {
21906       for (size_t k = 1; k <= 40; k += 9) {
21907         GemmMicrokernelTester()
21908           .mr(4)
21909           .nr(4)
21910           .kr(2)
21911           .sr(1)
21912           .m(4)
21913           .n(n)
21914           .k(k)
21915           .ks(3)
21916           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21917       }
21918     }
21919   }
21920 
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD64,strided_cm_subtile)21921   TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD64, strided_cm_subtile) {
21922     TEST_REQUIRES_X86_AVX;
21923     for (size_t k = 1; k <= 40; k += 9) {
21924       for (uint32_t n = 1; n <= 4; n++) {
21925         for (uint32_t m = 1; m <= 4; m++) {
21926           GemmMicrokernelTester()
21927             .mr(4)
21928             .nr(4)
21929             .kr(2)
21930             .sr(1)
21931             .m(m)
21932             .n(n)
21933             .k(k)
21934             .cm_stride(7)
21935             .iterations(1)
21936             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21937         }
21938       }
21939     }
21940   }
21941 
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD64,a_offset)21942   TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD64, a_offset) {
21943     TEST_REQUIRES_X86_AVX;
21944     for (size_t k = 1; k <= 40; k += 9) {
21945       GemmMicrokernelTester()
21946         .mr(4)
21947         .nr(4)
21948         .kr(2)
21949         .sr(1)
21950         .m(4)
21951         .n(4)
21952         .k(k)
21953         .ks(3)
21954         .a_offset(163)
21955         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21956     }
21957   }
21958 
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD64,zero)21959   TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD64, zero) {
21960     TEST_REQUIRES_X86_AVX;
21961     for (size_t k = 1; k <= 40; k += 9) {
21962       for (uint32_t mz = 0; mz < 4; mz++) {
21963         GemmMicrokernelTester()
21964           .mr(4)
21965           .nr(4)
21966           .kr(2)
21967           .sr(1)
21968           .m(4)
21969           .n(4)
21970           .k(k)
21971           .ks(3)
21972           .a_offset(163)
21973           .zero_index(mz)
21974           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21975       }
21976     }
21977   }
21978 
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD64,qmin)21979   TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD64, qmin) {
21980     TEST_REQUIRES_X86_AVX;
21981     GemmMicrokernelTester()
21982       .mr(4)
21983       .nr(4)
21984       .kr(2)
21985       .sr(1)
21986       .m(4)
21987       .n(4)
21988       .k(8)
21989       .qmin(128)
21990       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21991   }
21992 
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD64,qmax)21993   TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD64, qmax) {
21994     TEST_REQUIRES_X86_AVX;
21995     GemmMicrokernelTester()
21996       .mr(4)
21997       .nr(4)
21998       .kr(2)
21999       .sr(1)
22000       .m(4)
22001       .n(4)
22002       .k(8)
22003       .qmax(128)
22004       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
22005   }
22006 
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD64,strided_cm)22007   TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD64, strided_cm) {
22008     TEST_REQUIRES_X86_AVX;
22009     GemmMicrokernelTester()
22010       .mr(4)
22011       .nr(4)
22012       .kr(2)
22013       .sr(1)
22014       .m(4)
22015       .n(4)
22016       .k(8)
22017       .cm_stride(7)
22018       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
22019   }
22020 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
22021 
22022 
22023 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD128,k_eq_8)22024   TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD128, k_eq_8) {
22025     TEST_REQUIRES_X86_SSE2;
22026     GemmMicrokernelTester()
22027       .mr(2)
22028       .nr(4)
22029       .kr(2)
22030       .sr(1)
22031       .m(2)
22032       .n(4)
22033       .k(8)
22034       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
22035   }
22036 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD128,strided_cn)22037   TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD128, strided_cn) {
22038     TEST_REQUIRES_X86_SSE2;
22039     GemmMicrokernelTester()
22040       .mr(2)
22041       .nr(4)
22042       .kr(2)
22043       .sr(1)
22044       .m(2)
22045       .n(4)
22046       .k(8)
22047       .cn_stride(7)
22048       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
22049   }
22050 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD128,k_eq_8_subtile)22051   TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD128, k_eq_8_subtile) {
22052     TEST_REQUIRES_X86_SSE2;
22053     for (uint32_t n = 1; n <= 4; n++) {
22054       for (uint32_t m = 1; m <= 2; m++) {
22055         GemmMicrokernelTester()
22056           .mr(2)
22057           .nr(4)
22058           .kr(2)
22059           .sr(1)
22060           .m(m)
22061           .n(n)
22062           .k(8)
22063           .iterations(1)
22064           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
22065       }
22066     }
22067   }
22068 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD128,k_eq_8_subtile_m)22069   TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD128, k_eq_8_subtile_m) {
22070     TEST_REQUIRES_X86_SSE2;
22071     for (uint32_t m = 1; m <= 2; m++) {
22072       GemmMicrokernelTester()
22073         .mr(2)
22074         .nr(4)
22075         .kr(2)
22076         .sr(1)
22077         .m(m)
22078         .n(4)
22079         .k(8)
22080         .iterations(1)
22081         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
22082     }
22083   }
22084 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD128,k_eq_8_subtile_n)22085   TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD128, k_eq_8_subtile_n) {
22086     TEST_REQUIRES_X86_SSE2;
22087     for (uint32_t n = 1; n <= 4; n++) {
22088       GemmMicrokernelTester()
22089         .mr(2)
22090         .nr(4)
22091         .kr(2)
22092         .sr(1)
22093         .m(2)
22094         .n(n)
22095         .k(8)
22096         .iterations(1)
22097         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
22098     }
22099   }
22100 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD128,k_lt_8)22101   TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD128, k_lt_8) {
22102     TEST_REQUIRES_X86_SSE2;
22103     for (size_t k = 1; k < 8; k++) {
22104       GemmMicrokernelTester()
22105         .mr(2)
22106         .nr(4)
22107         .kr(2)
22108         .sr(1)
22109         .m(2)
22110         .n(4)
22111         .k(k)
22112         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
22113     }
22114   }
22115 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD128,k_lt_8_subtile)22116   TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD128, k_lt_8_subtile) {
22117     TEST_REQUIRES_X86_SSE2;
22118     for (size_t k = 1; k < 8; k++) {
22119       for (uint32_t n = 1; n <= 4; n++) {
22120         for (uint32_t m = 1; m <= 2; m++) {
22121           GemmMicrokernelTester()
22122             .mr(2)
22123             .nr(4)
22124             .kr(2)
22125             .sr(1)
22126             .m(m)
22127             .n(n)
22128             .k(k)
22129             .iterations(1)
22130             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
22131         }
22132       }
22133     }
22134   }
22135 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD128,k_gt_8)22136   TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD128, k_gt_8) {
22137     TEST_REQUIRES_X86_SSE2;
22138     for (size_t k = 9; k < 16; k++) {
22139       GemmMicrokernelTester()
22140         .mr(2)
22141         .nr(4)
22142         .kr(2)
22143         .sr(1)
22144         .m(2)
22145         .n(4)
22146         .k(k)
22147         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
22148     }
22149   }
22150 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD128,k_gt_8_subtile)22151   TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD128, k_gt_8_subtile) {
22152     TEST_REQUIRES_X86_SSE2;
22153     for (size_t k = 9; k < 16; k++) {
22154       for (uint32_t n = 1; n <= 4; n++) {
22155         for (uint32_t m = 1; m <= 2; m++) {
22156           GemmMicrokernelTester()
22157             .mr(2)
22158             .nr(4)
22159             .kr(2)
22160             .sr(1)
22161             .m(m)
22162             .n(n)
22163             .k(k)
22164             .iterations(1)
22165             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
22166         }
22167       }
22168     }
22169   }
22170 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD128,k_div_8)22171   TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD128, k_div_8) {
22172     TEST_REQUIRES_X86_SSE2;
22173     for (size_t k = 16; k <= 80; k += 8) {
22174       GemmMicrokernelTester()
22175         .mr(2)
22176         .nr(4)
22177         .kr(2)
22178         .sr(1)
22179         .m(2)
22180         .n(4)
22181         .k(k)
22182         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
22183     }
22184   }
22185 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD128,k_div_8_subtile)22186   TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD128, k_div_8_subtile) {
22187     TEST_REQUIRES_X86_SSE2;
22188     for (size_t k = 16; k <= 80; k += 8) {
22189       for (uint32_t n = 1; n <= 4; n++) {
22190         for (uint32_t m = 1; m <= 2; m++) {
22191           GemmMicrokernelTester()
22192             .mr(2)
22193             .nr(4)
22194             .kr(2)
22195             .sr(1)
22196             .m(m)
22197             .n(n)
22198             .k(k)
22199             .iterations(1)
22200             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
22201         }
22202       }
22203     }
22204   }
22205 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD128,n_gt_4)22206   TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD128, n_gt_4) {
22207     TEST_REQUIRES_X86_SSE2;
22208     for (uint32_t n = 5; n < 8; n++) {
22209       for (size_t k = 1; k <= 40; k += 9) {
22210         GemmMicrokernelTester()
22211           .mr(2)
22212           .nr(4)
22213           .kr(2)
22214           .sr(1)
22215           .m(2)
22216           .n(n)
22217           .k(k)
22218           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
22219       }
22220     }
22221   }
22222 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD128,n_gt_4_strided_cn)22223   TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD128, n_gt_4_strided_cn) {
22224     TEST_REQUIRES_X86_SSE2;
22225     for (uint32_t n = 5; n < 8; n++) {
22226       for (size_t k = 1; k <= 40; k += 9) {
22227         GemmMicrokernelTester()
22228           .mr(2)
22229           .nr(4)
22230           .kr(2)
22231           .sr(1)
22232           .m(2)
22233           .n(n)
22234           .k(k)
22235           .cn_stride(7)
22236           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
22237       }
22238     }
22239   }
22240 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD128,n_gt_4_subtile)22241   TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD128, n_gt_4_subtile) {
22242     TEST_REQUIRES_X86_SSE2;
22243     for (uint32_t n = 5; n < 8; n++) {
22244       for (size_t k = 1; k <= 40; k += 9) {
22245         for (uint32_t m = 1; m <= 2; m++) {
22246           GemmMicrokernelTester()
22247             .mr(2)
22248             .nr(4)
22249             .kr(2)
22250             .sr(1)
22251             .m(m)
22252             .n(n)
22253             .k(k)
22254             .iterations(1)
22255             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
22256         }
22257       }
22258     }
22259   }
22260 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD128,n_div_4)22261   TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD128, n_div_4) {
22262     TEST_REQUIRES_X86_SSE2;
22263     for (uint32_t n = 8; n <= 12; n += 4) {
22264       for (size_t k = 1; k <= 40; k += 9) {
22265         GemmMicrokernelTester()
22266           .mr(2)
22267           .nr(4)
22268           .kr(2)
22269           .sr(1)
22270           .m(2)
22271           .n(n)
22272           .k(k)
22273           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
22274       }
22275     }
22276   }
22277 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD128,n_div_4_strided_cn)22278   TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD128, n_div_4_strided_cn) {
22279     TEST_REQUIRES_X86_SSE2;
22280     for (uint32_t n = 8; n <= 12; n += 4) {
22281       for (size_t k = 1; k <= 40; k += 9) {
22282         GemmMicrokernelTester()
22283           .mr(2)
22284           .nr(4)
22285           .kr(2)
22286           .sr(1)
22287           .m(2)
22288           .n(n)
22289           .k(k)
22290           .cn_stride(7)
22291           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
22292       }
22293     }
22294   }
22295 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD128,n_div_4_subtile)22296   TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD128, n_div_4_subtile) {
22297     TEST_REQUIRES_X86_SSE2;
22298     for (uint32_t n = 8; n <= 12; n += 4) {
22299       for (size_t k = 1; k <= 40; k += 9) {
22300         for (uint32_t m = 1; m <= 2; m++) {
22301           GemmMicrokernelTester()
22302             .mr(2)
22303             .nr(4)
22304             .kr(2)
22305             .sr(1)
22306             .m(m)
22307             .n(n)
22308             .k(k)
22309             .iterations(1)
22310             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
22311         }
22312       }
22313     }
22314   }
22315 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD128,small_kernel)22316   TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD128, small_kernel) {
22317     TEST_REQUIRES_X86_SSE2;
22318     for (size_t k = 1; k <= 40; k += 9) {
22319       GemmMicrokernelTester()
22320         .mr(2)
22321         .nr(4)
22322         .kr(2)
22323         .sr(1)
22324         .m(2)
22325         .n(4)
22326         .k(k)
22327         .ks(3)
22328         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
22329     }
22330   }
22331 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD128,small_kernel_subtile)22332   TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD128, small_kernel_subtile) {
22333     TEST_REQUIRES_X86_SSE2;
22334     for (size_t k = 1; k <= 40; k += 9) {
22335       for (uint32_t n = 1; n <= 4; n++) {
22336         for (uint32_t m = 1; m <= 2; m++) {
22337           GemmMicrokernelTester()
22338             .mr(2)
22339             .nr(4)
22340             .kr(2)
22341             .sr(1)
22342             .m(m)
22343             .n(n)
22344             .k(k)
22345             .ks(3)
22346             .iterations(1)
22347             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
22348         }
22349       }
22350     }
22351   }
22352 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD128,n_gt_4_small_kernel)22353   TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD128, n_gt_4_small_kernel) {
22354     TEST_REQUIRES_X86_SSE2;
22355     for (uint32_t n = 5; n < 8; n++) {
22356       for (size_t k = 1; k <= 40; k += 9) {
22357         GemmMicrokernelTester()
22358           .mr(2)
22359           .nr(4)
22360           .kr(2)
22361           .sr(1)
22362           .m(2)
22363           .n(n)
22364           .k(k)
22365           .ks(3)
22366           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
22367       }
22368     }
22369   }
22370 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD128,n_div_4_small_kernel)22371   TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD128, n_div_4_small_kernel) {
22372     TEST_REQUIRES_X86_SSE2;
22373     for (uint32_t n = 8; n <= 12; n += 4) {
22374       for (size_t k = 1; k <= 40; k += 9) {
22375         GemmMicrokernelTester()
22376           .mr(2)
22377           .nr(4)
22378           .kr(2)
22379           .sr(1)
22380           .m(2)
22381           .n(n)
22382           .k(k)
22383           .ks(3)
22384           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
22385       }
22386     }
22387   }
22388 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD128,strided_cm_subtile)22389   TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD128, strided_cm_subtile) {
22390     TEST_REQUIRES_X86_SSE2;
22391     for (size_t k = 1; k <= 40; k += 9) {
22392       for (uint32_t n = 1; n <= 4; n++) {
22393         for (uint32_t m = 1; m <= 2; m++) {
22394           GemmMicrokernelTester()
22395             .mr(2)
22396             .nr(4)
22397             .kr(2)
22398             .sr(1)
22399             .m(m)
22400             .n(n)
22401             .k(k)
22402             .cm_stride(7)
22403             .iterations(1)
22404             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
22405         }
22406       }
22407     }
22408   }
22409 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD128,a_offset)22410   TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD128, a_offset) {
22411     TEST_REQUIRES_X86_SSE2;
22412     for (size_t k = 1; k <= 40; k += 9) {
22413       GemmMicrokernelTester()
22414         .mr(2)
22415         .nr(4)
22416         .kr(2)
22417         .sr(1)
22418         .m(2)
22419         .n(4)
22420         .k(k)
22421         .ks(3)
22422         .a_offset(83)
22423         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
22424     }
22425   }
22426 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD128,zero)22427   TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD128, zero) {
22428     TEST_REQUIRES_X86_SSE2;
22429     for (size_t k = 1; k <= 40; k += 9) {
22430       for (uint32_t mz = 0; mz < 2; mz++) {
22431         GemmMicrokernelTester()
22432           .mr(2)
22433           .nr(4)
22434           .kr(2)
22435           .sr(1)
22436           .m(2)
22437           .n(4)
22438           .k(k)
22439           .ks(3)
22440           .a_offset(83)
22441           .zero_index(mz)
22442           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
22443       }
22444     }
22445   }
22446 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD128,qmin)22447   TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD128, qmin) {
22448     TEST_REQUIRES_X86_SSE2;
22449     GemmMicrokernelTester()
22450       .mr(2)
22451       .nr(4)
22452       .kr(2)
22453       .sr(1)
22454       .m(2)
22455       .n(4)
22456       .k(8)
22457       .qmin(128)
22458       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
22459   }
22460 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD128,qmax)22461   TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD128, qmax) {
22462     TEST_REQUIRES_X86_SSE2;
22463     GemmMicrokernelTester()
22464       .mr(2)
22465       .nr(4)
22466       .kr(2)
22467       .sr(1)
22468       .m(2)
22469       .n(4)
22470       .k(8)
22471       .qmax(128)
22472       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
22473   }
22474 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD128,strided_cm)22475   TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD128, strided_cm) {
22476     TEST_REQUIRES_X86_SSE2;
22477     GemmMicrokernelTester()
22478       .mr(2)
22479       .nr(4)
22480       .kr(2)
22481       .sr(1)
22482       .m(2)
22483       .n(4)
22484       .k(8)
22485       .cm_stride(7)
22486       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
22487   }
22488 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
22489 
22490 
22491 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD128,k_eq_8)22492   TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD128, k_eq_8) {
22493     TEST_REQUIRES_X86_SSE41;
22494     GemmMicrokernelTester()
22495       .mr(2)
22496       .nr(4)
22497       .kr(2)
22498       .sr(1)
22499       .m(2)
22500       .n(4)
22501       .k(8)
22502       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
22503   }
22504 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD128,strided_cn)22505   TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD128, strided_cn) {
22506     TEST_REQUIRES_X86_SSE41;
22507     GemmMicrokernelTester()
22508       .mr(2)
22509       .nr(4)
22510       .kr(2)
22511       .sr(1)
22512       .m(2)
22513       .n(4)
22514       .k(8)
22515       .cn_stride(7)
22516       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
22517   }
22518 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD128,k_eq_8_subtile)22519   TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD128, k_eq_8_subtile) {
22520     TEST_REQUIRES_X86_SSE41;
22521     for (uint32_t n = 1; n <= 4; n++) {
22522       for (uint32_t m = 1; m <= 2; m++) {
22523         GemmMicrokernelTester()
22524           .mr(2)
22525           .nr(4)
22526           .kr(2)
22527           .sr(1)
22528           .m(m)
22529           .n(n)
22530           .k(8)
22531           .iterations(1)
22532           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
22533       }
22534     }
22535   }
22536 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD128,k_eq_8_subtile_m)22537   TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD128, k_eq_8_subtile_m) {
22538     TEST_REQUIRES_X86_SSE41;
22539     for (uint32_t m = 1; m <= 2; m++) {
22540       GemmMicrokernelTester()
22541         .mr(2)
22542         .nr(4)
22543         .kr(2)
22544         .sr(1)
22545         .m(m)
22546         .n(4)
22547         .k(8)
22548         .iterations(1)
22549         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
22550     }
22551   }
22552 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD128,k_eq_8_subtile_n)22553   TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD128, k_eq_8_subtile_n) {
22554     TEST_REQUIRES_X86_SSE41;
22555     for (uint32_t n = 1; n <= 4; n++) {
22556       GemmMicrokernelTester()
22557         .mr(2)
22558         .nr(4)
22559         .kr(2)
22560         .sr(1)
22561         .m(2)
22562         .n(n)
22563         .k(8)
22564         .iterations(1)
22565         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
22566     }
22567   }
22568 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD128,k_lt_8)22569   TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD128, k_lt_8) {
22570     TEST_REQUIRES_X86_SSE41;
22571     for (size_t k = 1; k < 8; k++) {
22572       GemmMicrokernelTester()
22573         .mr(2)
22574         .nr(4)
22575         .kr(2)
22576         .sr(1)
22577         .m(2)
22578         .n(4)
22579         .k(k)
22580         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
22581     }
22582   }
22583 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD128,k_lt_8_subtile)22584   TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD128, k_lt_8_subtile) {
22585     TEST_REQUIRES_X86_SSE41;
22586     for (size_t k = 1; k < 8; k++) {
22587       for (uint32_t n = 1; n <= 4; n++) {
22588         for (uint32_t m = 1; m <= 2; m++) {
22589           GemmMicrokernelTester()
22590             .mr(2)
22591             .nr(4)
22592             .kr(2)
22593             .sr(1)
22594             .m(m)
22595             .n(n)
22596             .k(k)
22597             .iterations(1)
22598             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
22599         }
22600       }
22601     }
22602   }
22603 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD128,k_gt_8)22604   TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD128, k_gt_8) {
22605     TEST_REQUIRES_X86_SSE41;
22606     for (size_t k = 9; k < 16; k++) {
22607       GemmMicrokernelTester()
22608         .mr(2)
22609         .nr(4)
22610         .kr(2)
22611         .sr(1)
22612         .m(2)
22613         .n(4)
22614         .k(k)
22615         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
22616     }
22617   }
22618 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD128,k_gt_8_subtile)22619   TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD128, k_gt_8_subtile) {
22620     TEST_REQUIRES_X86_SSE41;
22621     for (size_t k = 9; k < 16; k++) {
22622       for (uint32_t n = 1; n <= 4; n++) {
22623         for (uint32_t m = 1; m <= 2; m++) {
22624           GemmMicrokernelTester()
22625             .mr(2)
22626             .nr(4)
22627             .kr(2)
22628             .sr(1)
22629             .m(m)
22630             .n(n)
22631             .k(k)
22632             .iterations(1)
22633             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
22634         }
22635       }
22636     }
22637   }
22638 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD128,k_div_8)22639   TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD128, k_div_8) {
22640     TEST_REQUIRES_X86_SSE41;
22641     for (size_t k = 16; k <= 80; k += 8) {
22642       GemmMicrokernelTester()
22643         .mr(2)
22644         .nr(4)
22645         .kr(2)
22646         .sr(1)
22647         .m(2)
22648         .n(4)
22649         .k(k)
22650         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
22651     }
22652   }
22653 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD128,k_div_8_subtile)22654   TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD128, k_div_8_subtile) {
22655     TEST_REQUIRES_X86_SSE41;
22656     for (size_t k = 16; k <= 80; k += 8) {
22657       for (uint32_t n = 1; n <= 4; n++) {
22658         for (uint32_t m = 1; m <= 2; m++) {
22659           GemmMicrokernelTester()
22660             .mr(2)
22661             .nr(4)
22662             .kr(2)
22663             .sr(1)
22664             .m(m)
22665             .n(n)
22666             .k(k)
22667             .iterations(1)
22668             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
22669         }
22670       }
22671     }
22672   }
22673 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD128,n_gt_4)22674   TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD128, n_gt_4) {
22675     TEST_REQUIRES_X86_SSE41;
22676     for (uint32_t n = 5; n < 8; n++) {
22677       for (size_t k = 1; k <= 40; k += 9) {
22678         GemmMicrokernelTester()
22679           .mr(2)
22680           .nr(4)
22681           .kr(2)
22682           .sr(1)
22683           .m(2)
22684           .n(n)
22685           .k(k)
22686           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
22687       }
22688     }
22689   }
22690 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD128,n_gt_4_strided_cn)22691   TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD128, n_gt_4_strided_cn) {
22692     TEST_REQUIRES_X86_SSE41;
22693     for (uint32_t n = 5; n < 8; n++) {
22694       for (size_t k = 1; k <= 40; k += 9) {
22695         GemmMicrokernelTester()
22696           .mr(2)
22697           .nr(4)
22698           .kr(2)
22699           .sr(1)
22700           .m(2)
22701           .n(n)
22702           .k(k)
22703           .cn_stride(7)
22704           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
22705       }
22706     }
22707   }
22708 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD128,n_gt_4_subtile)22709   TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD128, n_gt_4_subtile) {
22710     TEST_REQUIRES_X86_SSE41;
22711     for (uint32_t n = 5; n < 8; n++) {
22712       for (size_t k = 1; k <= 40; k += 9) {
22713         for (uint32_t m = 1; m <= 2; m++) {
22714           GemmMicrokernelTester()
22715             .mr(2)
22716             .nr(4)
22717             .kr(2)
22718             .sr(1)
22719             .m(m)
22720             .n(n)
22721             .k(k)
22722             .iterations(1)
22723             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
22724         }
22725       }
22726     }
22727   }
22728 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD128,n_div_4)22729   TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD128, n_div_4) {
22730     TEST_REQUIRES_X86_SSE41;
22731     for (uint32_t n = 8; n <= 12; n += 4) {
22732       for (size_t k = 1; k <= 40; k += 9) {
22733         GemmMicrokernelTester()
22734           .mr(2)
22735           .nr(4)
22736           .kr(2)
22737           .sr(1)
22738           .m(2)
22739           .n(n)
22740           .k(k)
22741           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
22742       }
22743     }
22744   }
22745 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD128,n_div_4_strided_cn)22746   TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD128, n_div_4_strided_cn) {
22747     TEST_REQUIRES_X86_SSE41;
22748     for (uint32_t n = 8; n <= 12; n += 4) {
22749       for (size_t k = 1; k <= 40; k += 9) {
22750         GemmMicrokernelTester()
22751           .mr(2)
22752           .nr(4)
22753           .kr(2)
22754           .sr(1)
22755           .m(2)
22756           .n(n)
22757           .k(k)
22758           .cn_stride(7)
22759           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
22760       }
22761     }
22762   }
22763 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD128,n_div_4_subtile)22764   TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD128, n_div_4_subtile) {
22765     TEST_REQUIRES_X86_SSE41;
22766     for (uint32_t n = 8; n <= 12; n += 4) {
22767       for (size_t k = 1; k <= 40; k += 9) {
22768         for (uint32_t m = 1; m <= 2; m++) {
22769           GemmMicrokernelTester()
22770             .mr(2)
22771             .nr(4)
22772             .kr(2)
22773             .sr(1)
22774             .m(m)
22775             .n(n)
22776             .k(k)
22777             .iterations(1)
22778             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
22779         }
22780       }
22781     }
22782   }
22783 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD128,small_kernel)22784   TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD128, small_kernel) {
22785     TEST_REQUIRES_X86_SSE41;
22786     for (size_t k = 1; k <= 40; k += 9) {
22787       GemmMicrokernelTester()
22788         .mr(2)
22789         .nr(4)
22790         .kr(2)
22791         .sr(1)
22792         .m(2)
22793         .n(4)
22794         .k(k)
22795         .ks(3)
22796         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
22797     }
22798   }
22799 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD128,small_kernel_subtile)22800   TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD128, small_kernel_subtile) {
22801     TEST_REQUIRES_X86_SSE41;
22802     for (size_t k = 1; k <= 40; k += 9) {
22803       for (uint32_t n = 1; n <= 4; n++) {
22804         for (uint32_t m = 1; m <= 2; m++) {
22805           GemmMicrokernelTester()
22806             .mr(2)
22807             .nr(4)
22808             .kr(2)
22809             .sr(1)
22810             .m(m)
22811             .n(n)
22812             .k(k)
22813             .ks(3)
22814             .iterations(1)
22815             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
22816         }
22817       }
22818     }
22819   }
22820 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD128,n_gt_4_small_kernel)22821   TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD128, n_gt_4_small_kernel) {
22822     TEST_REQUIRES_X86_SSE41;
22823     for (uint32_t n = 5; n < 8; n++) {
22824       for (size_t k = 1; k <= 40; k += 9) {
22825         GemmMicrokernelTester()
22826           .mr(2)
22827           .nr(4)
22828           .kr(2)
22829           .sr(1)
22830           .m(2)
22831           .n(n)
22832           .k(k)
22833           .ks(3)
22834           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
22835       }
22836     }
22837   }
22838 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD128,n_div_4_small_kernel)22839   TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD128, n_div_4_small_kernel) {
22840     TEST_REQUIRES_X86_SSE41;
22841     for (uint32_t n = 8; n <= 12; n += 4) {
22842       for (size_t k = 1; k <= 40; k += 9) {
22843         GemmMicrokernelTester()
22844           .mr(2)
22845           .nr(4)
22846           .kr(2)
22847           .sr(1)
22848           .m(2)
22849           .n(n)
22850           .k(k)
22851           .ks(3)
22852           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
22853       }
22854     }
22855   }
22856 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD128,strided_cm_subtile)22857   TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD128, strided_cm_subtile) {
22858     TEST_REQUIRES_X86_SSE41;
22859     for (size_t k = 1; k <= 40; k += 9) {
22860       for (uint32_t n = 1; n <= 4; n++) {
22861         for (uint32_t m = 1; m <= 2; m++) {
22862           GemmMicrokernelTester()
22863             .mr(2)
22864             .nr(4)
22865             .kr(2)
22866             .sr(1)
22867             .m(m)
22868             .n(n)
22869             .k(k)
22870             .cm_stride(7)
22871             .iterations(1)
22872             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
22873         }
22874       }
22875     }
22876   }
22877 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD128,a_offset)22878   TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD128, a_offset) {
22879     TEST_REQUIRES_X86_SSE41;
22880     for (size_t k = 1; k <= 40; k += 9) {
22881       GemmMicrokernelTester()
22882         .mr(2)
22883         .nr(4)
22884         .kr(2)
22885         .sr(1)
22886         .m(2)
22887         .n(4)
22888         .k(k)
22889         .ks(3)
22890         .a_offset(83)
22891         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
22892     }
22893   }
22894 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD128,zero)22895   TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD128, zero) {
22896     TEST_REQUIRES_X86_SSE41;
22897     for (size_t k = 1; k <= 40; k += 9) {
22898       for (uint32_t mz = 0; mz < 2; mz++) {
22899         GemmMicrokernelTester()
22900           .mr(2)
22901           .nr(4)
22902           .kr(2)
22903           .sr(1)
22904           .m(2)
22905           .n(4)
22906           .k(k)
22907           .ks(3)
22908           .a_offset(83)
22909           .zero_index(mz)
22910           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
22911       }
22912     }
22913   }
22914 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD128,qmin)22915   TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD128, qmin) {
22916     TEST_REQUIRES_X86_SSE41;
22917     GemmMicrokernelTester()
22918       .mr(2)
22919       .nr(4)
22920       .kr(2)
22921       .sr(1)
22922       .m(2)
22923       .n(4)
22924       .k(8)
22925       .qmin(128)
22926       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
22927   }
22928 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD128,qmax)22929   TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD128, qmax) {
22930     TEST_REQUIRES_X86_SSE41;
22931     GemmMicrokernelTester()
22932       .mr(2)
22933       .nr(4)
22934       .kr(2)
22935       .sr(1)
22936       .m(2)
22937       .n(4)
22938       .k(8)
22939       .qmax(128)
22940       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
22941   }
22942 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD128,strided_cm)22943   TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD128, strided_cm) {
22944     TEST_REQUIRES_X86_SSE41;
22945     GemmMicrokernelTester()
22946       .mr(2)
22947       .nr(4)
22948       .kr(2)
22949       .sr(1)
22950       .m(2)
22951       .n(4)
22952       .k(8)
22953       .cm_stride(7)
22954       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
22955   }
22956 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
22957 
22958 
22959 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD128,k_eq_8)22960   TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD128, k_eq_8) {
22961     TEST_REQUIRES_X86_XOP;
22962     GemmMicrokernelTester()
22963       .mr(1)
22964       .nr(4)
22965       .kr(2)
22966       .sr(1)
22967       .m(1)
22968       .n(4)
22969       .k(8)
22970       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
22971   }
22972 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD128,strided_cn)22973   TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD128, strided_cn) {
22974     TEST_REQUIRES_X86_XOP;
22975     GemmMicrokernelTester()
22976       .mr(1)
22977       .nr(4)
22978       .kr(2)
22979       .sr(1)
22980       .m(1)
22981       .n(4)
22982       .k(8)
22983       .cn_stride(7)
22984       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
22985   }
22986 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD128,k_eq_8_subtile)22987   TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD128, k_eq_8_subtile) {
22988     TEST_REQUIRES_X86_XOP;
22989     for (uint32_t n = 1; n <= 4; n++) {
22990       for (uint32_t m = 1; m <= 1; m++) {
22991         GemmMicrokernelTester()
22992           .mr(1)
22993           .nr(4)
22994           .kr(2)
22995           .sr(1)
22996           .m(m)
22997           .n(n)
22998           .k(8)
22999           .iterations(1)
23000           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23001       }
23002     }
23003   }
23004 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD128,k_eq_8_subtile_m)23005   TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD128, k_eq_8_subtile_m) {
23006     TEST_REQUIRES_X86_XOP;
23007     for (uint32_t m = 1; m <= 1; m++) {
23008       GemmMicrokernelTester()
23009         .mr(1)
23010         .nr(4)
23011         .kr(2)
23012         .sr(1)
23013         .m(m)
23014         .n(4)
23015         .k(8)
23016         .iterations(1)
23017         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23018     }
23019   }
23020 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD128,k_eq_8_subtile_n)23021   TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD128, k_eq_8_subtile_n) {
23022     TEST_REQUIRES_X86_XOP;
23023     for (uint32_t n = 1; n <= 4; n++) {
23024       GemmMicrokernelTester()
23025         .mr(1)
23026         .nr(4)
23027         .kr(2)
23028         .sr(1)
23029         .m(1)
23030         .n(n)
23031         .k(8)
23032         .iterations(1)
23033         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23034     }
23035   }
23036 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD128,k_lt_8)23037   TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD128, k_lt_8) {
23038     TEST_REQUIRES_X86_XOP;
23039     for (size_t k = 1; k < 8; k++) {
23040       GemmMicrokernelTester()
23041         .mr(1)
23042         .nr(4)
23043         .kr(2)
23044         .sr(1)
23045         .m(1)
23046         .n(4)
23047         .k(k)
23048         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23049     }
23050   }
23051 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD128,k_lt_8_subtile)23052   TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD128, k_lt_8_subtile) {
23053     TEST_REQUIRES_X86_XOP;
23054     for (size_t k = 1; k < 8; k++) {
23055       for (uint32_t n = 1; n <= 4; n++) {
23056         for (uint32_t m = 1; m <= 1; m++) {
23057           GemmMicrokernelTester()
23058             .mr(1)
23059             .nr(4)
23060             .kr(2)
23061             .sr(1)
23062             .m(m)
23063             .n(n)
23064             .k(k)
23065             .iterations(1)
23066             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23067         }
23068       }
23069     }
23070   }
23071 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD128,k_gt_8)23072   TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD128, k_gt_8) {
23073     TEST_REQUIRES_X86_XOP;
23074     for (size_t k = 9; k < 16; k++) {
23075       GemmMicrokernelTester()
23076         .mr(1)
23077         .nr(4)
23078         .kr(2)
23079         .sr(1)
23080         .m(1)
23081         .n(4)
23082         .k(k)
23083         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23084     }
23085   }
23086 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD128,k_gt_8_subtile)23087   TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD128, k_gt_8_subtile) {
23088     TEST_REQUIRES_X86_XOP;
23089     for (size_t k = 9; k < 16; k++) {
23090       for (uint32_t n = 1; n <= 4; n++) {
23091         for (uint32_t m = 1; m <= 1; m++) {
23092           GemmMicrokernelTester()
23093             .mr(1)
23094             .nr(4)
23095             .kr(2)
23096             .sr(1)
23097             .m(m)
23098             .n(n)
23099             .k(k)
23100             .iterations(1)
23101             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23102         }
23103       }
23104     }
23105   }
23106 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD128,k_div_8)23107   TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD128, k_div_8) {
23108     TEST_REQUIRES_X86_XOP;
23109     for (size_t k = 16; k <= 80; k += 8) {
23110       GemmMicrokernelTester()
23111         .mr(1)
23112         .nr(4)
23113         .kr(2)
23114         .sr(1)
23115         .m(1)
23116         .n(4)
23117         .k(k)
23118         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23119     }
23120   }
23121 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD128,k_div_8_subtile)23122   TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD128, k_div_8_subtile) {
23123     TEST_REQUIRES_X86_XOP;
23124     for (size_t k = 16; k <= 80; k += 8) {
23125       for (uint32_t n = 1; n <= 4; n++) {
23126         for (uint32_t m = 1; m <= 1; m++) {
23127           GemmMicrokernelTester()
23128             .mr(1)
23129             .nr(4)
23130             .kr(2)
23131             .sr(1)
23132             .m(m)
23133             .n(n)
23134             .k(k)
23135             .iterations(1)
23136             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23137         }
23138       }
23139     }
23140   }
23141 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD128,n_gt_4)23142   TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD128, n_gt_4) {
23143     TEST_REQUIRES_X86_XOP;
23144     for (uint32_t n = 5; n < 8; n++) {
23145       for (size_t k = 1; k <= 40; k += 9) {
23146         GemmMicrokernelTester()
23147           .mr(1)
23148           .nr(4)
23149           .kr(2)
23150           .sr(1)
23151           .m(1)
23152           .n(n)
23153           .k(k)
23154           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23155       }
23156     }
23157   }
23158 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD128,n_gt_4_strided_cn)23159   TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD128, n_gt_4_strided_cn) {
23160     TEST_REQUIRES_X86_XOP;
23161     for (uint32_t n = 5; n < 8; n++) {
23162       for (size_t k = 1; k <= 40; k += 9) {
23163         GemmMicrokernelTester()
23164           .mr(1)
23165           .nr(4)
23166           .kr(2)
23167           .sr(1)
23168           .m(1)
23169           .n(n)
23170           .k(k)
23171           .cn_stride(7)
23172           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23173       }
23174     }
23175   }
23176 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD128,n_gt_4_subtile)23177   TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD128, n_gt_4_subtile) {
23178     TEST_REQUIRES_X86_XOP;
23179     for (uint32_t n = 5; n < 8; n++) {
23180       for (size_t k = 1; k <= 40; k += 9) {
23181         for (uint32_t m = 1; m <= 1; m++) {
23182           GemmMicrokernelTester()
23183             .mr(1)
23184             .nr(4)
23185             .kr(2)
23186             .sr(1)
23187             .m(m)
23188             .n(n)
23189             .k(k)
23190             .iterations(1)
23191             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23192         }
23193       }
23194     }
23195   }
23196 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD128,n_div_4)23197   TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD128, n_div_4) {
23198     TEST_REQUIRES_X86_XOP;
23199     for (uint32_t n = 8; n <= 12; n += 4) {
23200       for (size_t k = 1; k <= 40; k += 9) {
23201         GemmMicrokernelTester()
23202           .mr(1)
23203           .nr(4)
23204           .kr(2)
23205           .sr(1)
23206           .m(1)
23207           .n(n)
23208           .k(k)
23209           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23210       }
23211     }
23212   }
23213 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD128,n_div_4_strided_cn)23214   TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD128, n_div_4_strided_cn) {
23215     TEST_REQUIRES_X86_XOP;
23216     for (uint32_t n = 8; n <= 12; n += 4) {
23217       for (size_t k = 1; k <= 40; k += 9) {
23218         GemmMicrokernelTester()
23219           .mr(1)
23220           .nr(4)
23221           .kr(2)
23222           .sr(1)
23223           .m(1)
23224           .n(n)
23225           .k(k)
23226           .cn_stride(7)
23227           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23228       }
23229     }
23230   }
23231 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD128,n_div_4_subtile)23232   TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD128, n_div_4_subtile) {
23233     TEST_REQUIRES_X86_XOP;
23234     for (uint32_t n = 8; n <= 12; n += 4) {
23235       for (size_t k = 1; k <= 40; k += 9) {
23236         for (uint32_t m = 1; m <= 1; m++) {
23237           GemmMicrokernelTester()
23238             .mr(1)
23239             .nr(4)
23240             .kr(2)
23241             .sr(1)
23242             .m(m)
23243             .n(n)
23244             .k(k)
23245             .iterations(1)
23246             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23247         }
23248       }
23249     }
23250   }
23251 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD128,small_kernel)23252   TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD128, small_kernel) {
23253     TEST_REQUIRES_X86_XOP;
23254     for (size_t k = 1; k <= 40; k += 9) {
23255       GemmMicrokernelTester()
23256         .mr(1)
23257         .nr(4)
23258         .kr(2)
23259         .sr(1)
23260         .m(1)
23261         .n(4)
23262         .k(k)
23263         .ks(3)
23264         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23265     }
23266   }
23267 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD128,small_kernel_subtile)23268   TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD128, small_kernel_subtile) {
23269     TEST_REQUIRES_X86_XOP;
23270     for (size_t k = 1; k <= 40; k += 9) {
23271       for (uint32_t n = 1; n <= 4; n++) {
23272         for (uint32_t m = 1; m <= 1; m++) {
23273           GemmMicrokernelTester()
23274             .mr(1)
23275             .nr(4)
23276             .kr(2)
23277             .sr(1)
23278             .m(m)
23279             .n(n)
23280             .k(k)
23281             .ks(3)
23282             .iterations(1)
23283             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23284         }
23285       }
23286     }
23287   }
23288 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD128,n_gt_4_small_kernel)23289   TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD128, n_gt_4_small_kernel) {
23290     TEST_REQUIRES_X86_XOP;
23291     for (uint32_t n = 5; n < 8; n++) {
23292       for (size_t k = 1; k <= 40; k += 9) {
23293         GemmMicrokernelTester()
23294           .mr(1)
23295           .nr(4)
23296           .kr(2)
23297           .sr(1)
23298           .m(1)
23299           .n(n)
23300           .k(k)
23301           .ks(3)
23302           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23303       }
23304     }
23305   }
23306 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD128,n_div_4_small_kernel)23307   TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD128, n_div_4_small_kernel) {
23308     TEST_REQUIRES_X86_XOP;
23309     for (uint32_t n = 8; n <= 12; n += 4) {
23310       for (size_t k = 1; k <= 40; k += 9) {
23311         GemmMicrokernelTester()
23312           .mr(1)
23313           .nr(4)
23314           .kr(2)
23315           .sr(1)
23316           .m(1)
23317           .n(n)
23318           .k(k)
23319           .ks(3)
23320           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23321       }
23322     }
23323   }
23324 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD128,strided_cm_subtile)23325   TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD128, strided_cm_subtile) {
23326     TEST_REQUIRES_X86_XOP;
23327     for (size_t k = 1; k <= 40; k += 9) {
23328       for (uint32_t n = 1; n <= 4; n++) {
23329         for (uint32_t m = 1; m <= 1; m++) {
23330           GemmMicrokernelTester()
23331             .mr(1)
23332             .nr(4)
23333             .kr(2)
23334             .sr(1)
23335             .m(m)
23336             .n(n)
23337             .k(k)
23338             .cm_stride(7)
23339             .iterations(1)
23340             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23341         }
23342       }
23343     }
23344   }
23345 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD128,a_offset)23346   TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD128, a_offset) {
23347     TEST_REQUIRES_X86_XOP;
23348     for (size_t k = 1; k <= 40; k += 9) {
23349       GemmMicrokernelTester()
23350         .mr(1)
23351         .nr(4)
23352         .kr(2)
23353         .sr(1)
23354         .m(1)
23355         .n(4)
23356         .k(k)
23357         .ks(3)
23358         .a_offset(43)
23359         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23360     }
23361   }
23362 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD128,zero)23363   TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD128, zero) {
23364     TEST_REQUIRES_X86_XOP;
23365     for (size_t k = 1; k <= 40; k += 9) {
23366       for (uint32_t mz = 0; mz < 1; mz++) {
23367         GemmMicrokernelTester()
23368           .mr(1)
23369           .nr(4)
23370           .kr(2)
23371           .sr(1)
23372           .m(1)
23373           .n(4)
23374           .k(k)
23375           .ks(3)
23376           .a_offset(43)
23377           .zero_index(mz)
23378           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23379       }
23380     }
23381   }
23382 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD128,qmin)23383   TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD128, qmin) {
23384     TEST_REQUIRES_X86_XOP;
23385     GemmMicrokernelTester()
23386       .mr(1)
23387       .nr(4)
23388       .kr(2)
23389       .sr(1)
23390       .m(1)
23391       .n(4)
23392       .k(8)
23393       .qmin(128)
23394       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23395   }
23396 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD128,qmax)23397   TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD128, qmax) {
23398     TEST_REQUIRES_X86_XOP;
23399     GemmMicrokernelTester()
23400       .mr(1)
23401       .nr(4)
23402       .kr(2)
23403       .sr(1)
23404       .m(1)
23405       .n(4)
23406       .k(8)
23407       .qmax(128)
23408       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23409   }
23410 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD128,strided_cm)23411   TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD128, strided_cm) {
23412     TEST_REQUIRES_X86_XOP;
23413     GemmMicrokernelTester()
23414       .mr(1)
23415       .nr(4)
23416       .kr(2)
23417       .sr(1)
23418       .m(1)
23419       .n(4)
23420       .k(8)
23421       .cm_stride(7)
23422       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23423   }
23424 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
23425 
23426 
23427 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD128,k_eq_8)23428   TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD128, k_eq_8) {
23429     TEST_REQUIRES_X86_AVX;
23430     GemmMicrokernelTester()
23431       .mr(2)
23432       .nr(4)
23433       .kr(2)
23434       .sr(1)
23435       .m(2)
23436       .n(4)
23437       .k(8)
23438       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23439   }
23440 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD128,strided_cn)23441   TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD128, strided_cn) {
23442     TEST_REQUIRES_X86_AVX;
23443     GemmMicrokernelTester()
23444       .mr(2)
23445       .nr(4)
23446       .kr(2)
23447       .sr(1)
23448       .m(2)
23449       .n(4)
23450       .k(8)
23451       .cn_stride(7)
23452       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23453   }
23454 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD128,k_eq_8_subtile)23455   TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD128, k_eq_8_subtile) {
23456     TEST_REQUIRES_X86_AVX;
23457     for (uint32_t n = 1; n <= 4; n++) {
23458       for (uint32_t m = 1; m <= 2; m++) {
23459         GemmMicrokernelTester()
23460           .mr(2)
23461           .nr(4)
23462           .kr(2)
23463           .sr(1)
23464           .m(m)
23465           .n(n)
23466           .k(8)
23467           .iterations(1)
23468           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23469       }
23470     }
23471   }
23472 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD128,k_eq_8_subtile_m)23473   TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD128, k_eq_8_subtile_m) {
23474     TEST_REQUIRES_X86_AVX;
23475     for (uint32_t m = 1; m <= 2; m++) {
23476       GemmMicrokernelTester()
23477         .mr(2)
23478         .nr(4)
23479         .kr(2)
23480         .sr(1)
23481         .m(m)
23482         .n(4)
23483         .k(8)
23484         .iterations(1)
23485         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23486     }
23487   }
23488 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD128,k_eq_8_subtile_n)23489   TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD128, k_eq_8_subtile_n) {
23490     TEST_REQUIRES_X86_AVX;
23491     for (uint32_t n = 1; n <= 4; n++) {
23492       GemmMicrokernelTester()
23493         .mr(2)
23494         .nr(4)
23495         .kr(2)
23496         .sr(1)
23497         .m(2)
23498         .n(n)
23499         .k(8)
23500         .iterations(1)
23501         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23502     }
23503   }
23504 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD128,k_lt_8)23505   TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD128, k_lt_8) {
23506     TEST_REQUIRES_X86_AVX;
23507     for (size_t k = 1; k < 8; k++) {
23508       GemmMicrokernelTester()
23509         .mr(2)
23510         .nr(4)
23511         .kr(2)
23512         .sr(1)
23513         .m(2)
23514         .n(4)
23515         .k(k)
23516         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23517     }
23518   }
23519 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD128,k_lt_8_subtile)23520   TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD128, k_lt_8_subtile) {
23521     TEST_REQUIRES_X86_AVX;
23522     for (size_t k = 1; k < 8; k++) {
23523       for (uint32_t n = 1; n <= 4; n++) {
23524         for (uint32_t m = 1; m <= 2; m++) {
23525           GemmMicrokernelTester()
23526             .mr(2)
23527             .nr(4)
23528             .kr(2)
23529             .sr(1)
23530             .m(m)
23531             .n(n)
23532             .k(k)
23533             .iterations(1)
23534             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23535         }
23536       }
23537     }
23538   }
23539 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD128,k_gt_8)23540   TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD128, k_gt_8) {
23541     TEST_REQUIRES_X86_AVX;
23542     for (size_t k = 9; k < 16; k++) {
23543       GemmMicrokernelTester()
23544         .mr(2)
23545         .nr(4)
23546         .kr(2)
23547         .sr(1)
23548         .m(2)
23549         .n(4)
23550         .k(k)
23551         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23552     }
23553   }
23554 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD128,k_gt_8_subtile)23555   TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD128, k_gt_8_subtile) {
23556     TEST_REQUIRES_X86_AVX;
23557     for (size_t k = 9; k < 16; k++) {
23558       for (uint32_t n = 1; n <= 4; n++) {
23559         for (uint32_t m = 1; m <= 2; m++) {
23560           GemmMicrokernelTester()
23561             .mr(2)
23562             .nr(4)
23563             .kr(2)
23564             .sr(1)
23565             .m(m)
23566             .n(n)
23567             .k(k)
23568             .iterations(1)
23569             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23570         }
23571       }
23572     }
23573   }
23574 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD128,k_div_8)23575   TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD128, k_div_8) {
23576     TEST_REQUIRES_X86_AVX;
23577     for (size_t k = 16; k <= 80; k += 8) {
23578       GemmMicrokernelTester()
23579         .mr(2)
23580         .nr(4)
23581         .kr(2)
23582         .sr(1)
23583         .m(2)
23584         .n(4)
23585         .k(k)
23586         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23587     }
23588   }
23589 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD128,k_div_8_subtile)23590   TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD128, k_div_8_subtile) {
23591     TEST_REQUIRES_X86_AVX;
23592     for (size_t k = 16; k <= 80; k += 8) {
23593       for (uint32_t n = 1; n <= 4; n++) {
23594         for (uint32_t m = 1; m <= 2; m++) {
23595           GemmMicrokernelTester()
23596             .mr(2)
23597             .nr(4)
23598             .kr(2)
23599             .sr(1)
23600             .m(m)
23601             .n(n)
23602             .k(k)
23603             .iterations(1)
23604             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23605         }
23606       }
23607     }
23608   }
23609 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD128,n_gt_4)23610   TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD128, n_gt_4) {
23611     TEST_REQUIRES_X86_AVX;
23612     for (uint32_t n = 5; n < 8; n++) {
23613       for (size_t k = 1; k <= 40; k += 9) {
23614         GemmMicrokernelTester()
23615           .mr(2)
23616           .nr(4)
23617           .kr(2)
23618           .sr(1)
23619           .m(2)
23620           .n(n)
23621           .k(k)
23622           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23623       }
23624     }
23625   }
23626 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD128,n_gt_4_strided_cn)23627   TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD128, n_gt_4_strided_cn) {
23628     TEST_REQUIRES_X86_AVX;
23629     for (uint32_t n = 5; n < 8; n++) {
23630       for (size_t k = 1; k <= 40; k += 9) {
23631         GemmMicrokernelTester()
23632           .mr(2)
23633           .nr(4)
23634           .kr(2)
23635           .sr(1)
23636           .m(2)
23637           .n(n)
23638           .k(k)
23639           .cn_stride(7)
23640           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23641       }
23642     }
23643   }
23644 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD128,n_gt_4_subtile)23645   TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD128, n_gt_4_subtile) {
23646     TEST_REQUIRES_X86_AVX;
23647     for (uint32_t n = 5; n < 8; n++) {
23648       for (size_t k = 1; k <= 40; k += 9) {
23649         for (uint32_t m = 1; m <= 2; m++) {
23650           GemmMicrokernelTester()
23651             .mr(2)
23652             .nr(4)
23653             .kr(2)
23654             .sr(1)
23655             .m(m)
23656             .n(n)
23657             .k(k)
23658             .iterations(1)
23659             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23660         }
23661       }
23662     }
23663   }
23664 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD128,n_div_4)23665   TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD128, n_div_4) {
23666     TEST_REQUIRES_X86_AVX;
23667     for (uint32_t n = 8; n <= 12; n += 4) {
23668       for (size_t k = 1; k <= 40; k += 9) {
23669         GemmMicrokernelTester()
23670           .mr(2)
23671           .nr(4)
23672           .kr(2)
23673           .sr(1)
23674           .m(2)
23675           .n(n)
23676           .k(k)
23677           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23678       }
23679     }
23680   }
23681 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD128,n_div_4_strided_cn)23682   TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD128, n_div_4_strided_cn) {
23683     TEST_REQUIRES_X86_AVX;
23684     for (uint32_t n = 8; n <= 12; n += 4) {
23685       for (size_t k = 1; k <= 40; k += 9) {
23686         GemmMicrokernelTester()
23687           .mr(2)
23688           .nr(4)
23689           .kr(2)
23690           .sr(1)
23691           .m(2)
23692           .n(n)
23693           .k(k)
23694           .cn_stride(7)
23695           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23696       }
23697     }
23698   }
23699 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD128,n_div_4_subtile)23700   TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD128, n_div_4_subtile) {
23701     TEST_REQUIRES_X86_AVX;
23702     for (uint32_t n = 8; n <= 12; n += 4) {
23703       for (size_t k = 1; k <= 40; k += 9) {
23704         for (uint32_t m = 1; m <= 2; m++) {
23705           GemmMicrokernelTester()
23706             .mr(2)
23707             .nr(4)
23708             .kr(2)
23709             .sr(1)
23710             .m(m)
23711             .n(n)
23712             .k(k)
23713             .iterations(1)
23714             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23715         }
23716       }
23717     }
23718   }
23719 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD128,small_kernel)23720   TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD128, small_kernel) {
23721     TEST_REQUIRES_X86_AVX;
23722     for (size_t k = 1; k <= 40; k += 9) {
23723       GemmMicrokernelTester()
23724         .mr(2)
23725         .nr(4)
23726         .kr(2)
23727         .sr(1)
23728         .m(2)
23729         .n(4)
23730         .k(k)
23731         .ks(3)
23732         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23733     }
23734   }
23735 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD128,small_kernel_subtile)23736   TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD128, small_kernel_subtile) {
23737     TEST_REQUIRES_X86_AVX;
23738     for (size_t k = 1; k <= 40; k += 9) {
23739       for (uint32_t n = 1; n <= 4; n++) {
23740         for (uint32_t m = 1; m <= 2; m++) {
23741           GemmMicrokernelTester()
23742             .mr(2)
23743             .nr(4)
23744             .kr(2)
23745             .sr(1)
23746             .m(m)
23747             .n(n)
23748             .k(k)
23749             .ks(3)
23750             .iterations(1)
23751             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23752         }
23753       }
23754     }
23755   }
23756 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD128,n_gt_4_small_kernel)23757   TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD128, n_gt_4_small_kernel) {
23758     TEST_REQUIRES_X86_AVX;
23759     for (uint32_t n = 5; n < 8; n++) {
23760       for (size_t k = 1; k <= 40; k += 9) {
23761         GemmMicrokernelTester()
23762           .mr(2)
23763           .nr(4)
23764           .kr(2)
23765           .sr(1)
23766           .m(2)
23767           .n(n)
23768           .k(k)
23769           .ks(3)
23770           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23771       }
23772     }
23773   }
23774 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD128,n_div_4_small_kernel)23775   TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD128, n_div_4_small_kernel) {
23776     TEST_REQUIRES_X86_AVX;
23777     for (uint32_t n = 8; n <= 12; n += 4) {
23778       for (size_t k = 1; k <= 40; k += 9) {
23779         GemmMicrokernelTester()
23780           .mr(2)
23781           .nr(4)
23782           .kr(2)
23783           .sr(1)
23784           .m(2)
23785           .n(n)
23786           .k(k)
23787           .ks(3)
23788           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23789       }
23790     }
23791   }
23792 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD128,strided_cm_subtile)23793   TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD128, strided_cm_subtile) {
23794     TEST_REQUIRES_X86_AVX;
23795     for (size_t k = 1; k <= 40; k += 9) {
23796       for (uint32_t n = 1; n <= 4; n++) {
23797         for (uint32_t m = 1; m <= 2; m++) {
23798           GemmMicrokernelTester()
23799             .mr(2)
23800             .nr(4)
23801             .kr(2)
23802             .sr(1)
23803             .m(m)
23804             .n(n)
23805             .k(k)
23806             .cm_stride(7)
23807             .iterations(1)
23808             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23809         }
23810       }
23811     }
23812   }
23813 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD128,a_offset)23814   TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD128, a_offset) {
23815     TEST_REQUIRES_X86_AVX;
23816     for (size_t k = 1; k <= 40; k += 9) {
23817       GemmMicrokernelTester()
23818         .mr(2)
23819         .nr(4)
23820         .kr(2)
23821         .sr(1)
23822         .m(2)
23823         .n(4)
23824         .k(k)
23825         .ks(3)
23826         .a_offset(83)
23827         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23828     }
23829   }
23830 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD128,zero)23831   TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD128, zero) {
23832     TEST_REQUIRES_X86_AVX;
23833     for (size_t k = 1; k <= 40; k += 9) {
23834       for (uint32_t mz = 0; mz < 2; mz++) {
23835         GemmMicrokernelTester()
23836           .mr(2)
23837           .nr(4)
23838           .kr(2)
23839           .sr(1)
23840           .m(2)
23841           .n(4)
23842           .k(k)
23843           .ks(3)
23844           .a_offset(83)
23845           .zero_index(mz)
23846           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23847       }
23848     }
23849   }
23850 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD128,qmin)23851   TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD128, qmin) {
23852     TEST_REQUIRES_X86_AVX;
23853     GemmMicrokernelTester()
23854       .mr(2)
23855       .nr(4)
23856       .kr(2)
23857       .sr(1)
23858       .m(2)
23859       .n(4)
23860       .k(8)
23861       .qmin(128)
23862       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23863   }
23864 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD128,qmax)23865   TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD128, qmax) {
23866     TEST_REQUIRES_X86_AVX;
23867     GemmMicrokernelTester()
23868       .mr(2)
23869       .nr(4)
23870       .kr(2)
23871       .sr(1)
23872       .m(2)
23873       .n(4)
23874       .k(8)
23875       .qmax(128)
23876       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23877   }
23878 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD128,strided_cm)23879   TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD128, strided_cm) {
23880     TEST_REQUIRES_X86_AVX;
23881     GemmMicrokernelTester()
23882       .mr(2)
23883       .nr(4)
23884       .kr(2)
23885       .sr(1)
23886       .m(2)
23887       .n(4)
23888       .k(8)
23889       .cm_stride(7)
23890       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23891   }
23892 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
23893 
23894 
23895 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD128,k_eq_8)23896   TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD128, k_eq_8) {
23897     TEST_REQUIRES_X86_AVX;
23898     GemmMicrokernelTester()
23899       .mr(3)
23900       .nr(4)
23901       .kr(2)
23902       .sr(1)
23903       .m(3)
23904       .n(4)
23905       .k(8)
23906       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23907   }
23908 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD128,strided_cn)23909   TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD128, strided_cn) {
23910     TEST_REQUIRES_X86_AVX;
23911     GemmMicrokernelTester()
23912       .mr(3)
23913       .nr(4)
23914       .kr(2)
23915       .sr(1)
23916       .m(3)
23917       .n(4)
23918       .k(8)
23919       .cn_stride(7)
23920       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23921   }
23922 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD128,k_eq_8_subtile)23923   TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD128, k_eq_8_subtile) {
23924     TEST_REQUIRES_X86_AVX;
23925     for (uint32_t n = 1; n <= 4; n++) {
23926       for (uint32_t m = 1; m <= 3; m++) {
23927         GemmMicrokernelTester()
23928           .mr(3)
23929           .nr(4)
23930           .kr(2)
23931           .sr(1)
23932           .m(m)
23933           .n(n)
23934           .k(8)
23935           .iterations(1)
23936           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23937       }
23938     }
23939   }
23940 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD128,k_eq_8_subtile_m)23941   TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD128, k_eq_8_subtile_m) {
23942     TEST_REQUIRES_X86_AVX;
23943     for (uint32_t m = 1; m <= 3; m++) {
23944       GemmMicrokernelTester()
23945         .mr(3)
23946         .nr(4)
23947         .kr(2)
23948         .sr(1)
23949         .m(m)
23950         .n(4)
23951         .k(8)
23952         .iterations(1)
23953         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23954     }
23955   }
23956 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD128,k_eq_8_subtile_n)23957   TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD128, k_eq_8_subtile_n) {
23958     TEST_REQUIRES_X86_AVX;
23959     for (uint32_t n = 1; n <= 4; n++) {
23960       GemmMicrokernelTester()
23961         .mr(3)
23962         .nr(4)
23963         .kr(2)
23964         .sr(1)
23965         .m(3)
23966         .n(n)
23967         .k(8)
23968         .iterations(1)
23969         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23970     }
23971   }
23972 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD128,k_lt_8)23973   TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD128, k_lt_8) {
23974     TEST_REQUIRES_X86_AVX;
23975     for (size_t k = 1; k < 8; k++) {
23976       GemmMicrokernelTester()
23977         .mr(3)
23978         .nr(4)
23979         .kr(2)
23980         .sr(1)
23981         .m(3)
23982         .n(4)
23983         .k(k)
23984         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23985     }
23986   }
23987 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD128,k_lt_8_subtile)23988   TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD128, k_lt_8_subtile) {
23989     TEST_REQUIRES_X86_AVX;
23990     for (size_t k = 1; k < 8; k++) {
23991       for (uint32_t n = 1; n <= 4; n++) {
23992         for (uint32_t m = 1; m <= 3; m++) {
23993           GemmMicrokernelTester()
23994             .mr(3)
23995             .nr(4)
23996             .kr(2)
23997             .sr(1)
23998             .m(m)
23999             .n(n)
24000             .k(k)
24001             .iterations(1)
24002             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
24003         }
24004       }
24005     }
24006   }
24007 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD128,k_gt_8)24008   TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD128, k_gt_8) {
24009     TEST_REQUIRES_X86_AVX;
24010     for (size_t k = 9; k < 16; k++) {
24011       GemmMicrokernelTester()
24012         .mr(3)
24013         .nr(4)
24014         .kr(2)
24015         .sr(1)
24016         .m(3)
24017         .n(4)
24018         .k(k)
24019         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
24020     }
24021   }
24022 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD128,k_gt_8_subtile)24023   TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD128, k_gt_8_subtile) {
24024     TEST_REQUIRES_X86_AVX;
24025     for (size_t k = 9; k < 16; k++) {
24026       for (uint32_t n = 1; n <= 4; n++) {
24027         for (uint32_t m = 1; m <= 3; m++) {
24028           GemmMicrokernelTester()
24029             .mr(3)
24030             .nr(4)
24031             .kr(2)
24032             .sr(1)
24033             .m(m)
24034             .n(n)
24035             .k(k)
24036             .iterations(1)
24037             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
24038         }
24039       }
24040     }
24041   }
24042 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD128,k_div_8)24043   TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD128, k_div_8) {
24044     TEST_REQUIRES_X86_AVX;
24045     for (size_t k = 16; k <= 80; k += 8) {
24046       GemmMicrokernelTester()
24047         .mr(3)
24048         .nr(4)
24049         .kr(2)
24050         .sr(1)
24051         .m(3)
24052         .n(4)
24053         .k(k)
24054         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
24055     }
24056   }
24057 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD128,k_div_8_subtile)24058   TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD128, k_div_8_subtile) {
24059     TEST_REQUIRES_X86_AVX;
24060     for (size_t k = 16; k <= 80; k += 8) {
24061       for (uint32_t n = 1; n <= 4; n++) {
24062         for (uint32_t m = 1; m <= 3; m++) {
24063           GemmMicrokernelTester()
24064             .mr(3)
24065             .nr(4)
24066             .kr(2)
24067             .sr(1)
24068             .m(m)
24069             .n(n)
24070             .k(k)
24071             .iterations(1)
24072             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
24073         }
24074       }
24075     }
24076   }
24077 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD128,n_gt_4)24078   TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD128, n_gt_4) {
24079     TEST_REQUIRES_X86_AVX;
24080     for (uint32_t n = 5; n < 8; n++) {
24081       for (size_t k = 1; k <= 40; k += 9) {
24082         GemmMicrokernelTester()
24083           .mr(3)
24084           .nr(4)
24085           .kr(2)
24086           .sr(1)
24087           .m(3)
24088           .n(n)
24089           .k(k)
24090           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
24091       }
24092     }
24093   }
24094 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD128,n_gt_4_strided_cn)24095   TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD128, n_gt_4_strided_cn) {
24096     TEST_REQUIRES_X86_AVX;
24097     for (uint32_t n = 5; n < 8; n++) {
24098       for (size_t k = 1; k <= 40; k += 9) {
24099         GemmMicrokernelTester()
24100           .mr(3)
24101           .nr(4)
24102           .kr(2)
24103           .sr(1)
24104           .m(3)
24105           .n(n)
24106           .k(k)
24107           .cn_stride(7)
24108           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
24109       }
24110     }
24111   }
24112 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD128,n_gt_4_subtile)24113   TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD128, n_gt_4_subtile) {
24114     TEST_REQUIRES_X86_AVX;
24115     for (uint32_t n = 5; n < 8; n++) {
24116       for (size_t k = 1; k <= 40; k += 9) {
24117         for (uint32_t m = 1; m <= 3; m++) {
24118           GemmMicrokernelTester()
24119             .mr(3)
24120             .nr(4)
24121             .kr(2)
24122             .sr(1)
24123             .m(m)
24124             .n(n)
24125             .k(k)
24126             .iterations(1)
24127             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
24128         }
24129       }
24130     }
24131   }
24132 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD128,n_div_4)24133   TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD128, n_div_4) {
24134     TEST_REQUIRES_X86_AVX;
24135     for (uint32_t n = 8; n <= 12; n += 4) {
24136       for (size_t k = 1; k <= 40; k += 9) {
24137         GemmMicrokernelTester()
24138           .mr(3)
24139           .nr(4)
24140           .kr(2)
24141           .sr(1)
24142           .m(3)
24143           .n(n)
24144           .k(k)
24145           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
24146       }
24147     }
24148   }
24149 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD128,n_div_4_strided_cn)24150   TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD128, n_div_4_strided_cn) {
24151     TEST_REQUIRES_X86_AVX;
24152     for (uint32_t n = 8; n <= 12; n += 4) {
24153       for (size_t k = 1; k <= 40; k += 9) {
24154         GemmMicrokernelTester()
24155           .mr(3)
24156           .nr(4)
24157           .kr(2)
24158           .sr(1)
24159           .m(3)
24160           .n(n)
24161           .k(k)
24162           .cn_stride(7)
24163           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
24164       }
24165     }
24166   }
24167 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD128,n_div_4_subtile)24168   TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD128, n_div_4_subtile) {
24169     TEST_REQUIRES_X86_AVX;
24170     for (uint32_t n = 8; n <= 12; n += 4) {
24171       for (size_t k = 1; k <= 40; k += 9) {
24172         for (uint32_t m = 1; m <= 3; m++) {
24173           GemmMicrokernelTester()
24174             .mr(3)
24175             .nr(4)
24176             .kr(2)
24177             .sr(1)
24178             .m(m)
24179             .n(n)
24180             .k(k)
24181             .iterations(1)
24182             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
24183         }
24184       }
24185     }
24186   }
24187 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD128,small_kernel)24188   TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD128, small_kernel) {
24189     TEST_REQUIRES_X86_AVX;
24190     for (size_t k = 1; k <= 40; k += 9) {
24191       GemmMicrokernelTester()
24192         .mr(3)
24193         .nr(4)
24194         .kr(2)
24195         .sr(1)
24196         .m(3)
24197         .n(4)
24198         .k(k)
24199         .ks(3)
24200         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
24201     }
24202   }
24203 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD128,small_kernel_subtile)24204   TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD128, small_kernel_subtile) {
24205     TEST_REQUIRES_X86_AVX;
24206     for (size_t k = 1; k <= 40; k += 9) {
24207       for (uint32_t n = 1; n <= 4; n++) {
24208         for (uint32_t m = 1; m <= 3; m++) {
24209           GemmMicrokernelTester()
24210             .mr(3)
24211             .nr(4)
24212             .kr(2)
24213             .sr(1)
24214             .m(m)
24215             .n(n)
24216             .k(k)
24217             .ks(3)
24218             .iterations(1)
24219             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
24220         }
24221       }
24222     }
24223   }
24224 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD128,n_gt_4_small_kernel)24225   TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD128, n_gt_4_small_kernel) {
24226     TEST_REQUIRES_X86_AVX;
24227     for (uint32_t n = 5; n < 8; n++) {
24228       for (size_t k = 1; k <= 40; k += 9) {
24229         GemmMicrokernelTester()
24230           .mr(3)
24231           .nr(4)
24232           .kr(2)
24233           .sr(1)
24234           .m(3)
24235           .n(n)
24236           .k(k)
24237           .ks(3)
24238           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
24239       }
24240     }
24241   }
24242 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD128,n_div_4_small_kernel)24243   TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD128, n_div_4_small_kernel) {
24244     TEST_REQUIRES_X86_AVX;
24245     for (uint32_t n = 8; n <= 12; n += 4) {
24246       for (size_t k = 1; k <= 40; k += 9) {
24247         GemmMicrokernelTester()
24248           .mr(3)
24249           .nr(4)
24250           .kr(2)
24251           .sr(1)
24252           .m(3)
24253           .n(n)
24254           .k(k)
24255           .ks(3)
24256           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
24257       }
24258     }
24259   }
24260 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD128,strided_cm_subtile)24261   TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD128, strided_cm_subtile) {
24262     TEST_REQUIRES_X86_AVX;
24263     for (size_t k = 1; k <= 40; k += 9) {
24264       for (uint32_t n = 1; n <= 4; n++) {
24265         for (uint32_t m = 1; m <= 3; m++) {
24266           GemmMicrokernelTester()
24267             .mr(3)
24268             .nr(4)
24269             .kr(2)
24270             .sr(1)
24271             .m(m)
24272             .n(n)
24273             .k(k)
24274             .cm_stride(7)
24275             .iterations(1)
24276             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
24277         }
24278       }
24279     }
24280   }
24281 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD128,a_offset)24282   TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD128, a_offset) {
24283     TEST_REQUIRES_X86_AVX;
24284     for (size_t k = 1; k <= 40; k += 9) {
24285       GemmMicrokernelTester()
24286         .mr(3)
24287         .nr(4)
24288         .kr(2)
24289         .sr(1)
24290         .m(3)
24291         .n(4)
24292         .k(k)
24293         .ks(3)
24294         .a_offset(127)
24295         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
24296     }
24297   }
24298 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD128,zero)24299   TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD128, zero) {
24300     TEST_REQUIRES_X86_AVX;
24301     for (size_t k = 1; k <= 40; k += 9) {
24302       for (uint32_t mz = 0; mz < 3; mz++) {
24303         GemmMicrokernelTester()
24304           .mr(3)
24305           .nr(4)
24306           .kr(2)
24307           .sr(1)
24308           .m(3)
24309           .n(4)
24310           .k(k)
24311           .ks(3)
24312           .a_offset(127)
24313           .zero_index(mz)
24314           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
24315       }
24316     }
24317   }
24318 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD128,qmin)24319   TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD128, qmin) {
24320     TEST_REQUIRES_X86_AVX;
24321     GemmMicrokernelTester()
24322       .mr(3)
24323       .nr(4)
24324       .kr(2)
24325       .sr(1)
24326       .m(3)
24327       .n(4)
24328       .k(8)
24329       .qmin(128)
24330       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
24331   }
24332 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD128,qmax)24333   TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD128, qmax) {
24334     TEST_REQUIRES_X86_AVX;
24335     GemmMicrokernelTester()
24336       .mr(3)
24337       .nr(4)
24338       .kr(2)
24339       .sr(1)
24340       .m(3)
24341       .n(4)
24342       .k(8)
24343       .qmax(128)
24344       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
24345   }
24346 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD128,strided_cm)24347   TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD128, strided_cm) {
24348     TEST_REQUIRES_X86_AVX;
24349     GemmMicrokernelTester()
24350       .mr(3)
24351       .nr(4)
24352       .kr(2)
24353       .sr(1)
24354       .m(3)
24355       .n(4)
24356       .k(8)
24357       .cm_stride(7)
24358       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
24359   }
24360 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
24361 
24362 
24363 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD64,k_eq_8)24364   TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD64, k_eq_8) {
24365     TEST_REQUIRES_X86_SSE41;
24366     GemmMicrokernelTester()
24367       .mr(2)
24368       .nr(4)
24369       .kr(2)
24370       .sr(4)
24371       .m(2)
24372       .n(4)
24373       .k(8)
24374       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2s4__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
24375   }
24376 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD64,strided_cn)24377   TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD64, strided_cn) {
24378     TEST_REQUIRES_X86_SSE41;
24379     GemmMicrokernelTester()
24380       .mr(2)
24381       .nr(4)
24382       .kr(2)
24383       .sr(4)
24384       .m(2)
24385       .n(4)
24386       .k(8)
24387       .cn_stride(7)
24388       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2s4__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
24389   }
24390 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD64,k_eq_8_subtile)24391   TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD64, k_eq_8_subtile) {
24392     TEST_REQUIRES_X86_SSE41;
24393     for (uint32_t n = 1; n <= 4; n++) {
24394       for (uint32_t m = 1; m <= 2; m++) {
24395         GemmMicrokernelTester()
24396           .mr(2)
24397           .nr(4)
24398           .kr(2)
24399           .sr(4)
24400           .m(m)
24401           .n(n)
24402           .k(8)
24403           .iterations(1)
24404           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2s4__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
24405       }
24406     }
24407   }
24408 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD64,k_eq_8_subtile_m)24409   TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD64, k_eq_8_subtile_m) {
24410     TEST_REQUIRES_X86_SSE41;
24411     for (uint32_t m = 1; m <= 2; m++) {
24412       GemmMicrokernelTester()
24413         .mr(2)
24414         .nr(4)
24415         .kr(2)
24416         .sr(4)
24417         .m(m)
24418         .n(4)
24419         .k(8)
24420         .iterations(1)
24421         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2s4__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
24422     }
24423   }
24424 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD64,k_eq_8_subtile_n)24425   TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD64, k_eq_8_subtile_n) {
24426     TEST_REQUIRES_X86_SSE41;
24427     for (uint32_t n = 1; n <= 4; n++) {
24428       GemmMicrokernelTester()
24429         .mr(2)
24430         .nr(4)
24431         .kr(2)
24432         .sr(4)
24433         .m(2)
24434         .n(n)
24435         .k(8)
24436         .iterations(1)
24437         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2s4__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
24438     }
24439   }
24440 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD64,k_lt_8)24441   TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD64, k_lt_8) {
24442     TEST_REQUIRES_X86_SSE41;
24443     for (size_t k = 1; k < 8; k++) {
24444       GemmMicrokernelTester()
24445         .mr(2)
24446         .nr(4)
24447         .kr(2)
24448         .sr(4)
24449         .m(2)
24450         .n(4)
24451         .k(k)
24452         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2s4__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
24453     }
24454   }
24455 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD64,k_lt_8_subtile)24456   TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD64, k_lt_8_subtile) {
24457     TEST_REQUIRES_X86_SSE41;
24458     for (size_t k = 1; k < 8; k++) {
24459       for (uint32_t n = 1; n <= 4; n++) {
24460         for (uint32_t m = 1; m <= 2; m++) {
24461           GemmMicrokernelTester()
24462             .mr(2)
24463             .nr(4)
24464             .kr(2)
24465             .sr(4)
24466             .m(m)
24467             .n(n)
24468             .k(k)
24469             .iterations(1)
24470             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2s4__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
24471         }
24472       }
24473     }
24474   }
24475 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD64,k_gt_8)24476   TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD64, k_gt_8) {
24477     TEST_REQUIRES_X86_SSE41;
24478     for (size_t k = 9; k < 16; k++) {
24479       GemmMicrokernelTester()
24480         .mr(2)
24481         .nr(4)
24482         .kr(2)
24483         .sr(4)
24484         .m(2)
24485         .n(4)
24486         .k(k)
24487         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2s4__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
24488     }
24489   }
24490 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD64,k_gt_8_subtile)24491   TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD64, k_gt_8_subtile) {
24492     TEST_REQUIRES_X86_SSE41;
24493     for (size_t k = 9; k < 16; k++) {
24494       for (uint32_t n = 1; n <= 4; n++) {
24495         for (uint32_t m = 1; m <= 2; m++) {
24496           GemmMicrokernelTester()
24497             .mr(2)
24498             .nr(4)
24499             .kr(2)
24500             .sr(4)
24501             .m(m)
24502             .n(n)
24503             .k(k)
24504             .iterations(1)
24505             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2s4__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
24506         }
24507       }
24508     }
24509   }
24510 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD64,k_div_8)24511   TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD64, k_div_8) {
24512     TEST_REQUIRES_X86_SSE41;
24513     for (size_t k = 16; k <= 80; k += 8) {
24514       GemmMicrokernelTester()
24515         .mr(2)
24516         .nr(4)
24517         .kr(2)
24518         .sr(4)
24519         .m(2)
24520         .n(4)
24521         .k(k)
24522         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2s4__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
24523     }
24524   }
24525 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD64,k_div_8_subtile)24526   TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD64, k_div_8_subtile) {
24527     TEST_REQUIRES_X86_SSE41;
24528     for (size_t k = 16; k <= 80; k += 8) {
24529       for (uint32_t n = 1; n <= 4; n++) {
24530         for (uint32_t m = 1; m <= 2; m++) {
24531           GemmMicrokernelTester()
24532             .mr(2)
24533             .nr(4)
24534             .kr(2)
24535             .sr(4)
24536             .m(m)
24537             .n(n)
24538             .k(k)
24539             .iterations(1)
24540             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2s4__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
24541         }
24542       }
24543     }
24544   }
24545 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD64,n_gt_4)24546   TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD64, n_gt_4) {
24547     TEST_REQUIRES_X86_SSE41;
24548     for (uint32_t n = 5; n < 8; n++) {
24549       for (size_t k = 1; k <= 40; k += 9) {
24550         GemmMicrokernelTester()
24551           .mr(2)
24552           .nr(4)
24553           .kr(2)
24554           .sr(4)
24555           .m(2)
24556           .n(n)
24557           .k(k)
24558           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2s4__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
24559       }
24560     }
24561   }
24562 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD64,n_gt_4_strided_cn)24563   TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD64, n_gt_4_strided_cn) {
24564     TEST_REQUIRES_X86_SSE41;
24565     for (uint32_t n = 5; n < 8; n++) {
24566       for (size_t k = 1; k <= 40; k += 9) {
24567         GemmMicrokernelTester()
24568           .mr(2)
24569           .nr(4)
24570           .kr(2)
24571           .sr(4)
24572           .m(2)
24573           .n(n)
24574           .k(k)
24575           .cn_stride(7)
24576           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2s4__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
24577       }
24578     }
24579   }
24580 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD64,n_gt_4_subtile)24581   TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD64, n_gt_4_subtile) {
24582     TEST_REQUIRES_X86_SSE41;
24583     for (uint32_t n = 5; n < 8; n++) {
24584       for (size_t k = 1; k <= 40; k += 9) {
24585         for (uint32_t m = 1; m <= 2; m++) {
24586           GemmMicrokernelTester()
24587             .mr(2)
24588             .nr(4)
24589             .kr(2)
24590             .sr(4)
24591             .m(m)
24592             .n(n)
24593             .k(k)
24594             .iterations(1)
24595             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2s4__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
24596         }
24597       }
24598     }
24599   }
24600 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD64,n_div_4)24601   TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD64, n_div_4) {
24602     TEST_REQUIRES_X86_SSE41;
24603     for (uint32_t n = 8; n <= 12; n += 4) {
24604       for (size_t k = 1; k <= 40; k += 9) {
24605         GemmMicrokernelTester()
24606           .mr(2)
24607           .nr(4)
24608           .kr(2)
24609           .sr(4)
24610           .m(2)
24611           .n(n)
24612           .k(k)
24613           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2s4__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
24614       }
24615     }
24616   }
24617 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD64,n_div_4_strided_cn)24618   TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD64, n_div_4_strided_cn) {
24619     TEST_REQUIRES_X86_SSE41;
24620     for (uint32_t n = 8; n <= 12; n += 4) {
24621       for (size_t k = 1; k <= 40; k += 9) {
24622         GemmMicrokernelTester()
24623           .mr(2)
24624           .nr(4)
24625           .kr(2)
24626           .sr(4)
24627           .m(2)
24628           .n(n)
24629           .k(k)
24630           .cn_stride(7)
24631           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2s4__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
24632       }
24633     }
24634   }
24635 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD64,n_div_4_subtile)24636   TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD64, n_div_4_subtile) {
24637     TEST_REQUIRES_X86_SSE41;
24638     for (uint32_t n = 8; n <= 12; n += 4) {
24639       for (size_t k = 1; k <= 40; k += 9) {
24640         for (uint32_t m = 1; m <= 2; m++) {
24641           GemmMicrokernelTester()
24642             .mr(2)
24643             .nr(4)
24644             .kr(2)
24645             .sr(4)
24646             .m(m)
24647             .n(n)
24648             .k(k)
24649             .iterations(1)
24650             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2s4__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
24651         }
24652       }
24653     }
24654   }
24655 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD64,small_kernel)24656   TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD64, small_kernel) {
24657     TEST_REQUIRES_X86_SSE41;
24658     for (size_t k = 1; k <= 40; k += 9) {
24659       GemmMicrokernelTester()
24660         .mr(2)
24661         .nr(4)
24662         .kr(2)
24663         .sr(4)
24664         .m(2)
24665         .n(4)
24666         .k(k)
24667         .ks(3)
24668         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2s4__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
24669     }
24670   }
24671 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD64,small_kernel_subtile)24672   TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD64, small_kernel_subtile) {
24673     TEST_REQUIRES_X86_SSE41;
24674     for (size_t k = 1; k <= 40; k += 9) {
24675       for (uint32_t n = 1; n <= 4; n++) {
24676         for (uint32_t m = 1; m <= 2; m++) {
24677           GemmMicrokernelTester()
24678             .mr(2)
24679             .nr(4)
24680             .kr(2)
24681             .sr(4)
24682             .m(m)
24683             .n(n)
24684             .k(k)
24685             .ks(3)
24686             .iterations(1)
24687             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2s4__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
24688         }
24689       }
24690     }
24691   }
24692 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD64,n_gt_4_small_kernel)24693   TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD64, n_gt_4_small_kernel) {
24694     TEST_REQUIRES_X86_SSE41;
24695     for (uint32_t n = 5; n < 8; n++) {
24696       for (size_t k = 1; k <= 40; k += 9) {
24697         GemmMicrokernelTester()
24698           .mr(2)
24699           .nr(4)
24700           .kr(2)
24701           .sr(4)
24702           .m(2)
24703           .n(n)
24704           .k(k)
24705           .ks(3)
24706           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2s4__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
24707       }
24708     }
24709   }
24710 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD64,n_div_4_small_kernel)24711   TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD64, n_div_4_small_kernel) {
24712     TEST_REQUIRES_X86_SSE41;
24713     for (uint32_t n = 8; n <= 12; n += 4) {
24714       for (size_t k = 1; k <= 40; k += 9) {
24715         GemmMicrokernelTester()
24716           .mr(2)
24717           .nr(4)
24718           .kr(2)
24719           .sr(4)
24720           .m(2)
24721           .n(n)
24722           .k(k)
24723           .ks(3)
24724           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2s4__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
24725       }
24726     }
24727   }
24728 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD64,strided_cm_subtile)24729   TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD64, strided_cm_subtile) {
24730     TEST_REQUIRES_X86_SSE41;
24731     for (size_t k = 1; k <= 40; k += 9) {
24732       for (uint32_t n = 1; n <= 4; n++) {
24733         for (uint32_t m = 1; m <= 2; m++) {
24734           GemmMicrokernelTester()
24735             .mr(2)
24736             .nr(4)
24737             .kr(2)
24738             .sr(4)
24739             .m(m)
24740             .n(n)
24741             .k(k)
24742             .cm_stride(7)
24743             .iterations(1)
24744             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2s4__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
24745         }
24746       }
24747     }
24748   }
24749 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD64,a_offset)24750   TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD64, a_offset) {
24751     TEST_REQUIRES_X86_SSE41;
24752     for (size_t k = 1; k <= 40; k += 9) {
24753       GemmMicrokernelTester()
24754         .mr(2)
24755         .nr(4)
24756         .kr(2)
24757         .sr(4)
24758         .m(2)
24759         .n(4)
24760         .k(k)
24761         .ks(3)
24762         .a_offset(83)
24763         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2s4__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
24764     }
24765   }
24766 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD64,zero)24767   TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD64, zero) {
24768     TEST_REQUIRES_X86_SSE41;
24769     for (size_t k = 1; k <= 40; k += 9) {
24770       for (uint32_t mz = 0; mz < 2; mz++) {
24771         GemmMicrokernelTester()
24772           .mr(2)
24773           .nr(4)
24774           .kr(2)
24775           .sr(4)
24776           .m(2)
24777           .n(4)
24778           .k(k)
24779           .ks(3)
24780           .a_offset(83)
24781           .zero_index(mz)
24782           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2s4__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
24783       }
24784     }
24785   }
24786 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD64,qmin)24787   TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD64, qmin) {
24788     TEST_REQUIRES_X86_SSE41;
24789     GemmMicrokernelTester()
24790       .mr(2)
24791       .nr(4)
24792       .kr(2)
24793       .sr(4)
24794       .m(2)
24795       .n(4)
24796       .k(8)
24797       .qmin(128)
24798       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2s4__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
24799   }
24800 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD64,qmax)24801   TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD64, qmax) {
24802     TEST_REQUIRES_X86_SSE41;
24803     GemmMicrokernelTester()
24804       .mr(2)
24805       .nr(4)
24806       .kr(2)
24807       .sr(4)
24808       .m(2)
24809       .n(4)
24810       .k(8)
24811       .qmax(128)
24812       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2s4__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
24813   }
24814 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD64,strided_cm)24815   TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD64, strided_cm) {
24816     TEST_REQUIRES_X86_SSE41;
24817     GemmMicrokernelTester()
24818       .mr(2)
24819       .nr(4)
24820       .kr(2)
24821       .sr(4)
24822       .m(2)
24823       .n(4)
24824       .k(8)
24825       .cm_stride(7)
24826       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2s4__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
24827   }
24828 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
24829 
24830 
24831 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD64,k_eq_8)24832   TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD64, k_eq_8) {
24833     TEST_REQUIRES_X86_SSE2;
24834     GemmMicrokernelTester()
24835       .mr(4)
24836       .nr(4)
24837       .kr(2)
24838       .sr(4)
24839       .m(4)
24840       .n(4)
24841       .k(8)
24842       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2s4__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
24843   }
24844 
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD64,strided_cn)24845   TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD64, strided_cn) {
24846     TEST_REQUIRES_X86_SSE2;
24847     GemmMicrokernelTester()
24848       .mr(4)
24849       .nr(4)
24850       .kr(2)
24851       .sr(4)
24852       .m(4)
24853       .n(4)
24854       .k(8)
24855       .cn_stride(7)
24856       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2s4__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
24857   }
24858 
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD64,k_eq_8_subtile)24859   TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD64, k_eq_8_subtile) {
24860     TEST_REQUIRES_X86_SSE2;
24861     for (uint32_t n = 1; n <= 4; n++) {
24862       for (uint32_t m = 1; m <= 4; m++) {
24863         GemmMicrokernelTester()
24864           .mr(4)
24865           .nr(4)
24866           .kr(2)
24867           .sr(4)
24868           .m(m)
24869           .n(n)
24870           .k(8)
24871           .iterations(1)
24872           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2s4__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
24873       }
24874     }
24875   }
24876 
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD64,k_eq_8_subtile_m)24877   TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD64, k_eq_8_subtile_m) {
24878     TEST_REQUIRES_X86_SSE2;
24879     for (uint32_t m = 1; m <= 4; m++) {
24880       GemmMicrokernelTester()
24881         .mr(4)
24882         .nr(4)
24883         .kr(2)
24884         .sr(4)
24885         .m(m)
24886         .n(4)
24887         .k(8)
24888         .iterations(1)
24889         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2s4__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
24890     }
24891   }
24892 
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD64,k_eq_8_subtile_n)24893   TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD64, k_eq_8_subtile_n) {
24894     TEST_REQUIRES_X86_SSE2;
24895     for (uint32_t n = 1; n <= 4; n++) {
24896       GemmMicrokernelTester()
24897         .mr(4)
24898         .nr(4)
24899         .kr(2)
24900         .sr(4)
24901         .m(4)
24902         .n(n)
24903         .k(8)
24904         .iterations(1)
24905         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2s4__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
24906     }
24907   }
24908 
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD64,k_lt_8)24909   TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD64, k_lt_8) {
24910     TEST_REQUIRES_X86_SSE2;
24911     for (size_t k = 1; k < 8; k++) {
24912       GemmMicrokernelTester()
24913         .mr(4)
24914         .nr(4)
24915         .kr(2)
24916         .sr(4)
24917         .m(4)
24918         .n(4)
24919         .k(k)
24920         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2s4__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
24921     }
24922   }
24923 
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD64,k_lt_8_subtile)24924   TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD64, k_lt_8_subtile) {
24925     TEST_REQUIRES_X86_SSE2;
24926     for (size_t k = 1; k < 8; k++) {
24927       for (uint32_t n = 1; n <= 4; n++) {
24928         for (uint32_t m = 1; m <= 4; m++) {
24929           GemmMicrokernelTester()
24930             .mr(4)
24931             .nr(4)
24932             .kr(2)
24933             .sr(4)
24934             .m(m)
24935             .n(n)
24936             .k(k)
24937             .iterations(1)
24938             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2s4__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
24939         }
24940       }
24941     }
24942   }
24943 
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD64,k_gt_8)24944   TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD64, k_gt_8) {
24945     TEST_REQUIRES_X86_SSE2;
24946     for (size_t k = 9; k < 16; k++) {
24947       GemmMicrokernelTester()
24948         .mr(4)
24949         .nr(4)
24950         .kr(2)
24951         .sr(4)
24952         .m(4)
24953         .n(4)
24954         .k(k)
24955         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2s4__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
24956     }
24957   }
24958 
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD64,k_gt_8_subtile)24959   TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD64, k_gt_8_subtile) {
24960     TEST_REQUIRES_X86_SSE2;
24961     for (size_t k = 9; k < 16; k++) {
24962       for (uint32_t n = 1; n <= 4; n++) {
24963         for (uint32_t m = 1; m <= 4; m++) {
24964           GemmMicrokernelTester()
24965             .mr(4)
24966             .nr(4)
24967             .kr(2)
24968             .sr(4)
24969             .m(m)
24970             .n(n)
24971             .k(k)
24972             .iterations(1)
24973             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2s4__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
24974         }
24975       }
24976     }
24977   }
24978 
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD64,k_div_8)24979   TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD64, k_div_8) {
24980     TEST_REQUIRES_X86_SSE2;
24981     for (size_t k = 16; k <= 80; k += 8) {
24982       GemmMicrokernelTester()
24983         .mr(4)
24984         .nr(4)
24985         .kr(2)
24986         .sr(4)
24987         .m(4)
24988         .n(4)
24989         .k(k)
24990         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2s4__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
24991     }
24992   }
24993 
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD64,k_div_8_subtile)24994   TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD64, k_div_8_subtile) {
24995     TEST_REQUIRES_X86_SSE2;
24996     for (size_t k = 16; k <= 80; k += 8) {
24997       for (uint32_t n = 1; n <= 4; n++) {
24998         for (uint32_t m = 1; m <= 4; m++) {
24999           GemmMicrokernelTester()
25000             .mr(4)
25001             .nr(4)
25002             .kr(2)
25003             .sr(4)
25004             .m(m)
25005             .n(n)
25006             .k(k)
25007             .iterations(1)
25008             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2s4__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
25009         }
25010       }
25011     }
25012   }
25013 
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD64,n_gt_4)25014   TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD64, n_gt_4) {
25015     TEST_REQUIRES_X86_SSE2;
25016     for (uint32_t n = 5; n < 8; n++) {
25017       for (size_t k = 1; k <= 40; k += 9) {
25018         GemmMicrokernelTester()
25019           .mr(4)
25020           .nr(4)
25021           .kr(2)
25022           .sr(4)
25023           .m(4)
25024           .n(n)
25025           .k(k)
25026           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2s4__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
25027       }
25028     }
25029   }
25030 
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD64,n_gt_4_strided_cn)25031   TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD64, n_gt_4_strided_cn) {
25032     TEST_REQUIRES_X86_SSE2;
25033     for (uint32_t n = 5; n < 8; n++) {
25034       for (size_t k = 1; k <= 40; k += 9) {
25035         GemmMicrokernelTester()
25036           .mr(4)
25037           .nr(4)
25038           .kr(2)
25039           .sr(4)
25040           .m(4)
25041           .n(n)
25042           .k(k)
25043           .cn_stride(7)
25044           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2s4__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
25045       }
25046     }
25047   }
25048 
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD64,n_gt_4_subtile)25049   TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD64, n_gt_4_subtile) {
25050     TEST_REQUIRES_X86_SSE2;
25051     for (uint32_t n = 5; n < 8; n++) {
25052       for (size_t k = 1; k <= 40; k += 9) {
25053         for (uint32_t m = 1; m <= 4; m++) {
25054           GemmMicrokernelTester()
25055             .mr(4)
25056             .nr(4)
25057             .kr(2)
25058             .sr(4)
25059             .m(m)
25060             .n(n)
25061             .k(k)
25062             .iterations(1)
25063             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2s4__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
25064         }
25065       }
25066     }
25067   }
25068 
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD64,n_div_4)25069   TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD64, n_div_4) {
25070     TEST_REQUIRES_X86_SSE2;
25071     for (uint32_t n = 8; n <= 12; n += 4) {
25072       for (size_t k = 1; k <= 40; k += 9) {
25073         GemmMicrokernelTester()
25074           .mr(4)
25075           .nr(4)
25076           .kr(2)
25077           .sr(4)
25078           .m(4)
25079           .n(n)
25080           .k(k)
25081           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2s4__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
25082       }
25083     }
25084   }
25085 
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD64,n_div_4_strided_cn)25086   TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD64, n_div_4_strided_cn) {
25087     TEST_REQUIRES_X86_SSE2;
25088     for (uint32_t n = 8; n <= 12; n += 4) {
25089       for (size_t k = 1; k <= 40; k += 9) {
25090         GemmMicrokernelTester()
25091           .mr(4)
25092           .nr(4)
25093           .kr(2)
25094           .sr(4)
25095           .m(4)
25096           .n(n)
25097           .k(k)
25098           .cn_stride(7)
25099           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2s4__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
25100       }
25101     }
25102   }
25103 
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD64,n_div_4_subtile)25104   TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD64, n_div_4_subtile) {
25105     TEST_REQUIRES_X86_SSE2;
25106     for (uint32_t n = 8; n <= 12; n += 4) {
25107       for (size_t k = 1; k <= 40; k += 9) {
25108         for (uint32_t m = 1; m <= 4; m++) {
25109           GemmMicrokernelTester()
25110             .mr(4)
25111             .nr(4)
25112             .kr(2)
25113             .sr(4)
25114             .m(m)
25115             .n(n)
25116             .k(k)
25117             .iterations(1)
25118             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2s4__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
25119         }
25120       }
25121     }
25122   }
25123 
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD64,small_kernel)25124   TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD64, small_kernel) {
25125     TEST_REQUIRES_X86_SSE2;
25126     for (size_t k = 1; k <= 40; k += 9) {
25127       GemmMicrokernelTester()
25128         .mr(4)
25129         .nr(4)
25130         .kr(2)
25131         .sr(4)
25132         .m(4)
25133         .n(4)
25134         .k(k)
25135         .ks(3)
25136         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2s4__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
25137     }
25138   }
25139 
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD64,small_kernel_subtile)25140   TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD64, small_kernel_subtile) {
25141     TEST_REQUIRES_X86_SSE2;
25142     for (size_t k = 1; k <= 40; k += 9) {
25143       for (uint32_t n = 1; n <= 4; n++) {
25144         for (uint32_t m = 1; m <= 4; m++) {
25145           GemmMicrokernelTester()
25146             .mr(4)
25147             .nr(4)
25148             .kr(2)
25149             .sr(4)
25150             .m(m)
25151             .n(n)
25152             .k(k)
25153             .ks(3)
25154             .iterations(1)
25155             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2s4__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
25156         }
25157       }
25158     }
25159   }
25160 
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD64,n_gt_4_small_kernel)25161   TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD64, n_gt_4_small_kernel) {
25162     TEST_REQUIRES_X86_SSE2;
25163     for (uint32_t n = 5; n < 8; n++) {
25164       for (size_t k = 1; k <= 40; k += 9) {
25165         GemmMicrokernelTester()
25166           .mr(4)
25167           .nr(4)
25168           .kr(2)
25169           .sr(4)
25170           .m(4)
25171           .n(n)
25172           .k(k)
25173           .ks(3)
25174           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2s4__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
25175       }
25176     }
25177   }
25178 
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD64,n_div_4_small_kernel)25179   TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD64, n_div_4_small_kernel) {
25180     TEST_REQUIRES_X86_SSE2;
25181     for (uint32_t n = 8; n <= 12; n += 4) {
25182       for (size_t k = 1; k <= 40; k += 9) {
25183         GemmMicrokernelTester()
25184           .mr(4)
25185           .nr(4)
25186           .kr(2)
25187           .sr(4)
25188           .m(4)
25189           .n(n)
25190           .k(k)
25191           .ks(3)
25192           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2s4__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
25193       }
25194     }
25195   }
25196 
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD64,strided_cm_subtile)25197   TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD64, strided_cm_subtile) {
25198     TEST_REQUIRES_X86_SSE2;
25199     for (size_t k = 1; k <= 40; k += 9) {
25200       for (uint32_t n = 1; n <= 4; n++) {
25201         for (uint32_t m = 1; m <= 4; m++) {
25202           GemmMicrokernelTester()
25203             .mr(4)
25204             .nr(4)
25205             .kr(2)
25206             .sr(4)
25207             .m(m)
25208             .n(n)
25209             .k(k)
25210             .cm_stride(7)
25211             .iterations(1)
25212             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2s4__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
25213         }
25214       }
25215     }
25216   }
25217 
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD64,a_offset)25218   TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD64, a_offset) {
25219     TEST_REQUIRES_X86_SSE2;
25220     for (size_t k = 1; k <= 40; k += 9) {
25221       GemmMicrokernelTester()
25222         .mr(4)
25223         .nr(4)
25224         .kr(2)
25225         .sr(4)
25226         .m(4)
25227         .n(4)
25228         .k(k)
25229         .ks(3)
25230         .a_offset(163)
25231         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2s4__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
25232     }
25233   }
25234 
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD64,zero)25235   TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD64, zero) {
25236     TEST_REQUIRES_X86_SSE2;
25237     for (size_t k = 1; k <= 40; k += 9) {
25238       for (uint32_t mz = 0; mz < 4; mz++) {
25239         GemmMicrokernelTester()
25240           .mr(4)
25241           .nr(4)
25242           .kr(2)
25243           .sr(4)
25244           .m(4)
25245           .n(4)
25246           .k(k)
25247           .ks(3)
25248           .a_offset(163)
25249           .zero_index(mz)
25250           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2s4__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
25251       }
25252     }
25253   }
25254 
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD64,qmin)25255   TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD64, qmin) {
25256     TEST_REQUIRES_X86_SSE2;
25257     GemmMicrokernelTester()
25258       .mr(4)
25259       .nr(4)
25260       .kr(2)
25261       .sr(4)
25262       .m(4)
25263       .n(4)
25264       .k(8)
25265       .qmin(128)
25266       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2s4__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
25267   }
25268 
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD64,qmax)25269   TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD64, qmax) {
25270     TEST_REQUIRES_X86_SSE2;
25271     GemmMicrokernelTester()
25272       .mr(4)
25273       .nr(4)
25274       .kr(2)
25275       .sr(4)
25276       .m(4)
25277       .n(4)
25278       .k(8)
25279       .qmax(128)
25280       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2s4__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
25281   }
25282 
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD64,strided_cm)25283   TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD64, strided_cm) {
25284     TEST_REQUIRES_X86_SSE2;
25285     GemmMicrokernelTester()
25286       .mr(4)
25287       .nr(4)
25288       .kr(2)
25289       .sr(4)
25290       .m(4)
25291       .n(4)
25292       .k(8)
25293       .cm_stride(7)
25294       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2s4__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
25295   }
25296 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
25297 
25298 
25299 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD64,k_eq_8)25300   TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD64, k_eq_8) {
25301     TEST_REQUIRES_X86_SSE41;
25302     GemmMicrokernelTester()
25303       .mr(4)
25304       .nr(4)
25305       .kr(2)
25306       .sr(4)
25307       .m(4)
25308       .n(4)
25309       .k(8)
25310       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2s4__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25311   }
25312 
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD64,strided_cn)25313   TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD64, strided_cn) {
25314     TEST_REQUIRES_X86_SSE41;
25315     GemmMicrokernelTester()
25316       .mr(4)
25317       .nr(4)
25318       .kr(2)
25319       .sr(4)
25320       .m(4)
25321       .n(4)
25322       .k(8)
25323       .cn_stride(7)
25324       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2s4__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25325   }
25326 
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD64,k_eq_8_subtile)25327   TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD64, k_eq_8_subtile) {
25328     TEST_REQUIRES_X86_SSE41;
25329     for (uint32_t n = 1; n <= 4; n++) {
25330       for (uint32_t m = 1; m <= 4; m++) {
25331         GemmMicrokernelTester()
25332           .mr(4)
25333           .nr(4)
25334           .kr(2)
25335           .sr(4)
25336           .m(m)
25337           .n(n)
25338           .k(8)
25339           .iterations(1)
25340           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2s4__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25341       }
25342     }
25343   }
25344 
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD64,k_eq_8_subtile_m)25345   TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD64, k_eq_8_subtile_m) {
25346     TEST_REQUIRES_X86_SSE41;
25347     for (uint32_t m = 1; m <= 4; m++) {
25348       GemmMicrokernelTester()
25349         .mr(4)
25350         .nr(4)
25351         .kr(2)
25352         .sr(4)
25353         .m(m)
25354         .n(4)
25355         .k(8)
25356         .iterations(1)
25357         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2s4__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25358     }
25359   }
25360 
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD64,k_eq_8_subtile_n)25361   TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD64, k_eq_8_subtile_n) {
25362     TEST_REQUIRES_X86_SSE41;
25363     for (uint32_t n = 1; n <= 4; n++) {
25364       GemmMicrokernelTester()
25365         .mr(4)
25366         .nr(4)
25367         .kr(2)
25368         .sr(4)
25369         .m(4)
25370         .n(n)
25371         .k(8)
25372         .iterations(1)
25373         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2s4__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25374     }
25375   }
25376 
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD64,k_lt_8)25377   TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD64, k_lt_8) {
25378     TEST_REQUIRES_X86_SSE41;
25379     for (size_t k = 1; k < 8; k++) {
25380       GemmMicrokernelTester()
25381         .mr(4)
25382         .nr(4)
25383         .kr(2)
25384         .sr(4)
25385         .m(4)
25386         .n(4)
25387         .k(k)
25388         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2s4__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25389     }
25390   }
25391 
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD64,k_lt_8_subtile)25392   TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD64, k_lt_8_subtile) {
25393     TEST_REQUIRES_X86_SSE41;
25394     for (size_t k = 1; k < 8; k++) {
25395       for (uint32_t n = 1; n <= 4; n++) {
25396         for (uint32_t m = 1; m <= 4; m++) {
25397           GemmMicrokernelTester()
25398             .mr(4)
25399             .nr(4)
25400             .kr(2)
25401             .sr(4)
25402             .m(m)
25403             .n(n)
25404             .k(k)
25405             .iterations(1)
25406             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2s4__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25407         }
25408       }
25409     }
25410   }
25411 
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD64,k_gt_8)25412   TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD64, k_gt_8) {
25413     TEST_REQUIRES_X86_SSE41;
25414     for (size_t k = 9; k < 16; k++) {
25415       GemmMicrokernelTester()
25416         .mr(4)
25417         .nr(4)
25418         .kr(2)
25419         .sr(4)
25420         .m(4)
25421         .n(4)
25422         .k(k)
25423         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2s4__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25424     }
25425   }
25426 
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD64,k_gt_8_subtile)25427   TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD64, k_gt_8_subtile) {
25428     TEST_REQUIRES_X86_SSE41;
25429     for (size_t k = 9; k < 16; k++) {
25430       for (uint32_t n = 1; n <= 4; n++) {
25431         for (uint32_t m = 1; m <= 4; m++) {
25432           GemmMicrokernelTester()
25433             .mr(4)
25434             .nr(4)
25435             .kr(2)
25436             .sr(4)
25437             .m(m)
25438             .n(n)
25439             .k(k)
25440             .iterations(1)
25441             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2s4__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25442         }
25443       }
25444     }
25445   }
25446 
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD64,k_div_8)25447   TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD64, k_div_8) {
25448     TEST_REQUIRES_X86_SSE41;
25449     for (size_t k = 16; k <= 80; k += 8) {
25450       GemmMicrokernelTester()
25451         .mr(4)
25452         .nr(4)
25453         .kr(2)
25454         .sr(4)
25455         .m(4)
25456         .n(4)
25457         .k(k)
25458         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2s4__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25459     }
25460   }
25461 
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD64,k_div_8_subtile)25462   TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD64, k_div_8_subtile) {
25463     TEST_REQUIRES_X86_SSE41;
25464     for (size_t k = 16; k <= 80; k += 8) {
25465       for (uint32_t n = 1; n <= 4; n++) {
25466         for (uint32_t m = 1; m <= 4; m++) {
25467           GemmMicrokernelTester()
25468             .mr(4)
25469             .nr(4)
25470             .kr(2)
25471             .sr(4)
25472             .m(m)
25473             .n(n)
25474             .k(k)
25475             .iterations(1)
25476             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2s4__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25477         }
25478       }
25479     }
25480   }
25481 
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD64,n_gt_4)25482   TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD64, n_gt_4) {
25483     TEST_REQUIRES_X86_SSE41;
25484     for (uint32_t n = 5; n < 8; n++) {
25485       for (size_t k = 1; k <= 40; k += 9) {
25486         GemmMicrokernelTester()
25487           .mr(4)
25488           .nr(4)
25489           .kr(2)
25490           .sr(4)
25491           .m(4)
25492           .n(n)
25493           .k(k)
25494           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2s4__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25495       }
25496     }
25497   }
25498 
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD64,n_gt_4_strided_cn)25499   TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD64, n_gt_4_strided_cn) {
25500     TEST_REQUIRES_X86_SSE41;
25501     for (uint32_t n = 5; n < 8; n++) {
25502       for (size_t k = 1; k <= 40; k += 9) {
25503         GemmMicrokernelTester()
25504           .mr(4)
25505           .nr(4)
25506           .kr(2)
25507           .sr(4)
25508           .m(4)
25509           .n(n)
25510           .k(k)
25511           .cn_stride(7)
25512           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2s4__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25513       }
25514     }
25515   }
25516 
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD64,n_gt_4_subtile)25517   TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD64, n_gt_4_subtile) {
25518     TEST_REQUIRES_X86_SSE41;
25519     for (uint32_t n = 5; n < 8; n++) {
25520       for (size_t k = 1; k <= 40; k += 9) {
25521         for (uint32_t m = 1; m <= 4; m++) {
25522           GemmMicrokernelTester()
25523             .mr(4)
25524             .nr(4)
25525             .kr(2)
25526             .sr(4)
25527             .m(m)
25528             .n(n)
25529             .k(k)
25530             .iterations(1)
25531             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2s4__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25532         }
25533       }
25534     }
25535   }
25536 
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD64,n_div_4)25537   TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD64, n_div_4) {
25538     TEST_REQUIRES_X86_SSE41;
25539     for (uint32_t n = 8; n <= 12; n += 4) {
25540       for (size_t k = 1; k <= 40; k += 9) {
25541         GemmMicrokernelTester()
25542           .mr(4)
25543           .nr(4)
25544           .kr(2)
25545           .sr(4)
25546           .m(4)
25547           .n(n)
25548           .k(k)
25549           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2s4__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25550       }
25551     }
25552   }
25553 
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD64,n_div_4_strided_cn)25554   TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD64, n_div_4_strided_cn) {
25555     TEST_REQUIRES_X86_SSE41;
25556     for (uint32_t n = 8; n <= 12; n += 4) {
25557       for (size_t k = 1; k <= 40; k += 9) {
25558         GemmMicrokernelTester()
25559           .mr(4)
25560           .nr(4)
25561           .kr(2)
25562           .sr(4)
25563           .m(4)
25564           .n(n)
25565           .k(k)
25566           .cn_stride(7)
25567           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2s4__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25568       }
25569     }
25570   }
25571 
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD64,n_div_4_subtile)25572   TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD64, n_div_4_subtile) {
25573     TEST_REQUIRES_X86_SSE41;
25574     for (uint32_t n = 8; n <= 12; n += 4) {
25575       for (size_t k = 1; k <= 40; k += 9) {
25576         for (uint32_t m = 1; m <= 4; m++) {
25577           GemmMicrokernelTester()
25578             .mr(4)
25579             .nr(4)
25580             .kr(2)
25581             .sr(4)
25582             .m(m)
25583             .n(n)
25584             .k(k)
25585             .iterations(1)
25586             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2s4__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25587         }
25588       }
25589     }
25590   }
25591 
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD64,small_kernel)25592   TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD64, small_kernel) {
25593     TEST_REQUIRES_X86_SSE41;
25594     for (size_t k = 1; k <= 40; k += 9) {
25595       GemmMicrokernelTester()
25596         .mr(4)
25597         .nr(4)
25598         .kr(2)
25599         .sr(4)
25600         .m(4)
25601         .n(4)
25602         .k(k)
25603         .ks(3)
25604         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2s4__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25605     }
25606   }
25607 
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD64,small_kernel_subtile)25608   TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD64, small_kernel_subtile) {
25609     TEST_REQUIRES_X86_SSE41;
25610     for (size_t k = 1; k <= 40; k += 9) {
25611       for (uint32_t n = 1; n <= 4; n++) {
25612         for (uint32_t m = 1; m <= 4; m++) {
25613           GemmMicrokernelTester()
25614             .mr(4)
25615             .nr(4)
25616             .kr(2)
25617             .sr(4)
25618             .m(m)
25619             .n(n)
25620             .k(k)
25621             .ks(3)
25622             .iterations(1)
25623             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2s4__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25624         }
25625       }
25626     }
25627   }
25628 
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD64,n_gt_4_small_kernel)25629   TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD64, n_gt_4_small_kernel) {
25630     TEST_REQUIRES_X86_SSE41;
25631     for (uint32_t n = 5; n < 8; n++) {
25632       for (size_t k = 1; k <= 40; k += 9) {
25633         GemmMicrokernelTester()
25634           .mr(4)
25635           .nr(4)
25636           .kr(2)
25637           .sr(4)
25638           .m(4)
25639           .n(n)
25640           .k(k)
25641           .ks(3)
25642           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2s4__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25643       }
25644     }
25645   }
25646 
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD64,n_div_4_small_kernel)25647   TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD64, n_div_4_small_kernel) {
25648     TEST_REQUIRES_X86_SSE41;
25649     for (uint32_t n = 8; n <= 12; n += 4) {
25650       for (size_t k = 1; k <= 40; k += 9) {
25651         GemmMicrokernelTester()
25652           .mr(4)
25653           .nr(4)
25654           .kr(2)
25655           .sr(4)
25656           .m(4)
25657           .n(n)
25658           .k(k)
25659           .ks(3)
25660           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2s4__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25661       }
25662     }
25663   }
25664 
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD64,strided_cm_subtile)25665   TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD64, strided_cm_subtile) {
25666     TEST_REQUIRES_X86_SSE41;
25667     for (size_t k = 1; k <= 40; k += 9) {
25668       for (uint32_t n = 1; n <= 4; n++) {
25669         for (uint32_t m = 1; m <= 4; m++) {
25670           GemmMicrokernelTester()
25671             .mr(4)
25672             .nr(4)
25673             .kr(2)
25674             .sr(4)
25675             .m(m)
25676             .n(n)
25677             .k(k)
25678             .cm_stride(7)
25679             .iterations(1)
25680             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2s4__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25681         }
25682       }
25683     }
25684   }
25685 
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD64,a_offset)25686   TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD64, a_offset) {
25687     TEST_REQUIRES_X86_SSE41;
25688     for (size_t k = 1; k <= 40; k += 9) {
25689       GemmMicrokernelTester()
25690         .mr(4)
25691         .nr(4)
25692         .kr(2)
25693         .sr(4)
25694         .m(4)
25695         .n(4)
25696         .k(k)
25697         .ks(3)
25698         .a_offset(163)
25699         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2s4__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25700     }
25701   }
25702 
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD64,zero)25703   TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD64, zero) {
25704     TEST_REQUIRES_X86_SSE41;
25705     for (size_t k = 1; k <= 40; k += 9) {
25706       for (uint32_t mz = 0; mz < 4; mz++) {
25707         GemmMicrokernelTester()
25708           .mr(4)
25709           .nr(4)
25710           .kr(2)
25711           .sr(4)
25712           .m(4)
25713           .n(4)
25714           .k(k)
25715           .ks(3)
25716           .a_offset(163)
25717           .zero_index(mz)
25718           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2s4__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25719       }
25720     }
25721   }
25722 
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD64,qmin)25723   TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD64, qmin) {
25724     TEST_REQUIRES_X86_SSE41;
25725     GemmMicrokernelTester()
25726       .mr(4)
25727       .nr(4)
25728       .kr(2)
25729       .sr(4)
25730       .m(4)
25731       .n(4)
25732       .k(8)
25733       .qmin(128)
25734       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2s4__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25735   }
25736 
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD64,qmax)25737   TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD64, qmax) {
25738     TEST_REQUIRES_X86_SSE41;
25739     GemmMicrokernelTester()
25740       .mr(4)
25741       .nr(4)
25742       .kr(2)
25743       .sr(4)
25744       .m(4)
25745       .n(4)
25746       .k(8)
25747       .qmax(128)
25748       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2s4__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25749   }
25750 
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD64,strided_cm)25751   TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD64, strided_cm) {
25752     TEST_REQUIRES_X86_SSE41;
25753     GemmMicrokernelTester()
25754       .mr(4)
25755       .nr(4)
25756       .kr(2)
25757       .sr(4)
25758       .m(4)
25759       .n(4)
25760       .k(8)
25761       .cm_stride(7)
25762       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2s4__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25763   }
25764 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
25765 
25766 
25767 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD64,k_eq_8)25768   TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD64, k_eq_8) {
25769     TEST_REQUIRES_X86_XOP;
25770     GemmMicrokernelTester()
25771       .mr(1)
25772       .nr(4)
25773       .kr(2)
25774       .sr(4)
25775       .m(1)
25776       .n(4)
25777       .k(8)
25778       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2s4__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25779   }
25780 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD64,strided_cn)25781   TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD64, strided_cn) {
25782     TEST_REQUIRES_X86_XOP;
25783     GemmMicrokernelTester()
25784       .mr(1)
25785       .nr(4)
25786       .kr(2)
25787       .sr(4)
25788       .m(1)
25789       .n(4)
25790       .k(8)
25791       .cn_stride(7)
25792       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2s4__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25793   }
25794 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD64,k_eq_8_subtile)25795   TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD64, k_eq_8_subtile) {
25796     TEST_REQUIRES_X86_XOP;
25797     for (uint32_t n = 1; n <= 4; n++) {
25798       for (uint32_t m = 1; m <= 1; m++) {
25799         GemmMicrokernelTester()
25800           .mr(1)
25801           .nr(4)
25802           .kr(2)
25803           .sr(4)
25804           .m(m)
25805           .n(n)
25806           .k(8)
25807           .iterations(1)
25808           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2s4__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25809       }
25810     }
25811   }
25812 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD64,k_eq_8_subtile_m)25813   TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD64, k_eq_8_subtile_m) {
25814     TEST_REQUIRES_X86_XOP;
25815     for (uint32_t m = 1; m <= 1; m++) {
25816       GemmMicrokernelTester()
25817         .mr(1)
25818         .nr(4)
25819         .kr(2)
25820         .sr(4)
25821         .m(m)
25822         .n(4)
25823         .k(8)
25824         .iterations(1)
25825         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2s4__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25826     }
25827   }
25828 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD64,k_eq_8_subtile_n)25829   TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD64, k_eq_8_subtile_n) {
25830     TEST_REQUIRES_X86_XOP;
25831     for (uint32_t n = 1; n <= 4; n++) {
25832       GemmMicrokernelTester()
25833         .mr(1)
25834         .nr(4)
25835         .kr(2)
25836         .sr(4)
25837         .m(1)
25838         .n(n)
25839         .k(8)
25840         .iterations(1)
25841         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2s4__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25842     }
25843   }
25844 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD64,k_lt_8)25845   TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD64, k_lt_8) {
25846     TEST_REQUIRES_X86_XOP;
25847     for (size_t k = 1; k < 8; k++) {
25848       GemmMicrokernelTester()
25849         .mr(1)
25850         .nr(4)
25851         .kr(2)
25852         .sr(4)
25853         .m(1)
25854         .n(4)
25855         .k(k)
25856         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2s4__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25857     }
25858   }
25859 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD64,k_lt_8_subtile)25860   TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD64, k_lt_8_subtile) {
25861     TEST_REQUIRES_X86_XOP;
25862     for (size_t k = 1; k < 8; k++) {
25863       for (uint32_t n = 1; n <= 4; n++) {
25864         for (uint32_t m = 1; m <= 1; m++) {
25865           GemmMicrokernelTester()
25866             .mr(1)
25867             .nr(4)
25868             .kr(2)
25869             .sr(4)
25870             .m(m)
25871             .n(n)
25872             .k(k)
25873             .iterations(1)
25874             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2s4__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25875         }
25876       }
25877     }
25878   }
25879 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD64,k_gt_8)25880   TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD64, k_gt_8) {
25881     TEST_REQUIRES_X86_XOP;
25882     for (size_t k = 9; k < 16; k++) {
25883       GemmMicrokernelTester()
25884         .mr(1)
25885         .nr(4)
25886         .kr(2)
25887         .sr(4)
25888         .m(1)
25889         .n(4)
25890         .k(k)
25891         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2s4__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25892     }
25893   }
25894 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD64,k_gt_8_subtile)25895   TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD64, k_gt_8_subtile) {
25896     TEST_REQUIRES_X86_XOP;
25897     for (size_t k = 9; k < 16; k++) {
25898       for (uint32_t n = 1; n <= 4; n++) {
25899         for (uint32_t m = 1; m <= 1; m++) {
25900           GemmMicrokernelTester()
25901             .mr(1)
25902             .nr(4)
25903             .kr(2)
25904             .sr(4)
25905             .m(m)
25906             .n(n)
25907             .k(k)
25908             .iterations(1)
25909             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2s4__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25910         }
25911       }
25912     }
25913   }
25914 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD64,k_div_8)25915   TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD64, k_div_8) {
25916     TEST_REQUIRES_X86_XOP;
25917     for (size_t k = 16; k <= 80; k += 8) {
25918       GemmMicrokernelTester()
25919         .mr(1)
25920         .nr(4)
25921         .kr(2)
25922         .sr(4)
25923         .m(1)
25924         .n(4)
25925         .k(k)
25926         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2s4__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25927     }
25928   }
25929 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD64,k_div_8_subtile)25930   TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD64, k_div_8_subtile) {
25931     TEST_REQUIRES_X86_XOP;
25932     for (size_t k = 16; k <= 80; k += 8) {
25933       for (uint32_t n = 1; n <= 4; n++) {
25934         for (uint32_t m = 1; m <= 1; m++) {
25935           GemmMicrokernelTester()
25936             .mr(1)
25937             .nr(4)
25938             .kr(2)
25939             .sr(4)
25940             .m(m)
25941             .n(n)
25942             .k(k)
25943             .iterations(1)
25944             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2s4__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25945         }
25946       }
25947     }
25948   }
25949 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD64,n_gt_4)25950   TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD64, n_gt_4) {
25951     TEST_REQUIRES_X86_XOP;
25952     for (uint32_t n = 5; n < 8; n++) {
25953       for (size_t k = 1; k <= 40; k += 9) {
25954         GemmMicrokernelTester()
25955           .mr(1)
25956           .nr(4)
25957           .kr(2)
25958           .sr(4)
25959           .m(1)
25960           .n(n)
25961           .k(k)
25962           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2s4__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25963       }
25964     }
25965   }
25966 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD64,n_gt_4_strided_cn)25967   TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD64, n_gt_4_strided_cn) {
25968     TEST_REQUIRES_X86_XOP;
25969     for (uint32_t n = 5; n < 8; n++) {
25970       for (size_t k = 1; k <= 40; k += 9) {
25971         GemmMicrokernelTester()
25972           .mr(1)
25973           .nr(4)
25974           .kr(2)
25975           .sr(4)
25976           .m(1)
25977           .n(n)
25978           .k(k)
25979           .cn_stride(7)
25980           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2s4__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25981       }
25982     }
25983   }
25984 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD64,n_gt_4_subtile)25985   TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD64, n_gt_4_subtile) {
25986     TEST_REQUIRES_X86_XOP;
25987     for (uint32_t n = 5; n < 8; n++) {
25988       for (size_t k = 1; k <= 40; k += 9) {
25989         for (uint32_t m = 1; m <= 1; m++) {
25990           GemmMicrokernelTester()
25991             .mr(1)
25992             .nr(4)
25993             .kr(2)
25994             .sr(4)
25995             .m(m)
25996             .n(n)
25997             .k(k)
25998             .iterations(1)
25999             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2s4__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
26000         }
26001       }
26002     }
26003   }
26004 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD64,n_div_4)26005   TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD64, n_div_4) {
26006     TEST_REQUIRES_X86_XOP;
26007     for (uint32_t n = 8; n <= 12; n += 4) {
26008       for (size_t k = 1; k <= 40; k += 9) {
26009         GemmMicrokernelTester()
26010           .mr(1)
26011           .nr(4)
26012           .kr(2)
26013           .sr(4)
26014           .m(1)
26015           .n(n)
26016           .k(k)
26017           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2s4__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
26018       }
26019     }
26020   }
26021 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD64,n_div_4_strided_cn)26022   TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD64, n_div_4_strided_cn) {
26023     TEST_REQUIRES_X86_XOP;
26024     for (uint32_t n = 8; n <= 12; n += 4) {
26025       for (size_t k = 1; k <= 40; k += 9) {
26026         GemmMicrokernelTester()
26027           .mr(1)
26028           .nr(4)
26029           .kr(2)
26030           .sr(4)
26031           .m(1)
26032           .n(n)
26033           .k(k)
26034           .cn_stride(7)
26035           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2s4__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
26036       }
26037     }
26038   }
26039 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD64,n_div_4_subtile)26040   TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD64, n_div_4_subtile) {
26041     TEST_REQUIRES_X86_XOP;
26042     for (uint32_t n = 8; n <= 12; n += 4) {
26043       for (size_t k = 1; k <= 40; k += 9) {
26044         for (uint32_t m = 1; m <= 1; m++) {
26045           GemmMicrokernelTester()
26046             .mr(1)
26047             .nr(4)
26048             .kr(2)
26049             .sr(4)
26050             .m(m)
26051             .n(n)
26052             .k(k)
26053             .iterations(1)
26054             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2s4__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
26055         }
26056       }
26057     }
26058   }
26059 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD64,small_kernel)26060   TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD64, small_kernel) {
26061     TEST_REQUIRES_X86_XOP;
26062     for (size_t k = 1; k <= 40; k += 9) {
26063       GemmMicrokernelTester()
26064         .mr(1)
26065         .nr(4)
26066         .kr(2)
26067         .sr(4)
26068         .m(1)
26069         .n(4)
26070         .k(k)
26071         .ks(3)
26072         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2s4__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
26073     }
26074   }
26075 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD64,small_kernel_subtile)26076   TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD64, small_kernel_subtile) {
26077     TEST_REQUIRES_X86_XOP;
26078     for (size_t k = 1; k <= 40; k += 9) {
26079       for (uint32_t n = 1; n <= 4; n++) {
26080         for (uint32_t m = 1; m <= 1; m++) {
26081           GemmMicrokernelTester()
26082             .mr(1)
26083             .nr(4)
26084             .kr(2)
26085             .sr(4)
26086             .m(m)
26087             .n(n)
26088             .k(k)
26089             .ks(3)
26090             .iterations(1)
26091             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2s4__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
26092         }
26093       }
26094     }
26095   }
26096 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD64,n_gt_4_small_kernel)26097   TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD64, n_gt_4_small_kernel) {
26098     TEST_REQUIRES_X86_XOP;
26099     for (uint32_t n = 5; n < 8; n++) {
26100       for (size_t k = 1; k <= 40; k += 9) {
26101         GemmMicrokernelTester()
26102           .mr(1)
26103           .nr(4)
26104           .kr(2)
26105           .sr(4)
26106           .m(1)
26107           .n(n)
26108           .k(k)
26109           .ks(3)
26110           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2s4__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
26111       }
26112     }
26113   }
26114 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD64,n_div_4_small_kernel)26115   TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD64, n_div_4_small_kernel) {
26116     TEST_REQUIRES_X86_XOP;
26117     for (uint32_t n = 8; n <= 12; n += 4) {
26118       for (size_t k = 1; k <= 40; k += 9) {
26119         GemmMicrokernelTester()
26120           .mr(1)
26121           .nr(4)
26122           .kr(2)
26123           .sr(4)
26124           .m(1)
26125           .n(n)
26126           .k(k)
26127           .ks(3)
26128           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2s4__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
26129       }
26130     }
26131   }
26132 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD64,strided_cm_subtile)26133   TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD64, strided_cm_subtile) {
26134     TEST_REQUIRES_X86_XOP;
26135     for (size_t k = 1; k <= 40; k += 9) {
26136       for (uint32_t n = 1; n <= 4; n++) {
26137         for (uint32_t m = 1; m <= 1; m++) {
26138           GemmMicrokernelTester()
26139             .mr(1)
26140             .nr(4)
26141             .kr(2)
26142             .sr(4)
26143             .m(m)
26144             .n(n)
26145             .k(k)
26146             .cm_stride(7)
26147             .iterations(1)
26148             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2s4__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
26149         }
26150       }
26151     }
26152   }
26153 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD64,a_offset)26154   TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD64, a_offset) {
26155     TEST_REQUIRES_X86_XOP;
26156     for (size_t k = 1; k <= 40; k += 9) {
26157       GemmMicrokernelTester()
26158         .mr(1)
26159         .nr(4)
26160         .kr(2)
26161         .sr(4)
26162         .m(1)
26163         .n(4)
26164         .k(k)
26165         .ks(3)
26166         .a_offset(43)
26167         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2s4__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
26168     }
26169   }
26170 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD64,zero)26171   TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD64, zero) {
26172     TEST_REQUIRES_X86_XOP;
26173     for (size_t k = 1; k <= 40; k += 9) {
26174       for (uint32_t mz = 0; mz < 1; mz++) {
26175         GemmMicrokernelTester()
26176           .mr(1)
26177           .nr(4)
26178           .kr(2)
26179           .sr(4)
26180           .m(1)
26181           .n(4)
26182           .k(k)
26183           .ks(3)
26184           .a_offset(43)
26185           .zero_index(mz)
26186           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2s4__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
26187       }
26188     }
26189   }
26190 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD64,qmin)26191   TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD64, qmin) {
26192     TEST_REQUIRES_X86_XOP;
26193     GemmMicrokernelTester()
26194       .mr(1)
26195       .nr(4)
26196       .kr(2)
26197       .sr(4)
26198       .m(1)
26199       .n(4)
26200       .k(8)
26201       .qmin(128)
26202       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2s4__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
26203   }
26204 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD64,qmax)26205   TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD64, qmax) {
26206     TEST_REQUIRES_X86_XOP;
26207     GemmMicrokernelTester()
26208       .mr(1)
26209       .nr(4)
26210       .kr(2)
26211       .sr(4)
26212       .m(1)
26213       .n(4)
26214       .k(8)
26215       .qmax(128)
26216       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2s4__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
26217   }
26218 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD64,strided_cm)26219   TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD64, strided_cm) {
26220     TEST_REQUIRES_X86_XOP;
26221     GemmMicrokernelTester()
26222       .mr(1)
26223       .nr(4)
26224       .kr(2)
26225       .sr(4)
26226       .m(1)
26227       .n(4)
26228       .k(8)
26229       .cm_stride(7)
26230       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2s4__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
26231   }
26232 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
26233 
26234 
26235 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD64,k_eq_8)26236   TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD64, k_eq_8) {
26237     TEST_REQUIRES_X86_XOP;
26238     GemmMicrokernelTester()
26239       .mr(2)
26240       .nr(4)
26241       .kr(2)
26242       .sr(4)
26243       .m(2)
26244       .n(4)
26245       .k(8)
26246       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2s4__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
26247   }
26248 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD64,strided_cn)26249   TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD64, strided_cn) {
26250     TEST_REQUIRES_X86_XOP;
26251     GemmMicrokernelTester()
26252       .mr(2)
26253       .nr(4)
26254       .kr(2)
26255       .sr(4)
26256       .m(2)
26257       .n(4)
26258       .k(8)
26259       .cn_stride(7)
26260       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2s4__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
26261   }
26262 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD64,k_eq_8_subtile)26263   TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD64, k_eq_8_subtile) {
26264     TEST_REQUIRES_X86_XOP;
26265     for (uint32_t n = 1; n <= 4; n++) {
26266       for (uint32_t m = 1; m <= 2; m++) {
26267         GemmMicrokernelTester()
26268           .mr(2)
26269           .nr(4)
26270           .kr(2)
26271           .sr(4)
26272           .m(m)
26273           .n(n)
26274           .k(8)
26275           .iterations(1)
26276           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2s4__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
26277       }
26278     }
26279   }
26280 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD64,k_eq_8_subtile_m)26281   TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD64, k_eq_8_subtile_m) {
26282     TEST_REQUIRES_X86_XOP;
26283     for (uint32_t m = 1; m <= 2; m++) {
26284       GemmMicrokernelTester()
26285         .mr(2)
26286         .nr(4)
26287         .kr(2)
26288         .sr(4)
26289         .m(m)
26290         .n(4)
26291         .k(8)
26292         .iterations(1)
26293         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2s4__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
26294     }
26295   }
26296 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD64,k_eq_8_subtile_n)26297   TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD64, k_eq_8_subtile_n) {
26298     TEST_REQUIRES_X86_XOP;
26299     for (uint32_t n = 1; n <= 4; n++) {
26300       GemmMicrokernelTester()
26301         .mr(2)
26302         .nr(4)
26303         .kr(2)
26304         .sr(4)
26305         .m(2)
26306         .n(n)
26307         .k(8)
26308         .iterations(1)
26309         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2s4__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
26310     }
26311   }
26312 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD64,k_lt_8)26313   TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD64, k_lt_8) {
26314     TEST_REQUIRES_X86_XOP;
26315     for (size_t k = 1; k < 8; k++) {
26316       GemmMicrokernelTester()
26317         .mr(2)
26318         .nr(4)
26319         .kr(2)
26320         .sr(4)
26321         .m(2)
26322         .n(4)
26323         .k(k)
26324         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2s4__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
26325     }
26326   }
26327 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD64,k_lt_8_subtile)26328   TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD64, k_lt_8_subtile) {
26329     TEST_REQUIRES_X86_XOP;
26330     for (size_t k = 1; k < 8; k++) {
26331       for (uint32_t n = 1; n <= 4; n++) {
26332         for (uint32_t m = 1; m <= 2; m++) {
26333           GemmMicrokernelTester()
26334             .mr(2)
26335             .nr(4)
26336             .kr(2)
26337             .sr(4)
26338             .m(m)
26339             .n(n)
26340             .k(k)
26341             .iterations(1)
26342             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2s4__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
26343         }
26344       }
26345     }
26346   }
26347 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD64,k_gt_8)26348   TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD64, k_gt_8) {
26349     TEST_REQUIRES_X86_XOP;
26350     for (size_t k = 9; k < 16; k++) {
26351       GemmMicrokernelTester()
26352         .mr(2)
26353         .nr(4)
26354         .kr(2)
26355         .sr(4)
26356         .m(2)
26357         .n(4)
26358         .k(k)
26359         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2s4__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
26360     }
26361   }
26362 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD64,k_gt_8_subtile)26363   TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD64, k_gt_8_subtile) {
26364     TEST_REQUIRES_X86_XOP;
26365     for (size_t k = 9; k < 16; k++) {
26366       for (uint32_t n = 1; n <= 4; n++) {
26367         for (uint32_t m = 1; m <= 2; m++) {
26368           GemmMicrokernelTester()
26369             .mr(2)
26370             .nr(4)
26371             .kr(2)
26372             .sr(4)
26373             .m(m)
26374             .n(n)
26375             .k(k)
26376             .iterations(1)
26377             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2s4__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
26378         }
26379       }
26380     }
26381   }
26382 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD64,k_div_8)26383   TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD64, k_div_8) {
26384     TEST_REQUIRES_X86_XOP;
26385     for (size_t k = 16; k <= 80; k += 8) {
26386       GemmMicrokernelTester()
26387         .mr(2)
26388         .nr(4)
26389         .kr(2)
26390         .sr(4)
26391         .m(2)
26392         .n(4)
26393         .k(k)
26394         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2s4__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
26395     }
26396   }
26397 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD64,k_div_8_subtile)26398   TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD64, k_div_8_subtile) {
26399     TEST_REQUIRES_X86_XOP;
26400     for (size_t k = 16; k <= 80; k += 8) {
26401       for (uint32_t n = 1; n <= 4; n++) {
26402         for (uint32_t m = 1; m <= 2; m++) {
26403           GemmMicrokernelTester()
26404             .mr(2)
26405             .nr(4)
26406             .kr(2)
26407             .sr(4)
26408             .m(m)
26409             .n(n)
26410             .k(k)
26411             .iterations(1)
26412             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2s4__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
26413         }
26414       }
26415     }
26416   }
26417 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD64,n_gt_4)26418   TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD64, n_gt_4) {
26419     TEST_REQUIRES_X86_XOP;
26420     for (uint32_t n = 5; n < 8; n++) {
26421       for (size_t k = 1; k <= 40; k += 9) {
26422         GemmMicrokernelTester()
26423           .mr(2)
26424           .nr(4)
26425           .kr(2)
26426           .sr(4)
26427           .m(2)
26428           .n(n)
26429           .k(k)
26430           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2s4__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
26431       }
26432     }
26433   }
26434 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD64,n_gt_4_strided_cn)26435   TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD64, n_gt_4_strided_cn) {
26436     TEST_REQUIRES_X86_XOP;
26437     for (uint32_t n = 5; n < 8; n++) {
26438       for (size_t k = 1; k <= 40; k += 9) {
26439         GemmMicrokernelTester()
26440           .mr(2)
26441           .nr(4)
26442           .kr(2)
26443           .sr(4)
26444           .m(2)
26445           .n(n)
26446           .k(k)
26447           .cn_stride(7)
26448           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2s4__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
26449       }
26450     }
26451   }
26452 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD64,n_gt_4_subtile)26453   TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD64, n_gt_4_subtile) {
26454     TEST_REQUIRES_X86_XOP;
26455     for (uint32_t n = 5; n < 8; n++) {
26456       for (size_t k = 1; k <= 40; k += 9) {
26457         for (uint32_t m = 1; m <= 2; m++) {
26458           GemmMicrokernelTester()
26459             .mr(2)
26460             .nr(4)
26461             .kr(2)
26462             .sr(4)
26463             .m(m)
26464             .n(n)
26465             .k(k)
26466             .iterations(1)
26467             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2s4__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
26468         }
26469       }
26470     }
26471   }
26472 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD64,n_div_4)26473   TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD64, n_div_4) {
26474     TEST_REQUIRES_X86_XOP;
26475     for (uint32_t n = 8; n <= 12; n += 4) {
26476       for (size_t k = 1; k <= 40; k += 9) {
26477         GemmMicrokernelTester()
26478           .mr(2)
26479           .nr(4)
26480           .kr(2)
26481           .sr(4)
26482           .m(2)
26483           .n(n)
26484           .k(k)
26485           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2s4__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
26486       }
26487     }
26488   }
26489 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD64,n_div_4_strided_cn)26490   TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD64, n_div_4_strided_cn) {
26491     TEST_REQUIRES_X86_XOP;
26492     for (uint32_t n = 8; n <= 12; n += 4) {
26493       for (size_t k = 1; k <= 40; k += 9) {
26494         GemmMicrokernelTester()
26495           .mr(2)
26496           .nr(4)
26497           .kr(2)
26498           .sr(4)
26499           .m(2)
26500           .n(n)
26501           .k(k)
26502           .cn_stride(7)
26503           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2s4__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
26504       }
26505     }
26506   }
26507 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD64,n_div_4_subtile)26508   TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD64, n_div_4_subtile) {
26509     TEST_REQUIRES_X86_XOP;
26510     for (uint32_t n = 8; n <= 12; n += 4) {
26511       for (size_t k = 1; k <= 40; k += 9) {
26512         for (uint32_t m = 1; m <= 2; m++) {
26513           GemmMicrokernelTester()
26514             .mr(2)
26515             .nr(4)
26516             .kr(2)
26517             .sr(4)
26518             .m(m)
26519             .n(n)
26520             .k(k)
26521             .iterations(1)
26522             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2s4__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
26523         }
26524       }
26525     }
26526   }
26527 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD64,small_kernel)26528   TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD64, small_kernel) {
26529     TEST_REQUIRES_X86_XOP;
26530     for (size_t k = 1; k <= 40; k += 9) {
26531       GemmMicrokernelTester()
26532         .mr(2)
26533         .nr(4)
26534         .kr(2)
26535         .sr(4)
26536         .m(2)
26537         .n(4)
26538         .k(k)
26539         .ks(3)
26540         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2s4__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
26541     }
26542   }
26543 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD64,small_kernel_subtile)26544   TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD64, small_kernel_subtile) {
26545     TEST_REQUIRES_X86_XOP;
26546     for (size_t k = 1; k <= 40; k += 9) {
26547       for (uint32_t n = 1; n <= 4; n++) {
26548         for (uint32_t m = 1; m <= 2; m++) {
26549           GemmMicrokernelTester()
26550             .mr(2)
26551             .nr(4)
26552             .kr(2)
26553             .sr(4)
26554             .m(m)
26555             .n(n)
26556             .k(k)
26557             .ks(3)
26558             .iterations(1)
26559             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2s4__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
26560         }
26561       }
26562     }
26563   }
26564 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD64,n_gt_4_small_kernel)26565   TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD64, n_gt_4_small_kernel) {
26566     TEST_REQUIRES_X86_XOP;
26567     for (uint32_t n = 5; n < 8; n++) {
26568       for (size_t k = 1; k <= 40; k += 9) {
26569         GemmMicrokernelTester()
26570           .mr(2)
26571           .nr(4)
26572           .kr(2)
26573           .sr(4)
26574           .m(2)
26575           .n(n)
26576           .k(k)
26577           .ks(3)
26578           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2s4__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
26579       }
26580     }
26581   }
26582 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD64,n_div_4_small_kernel)26583   TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD64, n_div_4_small_kernel) {
26584     TEST_REQUIRES_X86_XOP;
26585     for (uint32_t n = 8; n <= 12; n += 4) {
26586       for (size_t k = 1; k <= 40; k += 9) {
26587         GemmMicrokernelTester()
26588           .mr(2)
26589           .nr(4)
26590           .kr(2)
26591           .sr(4)
26592           .m(2)
26593           .n(n)
26594           .k(k)
26595           .ks(3)
26596           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2s4__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
26597       }
26598     }
26599   }
26600 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD64,strided_cm_subtile)26601   TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD64, strided_cm_subtile) {
26602     TEST_REQUIRES_X86_XOP;
26603     for (size_t k = 1; k <= 40; k += 9) {
26604       for (uint32_t n = 1; n <= 4; n++) {
26605         for (uint32_t m = 1; m <= 2; m++) {
26606           GemmMicrokernelTester()
26607             .mr(2)
26608             .nr(4)
26609             .kr(2)
26610             .sr(4)
26611             .m(m)
26612             .n(n)
26613             .k(k)
26614             .cm_stride(7)
26615             .iterations(1)
26616             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2s4__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
26617         }
26618       }
26619     }
26620   }
26621 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD64,a_offset)26622   TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD64, a_offset) {
26623     TEST_REQUIRES_X86_XOP;
26624     for (size_t k = 1; k <= 40; k += 9) {
26625       GemmMicrokernelTester()
26626         .mr(2)
26627         .nr(4)
26628         .kr(2)
26629         .sr(4)
26630         .m(2)
26631         .n(4)
26632         .k(k)
26633         .ks(3)
26634         .a_offset(83)
26635         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2s4__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
26636     }
26637   }
26638 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD64,zero)26639   TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD64, zero) {
26640     TEST_REQUIRES_X86_XOP;
26641     for (size_t k = 1; k <= 40; k += 9) {
26642       for (uint32_t mz = 0; mz < 2; mz++) {
26643         GemmMicrokernelTester()
26644           .mr(2)
26645           .nr(4)
26646           .kr(2)
26647           .sr(4)
26648           .m(2)
26649           .n(4)
26650           .k(k)
26651           .ks(3)
26652           .a_offset(83)
26653           .zero_index(mz)
26654           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2s4__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
26655       }
26656     }
26657   }
26658 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD64,qmin)26659   TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD64, qmin) {
26660     TEST_REQUIRES_X86_XOP;
26661     GemmMicrokernelTester()
26662       .mr(2)
26663       .nr(4)
26664       .kr(2)
26665       .sr(4)
26666       .m(2)
26667       .n(4)
26668       .k(8)
26669       .qmin(128)
26670       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2s4__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
26671   }
26672 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD64,qmax)26673   TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD64, qmax) {
26674     TEST_REQUIRES_X86_XOP;
26675     GemmMicrokernelTester()
26676       .mr(2)
26677       .nr(4)
26678       .kr(2)
26679       .sr(4)
26680       .m(2)
26681       .n(4)
26682       .k(8)
26683       .qmax(128)
26684       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2s4__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
26685   }
26686 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD64,strided_cm)26687   TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD64, strided_cm) {
26688     TEST_REQUIRES_X86_XOP;
26689     GemmMicrokernelTester()
26690       .mr(2)
26691       .nr(4)
26692       .kr(2)
26693       .sr(4)
26694       .m(2)
26695       .n(4)
26696       .k(8)
26697       .cm_stride(7)
26698       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2s4__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
26699   }
26700 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
26701 
26702 
26703 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD64,k_eq_8)26704   TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD64, k_eq_8) {
26705     TEST_REQUIRES_X86_AVX;
26706     GemmMicrokernelTester()
26707       .mr(3)
26708       .nr(4)
26709       .kr(2)
26710       .sr(4)
26711       .m(3)
26712       .n(4)
26713       .k(8)
26714       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2s4__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
26715   }
26716 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD64,strided_cn)26717   TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD64, strided_cn) {
26718     TEST_REQUIRES_X86_AVX;
26719     GemmMicrokernelTester()
26720       .mr(3)
26721       .nr(4)
26722       .kr(2)
26723       .sr(4)
26724       .m(3)
26725       .n(4)
26726       .k(8)
26727       .cn_stride(7)
26728       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2s4__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
26729   }
26730 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD64,k_eq_8_subtile)26731   TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD64, k_eq_8_subtile) {
26732     TEST_REQUIRES_X86_AVX;
26733     for (uint32_t n = 1; n <= 4; n++) {
26734       for (uint32_t m = 1; m <= 3; m++) {
26735         GemmMicrokernelTester()
26736           .mr(3)
26737           .nr(4)
26738           .kr(2)
26739           .sr(4)
26740           .m(m)
26741           .n(n)
26742           .k(8)
26743           .iterations(1)
26744           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2s4__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
26745       }
26746     }
26747   }
26748 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD64,k_eq_8_subtile_m)26749   TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD64, k_eq_8_subtile_m) {
26750     TEST_REQUIRES_X86_AVX;
26751     for (uint32_t m = 1; m <= 3; m++) {
26752       GemmMicrokernelTester()
26753         .mr(3)
26754         .nr(4)
26755         .kr(2)
26756         .sr(4)
26757         .m(m)
26758         .n(4)
26759         .k(8)
26760         .iterations(1)
26761         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2s4__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
26762     }
26763   }
26764 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD64,k_eq_8_subtile_n)26765   TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD64, k_eq_8_subtile_n) {
26766     TEST_REQUIRES_X86_AVX;
26767     for (uint32_t n = 1; n <= 4; n++) {
26768       GemmMicrokernelTester()
26769         .mr(3)
26770         .nr(4)
26771         .kr(2)
26772         .sr(4)
26773         .m(3)
26774         .n(n)
26775         .k(8)
26776         .iterations(1)
26777         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2s4__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
26778     }
26779   }
26780 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD64,k_lt_8)26781   TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD64, k_lt_8) {
26782     TEST_REQUIRES_X86_AVX;
26783     for (size_t k = 1; k < 8; k++) {
26784       GemmMicrokernelTester()
26785         .mr(3)
26786         .nr(4)
26787         .kr(2)
26788         .sr(4)
26789         .m(3)
26790         .n(4)
26791         .k(k)
26792         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2s4__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
26793     }
26794   }
26795 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD64,k_lt_8_subtile)26796   TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD64, k_lt_8_subtile) {
26797     TEST_REQUIRES_X86_AVX;
26798     for (size_t k = 1; k < 8; k++) {
26799       for (uint32_t n = 1; n <= 4; n++) {
26800         for (uint32_t m = 1; m <= 3; m++) {
26801           GemmMicrokernelTester()
26802             .mr(3)
26803             .nr(4)
26804             .kr(2)
26805             .sr(4)
26806             .m(m)
26807             .n(n)
26808             .k(k)
26809             .iterations(1)
26810             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2s4__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
26811         }
26812       }
26813     }
26814   }
26815 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD64,k_gt_8)26816   TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD64, k_gt_8) {
26817     TEST_REQUIRES_X86_AVX;
26818     for (size_t k = 9; k < 16; k++) {
26819       GemmMicrokernelTester()
26820         .mr(3)
26821         .nr(4)
26822         .kr(2)
26823         .sr(4)
26824         .m(3)
26825         .n(4)
26826         .k(k)
26827         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2s4__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
26828     }
26829   }
26830 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD64,k_gt_8_subtile)26831   TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD64, k_gt_8_subtile) {
26832     TEST_REQUIRES_X86_AVX;
26833     for (size_t k = 9; k < 16; k++) {
26834       for (uint32_t n = 1; n <= 4; n++) {
26835         for (uint32_t m = 1; m <= 3; m++) {
26836           GemmMicrokernelTester()
26837             .mr(3)
26838             .nr(4)
26839             .kr(2)
26840             .sr(4)
26841             .m(m)
26842             .n(n)
26843             .k(k)
26844             .iterations(1)
26845             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2s4__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
26846         }
26847       }
26848     }
26849   }
26850 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD64,k_div_8)26851   TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD64, k_div_8) {
26852     TEST_REQUIRES_X86_AVX;
26853     for (size_t k = 16; k <= 80; k += 8) {
26854       GemmMicrokernelTester()
26855         .mr(3)
26856         .nr(4)
26857         .kr(2)
26858         .sr(4)
26859         .m(3)
26860         .n(4)
26861         .k(k)
26862         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2s4__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
26863     }
26864   }
26865 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD64,k_div_8_subtile)26866   TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD64, k_div_8_subtile) {
26867     TEST_REQUIRES_X86_AVX;
26868     for (size_t k = 16; k <= 80; k += 8) {
26869       for (uint32_t n = 1; n <= 4; n++) {
26870         for (uint32_t m = 1; m <= 3; m++) {
26871           GemmMicrokernelTester()
26872             .mr(3)
26873             .nr(4)
26874             .kr(2)
26875             .sr(4)
26876             .m(m)
26877             .n(n)
26878             .k(k)
26879             .iterations(1)
26880             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2s4__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
26881         }
26882       }
26883     }
26884   }
26885 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD64,n_gt_4)26886   TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD64, n_gt_4) {
26887     TEST_REQUIRES_X86_AVX;
26888     for (uint32_t n = 5; n < 8; n++) {
26889       for (size_t k = 1; k <= 40; k += 9) {
26890         GemmMicrokernelTester()
26891           .mr(3)
26892           .nr(4)
26893           .kr(2)
26894           .sr(4)
26895           .m(3)
26896           .n(n)
26897           .k(k)
26898           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2s4__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
26899       }
26900     }
26901   }
26902 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD64,n_gt_4_strided_cn)26903   TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD64, n_gt_4_strided_cn) {
26904     TEST_REQUIRES_X86_AVX;
26905     for (uint32_t n = 5; n < 8; n++) {
26906       for (size_t k = 1; k <= 40; k += 9) {
26907         GemmMicrokernelTester()
26908           .mr(3)
26909           .nr(4)
26910           .kr(2)
26911           .sr(4)
26912           .m(3)
26913           .n(n)
26914           .k(k)
26915           .cn_stride(7)
26916           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2s4__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
26917       }
26918     }
26919   }
26920 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD64,n_gt_4_subtile)26921   TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD64, n_gt_4_subtile) {
26922     TEST_REQUIRES_X86_AVX;
26923     for (uint32_t n = 5; n < 8; n++) {
26924       for (size_t k = 1; k <= 40; k += 9) {
26925         for (uint32_t m = 1; m <= 3; m++) {
26926           GemmMicrokernelTester()
26927             .mr(3)
26928             .nr(4)
26929             .kr(2)
26930             .sr(4)
26931             .m(m)
26932             .n(n)
26933             .k(k)
26934             .iterations(1)
26935             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2s4__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
26936         }
26937       }
26938     }
26939   }
26940 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD64,n_div_4)26941   TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD64, n_div_4) {
26942     TEST_REQUIRES_X86_AVX;
26943     for (uint32_t n = 8; n <= 12; n += 4) {
26944       for (size_t k = 1; k <= 40; k += 9) {
26945         GemmMicrokernelTester()
26946           .mr(3)
26947           .nr(4)
26948           .kr(2)
26949           .sr(4)
26950           .m(3)
26951           .n(n)
26952           .k(k)
26953           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2s4__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
26954       }
26955     }
26956   }
26957 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD64,n_div_4_strided_cn)26958   TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD64, n_div_4_strided_cn) {
26959     TEST_REQUIRES_X86_AVX;
26960     for (uint32_t n = 8; n <= 12; n += 4) {
26961       for (size_t k = 1; k <= 40; k += 9) {
26962         GemmMicrokernelTester()
26963           .mr(3)
26964           .nr(4)
26965           .kr(2)
26966           .sr(4)
26967           .m(3)
26968           .n(n)
26969           .k(k)
26970           .cn_stride(7)
26971           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2s4__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
26972       }
26973     }
26974   }
26975 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD64,n_div_4_subtile)26976   TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD64, n_div_4_subtile) {
26977     TEST_REQUIRES_X86_AVX;
26978     for (uint32_t n = 8; n <= 12; n += 4) {
26979       for (size_t k = 1; k <= 40; k += 9) {
26980         for (uint32_t m = 1; m <= 3; m++) {
26981           GemmMicrokernelTester()
26982             .mr(3)
26983             .nr(4)
26984             .kr(2)
26985             .sr(4)
26986             .m(m)
26987             .n(n)
26988             .k(k)
26989             .iterations(1)
26990             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2s4__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
26991         }
26992       }
26993     }
26994   }
26995 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD64,small_kernel)26996   TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD64, small_kernel) {
26997     TEST_REQUIRES_X86_AVX;
26998     for (size_t k = 1; k <= 40; k += 9) {
26999       GemmMicrokernelTester()
27000         .mr(3)
27001         .nr(4)
27002         .kr(2)
27003         .sr(4)
27004         .m(3)
27005         .n(4)
27006         .k(k)
27007         .ks(3)
27008         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2s4__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
27009     }
27010   }
27011 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD64,small_kernel_subtile)27012   TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD64, small_kernel_subtile) {
27013     TEST_REQUIRES_X86_AVX;
27014     for (size_t k = 1; k <= 40; k += 9) {
27015       for (uint32_t n = 1; n <= 4; n++) {
27016         for (uint32_t m = 1; m <= 3; m++) {
27017           GemmMicrokernelTester()
27018             .mr(3)
27019             .nr(4)
27020             .kr(2)
27021             .sr(4)
27022             .m(m)
27023             .n(n)
27024             .k(k)
27025             .ks(3)
27026             .iterations(1)
27027             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2s4__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
27028         }
27029       }
27030     }
27031   }
27032 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD64,n_gt_4_small_kernel)27033   TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD64, n_gt_4_small_kernel) {
27034     TEST_REQUIRES_X86_AVX;
27035     for (uint32_t n = 5; n < 8; n++) {
27036       for (size_t k = 1; k <= 40; k += 9) {
27037         GemmMicrokernelTester()
27038           .mr(3)
27039           .nr(4)
27040           .kr(2)
27041           .sr(4)
27042           .m(3)
27043           .n(n)
27044           .k(k)
27045           .ks(3)
27046           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2s4__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
27047       }
27048     }
27049   }
27050 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD64,n_div_4_small_kernel)27051   TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD64, n_div_4_small_kernel) {
27052     TEST_REQUIRES_X86_AVX;
27053     for (uint32_t n = 8; n <= 12; n += 4) {
27054       for (size_t k = 1; k <= 40; k += 9) {
27055         GemmMicrokernelTester()
27056           .mr(3)
27057           .nr(4)
27058           .kr(2)
27059           .sr(4)
27060           .m(3)
27061           .n(n)
27062           .k(k)
27063           .ks(3)
27064           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2s4__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
27065       }
27066     }
27067   }
27068 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD64,strided_cm_subtile)27069   TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD64, strided_cm_subtile) {
27070     TEST_REQUIRES_X86_AVX;
27071     for (size_t k = 1; k <= 40; k += 9) {
27072       for (uint32_t n = 1; n <= 4; n++) {
27073         for (uint32_t m = 1; m <= 3; m++) {
27074           GemmMicrokernelTester()
27075             .mr(3)
27076             .nr(4)
27077             .kr(2)
27078             .sr(4)
27079             .m(m)
27080             .n(n)
27081             .k(k)
27082             .cm_stride(7)
27083             .iterations(1)
27084             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2s4__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
27085         }
27086       }
27087     }
27088   }
27089 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD64,a_offset)27090   TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD64, a_offset) {
27091     TEST_REQUIRES_X86_AVX;
27092     for (size_t k = 1; k <= 40; k += 9) {
27093       GemmMicrokernelTester()
27094         .mr(3)
27095         .nr(4)
27096         .kr(2)
27097         .sr(4)
27098         .m(3)
27099         .n(4)
27100         .k(k)
27101         .ks(3)
27102         .a_offset(127)
27103         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2s4__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
27104     }
27105   }
27106 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD64,zero)27107   TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD64, zero) {
27108     TEST_REQUIRES_X86_AVX;
27109     for (size_t k = 1; k <= 40; k += 9) {
27110       for (uint32_t mz = 0; mz < 3; mz++) {
27111         GemmMicrokernelTester()
27112           .mr(3)
27113           .nr(4)
27114           .kr(2)
27115           .sr(4)
27116           .m(3)
27117           .n(4)
27118           .k(k)
27119           .ks(3)
27120           .a_offset(127)
27121           .zero_index(mz)
27122           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2s4__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
27123       }
27124     }
27125   }
27126 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD64,qmin)27127   TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD64, qmin) {
27128     TEST_REQUIRES_X86_AVX;
27129     GemmMicrokernelTester()
27130       .mr(3)
27131       .nr(4)
27132       .kr(2)
27133       .sr(4)
27134       .m(3)
27135       .n(4)
27136       .k(8)
27137       .qmin(128)
27138       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2s4__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
27139   }
27140 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD64,qmax)27141   TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD64, qmax) {
27142     TEST_REQUIRES_X86_AVX;
27143     GemmMicrokernelTester()
27144       .mr(3)
27145       .nr(4)
27146       .kr(2)
27147       .sr(4)
27148       .m(3)
27149       .n(4)
27150       .k(8)
27151       .qmax(128)
27152       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2s4__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
27153   }
27154 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD64,strided_cm)27155   TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD64, strided_cm) {
27156     TEST_REQUIRES_X86_AVX;
27157     GemmMicrokernelTester()
27158       .mr(3)
27159       .nr(4)
27160       .kr(2)
27161       .sr(4)
27162       .m(3)
27163       .n(4)
27164       .k(8)
27165       .cm_stride(7)
27166       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2s4__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
27167   }
27168 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
27169 
27170 
27171 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD64,k_eq_8)27172   TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD64, k_eq_8) {
27173     TEST_REQUIRES_X86_XOP;
27174     GemmMicrokernelTester()
27175       .mr(4)
27176       .nr(4)
27177       .kr(2)
27178       .sr(4)
27179       .m(4)
27180       .n(4)
27181       .k(8)
27182       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2s4__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
27183   }
27184 
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD64,strided_cn)27185   TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD64, strided_cn) {
27186     TEST_REQUIRES_X86_XOP;
27187     GemmMicrokernelTester()
27188       .mr(4)
27189       .nr(4)
27190       .kr(2)
27191       .sr(4)
27192       .m(4)
27193       .n(4)
27194       .k(8)
27195       .cn_stride(7)
27196       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2s4__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
27197   }
27198 
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD64,k_eq_8_subtile)27199   TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD64, k_eq_8_subtile) {
27200     TEST_REQUIRES_X86_XOP;
27201     for (uint32_t n = 1; n <= 4; n++) {
27202       for (uint32_t m = 1; m <= 4; m++) {
27203         GemmMicrokernelTester()
27204           .mr(4)
27205           .nr(4)
27206           .kr(2)
27207           .sr(4)
27208           .m(m)
27209           .n(n)
27210           .k(8)
27211           .iterations(1)
27212           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2s4__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
27213       }
27214     }
27215   }
27216 
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD64,k_eq_8_subtile_m)27217   TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD64, k_eq_8_subtile_m) {
27218     TEST_REQUIRES_X86_XOP;
27219     for (uint32_t m = 1; m <= 4; m++) {
27220       GemmMicrokernelTester()
27221         .mr(4)
27222         .nr(4)
27223         .kr(2)
27224         .sr(4)
27225         .m(m)
27226         .n(4)
27227         .k(8)
27228         .iterations(1)
27229         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2s4__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
27230     }
27231   }
27232 
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD64,k_eq_8_subtile_n)27233   TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD64, k_eq_8_subtile_n) {
27234     TEST_REQUIRES_X86_XOP;
27235     for (uint32_t n = 1; n <= 4; n++) {
27236       GemmMicrokernelTester()
27237         .mr(4)
27238         .nr(4)
27239         .kr(2)
27240         .sr(4)
27241         .m(4)
27242         .n(n)
27243         .k(8)
27244         .iterations(1)
27245         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2s4__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
27246     }
27247   }
27248 
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD64,k_lt_8)27249   TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD64, k_lt_8) {
27250     TEST_REQUIRES_X86_XOP;
27251     for (size_t k = 1; k < 8; k++) {
27252       GemmMicrokernelTester()
27253         .mr(4)
27254         .nr(4)
27255         .kr(2)
27256         .sr(4)
27257         .m(4)
27258         .n(4)
27259         .k(k)
27260         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2s4__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
27261     }
27262   }
27263 
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD64,k_lt_8_subtile)27264   TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD64, k_lt_8_subtile) {
27265     TEST_REQUIRES_X86_XOP;
27266     for (size_t k = 1; k < 8; k++) {
27267       for (uint32_t n = 1; n <= 4; n++) {
27268         for (uint32_t m = 1; m <= 4; m++) {
27269           GemmMicrokernelTester()
27270             .mr(4)
27271             .nr(4)
27272             .kr(2)
27273             .sr(4)
27274             .m(m)
27275             .n(n)
27276             .k(k)
27277             .iterations(1)
27278             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2s4__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
27279         }
27280       }
27281     }
27282   }
27283 
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD64,k_gt_8)27284   TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD64, k_gt_8) {
27285     TEST_REQUIRES_X86_XOP;
27286     for (size_t k = 9; k < 16; k++) {
27287       GemmMicrokernelTester()
27288         .mr(4)
27289         .nr(4)
27290         .kr(2)
27291         .sr(4)
27292         .m(4)
27293         .n(4)
27294         .k(k)
27295         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2s4__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
27296     }
27297   }
27298 
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD64,k_gt_8_subtile)27299   TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD64, k_gt_8_subtile) {
27300     TEST_REQUIRES_X86_XOP;
27301     for (size_t k = 9; k < 16; k++) {
27302       for (uint32_t n = 1; n <= 4; n++) {
27303         for (uint32_t m = 1; m <= 4; m++) {
27304           GemmMicrokernelTester()
27305             .mr(4)
27306             .nr(4)
27307             .kr(2)
27308             .sr(4)
27309             .m(m)
27310             .n(n)
27311             .k(k)
27312             .iterations(1)
27313             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2s4__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
27314         }
27315       }
27316     }
27317   }
27318 
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD64,k_div_8)27319   TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD64, k_div_8) {
27320     TEST_REQUIRES_X86_XOP;
27321     for (size_t k = 16; k <= 80; k += 8) {
27322       GemmMicrokernelTester()
27323         .mr(4)
27324         .nr(4)
27325         .kr(2)
27326         .sr(4)
27327         .m(4)
27328         .n(4)
27329         .k(k)
27330         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2s4__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
27331     }
27332   }
27333 
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD64,k_div_8_subtile)27334   TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD64, k_div_8_subtile) {
27335     TEST_REQUIRES_X86_XOP;
27336     for (size_t k = 16; k <= 80; k += 8) {
27337       for (uint32_t n = 1; n <= 4; n++) {
27338         for (uint32_t m = 1; m <= 4; m++) {
27339           GemmMicrokernelTester()
27340             .mr(4)
27341             .nr(4)
27342             .kr(2)
27343             .sr(4)
27344             .m(m)
27345             .n(n)
27346             .k(k)
27347             .iterations(1)
27348             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2s4__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
27349         }
27350       }
27351     }
27352   }
27353 
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD64,n_gt_4)27354   TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD64, n_gt_4) {
27355     TEST_REQUIRES_X86_XOP;
27356     for (uint32_t n = 5; n < 8; n++) {
27357       for (size_t k = 1; k <= 40; k += 9) {
27358         GemmMicrokernelTester()
27359           .mr(4)
27360           .nr(4)
27361           .kr(2)
27362           .sr(4)
27363           .m(4)
27364           .n(n)
27365           .k(k)
27366           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2s4__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
27367       }
27368     }
27369   }
27370 
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD64,n_gt_4_strided_cn)27371   TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD64, n_gt_4_strided_cn) {
27372     TEST_REQUIRES_X86_XOP;
27373     for (uint32_t n = 5; n < 8; n++) {
27374       for (size_t k = 1; k <= 40; k += 9) {
27375         GemmMicrokernelTester()
27376           .mr(4)
27377           .nr(4)
27378           .kr(2)
27379           .sr(4)
27380           .m(4)
27381           .n(n)
27382           .k(k)
27383           .cn_stride(7)
27384           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2s4__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
27385       }
27386     }
27387   }
27388 
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD64,n_gt_4_subtile)27389   TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD64, n_gt_4_subtile) {
27390     TEST_REQUIRES_X86_XOP;
27391     for (uint32_t n = 5; n < 8; n++) {
27392       for (size_t k = 1; k <= 40; k += 9) {
27393         for (uint32_t m = 1; m <= 4; m++) {
27394           GemmMicrokernelTester()
27395             .mr(4)
27396             .nr(4)
27397             .kr(2)
27398             .sr(4)
27399             .m(m)
27400             .n(n)
27401             .k(k)
27402             .iterations(1)
27403             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2s4__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
27404         }
27405       }
27406     }
27407   }
27408 
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD64,n_div_4)27409   TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD64, n_div_4) {
27410     TEST_REQUIRES_X86_XOP;
27411     for (uint32_t n = 8; n <= 12; n += 4) {
27412       for (size_t k = 1; k <= 40; k += 9) {
27413         GemmMicrokernelTester()
27414           .mr(4)
27415           .nr(4)
27416           .kr(2)
27417           .sr(4)
27418           .m(4)
27419           .n(n)
27420           .k(k)
27421           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2s4__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
27422       }
27423     }
27424   }
27425 
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD64,n_div_4_strided_cn)27426   TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD64, n_div_4_strided_cn) {
27427     TEST_REQUIRES_X86_XOP;
27428     for (uint32_t n = 8; n <= 12; n += 4) {
27429       for (size_t k = 1; k <= 40; k += 9) {
27430         GemmMicrokernelTester()
27431           .mr(4)
27432           .nr(4)
27433           .kr(2)
27434           .sr(4)
27435           .m(4)
27436           .n(n)
27437           .k(k)
27438           .cn_stride(7)
27439           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2s4__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
27440       }
27441     }
27442   }
27443 
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD64,n_div_4_subtile)27444   TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD64, n_div_4_subtile) {
27445     TEST_REQUIRES_X86_XOP;
27446     for (uint32_t n = 8; n <= 12; n += 4) {
27447       for (size_t k = 1; k <= 40; k += 9) {
27448         for (uint32_t m = 1; m <= 4; m++) {
27449           GemmMicrokernelTester()
27450             .mr(4)
27451             .nr(4)
27452             .kr(2)
27453             .sr(4)
27454             .m(m)
27455             .n(n)
27456             .k(k)
27457             .iterations(1)
27458             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2s4__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
27459         }
27460       }
27461     }
27462   }
27463 
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD64,small_kernel)27464   TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD64, small_kernel) {
27465     TEST_REQUIRES_X86_XOP;
27466     for (size_t k = 1; k <= 40; k += 9) {
27467       GemmMicrokernelTester()
27468         .mr(4)
27469         .nr(4)
27470         .kr(2)
27471         .sr(4)
27472         .m(4)
27473         .n(4)
27474         .k(k)
27475         .ks(3)
27476         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2s4__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
27477     }
27478   }
27479 
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD64,small_kernel_subtile)27480   TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD64, small_kernel_subtile) {
27481     TEST_REQUIRES_X86_XOP;
27482     for (size_t k = 1; k <= 40; k += 9) {
27483       for (uint32_t n = 1; n <= 4; n++) {
27484         for (uint32_t m = 1; m <= 4; m++) {
27485           GemmMicrokernelTester()
27486             .mr(4)
27487             .nr(4)
27488             .kr(2)
27489             .sr(4)
27490             .m(m)
27491             .n(n)
27492             .k(k)
27493             .ks(3)
27494             .iterations(1)
27495             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2s4__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
27496         }
27497       }
27498     }
27499   }
27500 
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD64,n_gt_4_small_kernel)27501   TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD64, n_gt_4_small_kernel) {
27502     TEST_REQUIRES_X86_XOP;
27503     for (uint32_t n = 5; n < 8; n++) {
27504       for (size_t k = 1; k <= 40; k += 9) {
27505         GemmMicrokernelTester()
27506           .mr(4)
27507           .nr(4)
27508           .kr(2)
27509           .sr(4)
27510           .m(4)
27511           .n(n)
27512           .k(k)
27513           .ks(3)
27514           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2s4__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
27515       }
27516     }
27517   }
27518 
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD64,n_div_4_small_kernel)27519   TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD64, n_div_4_small_kernel) {
27520     TEST_REQUIRES_X86_XOP;
27521     for (uint32_t n = 8; n <= 12; n += 4) {
27522       for (size_t k = 1; k <= 40; k += 9) {
27523         GemmMicrokernelTester()
27524           .mr(4)
27525           .nr(4)
27526           .kr(2)
27527           .sr(4)
27528           .m(4)
27529           .n(n)
27530           .k(k)
27531           .ks(3)
27532           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2s4__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
27533       }
27534     }
27535   }
27536 
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD64,strided_cm_subtile)27537   TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD64, strided_cm_subtile) {
27538     TEST_REQUIRES_X86_XOP;
27539     for (size_t k = 1; k <= 40; k += 9) {
27540       for (uint32_t n = 1; n <= 4; n++) {
27541         for (uint32_t m = 1; m <= 4; m++) {
27542           GemmMicrokernelTester()
27543             .mr(4)
27544             .nr(4)
27545             .kr(2)
27546             .sr(4)
27547             .m(m)
27548             .n(n)
27549             .k(k)
27550             .cm_stride(7)
27551             .iterations(1)
27552             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2s4__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
27553         }
27554       }
27555     }
27556   }
27557 
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD64,a_offset)27558   TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD64, a_offset) {
27559     TEST_REQUIRES_X86_XOP;
27560     for (size_t k = 1; k <= 40; k += 9) {
27561       GemmMicrokernelTester()
27562         .mr(4)
27563         .nr(4)
27564         .kr(2)
27565         .sr(4)
27566         .m(4)
27567         .n(4)
27568         .k(k)
27569         .ks(3)
27570         .a_offset(163)
27571         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2s4__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
27572     }
27573   }
27574 
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD64,zero)27575   TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD64, zero) {
27576     TEST_REQUIRES_X86_XOP;
27577     for (size_t k = 1; k <= 40; k += 9) {
27578       for (uint32_t mz = 0; mz < 4; mz++) {
27579         GemmMicrokernelTester()
27580           .mr(4)
27581           .nr(4)
27582           .kr(2)
27583           .sr(4)
27584           .m(4)
27585           .n(4)
27586           .k(k)
27587           .ks(3)
27588           .a_offset(163)
27589           .zero_index(mz)
27590           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2s4__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
27591       }
27592     }
27593   }
27594 
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD64,qmin)27595   TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD64, qmin) {
27596     TEST_REQUIRES_X86_XOP;
27597     GemmMicrokernelTester()
27598       .mr(4)
27599       .nr(4)
27600       .kr(2)
27601       .sr(4)
27602       .m(4)
27603       .n(4)
27604       .k(8)
27605       .qmin(128)
27606       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2s4__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
27607   }
27608 
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD64,qmax)27609   TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD64, qmax) {
27610     TEST_REQUIRES_X86_XOP;
27611     GemmMicrokernelTester()
27612       .mr(4)
27613       .nr(4)
27614       .kr(2)
27615       .sr(4)
27616       .m(4)
27617       .n(4)
27618       .k(8)
27619       .qmax(128)
27620       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2s4__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
27621   }
27622 
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD64,strided_cm)27623   TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD64, strided_cm) {
27624     TEST_REQUIRES_X86_XOP;
27625     GemmMicrokernelTester()
27626       .mr(4)
27627       .nr(4)
27628       .kr(2)
27629       .sr(4)
27630       .m(4)
27631       .n(4)
27632       .k(8)
27633       .cm_stride(7)
27634       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2s4__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
27635   }
27636 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
27637 
27638 
27639 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD128,k_eq_8)27640   TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD128, k_eq_8) {
27641     TEST_REQUIRES_X86_SSE2;
27642     GemmMicrokernelTester()
27643       .mr(1)
27644       .nr(4)
27645       .kr(2)
27646       .sr(4)
27647       .m(1)
27648       .n(4)
27649       .k(8)
27650       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2s4__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
27651   }
27652 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD128,strided_cn)27653   TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD128, strided_cn) {
27654     TEST_REQUIRES_X86_SSE2;
27655     GemmMicrokernelTester()
27656       .mr(1)
27657       .nr(4)
27658       .kr(2)
27659       .sr(4)
27660       .m(1)
27661       .n(4)
27662       .k(8)
27663       .cn_stride(7)
27664       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2s4__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
27665   }
27666 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD128,k_eq_8_subtile)27667   TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD128, k_eq_8_subtile) {
27668     TEST_REQUIRES_X86_SSE2;
27669     for (uint32_t n = 1; n <= 4; n++) {
27670       for (uint32_t m = 1; m <= 1; m++) {
27671         GemmMicrokernelTester()
27672           .mr(1)
27673           .nr(4)
27674           .kr(2)
27675           .sr(4)
27676           .m(m)
27677           .n(n)
27678           .k(8)
27679           .iterations(1)
27680           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2s4__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
27681       }
27682     }
27683   }
27684 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD128,k_eq_8_subtile_m)27685   TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD128, k_eq_8_subtile_m) {
27686     TEST_REQUIRES_X86_SSE2;
27687     for (uint32_t m = 1; m <= 1; m++) {
27688       GemmMicrokernelTester()
27689         .mr(1)
27690         .nr(4)
27691         .kr(2)
27692         .sr(4)
27693         .m(m)
27694         .n(4)
27695         .k(8)
27696         .iterations(1)
27697         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2s4__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
27698     }
27699   }
27700 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD128,k_eq_8_subtile_n)27701   TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD128, k_eq_8_subtile_n) {
27702     TEST_REQUIRES_X86_SSE2;
27703     for (uint32_t n = 1; n <= 4; n++) {
27704       GemmMicrokernelTester()
27705         .mr(1)
27706         .nr(4)
27707         .kr(2)
27708         .sr(4)
27709         .m(1)
27710         .n(n)
27711         .k(8)
27712         .iterations(1)
27713         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2s4__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
27714     }
27715   }
27716 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD128,k_lt_8)27717   TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD128, k_lt_8) {
27718     TEST_REQUIRES_X86_SSE2;
27719     for (size_t k = 1; k < 8; k++) {
27720       GemmMicrokernelTester()
27721         .mr(1)
27722         .nr(4)
27723         .kr(2)
27724         .sr(4)
27725         .m(1)
27726         .n(4)
27727         .k(k)
27728         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2s4__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
27729     }
27730   }
27731 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD128,k_lt_8_subtile)27732   TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD128, k_lt_8_subtile) {
27733     TEST_REQUIRES_X86_SSE2;
27734     for (size_t k = 1; k < 8; k++) {
27735       for (uint32_t n = 1; n <= 4; n++) {
27736         for (uint32_t m = 1; m <= 1; m++) {
27737           GemmMicrokernelTester()
27738             .mr(1)
27739             .nr(4)
27740             .kr(2)
27741             .sr(4)
27742             .m(m)
27743             .n(n)
27744             .k(k)
27745             .iterations(1)
27746             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2s4__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
27747         }
27748       }
27749     }
27750   }
27751 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD128,k_gt_8)27752   TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD128, k_gt_8) {
27753     TEST_REQUIRES_X86_SSE2;
27754     for (size_t k = 9; k < 16; k++) {
27755       GemmMicrokernelTester()
27756         .mr(1)
27757         .nr(4)
27758         .kr(2)
27759         .sr(4)
27760         .m(1)
27761         .n(4)
27762         .k(k)
27763         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2s4__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
27764     }
27765   }
27766 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD128,k_gt_8_subtile)27767   TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD128, k_gt_8_subtile) {
27768     TEST_REQUIRES_X86_SSE2;
27769     for (size_t k = 9; k < 16; k++) {
27770       for (uint32_t n = 1; n <= 4; n++) {
27771         for (uint32_t m = 1; m <= 1; m++) {
27772           GemmMicrokernelTester()
27773             .mr(1)
27774             .nr(4)
27775             .kr(2)
27776             .sr(4)
27777             .m(m)
27778             .n(n)
27779             .k(k)
27780             .iterations(1)
27781             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2s4__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
27782         }
27783       }
27784     }
27785   }
27786 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD128,k_div_8)27787   TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD128, k_div_8) {
27788     TEST_REQUIRES_X86_SSE2;
27789     for (size_t k = 16; k <= 80; k += 8) {
27790       GemmMicrokernelTester()
27791         .mr(1)
27792         .nr(4)
27793         .kr(2)
27794         .sr(4)
27795         .m(1)
27796         .n(4)
27797         .k(k)
27798         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2s4__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
27799     }
27800   }
27801 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD128,k_div_8_subtile)27802   TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD128, k_div_8_subtile) {
27803     TEST_REQUIRES_X86_SSE2;
27804     for (size_t k = 16; k <= 80; k += 8) {
27805       for (uint32_t n = 1; n <= 4; n++) {
27806         for (uint32_t m = 1; m <= 1; m++) {
27807           GemmMicrokernelTester()
27808             .mr(1)
27809             .nr(4)
27810             .kr(2)
27811             .sr(4)
27812             .m(m)
27813             .n(n)
27814             .k(k)
27815             .iterations(1)
27816             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2s4__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
27817         }
27818       }
27819     }
27820   }
27821 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD128,n_gt_4)27822   TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD128, n_gt_4) {
27823     TEST_REQUIRES_X86_SSE2;
27824     for (uint32_t n = 5; n < 8; n++) {
27825       for (size_t k = 1; k <= 40; k += 9) {
27826         GemmMicrokernelTester()
27827           .mr(1)
27828           .nr(4)
27829           .kr(2)
27830           .sr(4)
27831           .m(1)
27832           .n(n)
27833           .k(k)
27834           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2s4__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
27835       }
27836     }
27837   }
27838 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD128,n_gt_4_strided_cn)27839   TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD128, n_gt_4_strided_cn) {
27840     TEST_REQUIRES_X86_SSE2;
27841     for (uint32_t n = 5; n < 8; n++) {
27842       for (size_t k = 1; k <= 40; k += 9) {
27843         GemmMicrokernelTester()
27844           .mr(1)
27845           .nr(4)
27846           .kr(2)
27847           .sr(4)
27848           .m(1)
27849           .n(n)
27850           .k(k)
27851           .cn_stride(7)
27852           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2s4__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
27853       }
27854     }
27855   }
27856 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD128,n_gt_4_subtile)27857   TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD128, n_gt_4_subtile) {
27858     TEST_REQUIRES_X86_SSE2;
27859     for (uint32_t n = 5; n < 8; n++) {
27860       for (size_t k = 1; k <= 40; k += 9) {
27861         for (uint32_t m = 1; m <= 1; m++) {
27862           GemmMicrokernelTester()
27863             .mr(1)
27864             .nr(4)
27865             .kr(2)
27866             .sr(4)
27867             .m(m)
27868             .n(n)
27869             .k(k)
27870             .iterations(1)
27871             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2s4__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
27872         }
27873       }
27874     }
27875   }
27876 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD128,n_div_4)27877   TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD128, n_div_4) {
27878     TEST_REQUIRES_X86_SSE2;
27879     for (uint32_t n = 8; n <= 12; n += 4) {
27880       for (size_t k = 1; k <= 40; k += 9) {
27881         GemmMicrokernelTester()
27882           .mr(1)
27883           .nr(4)
27884           .kr(2)
27885           .sr(4)
27886           .m(1)
27887           .n(n)
27888           .k(k)
27889           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2s4__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
27890       }
27891     }
27892   }
27893 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD128,n_div_4_strided_cn)27894   TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD128, n_div_4_strided_cn) {
27895     TEST_REQUIRES_X86_SSE2;
27896     for (uint32_t n = 8; n <= 12; n += 4) {
27897       for (size_t k = 1; k <= 40; k += 9) {
27898         GemmMicrokernelTester()
27899           .mr(1)
27900           .nr(4)
27901           .kr(2)
27902           .sr(4)
27903           .m(1)
27904           .n(n)
27905           .k(k)
27906           .cn_stride(7)
27907           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2s4__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
27908       }
27909     }
27910   }
27911 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD128,n_div_4_subtile)27912   TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD128, n_div_4_subtile) {
27913     TEST_REQUIRES_X86_SSE2;
27914     for (uint32_t n = 8; n <= 12; n += 4) {
27915       for (size_t k = 1; k <= 40; k += 9) {
27916         for (uint32_t m = 1; m <= 1; m++) {
27917           GemmMicrokernelTester()
27918             .mr(1)
27919             .nr(4)
27920             .kr(2)
27921             .sr(4)
27922             .m(m)
27923             .n(n)
27924             .k(k)
27925             .iterations(1)
27926             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2s4__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
27927         }
27928       }
27929     }
27930   }
27931 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD128,small_kernel)27932   TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD128, small_kernel) {
27933     TEST_REQUIRES_X86_SSE2;
27934     for (size_t k = 1; k <= 40; k += 9) {
27935       GemmMicrokernelTester()
27936         .mr(1)
27937         .nr(4)
27938         .kr(2)
27939         .sr(4)
27940         .m(1)
27941         .n(4)
27942         .k(k)
27943         .ks(3)
27944         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2s4__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
27945     }
27946   }
27947 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD128,small_kernel_subtile)27948   TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD128, small_kernel_subtile) {
27949     TEST_REQUIRES_X86_SSE2;
27950     for (size_t k = 1; k <= 40; k += 9) {
27951       for (uint32_t n = 1; n <= 4; n++) {
27952         for (uint32_t m = 1; m <= 1; m++) {
27953           GemmMicrokernelTester()
27954             .mr(1)
27955             .nr(4)
27956             .kr(2)
27957             .sr(4)
27958             .m(m)
27959             .n(n)
27960             .k(k)
27961             .ks(3)
27962             .iterations(1)
27963             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2s4__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
27964         }
27965       }
27966     }
27967   }
27968 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD128,n_gt_4_small_kernel)27969   TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD128, n_gt_4_small_kernel) {
27970     TEST_REQUIRES_X86_SSE2;
27971     for (uint32_t n = 5; n < 8; n++) {
27972       for (size_t k = 1; k <= 40; k += 9) {
27973         GemmMicrokernelTester()
27974           .mr(1)
27975           .nr(4)
27976           .kr(2)
27977           .sr(4)
27978           .m(1)
27979           .n(n)
27980           .k(k)
27981           .ks(3)
27982           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2s4__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
27983       }
27984     }
27985   }
27986 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD128,n_div_4_small_kernel)27987   TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD128, n_div_4_small_kernel) {
27988     TEST_REQUIRES_X86_SSE2;
27989     for (uint32_t n = 8; n <= 12; n += 4) {
27990       for (size_t k = 1; k <= 40; k += 9) {
27991         GemmMicrokernelTester()
27992           .mr(1)
27993           .nr(4)
27994           .kr(2)
27995           .sr(4)
27996           .m(1)
27997           .n(n)
27998           .k(k)
27999           .ks(3)
28000           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2s4__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
28001       }
28002     }
28003   }
28004 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD128,strided_cm_subtile)28005   TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD128, strided_cm_subtile) {
28006     TEST_REQUIRES_X86_SSE2;
28007     for (size_t k = 1; k <= 40; k += 9) {
28008       for (uint32_t n = 1; n <= 4; n++) {
28009         for (uint32_t m = 1; m <= 1; m++) {
28010           GemmMicrokernelTester()
28011             .mr(1)
28012             .nr(4)
28013             .kr(2)
28014             .sr(4)
28015             .m(m)
28016             .n(n)
28017             .k(k)
28018             .cm_stride(7)
28019             .iterations(1)
28020             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2s4__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
28021         }
28022       }
28023     }
28024   }
28025 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD128,a_offset)28026   TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD128, a_offset) {
28027     TEST_REQUIRES_X86_SSE2;
28028     for (size_t k = 1; k <= 40; k += 9) {
28029       GemmMicrokernelTester()
28030         .mr(1)
28031         .nr(4)
28032         .kr(2)
28033         .sr(4)
28034         .m(1)
28035         .n(4)
28036         .k(k)
28037         .ks(3)
28038         .a_offset(43)
28039         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2s4__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
28040     }
28041   }
28042 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD128,zero)28043   TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD128, zero) {
28044     TEST_REQUIRES_X86_SSE2;
28045     for (size_t k = 1; k <= 40; k += 9) {
28046       for (uint32_t mz = 0; mz < 1; mz++) {
28047         GemmMicrokernelTester()
28048           .mr(1)
28049           .nr(4)
28050           .kr(2)
28051           .sr(4)
28052           .m(1)
28053           .n(4)
28054           .k(k)
28055           .ks(3)
28056           .a_offset(43)
28057           .zero_index(mz)
28058           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2s4__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
28059       }
28060     }
28061   }
28062 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD128,qmin)28063   TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD128, qmin) {
28064     TEST_REQUIRES_X86_SSE2;
28065     GemmMicrokernelTester()
28066       .mr(1)
28067       .nr(4)
28068       .kr(2)
28069       .sr(4)
28070       .m(1)
28071       .n(4)
28072       .k(8)
28073       .qmin(128)
28074       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2s4__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
28075   }
28076 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD128,qmax)28077   TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD128, qmax) {
28078     TEST_REQUIRES_X86_SSE2;
28079     GemmMicrokernelTester()
28080       .mr(1)
28081       .nr(4)
28082       .kr(2)
28083       .sr(4)
28084       .m(1)
28085       .n(4)
28086       .k(8)
28087       .qmax(128)
28088       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2s4__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
28089   }
28090 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD128,strided_cm)28091   TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD128, strided_cm) {
28092     TEST_REQUIRES_X86_SSE2;
28093     GemmMicrokernelTester()
28094       .mr(1)
28095       .nr(4)
28096       .kr(2)
28097       .sr(4)
28098       .m(1)
28099       .n(4)
28100       .k(8)
28101       .cm_stride(7)
28102       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2s4__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
28103   }
28104 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
28105 
28106 
28107 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD128,k_eq_8)28108   TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD128, k_eq_8) {
28109     TEST_REQUIRES_X86_SSE41;
28110     GemmMicrokernelTester()
28111       .mr(1)
28112       .nr(4)
28113       .kr(2)
28114       .sr(4)
28115       .m(1)
28116       .n(4)
28117       .k(8)
28118       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2s4__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
28119   }
28120 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD128,strided_cn)28121   TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD128, strided_cn) {
28122     TEST_REQUIRES_X86_SSE41;
28123     GemmMicrokernelTester()
28124       .mr(1)
28125       .nr(4)
28126       .kr(2)
28127       .sr(4)
28128       .m(1)
28129       .n(4)
28130       .k(8)
28131       .cn_stride(7)
28132       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2s4__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
28133   }
28134 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD128,k_eq_8_subtile)28135   TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD128, k_eq_8_subtile) {
28136     TEST_REQUIRES_X86_SSE41;
28137     for (uint32_t n = 1; n <= 4; n++) {
28138       for (uint32_t m = 1; m <= 1; m++) {
28139         GemmMicrokernelTester()
28140           .mr(1)
28141           .nr(4)
28142           .kr(2)
28143           .sr(4)
28144           .m(m)
28145           .n(n)
28146           .k(8)
28147           .iterations(1)
28148           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2s4__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
28149       }
28150     }
28151   }
28152 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD128,k_eq_8_subtile_m)28153   TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD128, k_eq_8_subtile_m) {
28154     TEST_REQUIRES_X86_SSE41;
28155     for (uint32_t m = 1; m <= 1; m++) {
28156       GemmMicrokernelTester()
28157         .mr(1)
28158         .nr(4)
28159         .kr(2)
28160         .sr(4)
28161         .m(m)
28162         .n(4)
28163         .k(8)
28164         .iterations(1)
28165         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2s4__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
28166     }
28167   }
28168 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD128,k_eq_8_subtile_n)28169   TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD128, k_eq_8_subtile_n) {
28170     TEST_REQUIRES_X86_SSE41;
28171     for (uint32_t n = 1; n <= 4; n++) {
28172       GemmMicrokernelTester()
28173         .mr(1)
28174         .nr(4)
28175         .kr(2)
28176         .sr(4)
28177         .m(1)
28178         .n(n)
28179         .k(8)
28180         .iterations(1)
28181         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2s4__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
28182     }
28183   }
28184 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD128,k_lt_8)28185   TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD128, k_lt_8) {
28186     TEST_REQUIRES_X86_SSE41;
28187     for (size_t k = 1; k < 8; k++) {
28188       GemmMicrokernelTester()
28189         .mr(1)
28190         .nr(4)
28191         .kr(2)
28192         .sr(4)
28193         .m(1)
28194         .n(4)
28195         .k(k)
28196         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2s4__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
28197     }
28198   }
28199 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD128,k_lt_8_subtile)28200   TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD128, k_lt_8_subtile) {
28201     TEST_REQUIRES_X86_SSE41;
28202     for (size_t k = 1; k < 8; k++) {
28203       for (uint32_t n = 1; n <= 4; n++) {
28204         for (uint32_t m = 1; m <= 1; m++) {
28205           GemmMicrokernelTester()
28206             .mr(1)
28207             .nr(4)
28208             .kr(2)
28209             .sr(4)
28210             .m(m)
28211             .n(n)
28212             .k(k)
28213             .iterations(1)
28214             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2s4__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
28215         }
28216       }
28217     }
28218   }
28219 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD128,k_gt_8)28220   TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD128, k_gt_8) {
28221     TEST_REQUIRES_X86_SSE41;
28222     for (size_t k = 9; k < 16; k++) {
28223       GemmMicrokernelTester()
28224         .mr(1)
28225         .nr(4)
28226         .kr(2)
28227         .sr(4)
28228         .m(1)
28229         .n(4)
28230         .k(k)
28231         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2s4__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
28232     }
28233   }
28234 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD128,k_gt_8_subtile)28235   TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD128, k_gt_8_subtile) {
28236     TEST_REQUIRES_X86_SSE41;
28237     for (size_t k = 9; k < 16; k++) {
28238       for (uint32_t n = 1; n <= 4; n++) {
28239         for (uint32_t m = 1; m <= 1; m++) {
28240           GemmMicrokernelTester()
28241             .mr(1)
28242             .nr(4)
28243             .kr(2)
28244             .sr(4)
28245             .m(m)
28246             .n(n)
28247             .k(k)
28248             .iterations(1)
28249             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2s4__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
28250         }
28251       }
28252     }
28253   }
28254 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD128,k_div_8)28255   TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD128, k_div_8) {
28256     TEST_REQUIRES_X86_SSE41;
28257     for (size_t k = 16; k <= 80; k += 8) {
28258       GemmMicrokernelTester()
28259         .mr(1)
28260         .nr(4)
28261         .kr(2)
28262         .sr(4)
28263         .m(1)
28264         .n(4)
28265         .k(k)
28266         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2s4__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
28267     }
28268   }
28269 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD128,k_div_8_subtile)28270   TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD128, k_div_8_subtile) {
28271     TEST_REQUIRES_X86_SSE41;
28272     for (size_t k = 16; k <= 80; k += 8) {
28273       for (uint32_t n = 1; n <= 4; n++) {
28274         for (uint32_t m = 1; m <= 1; m++) {
28275           GemmMicrokernelTester()
28276             .mr(1)
28277             .nr(4)
28278             .kr(2)
28279             .sr(4)
28280             .m(m)
28281             .n(n)
28282             .k(k)
28283             .iterations(1)
28284             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2s4__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
28285         }
28286       }
28287     }
28288   }
28289 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD128,n_gt_4)28290   TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD128, n_gt_4) {
28291     TEST_REQUIRES_X86_SSE41;
28292     for (uint32_t n = 5; n < 8; n++) {
28293       for (size_t k = 1; k <= 40; k += 9) {
28294         GemmMicrokernelTester()
28295           .mr(1)
28296           .nr(4)
28297           .kr(2)
28298           .sr(4)
28299           .m(1)
28300           .n(n)
28301           .k(k)
28302           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2s4__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
28303       }
28304     }
28305   }
28306 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD128,n_gt_4_strided_cn)28307   TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD128, n_gt_4_strided_cn) {
28308     TEST_REQUIRES_X86_SSE41;
28309     for (uint32_t n = 5; n < 8; n++) {
28310       for (size_t k = 1; k <= 40; k += 9) {
28311         GemmMicrokernelTester()
28312           .mr(1)
28313           .nr(4)
28314           .kr(2)
28315           .sr(4)
28316           .m(1)
28317           .n(n)
28318           .k(k)
28319           .cn_stride(7)
28320           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2s4__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
28321       }
28322     }
28323   }
28324 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD128,n_gt_4_subtile)28325   TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD128, n_gt_4_subtile) {
28326     TEST_REQUIRES_X86_SSE41;
28327     for (uint32_t n = 5; n < 8; n++) {
28328       for (size_t k = 1; k <= 40; k += 9) {
28329         for (uint32_t m = 1; m <= 1; m++) {
28330           GemmMicrokernelTester()
28331             .mr(1)
28332             .nr(4)
28333             .kr(2)
28334             .sr(4)
28335             .m(m)
28336             .n(n)
28337             .k(k)
28338             .iterations(1)
28339             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2s4__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
28340         }
28341       }
28342     }
28343   }
28344 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD128,n_div_4)28345   TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD128, n_div_4) {
28346     TEST_REQUIRES_X86_SSE41;
28347     for (uint32_t n = 8; n <= 12; n += 4) {
28348       for (size_t k = 1; k <= 40; k += 9) {
28349         GemmMicrokernelTester()
28350           .mr(1)
28351           .nr(4)
28352           .kr(2)
28353           .sr(4)
28354           .m(1)
28355           .n(n)
28356           .k(k)
28357           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2s4__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
28358       }
28359     }
28360   }
28361 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD128,n_div_4_strided_cn)28362   TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD128, n_div_4_strided_cn) {
28363     TEST_REQUIRES_X86_SSE41;
28364     for (uint32_t n = 8; n <= 12; n += 4) {
28365       for (size_t k = 1; k <= 40; k += 9) {
28366         GemmMicrokernelTester()
28367           .mr(1)
28368           .nr(4)
28369           .kr(2)
28370           .sr(4)
28371           .m(1)
28372           .n(n)
28373           .k(k)
28374           .cn_stride(7)
28375           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2s4__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
28376       }
28377     }
28378   }
28379 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD128,n_div_4_subtile)28380   TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD128, n_div_4_subtile) {
28381     TEST_REQUIRES_X86_SSE41;
28382     for (uint32_t n = 8; n <= 12; n += 4) {
28383       for (size_t k = 1; k <= 40; k += 9) {
28384         for (uint32_t m = 1; m <= 1; m++) {
28385           GemmMicrokernelTester()
28386             .mr(1)
28387             .nr(4)
28388             .kr(2)
28389             .sr(4)
28390             .m(m)
28391             .n(n)
28392             .k(k)
28393             .iterations(1)
28394             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2s4__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
28395         }
28396       }
28397     }
28398   }
28399 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD128,small_kernel)28400   TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD128, small_kernel) {
28401     TEST_REQUIRES_X86_SSE41;
28402     for (size_t k = 1; k <= 40; k += 9) {
28403       GemmMicrokernelTester()
28404         .mr(1)
28405         .nr(4)
28406         .kr(2)
28407         .sr(4)
28408         .m(1)
28409         .n(4)
28410         .k(k)
28411         .ks(3)
28412         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2s4__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
28413     }
28414   }
28415 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD128,small_kernel_subtile)28416   TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD128, small_kernel_subtile) {
28417     TEST_REQUIRES_X86_SSE41;
28418     for (size_t k = 1; k <= 40; k += 9) {
28419       for (uint32_t n = 1; n <= 4; n++) {
28420         for (uint32_t m = 1; m <= 1; m++) {
28421           GemmMicrokernelTester()
28422             .mr(1)
28423             .nr(4)
28424             .kr(2)
28425             .sr(4)
28426             .m(m)
28427             .n(n)
28428             .k(k)
28429             .ks(3)
28430             .iterations(1)
28431             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2s4__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
28432         }
28433       }
28434     }
28435   }
28436 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD128,n_gt_4_small_kernel)28437   TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD128, n_gt_4_small_kernel) {
28438     TEST_REQUIRES_X86_SSE41;
28439     for (uint32_t n = 5; n < 8; n++) {
28440       for (size_t k = 1; k <= 40; k += 9) {
28441         GemmMicrokernelTester()
28442           .mr(1)
28443           .nr(4)
28444           .kr(2)
28445           .sr(4)
28446           .m(1)
28447           .n(n)
28448           .k(k)
28449           .ks(3)
28450           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2s4__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
28451       }
28452     }
28453   }
28454 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD128,n_div_4_small_kernel)28455   TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD128, n_div_4_small_kernel) {
28456     TEST_REQUIRES_X86_SSE41;
28457     for (uint32_t n = 8; n <= 12; n += 4) {
28458       for (size_t k = 1; k <= 40; k += 9) {
28459         GemmMicrokernelTester()
28460           .mr(1)
28461           .nr(4)
28462           .kr(2)
28463           .sr(4)
28464           .m(1)
28465           .n(n)
28466           .k(k)
28467           .ks(3)
28468           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2s4__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
28469       }
28470     }
28471   }
28472 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD128,strided_cm_subtile)28473   TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD128, strided_cm_subtile) {
28474     TEST_REQUIRES_X86_SSE41;
28475     for (size_t k = 1; k <= 40; k += 9) {
28476       for (uint32_t n = 1; n <= 4; n++) {
28477         for (uint32_t m = 1; m <= 1; m++) {
28478           GemmMicrokernelTester()
28479             .mr(1)
28480             .nr(4)
28481             .kr(2)
28482             .sr(4)
28483             .m(m)
28484             .n(n)
28485             .k(k)
28486             .cm_stride(7)
28487             .iterations(1)
28488             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2s4__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
28489         }
28490       }
28491     }
28492   }
28493 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD128,a_offset)28494   TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD128, a_offset) {
28495     TEST_REQUIRES_X86_SSE41;
28496     for (size_t k = 1; k <= 40; k += 9) {
28497       GemmMicrokernelTester()
28498         .mr(1)
28499         .nr(4)
28500         .kr(2)
28501         .sr(4)
28502         .m(1)
28503         .n(4)
28504         .k(k)
28505         .ks(3)
28506         .a_offset(43)
28507         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2s4__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
28508     }
28509   }
28510 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD128,zero)28511   TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD128, zero) {
28512     TEST_REQUIRES_X86_SSE41;
28513     for (size_t k = 1; k <= 40; k += 9) {
28514       for (uint32_t mz = 0; mz < 1; mz++) {
28515         GemmMicrokernelTester()
28516           .mr(1)
28517           .nr(4)
28518           .kr(2)
28519           .sr(4)
28520           .m(1)
28521           .n(4)
28522           .k(k)
28523           .ks(3)
28524           .a_offset(43)
28525           .zero_index(mz)
28526           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2s4__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
28527       }
28528     }
28529   }
28530 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD128,qmin)28531   TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD128, qmin) {
28532     TEST_REQUIRES_X86_SSE41;
28533     GemmMicrokernelTester()
28534       .mr(1)
28535       .nr(4)
28536       .kr(2)
28537       .sr(4)
28538       .m(1)
28539       .n(4)
28540       .k(8)
28541       .qmin(128)
28542       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2s4__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
28543   }
28544 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD128,qmax)28545   TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD128, qmax) {
28546     TEST_REQUIRES_X86_SSE41;
28547     GemmMicrokernelTester()
28548       .mr(1)
28549       .nr(4)
28550       .kr(2)
28551       .sr(4)
28552       .m(1)
28553       .n(4)
28554       .k(8)
28555       .qmax(128)
28556       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2s4__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
28557   }
28558 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD128,strided_cm)28559   TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD128, strided_cm) {
28560     TEST_REQUIRES_X86_SSE41;
28561     GemmMicrokernelTester()
28562       .mr(1)
28563       .nr(4)
28564       .kr(2)
28565       .sr(4)
28566       .m(1)
28567       .n(4)
28568       .k(8)
28569       .cm_stride(7)
28570       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2s4__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
28571   }
28572 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
28573 
28574 
28575 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD128,k_eq_8)28576   TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD128, k_eq_8) {
28577     TEST_REQUIRES_X86_SSE2;
28578     GemmMicrokernelTester()
28579       .mr(2)
28580       .nr(4)
28581       .kr(2)
28582       .sr(4)
28583       .m(2)
28584       .n(4)
28585       .k(8)
28586       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2s4__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
28587   }
28588 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD128,strided_cn)28589   TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD128, strided_cn) {
28590     TEST_REQUIRES_X86_SSE2;
28591     GemmMicrokernelTester()
28592       .mr(2)
28593       .nr(4)
28594       .kr(2)
28595       .sr(4)
28596       .m(2)
28597       .n(4)
28598       .k(8)
28599       .cn_stride(7)
28600       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2s4__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
28601   }
28602 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD128,k_eq_8_subtile)28603   TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD128, k_eq_8_subtile) {
28604     TEST_REQUIRES_X86_SSE2;
28605     for (uint32_t n = 1; n <= 4; n++) {
28606       for (uint32_t m = 1; m <= 2; m++) {
28607         GemmMicrokernelTester()
28608           .mr(2)
28609           .nr(4)
28610           .kr(2)
28611           .sr(4)
28612           .m(m)
28613           .n(n)
28614           .k(8)
28615           .iterations(1)
28616           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2s4__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
28617       }
28618     }
28619   }
28620 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD128,k_eq_8_subtile_m)28621   TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD128, k_eq_8_subtile_m) {
28622     TEST_REQUIRES_X86_SSE2;
28623     for (uint32_t m = 1; m <= 2; m++) {
28624       GemmMicrokernelTester()
28625         .mr(2)
28626         .nr(4)
28627         .kr(2)
28628         .sr(4)
28629         .m(m)
28630         .n(4)
28631         .k(8)
28632         .iterations(1)
28633         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2s4__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
28634     }
28635   }
28636 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD128,k_eq_8_subtile_n)28637   TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD128, k_eq_8_subtile_n) {
28638     TEST_REQUIRES_X86_SSE2;
28639     for (uint32_t n = 1; n <= 4; n++) {
28640       GemmMicrokernelTester()
28641         .mr(2)
28642         .nr(4)
28643         .kr(2)
28644         .sr(4)
28645         .m(2)
28646         .n(n)
28647         .k(8)
28648         .iterations(1)
28649         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2s4__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
28650     }
28651   }
28652 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD128,k_lt_8)28653   TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD128, k_lt_8) {
28654     TEST_REQUIRES_X86_SSE2;
28655     for (size_t k = 1; k < 8; k++) {
28656       GemmMicrokernelTester()
28657         .mr(2)
28658         .nr(4)
28659         .kr(2)
28660         .sr(4)
28661         .m(2)
28662         .n(4)
28663         .k(k)
28664         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2s4__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
28665     }
28666   }
28667 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD128,k_lt_8_subtile)28668   TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD128, k_lt_8_subtile) {
28669     TEST_REQUIRES_X86_SSE2;
28670     for (size_t k = 1; k < 8; k++) {
28671       for (uint32_t n = 1; n <= 4; n++) {
28672         for (uint32_t m = 1; m <= 2; m++) {
28673           GemmMicrokernelTester()
28674             .mr(2)
28675             .nr(4)
28676             .kr(2)
28677             .sr(4)
28678             .m(m)
28679             .n(n)
28680             .k(k)
28681             .iterations(1)
28682             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2s4__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
28683         }
28684       }
28685     }
28686   }
28687 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD128,k_gt_8)28688   TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD128, k_gt_8) {
28689     TEST_REQUIRES_X86_SSE2;
28690     for (size_t k = 9; k < 16; k++) {
28691       GemmMicrokernelTester()
28692         .mr(2)
28693         .nr(4)
28694         .kr(2)
28695         .sr(4)
28696         .m(2)
28697         .n(4)
28698         .k(k)
28699         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2s4__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
28700     }
28701   }
28702 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD128,k_gt_8_subtile)28703   TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD128, k_gt_8_subtile) {
28704     TEST_REQUIRES_X86_SSE2;
28705     for (size_t k = 9; k < 16; k++) {
28706       for (uint32_t n = 1; n <= 4; n++) {
28707         for (uint32_t m = 1; m <= 2; m++) {
28708           GemmMicrokernelTester()
28709             .mr(2)
28710             .nr(4)
28711             .kr(2)
28712             .sr(4)
28713             .m(m)
28714             .n(n)
28715             .k(k)
28716             .iterations(1)
28717             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2s4__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
28718         }
28719       }
28720     }
28721   }
28722 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD128,k_div_8)28723   TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD128, k_div_8) {
28724     TEST_REQUIRES_X86_SSE2;
28725     for (size_t k = 16; k <= 80; k += 8) {
28726       GemmMicrokernelTester()
28727         .mr(2)
28728         .nr(4)
28729         .kr(2)
28730         .sr(4)
28731         .m(2)
28732         .n(4)
28733         .k(k)
28734         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2s4__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
28735     }
28736   }
28737 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD128,k_div_8_subtile)28738   TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD128, k_div_8_subtile) {
28739     TEST_REQUIRES_X86_SSE2;
28740     for (size_t k = 16; k <= 80; k += 8) {
28741       for (uint32_t n = 1; n <= 4; n++) {
28742         for (uint32_t m = 1; m <= 2; m++) {
28743           GemmMicrokernelTester()
28744             .mr(2)
28745             .nr(4)
28746             .kr(2)
28747             .sr(4)
28748             .m(m)
28749             .n(n)
28750             .k(k)
28751             .iterations(1)
28752             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2s4__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
28753         }
28754       }
28755     }
28756   }
28757 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD128,n_gt_4)28758   TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD128, n_gt_4) {
28759     TEST_REQUIRES_X86_SSE2;
28760     for (uint32_t n = 5; n < 8; n++) {
28761       for (size_t k = 1; k <= 40; k += 9) {
28762         GemmMicrokernelTester()
28763           .mr(2)
28764           .nr(4)
28765           .kr(2)
28766           .sr(4)
28767           .m(2)
28768           .n(n)
28769           .k(k)
28770           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2s4__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
28771       }
28772     }
28773   }
28774 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD128,n_gt_4_strided_cn)28775   TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD128, n_gt_4_strided_cn) {
28776     TEST_REQUIRES_X86_SSE2;
28777     for (uint32_t n = 5; n < 8; n++) {
28778       for (size_t k = 1; k <= 40; k += 9) {
28779         GemmMicrokernelTester()
28780           .mr(2)
28781           .nr(4)
28782           .kr(2)
28783           .sr(4)
28784           .m(2)
28785           .n(n)
28786           .k(k)
28787           .cn_stride(7)
28788           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2s4__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
28789       }
28790     }
28791   }
28792 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD128,n_gt_4_subtile)28793   TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD128, n_gt_4_subtile) {
28794     TEST_REQUIRES_X86_SSE2;
28795     for (uint32_t n = 5; n < 8; n++) {
28796       for (size_t k = 1; k <= 40; k += 9) {
28797         for (uint32_t m = 1; m <= 2; m++) {
28798           GemmMicrokernelTester()
28799             .mr(2)
28800             .nr(4)
28801             .kr(2)
28802             .sr(4)
28803             .m(m)
28804             .n(n)
28805             .k(k)
28806             .iterations(1)
28807             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2s4__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
28808         }
28809       }
28810     }
28811   }
28812 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD128,n_div_4)28813   TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD128, n_div_4) {
28814     TEST_REQUIRES_X86_SSE2;
28815     for (uint32_t n = 8; n <= 12; n += 4) {
28816       for (size_t k = 1; k <= 40; k += 9) {
28817         GemmMicrokernelTester()
28818           .mr(2)
28819           .nr(4)
28820           .kr(2)
28821           .sr(4)
28822           .m(2)
28823           .n(n)
28824           .k(k)
28825           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2s4__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
28826       }
28827     }
28828   }
28829 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD128,n_div_4_strided_cn)28830   TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD128, n_div_4_strided_cn) {
28831     TEST_REQUIRES_X86_SSE2;
28832     for (uint32_t n = 8; n <= 12; n += 4) {
28833       for (size_t k = 1; k <= 40; k += 9) {
28834         GemmMicrokernelTester()
28835           .mr(2)
28836           .nr(4)
28837           .kr(2)
28838           .sr(4)
28839           .m(2)
28840           .n(n)
28841           .k(k)
28842           .cn_stride(7)
28843           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2s4__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
28844       }
28845     }
28846   }
28847 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD128,n_div_4_subtile)28848   TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD128, n_div_4_subtile) {
28849     TEST_REQUIRES_X86_SSE2;
28850     for (uint32_t n = 8; n <= 12; n += 4) {
28851       for (size_t k = 1; k <= 40; k += 9) {
28852         for (uint32_t m = 1; m <= 2; m++) {
28853           GemmMicrokernelTester()
28854             .mr(2)
28855             .nr(4)
28856             .kr(2)
28857             .sr(4)
28858             .m(m)
28859             .n(n)
28860             .k(k)
28861             .iterations(1)
28862             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2s4__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
28863         }
28864       }
28865     }
28866   }
28867 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD128,small_kernel)28868   TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD128, small_kernel) {
28869     TEST_REQUIRES_X86_SSE2;
28870     for (size_t k = 1; k <= 40; k += 9) {
28871       GemmMicrokernelTester()
28872         .mr(2)
28873         .nr(4)
28874         .kr(2)
28875         .sr(4)
28876         .m(2)
28877         .n(4)
28878         .k(k)
28879         .ks(3)
28880         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2s4__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
28881     }
28882   }
28883 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD128,small_kernel_subtile)28884   TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD128, small_kernel_subtile) {
28885     TEST_REQUIRES_X86_SSE2;
28886     for (size_t k = 1; k <= 40; k += 9) {
28887       for (uint32_t n = 1; n <= 4; n++) {
28888         for (uint32_t m = 1; m <= 2; m++) {
28889           GemmMicrokernelTester()
28890             .mr(2)
28891             .nr(4)
28892             .kr(2)
28893             .sr(4)
28894             .m(m)
28895             .n(n)
28896             .k(k)
28897             .ks(3)
28898             .iterations(1)
28899             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2s4__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
28900         }
28901       }
28902     }
28903   }
28904 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD128,n_gt_4_small_kernel)28905   TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD128, n_gt_4_small_kernel) {
28906     TEST_REQUIRES_X86_SSE2;
28907     for (uint32_t n = 5; n < 8; n++) {
28908       for (size_t k = 1; k <= 40; k += 9) {
28909         GemmMicrokernelTester()
28910           .mr(2)
28911           .nr(4)
28912           .kr(2)
28913           .sr(4)
28914           .m(2)
28915           .n(n)
28916           .k(k)
28917           .ks(3)
28918           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2s4__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
28919       }
28920     }
28921   }
28922 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD128,n_div_4_small_kernel)28923   TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD128, n_div_4_small_kernel) {
28924     TEST_REQUIRES_X86_SSE2;
28925     for (uint32_t n = 8; n <= 12; n += 4) {
28926       for (size_t k = 1; k <= 40; k += 9) {
28927         GemmMicrokernelTester()
28928           .mr(2)
28929           .nr(4)
28930           .kr(2)
28931           .sr(4)
28932           .m(2)
28933           .n(n)
28934           .k(k)
28935           .ks(3)
28936           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2s4__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
28937       }
28938     }
28939   }
28940 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD128,strided_cm_subtile)28941   TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD128, strided_cm_subtile) {
28942     TEST_REQUIRES_X86_SSE2;
28943     for (size_t k = 1; k <= 40; k += 9) {
28944       for (uint32_t n = 1; n <= 4; n++) {
28945         for (uint32_t m = 1; m <= 2; m++) {
28946           GemmMicrokernelTester()
28947             .mr(2)
28948             .nr(4)
28949             .kr(2)
28950             .sr(4)
28951             .m(m)
28952             .n(n)
28953             .k(k)
28954             .cm_stride(7)
28955             .iterations(1)
28956             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2s4__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
28957         }
28958       }
28959     }
28960   }
28961 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD128,a_offset)28962   TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD128, a_offset) {
28963     TEST_REQUIRES_X86_SSE2;
28964     for (size_t k = 1; k <= 40; k += 9) {
28965       GemmMicrokernelTester()
28966         .mr(2)
28967         .nr(4)
28968         .kr(2)
28969         .sr(4)
28970         .m(2)
28971         .n(4)
28972         .k(k)
28973         .ks(3)
28974         .a_offset(83)
28975         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2s4__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
28976     }
28977   }
28978 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD128,zero)28979   TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD128, zero) {
28980     TEST_REQUIRES_X86_SSE2;
28981     for (size_t k = 1; k <= 40; k += 9) {
28982       for (uint32_t mz = 0; mz < 2; mz++) {
28983         GemmMicrokernelTester()
28984           .mr(2)
28985           .nr(4)
28986           .kr(2)
28987           .sr(4)
28988           .m(2)
28989           .n(4)
28990           .k(k)
28991           .ks(3)
28992           .a_offset(83)
28993           .zero_index(mz)
28994           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2s4__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
28995       }
28996     }
28997   }
28998 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD128,qmin)28999   TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD128, qmin) {
29000     TEST_REQUIRES_X86_SSE2;
29001     GemmMicrokernelTester()
29002       .mr(2)
29003       .nr(4)
29004       .kr(2)
29005       .sr(4)
29006       .m(2)
29007       .n(4)
29008       .k(8)
29009       .qmin(128)
29010       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2s4__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
29011   }
29012 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD128,qmax)29013   TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD128, qmax) {
29014     TEST_REQUIRES_X86_SSE2;
29015     GemmMicrokernelTester()
29016       .mr(2)
29017       .nr(4)
29018       .kr(2)
29019       .sr(4)
29020       .m(2)
29021       .n(4)
29022       .k(8)
29023       .qmax(128)
29024       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2s4__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
29025   }
29026 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD128,strided_cm)29027   TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD128, strided_cm) {
29028     TEST_REQUIRES_X86_SSE2;
29029     GemmMicrokernelTester()
29030       .mr(2)
29031       .nr(4)
29032       .kr(2)
29033       .sr(4)
29034       .m(2)
29035       .n(4)
29036       .k(8)
29037       .cm_stride(7)
29038       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2s4__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
29039   }
29040 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
29041 
29042 
29043 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD128,k_eq_8)29044   TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD128, k_eq_8) {
29045     TEST_REQUIRES_X86_SSE2;
29046     GemmMicrokernelTester()
29047       .mr(3)
29048       .nr(4)
29049       .kr(2)
29050       .sr(4)
29051       .m(3)
29052       .n(4)
29053       .k(8)
29054       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2s4__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
29055   }
29056 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD128,strided_cn)29057   TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD128, strided_cn) {
29058     TEST_REQUIRES_X86_SSE2;
29059     GemmMicrokernelTester()
29060       .mr(3)
29061       .nr(4)
29062       .kr(2)
29063       .sr(4)
29064       .m(3)
29065       .n(4)
29066       .k(8)
29067       .cn_stride(7)
29068       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2s4__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
29069   }
29070 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD128,k_eq_8_subtile)29071   TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD128, k_eq_8_subtile) {
29072     TEST_REQUIRES_X86_SSE2;
29073     for (uint32_t n = 1; n <= 4; n++) {
29074       for (uint32_t m = 1; m <= 3; m++) {
29075         GemmMicrokernelTester()
29076           .mr(3)
29077           .nr(4)
29078           .kr(2)
29079           .sr(4)
29080           .m(m)
29081           .n(n)
29082           .k(8)
29083           .iterations(1)
29084           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2s4__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
29085       }
29086     }
29087   }
29088 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD128,k_eq_8_subtile_m)29089   TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD128, k_eq_8_subtile_m) {
29090     TEST_REQUIRES_X86_SSE2;
29091     for (uint32_t m = 1; m <= 3; m++) {
29092       GemmMicrokernelTester()
29093         .mr(3)
29094         .nr(4)
29095         .kr(2)
29096         .sr(4)
29097         .m(m)
29098         .n(4)
29099         .k(8)
29100         .iterations(1)
29101         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2s4__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
29102     }
29103   }
29104 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD128,k_eq_8_subtile_n)29105   TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD128, k_eq_8_subtile_n) {
29106     TEST_REQUIRES_X86_SSE2;
29107     for (uint32_t n = 1; n <= 4; n++) {
29108       GemmMicrokernelTester()
29109         .mr(3)
29110         .nr(4)
29111         .kr(2)
29112         .sr(4)
29113         .m(3)
29114         .n(n)
29115         .k(8)
29116         .iterations(1)
29117         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2s4__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
29118     }
29119   }
29120 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD128,k_lt_8)29121   TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD128, k_lt_8) {
29122     TEST_REQUIRES_X86_SSE2;
29123     for (size_t k = 1; k < 8; k++) {
29124       GemmMicrokernelTester()
29125         .mr(3)
29126         .nr(4)
29127         .kr(2)
29128         .sr(4)
29129         .m(3)
29130         .n(4)
29131         .k(k)
29132         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2s4__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
29133     }
29134   }
29135 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD128,k_lt_8_subtile)29136   TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD128, k_lt_8_subtile) {
29137     TEST_REQUIRES_X86_SSE2;
29138     for (size_t k = 1; k < 8; k++) {
29139       for (uint32_t n = 1; n <= 4; n++) {
29140         for (uint32_t m = 1; m <= 3; m++) {
29141           GemmMicrokernelTester()
29142             .mr(3)
29143             .nr(4)
29144             .kr(2)
29145             .sr(4)
29146             .m(m)
29147             .n(n)
29148             .k(k)
29149             .iterations(1)
29150             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2s4__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
29151         }
29152       }
29153     }
29154   }
29155 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD128,k_gt_8)29156   TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD128, k_gt_8) {
29157     TEST_REQUIRES_X86_SSE2;
29158     for (size_t k = 9; k < 16; k++) {
29159       GemmMicrokernelTester()
29160         .mr(3)
29161         .nr(4)
29162         .kr(2)
29163         .sr(4)
29164         .m(3)
29165         .n(4)
29166         .k(k)
29167         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2s4__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
29168     }
29169   }
29170 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD128,k_gt_8_subtile)29171   TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD128, k_gt_8_subtile) {
29172     TEST_REQUIRES_X86_SSE2;
29173     for (size_t k = 9; k < 16; k++) {
29174       for (uint32_t n = 1; n <= 4; n++) {
29175         for (uint32_t m = 1; m <= 3; m++) {
29176           GemmMicrokernelTester()
29177             .mr(3)
29178             .nr(4)
29179             .kr(2)
29180             .sr(4)
29181             .m(m)
29182             .n(n)
29183             .k(k)
29184             .iterations(1)
29185             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2s4__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
29186         }
29187       }
29188     }
29189   }
29190 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD128,k_div_8)29191   TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD128, k_div_8) {
29192     TEST_REQUIRES_X86_SSE2;
29193     for (size_t k = 16; k <= 80; k += 8) {
29194       GemmMicrokernelTester()
29195         .mr(3)
29196         .nr(4)
29197         .kr(2)
29198         .sr(4)
29199         .m(3)
29200         .n(4)
29201         .k(k)
29202         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2s4__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
29203     }
29204   }
29205 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD128,k_div_8_subtile)29206   TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD128, k_div_8_subtile) {
29207     TEST_REQUIRES_X86_SSE2;
29208     for (size_t k = 16; k <= 80; k += 8) {
29209       for (uint32_t n = 1; n <= 4; n++) {
29210         for (uint32_t m = 1; m <= 3; m++) {
29211           GemmMicrokernelTester()
29212             .mr(3)
29213             .nr(4)
29214             .kr(2)
29215             .sr(4)
29216             .m(m)
29217             .n(n)
29218             .k(k)
29219             .iterations(1)
29220             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2s4__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
29221         }
29222       }
29223     }
29224   }
29225 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD128,n_gt_4)29226   TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD128, n_gt_4) {
29227     TEST_REQUIRES_X86_SSE2;
29228     for (uint32_t n = 5; n < 8; n++) {
29229       for (size_t k = 1; k <= 40; k += 9) {
29230         GemmMicrokernelTester()
29231           .mr(3)
29232           .nr(4)
29233           .kr(2)
29234           .sr(4)
29235           .m(3)
29236           .n(n)
29237           .k(k)
29238           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2s4__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
29239       }
29240     }
29241   }
29242 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD128,n_gt_4_strided_cn)29243   TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD128, n_gt_4_strided_cn) {
29244     TEST_REQUIRES_X86_SSE2;
29245     for (uint32_t n = 5; n < 8; n++) {
29246       for (size_t k = 1; k <= 40; k += 9) {
29247         GemmMicrokernelTester()
29248           .mr(3)
29249           .nr(4)
29250           .kr(2)
29251           .sr(4)
29252           .m(3)
29253           .n(n)
29254           .k(k)
29255           .cn_stride(7)
29256           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2s4__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
29257       }
29258     }
29259   }
29260 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD128,n_gt_4_subtile)29261   TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD128, n_gt_4_subtile) {
29262     TEST_REQUIRES_X86_SSE2;
29263     for (uint32_t n = 5; n < 8; n++) {
29264       for (size_t k = 1; k <= 40; k += 9) {
29265         for (uint32_t m = 1; m <= 3; m++) {
29266           GemmMicrokernelTester()
29267             .mr(3)
29268             .nr(4)
29269             .kr(2)
29270             .sr(4)
29271             .m(m)
29272             .n(n)
29273             .k(k)
29274             .iterations(1)
29275             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2s4__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
29276         }
29277       }
29278     }
29279   }
29280 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD128,n_div_4)29281   TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD128, n_div_4) {
29282     TEST_REQUIRES_X86_SSE2;
29283     for (uint32_t n = 8; n <= 12; n += 4) {
29284       for (size_t k = 1; k <= 40; k += 9) {
29285         GemmMicrokernelTester()
29286           .mr(3)
29287           .nr(4)
29288           .kr(2)
29289           .sr(4)
29290           .m(3)
29291           .n(n)
29292           .k(k)
29293           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2s4__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
29294       }
29295     }
29296   }
29297 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD128,n_div_4_strided_cn)29298   TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD128, n_div_4_strided_cn) {
29299     TEST_REQUIRES_X86_SSE2;
29300     for (uint32_t n = 8; n <= 12; n += 4) {
29301       for (size_t k = 1; k <= 40; k += 9) {
29302         GemmMicrokernelTester()
29303           .mr(3)
29304           .nr(4)
29305           .kr(2)
29306           .sr(4)
29307           .m(3)
29308           .n(n)
29309           .k(k)
29310           .cn_stride(7)
29311           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2s4__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
29312       }
29313     }
29314   }
29315 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD128,n_div_4_subtile)29316   TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD128, n_div_4_subtile) {
29317     TEST_REQUIRES_X86_SSE2;
29318     for (uint32_t n = 8; n <= 12; n += 4) {
29319       for (size_t k = 1; k <= 40; k += 9) {
29320         for (uint32_t m = 1; m <= 3; m++) {
29321           GemmMicrokernelTester()
29322             .mr(3)
29323             .nr(4)
29324             .kr(2)
29325             .sr(4)
29326             .m(m)
29327             .n(n)
29328             .k(k)
29329             .iterations(1)
29330             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2s4__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
29331         }
29332       }
29333     }
29334   }
29335 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD128,small_kernel)29336   TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD128, small_kernel) {
29337     TEST_REQUIRES_X86_SSE2;
29338     for (size_t k = 1; k <= 40; k += 9) {
29339       GemmMicrokernelTester()
29340         .mr(3)
29341         .nr(4)
29342         .kr(2)
29343         .sr(4)
29344         .m(3)
29345         .n(4)
29346         .k(k)
29347         .ks(3)
29348         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2s4__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
29349     }
29350   }
29351 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD128,small_kernel_subtile)29352   TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD128, small_kernel_subtile) {
29353     TEST_REQUIRES_X86_SSE2;
29354     for (size_t k = 1; k <= 40; k += 9) {
29355       for (uint32_t n = 1; n <= 4; n++) {
29356         for (uint32_t m = 1; m <= 3; m++) {
29357           GemmMicrokernelTester()
29358             .mr(3)
29359             .nr(4)
29360             .kr(2)
29361             .sr(4)
29362             .m(m)
29363             .n(n)
29364             .k(k)
29365             .ks(3)
29366             .iterations(1)
29367             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2s4__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
29368         }
29369       }
29370     }
29371   }
29372 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD128,n_gt_4_small_kernel)29373   TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD128, n_gt_4_small_kernel) {
29374     TEST_REQUIRES_X86_SSE2;
29375     for (uint32_t n = 5; n < 8; n++) {
29376       for (size_t k = 1; k <= 40; k += 9) {
29377         GemmMicrokernelTester()
29378           .mr(3)
29379           .nr(4)
29380           .kr(2)
29381           .sr(4)
29382           .m(3)
29383           .n(n)
29384           .k(k)
29385           .ks(3)
29386           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2s4__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
29387       }
29388     }
29389   }
29390 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD128,n_div_4_small_kernel)29391   TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD128, n_div_4_small_kernel) {
29392     TEST_REQUIRES_X86_SSE2;
29393     for (uint32_t n = 8; n <= 12; n += 4) {
29394       for (size_t k = 1; k <= 40; k += 9) {
29395         GemmMicrokernelTester()
29396           .mr(3)
29397           .nr(4)
29398           .kr(2)
29399           .sr(4)
29400           .m(3)
29401           .n(n)
29402           .k(k)
29403           .ks(3)
29404           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2s4__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
29405       }
29406     }
29407   }
29408 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD128,strided_cm_subtile)29409   TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD128, strided_cm_subtile) {
29410     TEST_REQUIRES_X86_SSE2;
29411     for (size_t k = 1; k <= 40; k += 9) {
29412       for (uint32_t n = 1; n <= 4; n++) {
29413         for (uint32_t m = 1; m <= 3; m++) {
29414           GemmMicrokernelTester()
29415             .mr(3)
29416             .nr(4)
29417             .kr(2)
29418             .sr(4)
29419             .m(m)
29420             .n(n)
29421             .k(k)
29422             .cm_stride(7)
29423             .iterations(1)
29424             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2s4__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
29425         }
29426       }
29427     }
29428   }
29429 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD128,a_offset)29430   TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD128, a_offset) {
29431     TEST_REQUIRES_X86_SSE2;
29432     for (size_t k = 1; k <= 40; k += 9) {
29433       GemmMicrokernelTester()
29434         .mr(3)
29435         .nr(4)
29436         .kr(2)
29437         .sr(4)
29438         .m(3)
29439         .n(4)
29440         .k(k)
29441         .ks(3)
29442         .a_offset(127)
29443         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2s4__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
29444     }
29445   }
29446 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD128,zero)29447   TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD128, zero) {
29448     TEST_REQUIRES_X86_SSE2;
29449     for (size_t k = 1; k <= 40; k += 9) {
29450       for (uint32_t mz = 0; mz < 3; mz++) {
29451         GemmMicrokernelTester()
29452           .mr(3)
29453           .nr(4)
29454           .kr(2)
29455           .sr(4)
29456           .m(3)
29457           .n(4)
29458           .k(k)
29459           .ks(3)
29460           .a_offset(127)
29461           .zero_index(mz)
29462           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2s4__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
29463       }
29464     }
29465   }
29466 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD128,qmin)29467   TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD128, qmin) {
29468     TEST_REQUIRES_X86_SSE2;
29469     GemmMicrokernelTester()
29470       .mr(3)
29471       .nr(4)
29472       .kr(2)
29473       .sr(4)
29474       .m(3)
29475       .n(4)
29476       .k(8)
29477       .qmin(128)
29478       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2s4__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
29479   }
29480 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD128,qmax)29481   TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD128, qmax) {
29482     TEST_REQUIRES_X86_SSE2;
29483     GemmMicrokernelTester()
29484       .mr(3)
29485       .nr(4)
29486       .kr(2)
29487       .sr(4)
29488       .m(3)
29489       .n(4)
29490       .k(8)
29491       .qmax(128)
29492       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2s4__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
29493   }
29494 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD128,strided_cm)29495   TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD128, strided_cm) {
29496     TEST_REQUIRES_X86_SSE2;
29497     GemmMicrokernelTester()
29498       .mr(3)
29499       .nr(4)
29500       .kr(2)
29501       .sr(4)
29502       .m(3)
29503       .n(4)
29504       .k(8)
29505       .cm_stride(7)
29506       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2s4__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
29507   }
29508 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
29509 
29510 
29511 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD128,k_eq_8)29512   TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD128, k_eq_8) {
29513     TEST_REQUIRES_X86_SSE41;
29514     GemmMicrokernelTester()
29515       .mr(3)
29516       .nr(4)
29517       .kr(2)
29518       .sr(4)
29519       .m(3)
29520       .n(4)
29521       .k(8)
29522       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2s4__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
29523   }
29524 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD128,strided_cn)29525   TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD128, strided_cn) {
29526     TEST_REQUIRES_X86_SSE41;
29527     GemmMicrokernelTester()
29528       .mr(3)
29529       .nr(4)
29530       .kr(2)
29531       .sr(4)
29532       .m(3)
29533       .n(4)
29534       .k(8)
29535       .cn_stride(7)
29536       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2s4__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
29537   }
29538 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD128,k_eq_8_subtile)29539   TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD128, k_eq_8_subtile) {
29540     TEST_REQUIRES_X86_SSE41;
29541     for (uint32_t n = 1; n <= 4; n++) {
29542       for (uint32_t m = 1; m <= 3; m++) {
29543         GemmMicrokernelTester()
29544           .mr(3)
29545           .nr(4)
29546           .kr(2)
29547           .sr(4)
29548           .m(m)
29549           .n(n)
29550           .k(8)
29551           .iterations(1)
29552           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2s4__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
29553       }
29554     }
29555   }
29556 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD128,k_eq_8_subtile_m)29557   TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD128, k_eq_8_subtile_m) {
29558     TEST_REQUIRES_X86_SSE41;
29559     for (uint32_t m = 1; m <= 3; m++) {
29560       GemmMicrokernelTester()
29561         .mr(3)
29562         .nr(4)
29563         .kr(2)
29564         .sr(4)
29565         .m(m)
29566         .n(4)
29567         .k(8)
29568         .iterations(1)
29569         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2s4__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
29570     }
29571   }
29572 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD128,k_eq_8_subtile_n)29573   TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD128, k_eq_8_subtile_n) {
29574     TEST_REQUIRES_X86_SSE41;
29575     for (uint32_t n = 1; n <= 4; n++) {
29576       GemmMicrokernelTester()
29577         .mr(3)
29578         .nr(4)
29579         .kr(2)
29580         .sr(4)
29581         .m(3)
29582         .n(n)
29583         .k(8)
29584         .iterations(1)
29585         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2s4__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
29586     }
29587   }
29588 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD128,k_lt_8)29589   TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD128, k_lt_8) {
29590     TEST_REQUIRES_X86_SSE41;
29591     for (size_t k = 1; k < 8; k++) {
29592       GemmMicrokernelTester()
29593         .mr(3)
29594         .nr(4)
29595         .kr(2)
29596         .sr(4)
29597         .m(3)
29598         .n(4)
29599         .k(k)
29600         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2s4__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
29601     }
29602   }
29603 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD128,k_lt_8_subtile)29604   TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD128, k_lt_8_subtile) {
29605     TEST_REQUIRES_X86_SSE41;
29606     for (size_t k = 1; k < 8; k++) {
29607       for (uint32_t n = 1; n <= 4; n++) {
29608         for (uint32_t m = 1; m <= 3; m++) {
29609           GemmMicrokernelTester()
29610             .mr(3)
29611             .nr(4)
29612             .kr(2)
29613             .sr(4)
29614             .m(m)
29615             .n(n)
29616             .k(k)
29617             .iterations(1)
29618             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2s4__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
29619         }
29620       }
29621     }
29622   }
29623 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD128,k_gt_8)29624   TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD128, k_gt_8) {
29625     TEST_REQUIRES_X86_SSE41;
29626     for (size_t k = 9; k < 16; k++) {
29627       GemmMicrokernelTester()
29628         .mr(3)
29629         .nr(4)
29630         .kr(2)
29631         .sr(4)
29632         .m(3)
29633         .n(4)
29634         .k(k)
29635         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2s4__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
29636     }
29637   }
29638 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD128,k_gt_8_subtile)29639   TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD128, k_gt_8_subtile) {
29640     TEST_REQUIRES_X86_SSE41;
29641     for (size_t k = 9; k < 16; k++) {
29642       for (uint32_t n = 1; n <= 4; n++) {
29643         for (uint32_t m = 1; m <= 3; m++) {
29644           GemmMicrokernelTester()
29645             .mr(3)
29646             .nr(4)
29647             .kr(2)
29648             .sr(4)
29649             .m(m)
29650             .n(n)
29651             .k(k)
29652             .iterations(1)
29653             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2s4__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
29654         }
29655       }
29656     }
29657   }
29658 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD128,k_div_8)29659   TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD128, k_div_8) {
29660     TEST_REQUIRES_X86_SSE41;
29661     for (size_t k = 16; k <= 80; k += 8) {
29662       GemmMicrokernelTester()
29663         .mr(3)
29664         .nr(4)
29665         .kr(2)
29666         .sr(4)
29667         .m(3)
29668         .n(4)
29669         .k(k)
29670         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2s4__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
29671     }
29672   }
29673 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD128,k_div_8_subtile)29674   TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD128, k_div_8_subtile) {
29675     TEST_REQUIRES_X86_SSE41;
29676     for (size_t k = 16; k <= 80; k += 8) {
29677       for (uint32_t n = 1; n <= 4; n++) {
29678         for (uint32_t m = 1; m <= 3; m++) {
29679           GemmMicrokernelTester()
29680             .mr(3)
29681             .nr(4)
29682             .kr(2)
29683             .sr(4)
29684             .m(m)
29685             .n(n)
29686             .k(k)
29687             .iterations(1)
29688             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2s4__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
29689         }
29690       }
29691     }
29692   }
29693 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD128,n_gt_4)29694   TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD128, n_gt_4) {
29695     TEST_REQUIRES_X86_SSE41;
29696     for (uint32_t n = 5; n < 8; n++) {
29697       for (size_t k = 1; k <= 40; k += 9) {
29698         GemmMicrokernelTester()
29699           .mr(3)
29700           .nr(4)
29701           .kr(2)
29702           .sr(4)
29703           .m(3)
29704           .n(n)
29705           .k(k)
29706           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2s4__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
29707       }
29708     }
29709   }
29710 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD128,n_gt_4_strided_cn)29711   TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD128, n_gt_4_strided_cn) {
29712     TEST_REQUIRES_X86_SSE41;
29713     for (uint32_t n = 5; n < 8; n++) {
29714       for (size_t k = 1; k <= 40; k += 9) {
29715         GemmMicrokernelTester()
29716           .mr(3)
29717           .nr(4)
29718           .kr(2)
29719           .sr(4)
29720           .m(3)
29721           .n(n)
29722           .k(k)
29723           .cn_stride(7)
29724           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2s4__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
29725       }
29726     }
29727   }
29728 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD128,n_gt_4_subtile)29729   TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD128, n_gt_4_subtile) {
29730     TEST_REQUIRES_X86_SSE41;
29731     for (uint32_t n = 5; n < 8; n++) {
29732       for (size_t k = 1; k <= 40; k += 9) {
29733         for (uint32_t m = 1; m <= 3; m++) {
29734           GemmMicrokernelTester()
29735             .mr(3)
29736             .nr(4)
29737             .kr(2)
29738             .sr(4)
29739             .m(m)
29740             .n(n)
29741             .k(k)
29742             .iterations(1)
29743             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2s4__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
29744         }
29745       }
29746     }
29747   }
29748 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD128,n_div_4)29749   TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD128, n_div_4) {
29750     TEST_REQUIRES_X86_SSE41;
29751     for (uint32_t n = 8; n <= 12; n += 4) {
29752       for (size_t k = 1; k <= 40; k += 9) {
29753         GemmMicrokernelTester()
29754           .mr(3)
29755           .nr(4)
29756           .kr(2)
29757           .sr(4)
29758           .m(3)
29759           .n(n)
29760           .k(k)
29761           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2s4__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
29762       }
29763     }
29764   }
29765 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD128,n_div_4_strided_cn)29766   TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD128, n_div_4_strided_cn) {
29767     TEST_REQUIRES_X86_SSE41;
29768     for (uint32_t n = 8; n <= 12; n += 4) {
29769       for (size_t k = 1; k <= 40; k += 9) {
29770         GemmMicrokernelTester()
29771           .mr(3)
29772           .nr(4)
29773           .kr(2)
29774           .sr(4)
29775           .m(3)
29776           .n(n)
29777           .k(k)
29778           .cn_stride(7)
29779           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2s4__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
29780       }
29781     }
29782   }
29783 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD128,n_div_4_subtile)29784   TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD128, n_div_4_subtile) {
29785     TEST_REQUIRES_X86_SSE41;
29786     for (uint32_t n = 8; n <= 12; n += 4) {
29787       for (size_t k = 1; k <= 40; k += 9) {
29788         for (uint32_t m = 1; m <= 3; m++) {
29789           GemmMicrokernelTester()
29790             .mr(3)
29791             .nr(4)
29792             .kr(2)
29793             .sr(4)
29794             .m(m)
29795             .n(n)
29796             .k(k)
29797             .iterations(1)
29798             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2s4__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
29799         }
29800       }
29801     }
29802   }
29803 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD128,small_kernel)29804   TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD128, small_kernel) {
29805     TEST_REQUIRES_X86_SSE41;
29806     for (size_t k = 1; k <= 40; k += 9) {
29807       GemmMicrokernelTester()
29808         .mr(3)
29809         .nr(4)
29810         .kr(2)
29811         .sr(4)
29812         .m(3)
29813         .n(4)
29814         .k(k)
29815         .ks(3)
29816         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2s4__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
29817     }
29818   }
29819 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD128,small_kernel_subtile)29820   TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD128, small_kernel_subtile) {
29821     TEST_REQUIRES_X86_SSE41;
29822     for (size_t k = 1; k <= 40; k += 9) {
29823       for (uint32_t n = 1; n <= 4; n++) {
29824         for (uint32_t m = 1; m <= 3; m++) {
29825           GemmMicrokernelTester()
29826             .mr(3)
29827             .nr(4)
29828             .kr(2)
29829             .sr(4)
29830             .m(m)
29831             .n(n)
29832             .k(k)
29833             .ks(3)
29834             .iterations(1)
29835             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2s4__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
29836         }
29837       }
29838     }
29839   }
29840 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD128,n_gt_4_small_kernel)29841   TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD128, n_gt_4_small_kernel) {
29842     TEST_REQUIRES_X86_SSE41;
29843     for (uint32_t n = 5; n < 8; n++) {
29844       for (size_t k = 1; k <= 40; k += 9) {
29845         GemmMicrokernelTester()
29846           .mr(3)
29847           .nr(4)
29848           .kr(2)
29849           .sr(4)
29850           .m(3)
29851           .n(n)
29852           .k(k)
29853           .ks(3)
29854           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2s4__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
29855       }
29856     }
29857   }
29858 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD128,n_div_4_small_kernel)29859   TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD128, n_div_4_small_kernel) {
29860     TEST_REQUIRES_X86_SSE41;
29861     for (uint32_t n = 8; n <= 12; n += 4) {
29862       for (size_t k = 1; k <= 40; k += 9) {
29863         GemmMicrokernelTester()
29864           .mr(3)
29865           .nr(4)
29866           .kr(2)
29867           .sr(4)
29868           .m(3)
29869           .n(n)
29870           .k(k)
29871           .ks(3)
29872           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2s4__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
29873       }
29874     }
29875   }
29876 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD128,strided_cm_subtile)29877   TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD128, strided_cm_subtile) {
29878     TEST_REQUIRES_X86_SSE41;
29879     for (size_t k = 1; k <= 40; k += 9) {
29880       for (uint32_t n = 1; n <= 4; n++) {
29881         for (uint32_t m = 1; m <= 3; m++) {
29882           GemmMicrokernelTester()
29883             .mr(3)
29884             .nr(4)
29885             .kr(2)
29886             .sr(4)
29887             .m(m)
29888             .n(n)
29889             .k(k)
29890             .cm_stride(7)
29891             .iterations(1)
29892             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2s4__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
29893         }
29894       }
29895     }
29896   }
29897 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD128,a_offset)29898   TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD128, a_offset) {
29899     TEST_REQUIRES_X86_SSE41;
29900     for (size_t k = 1; k <= 40; k += 9) {
29901       GemmMicrokernelTester()
29902         .mr(3)
29903         .nr(4)
29904         .kr(2)
29905         .sr(4)
29906         .m(3)
29907         .n(4)
29908         .k(k)
29909         .ks(3)
29910         .a_offset(127)
29911         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2s4__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
29912     }
29913   }
29914 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD128,zero)29915   TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD128, zero) {
29916     TEST_REQUIRES_X86_SSE41;
29917     for (size_t k = 1; k <= 40; k += 9) {
29918       for (uint32_t mz = 0; mz < 3; mz++) {
29919         GemmMicrokernelTester()
29920           .mr(3)
29921           .nr(4)
29922           .kr(2)
29923           .sr(4)
29924           .m(3)
29925           .n(4)
29926           .k(k)
29927           .ks(3)
29928           .a_offset(127)
29929           .zero_index(mz)
29930           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2s4__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
29931       }
29932     }
29933   }
29934 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD128,qmin)29935   TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD128, qmin) {
29936     TEST_REQUIRES_X86_SSE41;
29937     GemmMicrokernelTester()
29938       .mr(3)
29939       .nr(4)
29940       .kr(2)
29941       .sr(4)
29942       .m(3)
29943       .n(4)
29944       .k(8)
29945       .qmin(128)
29946       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2s4__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
29947   }
29948 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD128,qmax)29949   TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD128, qmax) {
29950     TEST_REQUIRES_X86_SSE41;
29951     GemmMicrokernelTester()
29952       .mr(3)
29953       .nr(4)
29954       .kr(2)
29955       .sr(4)
29956       .m(3)
29957       .n(4)
29958       .k(8)
29959       .qmax(128)
29960       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2s4__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
29961   }
29962 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD128,strided_cm)29963   TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD128, strided_cm) {
29964     TEST_REQUIRES_X86_SSE41;
29965     GemmMicrokernelTester()
29966       .mr(3)
29967       .nr(4)
29968       .kr(2)
29969       .sr(4)
29970       .m(3)
29971       .n(4)
29972       .k(8)
29973       .cm_stride(7)
29974       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2s4__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
29975   }
29976 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
29977 
29978 
29979 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD128,k_eq_8)29980   TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD128, k_eq_8) {
29981     TEST_REQUIRES_X86_XOP;
29982     GemmMicrokernelTester()
29983       .mr(1)
29984       .nr(4)
29985       .kr(2)
29986       .sr(4)
29987       .m(1)
29988       .n(4)
29989       .k(8)
29990       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2s4__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
29991   }
29992 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD128,strided_cn)29993   TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD128, strided_cn) {
29994     TEST_REQUIRES_X86_XOP;
29995     GemmMicrokernelTester()
29996       .mr(1)
29997       .nr(4)
29998       .kr(2)
29999       .sr(4)
30000       .m(1)
30001       .n(4)
30002       .k(8)
30003       .cn_stride(7)
30004       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2s4__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
30005   }
30006 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD128,k_eq_8_subtile)30007   TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD128, k_eq_8_subtile) {
30008     TEST_REQUIRES_X86_XOP;
30009     for (uint32_t n = 1; n <= 4; n++) {
30010       for (uint32_t m = 1; m <= 1; m++) {
30011         GemmMicrokernelTester()
30012           .mr(1)
30013           .nr(4)
30014           .kr(2)
30015           .sr(4)
30016           .m(m)
30017           .n(n)
30018           .k(8)
30019           .iterations(1)
30020           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2s4__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
30021       }
30022     }
30023   }
30024 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD128,k_eq_8_subtile_m)30025   TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD128, k_eq_8_subtile_m) {
30026     TEST_REQUIRES_X86_XOP;
30027     for (uint32_t m = 1; m <= 1; m++) {
30028       GemmMicrokernelTester()
30029         .mr(1)
30030         .nr(4)
30031         .kr(2)
30032         .sr(4)
30033         .m(m)
30034         .n(4)
30035         .k(8)
30036         .iterations(1)
30037         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2s4__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
30038     }
30039   }
30040 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD128,k_eq_8_subtile_n)30041   TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD128, k_eq_8_subtile_n) {
30042     TEST_REQUIRES_X86_XOP;
30043     for (uint32_t n = 1; n <= 4; n++) {
30044       GemmMicrokernelTester()
30045         .mr(1)
30046         .nr(4)
30047         .kr(2)
30048         .sr(4)
30049         .m(1)
30050         .n(n)
30051         .k(8)
30052         .iterations(1)
30053         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2s4__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
30054     }
30055   }
30056 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD128,k_lt_8)30057   TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD128, k_lt_8) {
30058     TEST_REQUIRES_X86_XOP;
30059     for (size_t k = 1; k < 8; k++) {
30060       GemmMicrokernelTester()
30061         .mr(1)
30062         .nr(4)
30063         .kr(2)
30064         .sr(4)
30065         .m(1)
30066         .n(4)
30067         .k(k)
30068         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2s4__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
30069     }
30070   }
30071 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD128,k_lt_8_subtile)30072   TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD128, k_lt_8_subtile) {
30073     TEST_REQUIRES_X86_XOP;
30074     for (size_t k = 1; k < 8; k++) {
30075       for (uint32_t n = 1; n <= 4; n++) {
30076         for (uint32_t m = 1; m <= 1; m++) {
30077           GemmMicrokernelTester()
30078             .mr(1)
30079             .nr(4)
30080             .kr(2)
30081             .sr(4)
30082             .m(m)
30083             .n(n)
30084             .k(k)
30085             .iterations(1)
30086             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2s4__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
30087         }
30088       }
30089     }
30090   }
30091 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD128,k_gt_8)30092   TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD128, k_gt_8) {
30093     TEST_REQUIRES_X86_XOP;
30094     for (size_t k = 9; k < 16; k++) {
30095       GemmMicrokernelTester()
30096         .mr(1)
30097         .nr(4)
30098         .kr(2)
30099         .sr(4)
30100         .m(1)
30101         .n(4)
30102         .k(k)
30103         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2s4__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
30104     }
30105   }
30106 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD128,k_gt_8_subtile)30107   TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD128, k_gt_8_subtile) {
30108     TEST_REQUIRES_X86_XOP;
30109     for (size_t k = 9; k < 16; k++) {
30110       for (uint32_t n = 1; n <= 4; n++) {
30111         for (uint32_t m = 1; m <= 1; m++) {
30112           GemmMicrokernelTester()
30113             .mr(1)
30114             .nr(4)
30115             .kr(2)
30116             .sr(4)
30117             .m(m)
30118             .n(n)
30119             .k(k)
30120             .iterations(1)
30121             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2s4__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
30122         }
30123       }
30124     }
30125   }
30126 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD128,k_div_8)30127   TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD128, k_div_8) {
30128     TEST_REQUIRES_X86_XOP;
30129     for (size_t k = 16; k <= 80; k += 8) {
30130       GemmMicrokernelTester()
30131         .mr(1)
30132         .nr(4)
30133         .kr(2)
30134         .sr(4)
30135         .m(1)
30136         .n(4)
30137         .k(k)
30138         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2s4__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
30139     }
30140   }
30141 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD128,k_div_8_subtile)30142   TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD128, k_div_8_subtile) {
30143     TEST_REQUIRES_X86_XOP;
30144     for (size_t k = 16; k <= 80; k += 8) {
30145       for (uint32_t n = 1; n <= 4; n++) {
30146         for (uint32_t m = 1; m <= 1; m++) {
30147           GemmMicrokernelTester()
30148             .mr(1)
30149             .nr(4)
30150             .kr(2)
30151             .sr(4)
30152             .m(m)
30153             .n(n)
30154             .k(k)
30155             .iterations(1)
30156             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2s4__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
30157         }
30158       }
30159     }
30160   }
30161 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD128,n_gt_4)30162   TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD128, n_gt_4) {
30163     TEST_REQUIRES_X86_XOP;
30164     for (uint32_t n = 5; n < 8; n++) {
30165       for (size_t k = 1; k <= 40; k += 9) {
30166         GemmMicrokernelTester()
30167           .mr(1)
30168           .nr(4)
30169           .kr(2)
30170           .sr(4)
30171           .m(1)
30172           .n(n)
30173           .k(k)
30174           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2s4__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
30175       }
30176     }
30177   }
30178 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD128,n_gt_4_strided_cn)30179   TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD128, n_gt_4_strided_cn) {
30180     TEST_REQUIRES_X86_XOP;
30181     for (uint32_t n = 5; n < 8; n++) {
30182       for (size_t k = 1; k <= 40; k += 9) {
30183         GemmMicrokernelTester()
30184           .mr(1)
30185           .nr(4)
30186           .kr(2)
30187           .sr(4)
30188           .m(1)
30189           .n(n)
30190           .k(k)
30191           .cn_stride(7)
30192           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2s4__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
30193       }
30194     }
30195   }
30196 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD128,n_gt_4_subtile)30197   TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD128, n_gt_4_subtile) {
30198     TEST_REQUIRES_X86_XOP;
30199     for (uint32_t n = 5; n < 8; n++) {
30200       for (size_t k = 1; k <= 40; k += 9) {
30201         for (uint32_t m = 1; m <= 1; m++) {
30202           GemmMicrokernelTester()
30203             .mr(1)
30204             .nr(4)
30205             .kr(2)
30206             .sr(4)
30207             .m(m)
30208             .n(n)
30209             .k(k)
30210             .iterations(1)
30211             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2s4__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
30212         }
30213       }
30214     }
30215   }
30216 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD128,n_div_4)30217   TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD128, n_div_4) {
30218     TEST_REQUIRES_X86_XOP;
30219     for (uint32_t n = 8; n <= 12; n += 4) {
30220       for (size_t k = 1; k <= 40; k += 9) {
30221         GemmMicrokernelTester()
30222           .mr(1)
30223           .nr(4)
30224           .kr(2)
30225           .sr(4)
30226           .m(1)
30227           .n(n)
30228           .k(k)
30229           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2s4__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
30230       }
30231     }
30232   }
30233 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD128,n_div_4_strided_cn)30234   TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD128, n_div_4_strided_cn) {
30235     TEST_REQUIRES_X86_XOP;
30236     for (uint32_t n = 8; n <= 12; n += 4) {
30237       for (size_t k = 1; k <= 40; k += 9) {
30238         GemmMicrokernelTester()
30239           .mr(1)
30240           .nr(4)
30241           .kr(2)
30242           .sr(4)
30243           .m(1)
30244           .n(n)
30245           .k(k)
30246           .cn_stride(7)
30247           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2s4__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
30248       }
30249     }
30250   }
30251 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD128,n_div_4_subtile)30252   TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD128, n_div_4_subtile) {
30253     TEST_REQUIRES_X86_XOP;
30254     for (uint32_t n = 8; n <= 12; n += 4) {
30255       for (size_t k = 1; k <= 40; k += 9) {
30256         for (uint32_t m = 1; m <= 1; m++) {
30257           GemmMicrokernelTester()
30258             .mr(1)
30259             .nr(4)
30260             .kr(2)
30261             .sr(4)
30262             .m(m)
30263             .n(n)
30264             .k(k)
30265             .iterations(1)
30266             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2s4__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
30267         }
30268       }
30269     }
30270   }
30271 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD128,small_kernel)30272   TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD128, small_kernel) {
30273     TEST_REQUIRES_X86_XOP;
30274     for (size_t k = 1; k <= 40; k += 9) {
30275       GemmMicrokernelTester()
30276         .mr(1)
30277         .nr(4)
30278         .kr(2)
30279         .sr(4)
30280         .m(1)
30281         .n(4)
30282         .k(k)
30283         .ks(3)
30284         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2s4__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
30285     }
30286   }
30287 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD128,small_kernel_subtile)30288   TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD128, small_kernel_subtile) {
30289     TEST_REQUIRES_X86_XOP;
30290     for (size_t k = 1; k <= 40; k += 9) {
30291       for (uint32_t n = 1; n <= 4; n++) {
30292         for (uint32_t m = 1; m <= 1; m++) {
30293           GemmMicrokernelTester()
30294             .mr(1)
30295             .nr(4)
30296             .kr(2)
30297             .sr(4)
30298             .m(m)
30299             .n(n)
30300             .k(k)
30301             .ks(3)
30302             .iterations(1)
30303             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2s4__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
30304         }
30305       }
30306     }
30307   }
30308 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD128,n_gt_4_small_kernel)30309   TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD128, n_gt_4_small_kernel) {
30310     TEST_REQUIRES_X86_XOP;
30311     for (uint32_t n = 5; n < 8; n++) {
30312       for (size_t k = 1; k <= 40; k += 9) {
30313         GemmMicrokernelTester()
30314           .mr(1)
30315           .nr(4)
30316           .kr(2)
30317           .sr(4)
30318           .m(1)
30319           .n(n)
30320           .k(k)
30321           .ks(3)
30322           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2s4__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
30323       }
30324     }
30325   }
30326 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD128,n_div_4_small_kernel)30327   TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD128, n_div_4_small_kernel) {
30328     TEST_REQUIRES_X86_XOP;
30329     for (uint32_t n = 8; n <= 12; n += 4) {
30330       for (size_t k = 1; k <= 40; k += 9) {
30331         GemmMicrokernelTester()
30332           .mr(1)
30333           .nr(4)
30334           .kr(2)
30335           .sr(4)
30336           .m(1)
30337           .n(n)
30338           .k(k)
30339           .ks(3)
30340           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2s4__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
30341       }
30342     }
30343   }
30344 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD128,strided_cm_subtile)30345   TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD128, strided_cm_subtile) {
30346     TEST_REQUIRES_X86_XOP;
30347     for (size_t k = 1; k <= 40; k += 9) {
30348       for (uint32_t n = 1; n <= 4; n++) {
30349         for (uint32_t m = 1; m <= 1; m++) {
30350           GemmMicrokernelTester()
30351             .mr(1)
30352             .nr(4)
30353             .kr(2)
30354             .sr(4)
30355             .m(m)
30356             .n(n)
30357             .k(k)
30358             .cm_stride(7)
30359             .iterations(1)
30360             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2s4__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
30361         }
30362       }
30363     }
30364   }
30365 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD128,a_offset)30366   TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD128, a_offset) {
30367     TEST_REQUIRES_X86_XOP;
30368     for (size_t k = 1; k <= 40; k += 9) {
30369       GemmMicrokernelTester()
30370         .mr(1)
30371         .nr(4)
30372         .kr(2)
30373         .sr(4)
30374         .m(1)
30375         .n(4)
30376         .k(k)
30377         .ks(3)
30378         .a_offset(43)
30379         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2s4__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
30380     }
30381   }
30382 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD128,zero)30383   TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD128, zero) {
30384     TEST_REQUIRES_X86_XOP;
30385     for (size_t k = 1; k <= 40; k += 9) {
30386       for (uint32_t mz = 0; mz < 1; mz++) {
30387         GemmMicrokernelTester()
30388           .mr(1)
30389           .nr(4)
30390           .kr(2)
30391           .sr(4)
30392           .m(1)
30393           .n(4)
30394           .k(k)
30395           .ks(3)
30396           .a_offset(43)
30397           .zero_index(mz)
30398           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2s4__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
30399       }
30400     }
30401   }
30402 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD128,qmin)30403   TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD128, qmin) {
30404     TEST_REQUIRES_X86_XOP;
30405     GemmMicrokernelTester()
30406       .mr(1)
30407       .nr(4)
30408       .kr(2)
30409       .sr(4)
30410       .m(1)
30411       .n(4)
30412       .k(8)
30413       .qmin(128)
30414       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2s4__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
30415   }
30416 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD128,qmax)30417   TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD128, qmax) {
30418     TEST_REQUIRES_X86_XOP;
30419     GemmMicrokernelTester()
30420       .mr(1)
30421       .nr(4)
30422       .kr(2)
30423       .sr(4)
30424       .m(1)
30425       .n(4)
30426       .k(8)
30427       .qmax(128)
30428       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2s4__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
30429   }
30430 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD128,strided_cm)30431   TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__XOP_LD128, strided_cm) {
30432     TEST_REQUIRES_X86_XOP;
30433     GemmMicrokernelTester()
30434       .mr(1)
30435       .nr(4)
30436       .kr(2)
30437       .sr(4)
30438       .m(1)
30439       .n(4)
30440       .k(8)
30441       .cm_stride(7)
30442       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2s4__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
30443   }
30444 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
30445 
30446 
30447 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD128,k_eq_8)30448   TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD128, k_eq_8) {
30449     TEST_REQUIRES_X86_AVX;
30450     GemmMicrokernelTester()
30451       .mr(2)
30452       .nr(4)
30453       .kr(2)
30454       .sr(4)
30455       .m(2)
30456       .n(4)
30457       .k(8)
30458       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2s4__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
30459   }
30460 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD128,strided_cn)30461   TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD128, strided_cn) {
30462     TEST_REQUIRES_X86_AVX;
30463     GemmMicrokernelTester()
30464       .mr(2)
30465       .nr(4)
30466       .kr(2)
30467       .sr(4)
30468       .m(2)
30469       .n(4)
30470       .k(8)
30471       .cn_stride(7)
30472       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2s4__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
30473   }
30474 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD128,k_eq_8_subtile)30475   TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD128, k_eq_8_subtile) {
30476     TEST_REQUIRES_X86_AVX;
30477     for (uint32_t n = 1; n <= 4; n++) {
30478       for (uint32_t m = 1; m <= 2; m++) {
30479         GemmMicrokernelTester()
30480           .mr(2)
30481           .nr(4)
30482           .kr(2)
30483           .sr(4)
30484           .m(m)
30485           .n(n)
30486           .k(8)
30487           .iterations(1)
30488           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2s4__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
30489       }
30490     }
30491   }
30492 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD128,k_eq_8_subtile_m)30493   TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD128, k_eq_8_subtile_m) {
30494     TEST_REQUIRES_X86_AVX;
30495     for (uint32_t m = 1; m <= 2; m++) {
30496       GemmMicrokernelTester()
30497         .mr(2)
30498         .nr(4)
30499         .kr(2)
30500         .sr(4)
30501         .m(m)
30502         .n(4)
30503         .k(8)
30504         .iterations(1)
30505         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2s4__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
30506     }
30507   }
30508 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD128,k_eq_8_subtile_n)30509   TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD128, k_eq_8_subtile_n) {
30510     TEST_REQUIRES_X86_AVX;
30511     for (uint32_t n = 1; n <= 4; n++) {
30512       GemmMicrokernelTester()
30513         .mr(2)
30514         .nr(4)
30515         .kr(2)
30516         .sr(4)
30517         .m(2)
30518         .n(n)
30519         .k(8)
30520         .iterations(1)
30521         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2s4__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
30522     }
30523   }
30524 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD128,k_lt_8)30525   TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD128, k_lt_8) {
30526     TEST_REQUIRES_X86_AVX;
30527     for (size_t k = 1; k < 8; k++) {
30528       GemmMicrokernelTester()
30529         .mr(2)
30530         .nr(4)
30531         .kr(2)
30532         .sr(4)
30533         .m(2)
30534         .n(4)
30535         .k(k)
30536         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2s4__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
30537     }
30538   }
30539 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD128,k_lt_8_subtile)30540   TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD128, k_lt_8_subtile) {
30541     TEST_REQUIRES_X86_AVX;
30542     for (size_t k = 1; k < 8; k++) {
30543       for (uint32_t n = 1; n <= 4; n++) {
30544         for (uint32_t m = 1; m <= 2; m++) {
30545           GemmMicrokernelTester()
30546             .mr(2)
30547             .nr(4)
30548             .kr(2)
30549             .sr(4)
30550             .m(m)
30551             .n(n)
30552             .k(k)
30553             .iterations(1)
30554             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2s4__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
30555         }
30556       }
30557     }
30558   }
30559 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD128,k_gt_8)30560   TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD128, k_gt_8) {
30561     TEST_REQUIRES_X86_AVX;
30562     for (size_t k = 9; k < 16; k++) {
30563       GemmMicrokernelTester()
30564         .mr(2)
30565         .nr(4)
30566         .kr(2)
30567         .sr(4)
30568         .m(2)
30569         .n(4)
30570         .k(k)
30571         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2s4__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
30572     }
30573   }
30574 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD128,k_gt_8_subtile)30575   TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD128, k_gt_8_subtile) {
30576     TEST_REQUIRES_X86_AVX;
30577     for (size_t k = 9; k < 16; k++) {
30578       for (uint32_t n = 1; n <= 4; n++) {
30579         for (uint32_t m = 1; m <= 2; m++) {
30580           GemmMicrokernelTester()
30581             .mr(2)
30582             .nr(4)
30583             .kr(2)
30584             .sr(4)
30585             .m(m)
30586             .n(n)
30587             .k(k)
30588             .iterations(1)
30589             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2s4__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
30590         }
30591       }
30592     }
30593   }
30594 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD128,k_div_8)30595   TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD128, k_div_8) {
30596     TEST_REQUIRES_X86_AVX;
30597     for (size_t k = 16; k <= 80; k += 8) {
30598       GemmMicrokernelTester()
30599         .mr(2)
30600         .nr(4)
30601         .kr(2)
30602         .sr(4)
30603         .m(2)
30604         .n(4)
30605         .k(k)
30606         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2s4__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
30607     }
30608   }
30609 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD128,k_div_8_subtile)30610   TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD128, k_div_8_subtile) {
30611     TEST_REQUIRES_X86_AVX;
30612     for (size_t k = 16; k <= 80; k += 8) {
30613       for (uint32_t n = 1; n <= 4; n++) {
30614         for (uint32_t m = 1; m <= 2; m++) {
30615           GemmMicrokernelTester()
30616             .mr(2)
30617             .nr(4)
30618             .kr(2)
30619             .sr(4)
30620             .m(m)
30621             .n(n)
30622             .k(k)
30623             .iterations(1)
30624             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2s4__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
30625         }
30626       }
30627     }
30628   }
30629 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD128,n_gt_4)30630   TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD128, n_gt_4) {
30631     TEST_REQUIRES_X86_AVX;
30632     for (uint32_t n = 5; n < 8; n++) {
30633       for (size_t k = 1; k <= 40; k += 9) {
30634         GemmMicrokernelTester()
30635           .mr(2)
30636           .nr(4)
30637           .kr(2)
30638           .sr(4)
30639           .m(2)
30640           .n(n)
30641           .k(k)
30642           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2s4__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
30643       }
30644     }
30645   }
30646 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD128,n_gt_4_strided_cn)30647   TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD128, n_gt_4_strided_cn) {
30648     TEST_REQUIRES_X86_AVX;
30649     for (uint32_t n = 5; n < 8; n++) {
30650       for (size_t k = 1; k <= 40; k += 9) {
30651         GemmMicrokernelTester()
30652           .mr(2)
30653           .nr(4)
30654           .kr(2)
30655           .sr(4)
30656           .m(2)
30657           .n(n)
30658           .k(k)
30659           .cn_stride(7)
30660           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2s4__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
30661       }
30662     }
30663   }
30664 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD128,n_gt_4_subtile)30665   TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD128, n_gt_4_subtile) {
30666     TEST_REQUIRES_X86_AVX;
30667     for (uint32_t n = 5; n < 8; n++) {
30668       for (size_t k = 1; k <= 40; k += 9) {
30669         for (uint32_t m = 1; m <= 2; m++) {
30670           GemmMicrokernelTester()
30671             .mr(2)
30672             .nr(4)
30673             .kr(2)
30674             .sr(4)
30675             .m(m)
30676             .n(n)
30677             .k(k)
30678             .iterations(1)
30679             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2s4__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
30680         }
30681       }
30682     }
30683   }
30684 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD128,n_div_4)30685   TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD128, n_div_4) {
30686     TEST_REQUIRES_X86_AVX;
30687     for (uint32_t n = 8; n <= 12; n += 4) {
30688       for (size_t k = 1; k <= 40; k += 9) {
30689         GemmMicrokernelTester()
30690           .mr(2)
30691           .nr(4)
30692           .kr(2)
30693           .sr(4)
30694           .m(2)
30695           .n(n)
30696           .k(k)
30697           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2s4__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
30698       }
30699     }
30700   }
30701 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD128,n_div_4_strided_cn)30702   TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD128, n_div_4_strided_cn) {
30703     TEST_REQUIRES_X86_AVX;
30704     for (uint32_t n = 8; n <= 12; n += 4) {
30705       for (size_t k = 1; k <= 40; k += 9) {
30706         GemmMicrokernelTester()
30707           .mr(2)
30708           .nr(4)
30709           .kr(2)
30710           .sr(4)
30711           .m(2)
30712           .n(n)
30713           .k(k)
30714           .cn_stride(7)
30715           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2s4__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
30716       }
30717     }
30718   }
30719 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD128,n_div_4_subtile)30720   TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD128, n_div_4_subtile) {
30721     TEST_REQUIRES_X86_AVX;
30722     for (uint32_t n = 8; n <= 12; n += 4) {
30723       for (size_t k = 1; k <= 40; k += 9) {
30724         for (uint32_t m = 1; m <= 2; m++) {
30725           GemmMicrokernelTester()
30726             .mr(2)
30727             .nr(4)
30728             .kr(2)
30729             .sr(4)
30730             .m(m)
30731             .n(n)
30732             .k(k)
30733             .iterations(1)
30734             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2s4__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
30735         }
30736       }
30737     }
30738   }
30739 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD128,small_kernel)30740   TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD128, small_kernel) {
30741     TEST_REQUIRES_X86_AVX;
30742     for (size_t k = 1; k <= 40; k += 9) {
30743       GemmMicrokernelTester()
30744         .mr(2)
30745         .nr(4)
30746         .kr(2)
30747         .sr(4)
30748         .m(2)
30749         .n(4)
30750         .k(k)
30751         .ks(3)
30752         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2s4__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
30753     }
30754   }
30755 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD128,small_kernel_subtile)30756   TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD128, small_kernel_subtile) {
30757     TEST_REQUIRES_X86_AVX;
30758     for (size_t k = 1; k <= 40; k += 9) {
30759       for (uint32_t n = 1; n <= 4; n++) {
30760         for (uint32_t m = 1; m <= 2; m++) {
30761           GemmMicrokernelTester()
30762             .mr(2)
30763             .nr(4)
30764             .kr(2)
30765             .sr(4)
30766             .m(m)
30767             .n(n)
30768             .k(k)
30769             .ks(3)
30770             .iterations(1)
30771             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2s4__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
30772         }
30773       }
30774     }
30775   }
30776 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD128,n_gt_4_small_kernel)30777   TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD128, n_gt_4_small_kernel) {
30778     TEST_REQUIRES_X86_AVX;
30779     for (uint32_t n = 5; n < 8; n++) {
30780       for (size_t k = 1; k <= 40; k += 9) {
30781         GemmMicrokernelTester()
30782           .mr(2)
30783           .nr(4)
30784           .kr(2)
30785           .sr(4)
30786           .m(2)
30787           .n(n)
30788           .k(k)
30789           .ks(3)
30790           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2s4__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
30791       }
30792     }
30793   }
30794 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD128,n_div_4_small_kernel)30795   TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD128, n_div_4_small_kernel) {
30796     TEST_REQUIRES_X86_AVX;
30797     for (uint32_t n = 8; n <= 12; n += 4) {
30798       for (size_t k = 1; k <= 40; k += 9) {
30799         GemmMicrokernelTester()
30800           .mr(2)
30801           .nr(4)
30802           .kr(2)
30803           .sr(4)
30804           .m(2)
30805           .n(n)
30806           .k(k)
30807           .ks(3)
30808           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2s4__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
30809       }
30810     }
30811   }
30812 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD128,strided_cm_subtile)30813   TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD128, strided_cm_subtile) {
30814     TEST_REQUIRES_X86_AVX;
30815     for (size_t k = 1; k <= 40; k += 9) {
30816       for (uint32_t n = 1; n <= 4; n++) {
30817         for (uint32_t m = 1; m <= 2; m++) {
30818           GemmMicrokernelTester()
30819             .mr(2)
30820             .nr(4)
30821             .kr(2)
30822             .sr(4)
30823             .m(m)
30824             .n(n)
30825             .k(k)
30826             .cm_stride(7)
30827             .iterations(1)
30828             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2s4__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
30829         }
30830       }
30831     }
30832   }
30833 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD128,a_offset)30834   TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD128, a_offset) {
30835     TEST_REQUIRES_X86_AVX;
30836     for (size_t k = 1; k <= 40; k += 9) {
30837       GemmMicrokernelTester()
30838         .mr(2)
30839         .nr(4)
30840         .kr(2)
30841         .sr(4)
30842         .m(2)
30843         .n(4)
30844         .k(k)
30845         .ks(3)
30846         .a_offset(83)
30847         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2s4__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
30848     }
30849   }
30850 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD128,zero)30851   TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD128, zero) {
30852     TEST_REQUIRES_X86_AVX;
30853     for (size_t k = 1; k <= 40; k += 9) {
30854       for (uint32_t mz = 0; mz < 2; mz++) {
30855         GemmMicrokernelTester()
30856           .mr(2)
30857           .nr(4)
30858           .kr(2)
30859           .sr(4)
30860           .m(2)
30861           .n(4)
30862           .k(k)
30863           .ks(3)
30864           .a_offset(83)
30865           .zero_index(mz)
30866           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2s4__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
30867       }
30868     }
30869   }
30870 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD128,qmin)30871   TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD128, qmin) {
30872     TEST_REQUIRES_X86_AVX;
30873     GemmMicrokernelTester()
30874       .mr(2)
30875       .nr(4)
30876       .kr(2)
30877       .sr(4)
30878       .m(2)
30879       .n(4)
30880       .k(8)
30881       .qmin(128)
30882       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2s4__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
30883   }
30884 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD128,qmax)30885   TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD128, qmax) {
30886     TEST_REQUIRES_X86_AVX;
30887     GemmMicrokernelTester()
30888       .mr(2)
30889       .nr(4)
30890       .kr(2)
30891       .sr(4)
30892       .m(2)
30893       .n(4)
30894       .k(8)
30895       .qmax(128)
30896       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2s4__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
30897   }
30898 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD128,strided_cm)30899   TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD128, strided_cm) {
30900     TEST_REQUIRES_X86_AVX;
30901     GemmMicrokernelTester()
30902       .mr(2)
30903       .nr(4)
30904       .kr(2)
30905       .sr(4)
30906       .m(2)
30907       .n(4)
30908       .k(8)
30909       .cm_stride(7)
30910       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2s4__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
30911   }
30912 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
30913 
30914 
30915 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD128,k_eq_8)30916   TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD128, k_eq_8) {
30917     TEST_REQUIRES_X86_XOP;
30918     GemmMicrokernelTester()
30919       .mr(4)
30920       .nr(4)
30921       .kr(2)
30922       .sr(4)
30923       .m(4)
30924       .n(4)
30925       .k(8)
30926       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2s4__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
30927   }
30928 
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD128,strided_cn)30929   TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD128, strided_cn) {
30930     TEST_REQUIRES_X86_XOP;
30931     GemmMicrokernelTester()
30932       .mr(4)
30933       .nr(4)
30934       .kr(2)
30935       .sr(4)
30936       .m(4)
30937       .n(4)
30938       .k(8)
30939       .cn_stride(7)
30940       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2s4__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
30941   }
30942 
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD128,k_eq_8_subtile)30943   TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD128, k_eq_8_subtile) {
30944     TEST_REQUIRES_X86_XOP;
30945     for (uint32_t n = 1; n <= 4; n++) {
30946       for (uint32_t m = 1; m <= 4; m++) {
30947         GemmMicrokernelTester()
30948           .mr(4)
30949           .nr(4)
30950           .kr(2)
30951           .sr(4)
30952           .m(m)
30953           .n(n)
30954           .k(8)
30955           .iterations(1)
30956           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2s4__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
30957       }
30958     }
30959   }
30960 
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD128,k_eq_8_subtile_m)30961   TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD128, k_eq_8_subtile_m) {
30962     TEST_REQUIRES_X86_XOP;
30963     for (uint32_t m = 1; m <= 4; m++) {
30964       GemmMicrokernelTester()
30965         .mr(4)
30966         .nr(4)
30967         .kr(2)
30968         .sr(4)
30969         .m(m)
30970         .n(4)
30971         .k(8)
30972         .iterations(1)
30973         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2s4__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
30974     }
30975   }
30976 
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD128,k_eq_8_subtile_n)30977   TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD128, k_eq_8_subtile_n) {
30978     TEST_REQUIRES_X86_XOP;
30979     for (uint32_t n = 1; n <= 4; n++) {
30980       GemmMicrokernelTester()
30981         .mr(4)
30982         .nr(4)
30983         .kr(2)
30984         .sr(4)
30985         .m(4)
30986         .n(n)
30987         .k(8)
30988         .iterations(1)
30989         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2s4__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
30990     }
30991   }
30992 
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD128,k_lt_8)30993   TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD128, k_lt_8) {
30994     TEST_REQUIRES_X86_XOP;
30995     for (size_t k = 1; k < 8; k++) {
30996       GemmMicrokernelTester()
30997         .mr(4)
30998         .nr(4)
30999         .kr(2)
31000         .sr(4)
31001         .m(4)
31002         .n(4)
31003         .k(k)
31004         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2s4__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
31005     }
31006   }
31007 
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD128,k_lt_8_subtile)31008   TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD128, k_lt_8_subtile) {
31009     TEST_REQUIRES_X86_XOP;
31010     for (size_t k = 1; k < 8; k++) {
31011       for (uint32_t n = 1; n <= 4; n++) {
31012         for (uint32_t m = 1; m <= 4; m++) {
31013           GemmMicrokernelTester()
31014             .mr(4)
31015             .nr(4)
31016             .kr(2)
31017             .sr(4)
31018             .m(m)
31019             .n(n)
31020             .k(k)
31021             .iterations(1)
31022             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2s4__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
31023         }
31024       }
31025     }
31026   }
31027 
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD128,k_gt_8)31028   TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD128, k_gt_8) {
31029     TEST_REQUIRES_X86_XOP;
31030     for (size_t k = 9; k < 16; k++) {
31031       GemmMicrokernelTester()
31032         .mr(4)
31033         .nr(4)
31034         .kr(2)
31035         .sr(4)
31036         .m(4)
31037         .n(4)
31038         .k(k)
31039         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2s4__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
31040     }
31041   }
31042 
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD128,k_gt_8_subtile)31043   TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD128, k_gt_8_subtile) {
31044     TEST_REQUIRES_X86_XOP;
31045     for (size_t k = 9; k < 16; k++) {
31046       for (uint32_t n = 1; n <= 4; n++) {
31047         for (uint32_t m = 1; m <= 4; m++) {
31048           GemmMicrokernelTester()
31049             .mr(4)
31050             .nr(4)
31051             .kr(2)
31052             .sr(4)
31053             .m(m)
31054             .n(n)
31055             .k(k)
31056             .iterations(1)
31057             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2s4__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
31058         }
31059       }
31060     }
31061   }
31062 
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD128,k_div_8)31063   TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD128, k_div_8) {
31064     TEST_REQUIRES_X86_XOP;
31065     for (size_t k = 16; k <= 80; k += 8) {
31066       GemmMicrokernelTester()
31067         .mr(4)
31068         .nr(4)
31069         .kr(2)
31070         .sr(4)
31071         .m(4)
31072         .n(4)
31073         .k(k)
31074         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2s4__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
31075     }
31076   }
31077 
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD128,k_div_8_subtile)31078   TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD128, k_div_8_subtile) {
31079     TEST_REQUIRES_X86_XOP;
31080     for (size_t k = 16; k <= 80; k += 8) {
31081       for (uint32_t n = 1; n <= 4; n++) {
31082         for (uint32_t m = 1; m <= 4; m++) {
31083           GemmMicrokernelTester()
31084             .mr(4)
31085             .nr(4)
31086             .kr(2)
31087             .sr(4)
31088             .m(m)
31089             .n(n)
31090             .k(k)
31091             .iterations(1)
31092             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2s4__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
31093         }
31094       }
31095     }
31096   }
31097 
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD128,n_gt_4)31098   TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD128, n_gt_4) {
31099     TEST_REQUIRES_X86_XOP;
31100     for (uint32_t n = 5; n < 8; n++) {
31101       for (size_t k = 1; k <= 40; k += 9) {
31102         GemmMicrokernelTester()
31103           .mr(4)
31104           .nr(4)
31105           .kr(2)
31106           .sr(4)
31107           .m(4)
31108           .n(n)
31109           .k(k)
31110           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2s4__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
31111       }
31112     }
31113   }
31114 
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD128,n_gt_4_strided_cn)31115   TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD128, n_gt_4_strided_cn) {
31116     TEST_REQUIRES_X86_XOP;
31117     for (uint32_t n = 5; n < 8; n++) {
31118       for (size_t k = 1; k <= 40; k += 9) {
31119         GemmMicrokernelTester()
31120           .mr(4)
31121           .nr(4)
31122           .kr(2)
31123           .sr(4)
31124           .m(4)
31125           .n(n)
31126           .k(k)
31127           .cn_stride(7)
31128           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2s4__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
31129       }
31130     }
31131   }
31132 
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD128,n_gt_4_subtile)31133   TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD128, n_gt_4_subtile) {
31134     TEST_REQUIRES_X86_XOP;
31135     for (uint32_t n = 5; n < 8; n++) {
31136       for (size_t k = 1; k <= 40; k += 9) {
31137         for (uint32_t m = 1; m <= 4; m++) {
31138           GemmMicrokernelTester()
31139             .mr(4)
31140             .nr(4)
31141             .kr(2)
31142             .sr(4)
31143             .m(m)
31144             .n(n)
31145             .k(k)
31146             .iterations(1)
31147             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2s4__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
31148         }
31149       }
31150     }
31151   }
31152 
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD128,n_div_4)31153   TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD128, n_div_4) {
31154     TEST_REQUIRES_X86_XOP;
31155     for (uint32_t n = 8; n <= 12; n += 4) {
31156       for (size_t k = 1; k <= 40; k += 9) {
31157         GemmMicrokernelTester()
31158           .mr(4)
31159           .nr(4)
31160           .kr(2)
31161           .sr(4)
31162           .m(4)
31163           .n(n)
31164           .k(k)
31165           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2s4__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
31166       }
31167     }
31168   }
31169 
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD128,n_div_4_strided_cn)31170   TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD128, n_div_4_strided_cn) {
31171     TEST_REQUIRES_X86_XOP;
31172     for (uint32_t n = 8; n <= 12; n += 4) {
31173       for (size_t k = 1; k <= 40; k += 9) {
31174         GemmMicrokernelTester()
31175           .mr(4)
31176           .nr(4)
31177           .kr(2)
31178           .sr(4)
31179           .m(4)
31180           .n(n)
31181           .k(k)
31182           .cn_stride(7)
31183           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2s4__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
31184       }
31185     }
31186   }
31187 
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD128,n_div_4_subtile)31188   TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD128, n_div_4_subtile) {
31189     TEST_REQUIRES_X86_XOP;
31190     for (uint32_t n = 8; n <= 12; n += 4) {
31191       for (size_t k = 1; k <= 40; k += 9) {
31192         for (uint32_t m = 1; m <= 4; m++) {
31193           GemmMicrokernelTester()
31194             .mr(4)
31195             .nr(4)
31196             .kr(2)
31197             .sr(4)
31198             .m(m)
31199             .n(n)
31200             .k(k)
31201             .iterations(1)
31202             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2s4__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
31203         }
31204       }
31205     }
31206   }
31207 
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD128,small_kernel)31208   TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD128, small_kernel) {
31209     TEST_REQUIRES_X86_XOP;
31210     for (size_t k = 1; k <= 40; k += 9) {
31211       GemmMicrokernelTester()
31212         .mr(4)
31213         .nr(4)
31214         .kr(2)
31215         .sr(4)
31216         .m(4)
31217         .n(4)
31218         .k(k)
31219         .ks(3)
31220         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2s4__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
31221     }
31222   }
31223 
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD128,small_kernel_subtile)31224   TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD128, small_kernel_subtile) {
31225     TEST_REQUIRES_X86_XOP;
31226     for (size_t k = 1; k <= 40; k += 9) {
31227       for (uint32_t n = 1; n <= 4; n++) {
31228         for (uint32_t m = 1; m <= 4; m++) {
31229           GemmMicrokernelTester()
31230             .mr(4)
31231             .nr(4)
31232             .kr(2)
31233             .sr(4)
31234             .m(m)
31235             .n(n)
31236             .k(k)
31237             .ks(3)
31238             .iterations(1)
31239             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2s4__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
31240         }
31241       }
31242     }
31243   }
31244 
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD128,n_gt_4_small_kernel)31245   TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD128, n_gt_4_small_kernel) {
31246     TEST_REQUIRES_X86_XOP;
31247     for (uint32_t n = 5; n < 8; n++) {
31248       for (size_t k = 1; k <= 40; k += 9) {
31249         GemmMicrokernelTester()
31250           .mr(4)
31251           .nr(4)
31252           .kr(2)
31253           .sr(4)
31254           .m(4)
31255           .n(n)
31256           .k(k)
31257           .ks(3)
31258           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2s4__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
31259       }
31260     }
31261   }
31262 
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD128,n_div_4_small_kernel)31263   TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD128, n_div_4_small_kernel) {
31264     TEST_REQUIRES_X86_XOP;
31265     for (uint32_t n = 8; n <= 12; n += 4) {
31266       for (size_t k = 1; k <= 40; k += 9) {
31267         GemmMicrokernelTester()
31268           .mr(4)
31269           .nr(4)
31270           .kr(2)
31271           .sr(4)
31272           .m(4)
31273           .n(n)
31274           .k(k)
31275           .ks(3)
31276           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2s4__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
31277       }
31278     }
31279   }
31280 
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD128,strided_cm_subtile)31281   TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD128, strided_cm_subtile) {
31282     TEST_REQUIRES_X86_XOP;
31283     for (size_t k = 1; k <= 40; k += 9) {
31284       for (uint32_t n = 1; n <= 4; n++) {
31285         for (uint32_t m = 1; m <= 4; m++) {
31286           GemmMicrokernelTester()
31287             .mr(4)
31288             .nr(4)
31289             .kr(2)
31290             .sr(4)
31291             .m(m)
31292             .n(n)
31293             .k(k)
31294             .cm_stride(7)
31295             .iterations(1)
31296             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2s4__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
31297         }
31298       }
31299     }
31300   }
31301 
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD128,a_offset)31302   TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD128, a_offset) {
31303     TEST_REQUIRES_X86_XOP;
31304     for (size_t k = 1; k <= 40; k += 9) {
31305       GemmMicrokernelTester()
31306         .mr(4)
31307         .nr(4)
31308         .kr(2)
31309         .sr(4)
31310         .m(4)
31311         .n(4)
31312         .k(k)
31313         .ks(3)
31314         .a_offset(163)
31315         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2s4__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
31316     }
31317   }
31318 
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD128,zero)31319   TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD128, zero) {
31320     TEST_REQUIRES_X86_XOP;
31321     for (size_t k = 1; k <= 40; k += 9) {
31322       for (uint32_t mz = 0; mz < 4; mz++) {
31323         GemmMicrokernelTester()
31324           .mr(4)
31325           .nr(4)
31326           .kr(2)
31327           .sr(4)
31328           .m(4)
31329           .n(4)
31330           .k(k)
31331           .ks(3)
31332           .a_offset(163)
31333           .zero_index(mz)
31334           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2s4__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
31335       }
31336     }
31337   }
31338 
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD128,qmin)31339   TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD128, qmin) {
31340     TEST_REQUIRES_X86_XOP;
31341     GemmMicrokernelTester()
31342       .mr(4)
31343       .nr(4)
31344       .kr(2)
31345       .sr(4)
31346       .m(4)
31347       .n(4)
31348       .k(8)
31349       .qmin(128)
31350       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2s4__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
31351   }
31352 
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD128,qmax)31353   TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD128, qmax) {
31354     TEST_REQUIRES_X86_XOP;
31355     GemmMicrokernelTester()
31356       .mr(4)
31357       .nr(4)
31358       .kr(2)
31359       .sr(4)
31360       .m(4)
31361       .n(4)
31362       .k(8)
31363       .qmax(128)
31364       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2s4__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
31365   }
31366 
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD128,strided_cm)31367   TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD128, strided_cm) {
31368     TEST_REQUIRES_X86_XOP;
31369     GemmMicrokernelTester()
31370       .mr(4)
31371       .nr(4)
31372       .kr(2)
31373       .sr(4)
31374       .m(4)
31375       .n(4)
31376       .k(8)
31377       .cm_stride(7)
31378       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2s4__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
31379   }
31380 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
31381 
31382 
31383 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD64,k_eq_8)31384   TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD64, k_eq_8) {
31385     TEST_REQUIRES_X86_SSE2;
31386     GemmMicrokernelTester()
31387       .mr(1)
31388       .nr(4)
31389       .kr(8)
31390       .sr(1)
31391       .m(1)
31392       .n(4)
31393       .k(8)
31394       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c8__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
31395   }
31396 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD64,strided_cn)31397   TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD64, strided_cn) {
31398     TEST_REQUIRES_X86_SSE2;
31399     GemmMicrokernelTester()
31400       .mr(1)
31401       .nr(4)
31402       .kr(8)
31403       .sr(1)
31404       .m(1)
31405       .n(4)
31406       .k(8)
31407       .cn_stride(7)
31408       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c8__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
31409   }
31410 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD64,k_eq_8_subtile)31411   TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD64, k_eq_8_subtile) {
31412     TEST_REQUIRES_X86_SSE2;
31413     for (uint32_t n = 1; n <= 4; n++) {
31414       for (uint32_t m = 1; m <= 1; m++) {
31415         GemmMicrokernelTester()
31416           .mr(1)
31417           .nr(4)
31418           .kr(8)
31419           .sr(1)
31420           .m(m)
31421           .n(n)
31422           .k(8)
31423           .iterations(1)
31424           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c8__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
31425       }
31426     }
31427   }
31428 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD64,k_eq_8_subtile_m)31429   TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD64, k_eq_8_subtile_m) {
31430     TEST_REQUIRES_X86_SSE2;
31431     for (uint32_t m = 1; m <= 1; m++) {
31432       GemmMicrokernelTester()
31433         .mr(1)
31434         .nr(4)
31435         .kr(8)
31436         .sr(1)
31437         .m(m)
31438         .n(4)
31439         .k(8)
31440         .iterations(1)
31441         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c8__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
31442     }
31443   }
31444 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD64,k_eq_8_subtile_n)31445   TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD64, k_eq_8_subtile_n) {
31446     TEST_REQUIRES_X86_SSE2;
31447     for (uint32_t n = 1; n <= 4; n++) {
31448       GemmMicrokernelTester()
31449         .mr(1)
31450         .nr(4)
31451         .kr(8)
31452         .sr(1)
31453         .m(1)
31454         .n(n)
31455         .k(8)
31456         .iterations(1)
31457         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c8__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
31458     }
31459   }
31460 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD64,k_lt_8)31461   TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD64, k_lt_8) {
31462     TEST_REQUIRES_X86_SSE2;
31463     for (size_t k = 1; k < 8; k++) {
31464       GemmMicrokernelTester()
31465         .mr(1)
31466         .nr(4)
31467         .kr(8)
31468         .sr(1)
31469         .m(1)
31470         .n(4)
31471         .k(k)
31472         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c8__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
31473     }
31474   }
31475 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD64,k_lt_8_subtile)31476   TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD64, k_lt_8_subtile) {
31477     TEST_REQUIRES_X86_SSE2;
31478     for (size_t k = 1; k < 8; k++) {
31479       for (uint32_t n = 1; n <= 4; n++) {
31480         for (uint32_t m = 1; m <= 1; m++) {
31481           GemmMicrokernelTester()
31482             .mr(1)
31483             .nr(4)
31484             .kr(8)
31485             .sr(1)
31486             .m(m)
31487             .n(n)
31488             .k(k)
31489             .iterations(1)
31490             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c8__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
31491         }
31492       }
31493     }
31494   }
31495 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD64,k_gt_8)31496   TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD64, k_gt_8) {
31497     TEST_REQUIRES_X86_SSE2;
31498     for (size_t k = 9; k < 16; k++) {
31499       GemmMicrokernelTester()
31500         .mr(1)
31501         .nr(4)
31502         .kr(8)
31503         .sr(1)
31504         .m(1)
31505         .n(4)
31506         .k(k)
31507         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c8__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
31508     }
31509   }
31510 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD64,k_gt_8_subtile)31511   TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD64, k_gt_8_subtile) {
31512     TEST_REQUIRES_X86_SSE2;
31513     for (size_t k = 9; k < 16; k++) {
31514       for (uint32_t n = 1; n <= 4; n++) {
31515         for (uint32_t m = 1; m <= 1; m++) {
31516           GemmMicrokernelTester()
31517             .mr(1)
31518             .nr(4)
31519             .kr(8)
31520             .sr(1)
31521             .m(m)
31522             .n(n)
31523             .k(k)
31524             .iterations(1)
31525             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c8__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
31526         }
31527       }
31528     }
31529   }
31530 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD64,k_div_8)31531   TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD64, k_div_8) {
31532     TEST_REQUIRES_X86_SSE2;
31533     for (size_t k = 16; k <= 80; k += 8) {
31534       GemmMicrokernelTester()
31535         .mr(1)
31536         .nr(4)
31537         .kr(8)
31538         .sr(1)
31539         .m(1)
31540         .n(4)
31541         .k(k)
31542         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c8__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
31543     }
31544   }
31545 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD64,k_div_8_subtile)31546   TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD64, k_div_8_subtile) {
31547     TEST_REQUIRES_X86_SSE2;
31548     for (size_t k = 16; k <= 80; k += 8) {
31549       for (uint32_t n = 1; n <= 4; n++) {
31550         for (uint32_t m = 1; m <= 1; m++) {
31551           GemmMicrokernelTester()
31552             .mr(1)
31553             .nr(4)
31554             .kr(8)
31555             .sr(1)
31556             .m(m)
31557             .n(n)
31558             .k(k)
31559             .iterations(1)
31560             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c8__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
31561         }
31562       }
31563     }
31564   }
31565 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD64,n_gt_4)31566   TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD64, n_gt_4) {
31567     TEST_REQUIRES_X86_SSE2;
31568     for (uint32_t n = 5; n < 8; n++) {
31569       for (size_t k = 1; k <= 40; k += 9) {
31570         GemmMicrokernelTester()
31571           .mr(1)
31572           .nr(4)
31573           .kr(8)
31574           .sr(1)
31575           .m(1)
31576           .n(n)
31577           .k(k)
31578           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c8__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
31579       }
31580     }
31581   }
31582 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD64,n_gt_4_strided_cn)31583   TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD64, n_gt_4_strided_cn) {
31584     TEST_REQUIRES_X86_SSE2;
31585     for (uint32_t n = 5; n < 8; n++) {
31586       for (size_t k = 1; k <= 40; k += 9) {
31587         GemmMicrokernelTester()
31588           .mr(1)
31589           .nr(4)
31590           .kr(8)
31591           .sr(1)
31592           .m(1)
31593           .n(n)
31594           .k(k)
31595           .cn_stride(7)
31596           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c8__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
31597       }
31598     }
31599   }
31600 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD64,n_gt_4_subtile)31601   TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD64, n_gt_4_subtile) {
31602     TEST_REQUIRES_X86_SSE2;
31603     for (uint32_t n = 5; n < 8; n++) {
31604       for (size_t k = 1; k <= 40; k += 9) {
31605         for (uint32_t m = 1; m <= 1; m++) {
31606           GemmMicrokernelTester()
31607             .mr(1)
31608             .nr(4)
31609             .kr(8)
31610             .sr(1)
31611             .m(m)
31612             .n(n)
31613             .k(k)
31614             .iterations(1)
31615             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c8__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
31616         }
31617       }
31618     }
31619   }
31620 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD64,n_div_4)31621   TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD64, n_div_4) {
31622     TEST_REQUIRES_X86_SSE2;
31623     for (uint32_t n = 8; n <= 12; n += 4) {
31624       for (size_t k = 1; k <= 40; k += 9) {
31625         GemmMicrokernelTester()
31626           .mr(1)
31627           .nr(4)
31628           .kr(8)
31629           .sr(1)
31630           .m(1)
31631           .n(n)
31632           .k(k)
31633           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c8__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
31634       }
31635     }
31636   }
31637 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD64,n_div_4_strided_cn)31638   TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD64, n_div_4_strided_cn) {
31639     TEST_REQUIRES_X86_SSE2;
31640     for (uint32_t n = 8; n <= 12; n += 4) {
31641       for (size_t k = 1; k <= 40; k += 9) {
31642         GemmMicrokernelTester()
31643           .mr(1)
31644           .nr(4)
31645           .kr(8)
31646           .sr(1)
31647           .m(1)
31648           .n(n)
31649           .k(k)
31650           .cn_stride(7)
31651           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c8__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
31652       }
31653     }
31654   }
31655 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD64,n_div_4_subtile)31656   TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD64, n_div_4_subtile) {
31657     TEST_REQUIRES_X86_SSE2;
31658     for (uint32_t n = 8; n <= 12; n += 4) {
31659       for (size_t k = 1; k <= 40; k += 9) {
31660         for (uint32_t m = 1; m <= 1; m++) {
31661           GemmMicrokernelTester()
31662             .mr(1)
31663             .nr(4)
31664             .kr(8)
31665             .sr(1)
31666             .m(m)
31667             .n(n)
31668             .k(k)
31669             .iterations(1)
31670             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c8__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
31671         }
31672       }
31673     }
31674   }
31675 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD64,small_kernel)31676   TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD64, small_kernel) {
31677     TEST_REQUIRES_X86_SSE2;
31678     for (size_t k = 1; k <= 40; k += 9) {
31679       GemmMicrokernelTester()
31680         .mr(1)
31681         .nr(4)
31682         .kr(8)
31683         .sr(1)
31684         .m(1)
31685         .n(4)
31686         .k(k)
31687         .ks(3)
31688         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c8__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
31689     }
31690   }
31691 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD64,small_kernel_subtile)31692   TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD64, small_kernel_subtile) {
31693     TEST_REQUIRES_X86_SSE2;
31694     for (size_t k = 1; k <= 40; k += 9) {
31695       for (uint32_t n = 1; n <= 4; n++) {
31696         for (uint32_t m = 1; m <= 1; m++) {
31697           GemmMicrokernelTester()
31698             .mr(1)
31699             .nr(4)
31700             .kr(8)
31701             .sr(1)
31702             .m(m)
31703             .n(n)
31704             .k(k)
31705             .ks(3)
31706             .iterations(1)
31707             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c8__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
31708         }
31709       }
31710     }
31711   }
31712 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD64,n_gt_4_small_kernel)31713   TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD64, n_gt_4_small_kernel) {
31714     TEST_REQUIRES_X86_SSE2;
31715     for (uint32_t n = 5; n < 8; n++) {
31716       for (size_t k = 1; k <= 40; k += 9) {
31717         GemmMicrokernelTester()
31718           .mr(1)
31719           .nr(4)
31720           .kr(8)
31721           .sr(1)
31722           .m(1)
31723           .n(n)
31724           .k(k)
31725           .ks(3)
31726           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c8__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
31727       }
31728     }
31729   }
31730 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD64,n_div_4_small_kernel)31731   TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD64, n_div_4_small_kernel) {
31732     TEST_REQUIRES_X86_SSE2;
31733     for (uint32_t n = 8; n <= 12; n += 4) {
31734       for (size_t k = 1; k <= 40; k += 9) {
31735         GemmMicrokernelTester()
31736           .mr(1)
31737           .nr(4)
31738           .kr(8)
31739           .sr(1)
31740           .m(1)
31741           .n(n)
31742           .k(k)
31743           .ks(3)
31744           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c8__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
31745       }
31746     }
31747   }
31748 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD64,strided_cm_subtile)31749   TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD64, strided_cm_subtile) {
31750     TEST_REQUIRES_X86_SSE2;
31751     for (size_t k = 1; k <= 40; k += 9) {
31752       for (uint32_t n = 1; n <= 4; n++) {
31753         for (uint32_t m = 1; m <= 1; m++) {
31754           GemmMicrokernelTester()
31755             .mr(1)
31756             .nr(4)
31757             .kr(8)
31758             .sr(1)
31759             .m(m)
31760             .n(n)
31761             .k(k)
31762             .cm_stride(7)
31763             .iterations(1)
31764             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c8__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
31765         }
31766       }
31767     }
31768   }
31769 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD64,a_offset)31770   TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD64, a_offset) {
31771     TEST_REQUIRES_X86_SSE2;
31772     for (size_t k = 1; k <= 40; k += 9) {
31773       GemmMicrokernelTester()
31774         .mr(1)
31775         .nr(4)
31776         .kr(8)
31777         .sr(1)
31778         .m(1)
31779         .n(4)
31780         .k(k)
31781         .ks(3)
31782         .a_offset(43)
31783         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c8__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
31784     }
31785   }
31786 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD64,zero)31787   TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD64, zero) {
31788     TEST_REQUIRES_X86_SSE2;
31789     for (size_t k = 1; k <= 40; k += 9) {
31790       for (uint32_t mz = 0; mz < 1; mz++) {
31791         GemmMicrokernelTester()
31792           .mr(1)
31793           .nr(4)
31794           .kr(8)
31795           .sr(1)
31796           .m(1)
31797           .n(4)
31798           .k(k)
31799           .ks(3)
31800           .a_offset(43)
31801           .zero_index(mz)
31802           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c8__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
31803       }
31804     }
31805   }
31806 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD64,qmin)31807   TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD64, qmin) {
31808     TEST_REQUIRES_X86_SSE2;
31809     GemmMicrokernelTester()
31810       .mr(1)
31811       .nr(4)
31812       .kr(8)
31813       .sr(1)
31814       .m(1)
31815       .n(4)
31816       .k(8)
31817       .qmin(128)
31818       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c8__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
31819   }
31820 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD64,qmax)31821   TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD64, qmax) {
31822     TEST_REQUIRES_X86_SSE2;
31823     GemmMicrokernelTester()
31824       .mr(1)
31825       .nr(4)
31826       .kr(8)
31827       .sr(1)
31828       .m(1)
31829       .n(4)
31830       .k(8)
31831       .qmax(128)
31832       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c8__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
31833   }
31834 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD64,strided_cm)31835   TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD64, strided_cm) {
31836     TEST_REQUIRES_X86_SSE2;
31837     GemmMicrokernelTester()
31838       .mr(1)
31839       .nr(4)
31840       .kr(8)
31841       .sr(1)
31842       .m(1)
31843       .n(4)
31844       .k(8)
31845       .cm_stride(7)
31846       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c8__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
31847   }
31848 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
31849 
31850 
31851 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD64,k_eq_8)31852   TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD64, k_eq_8) {
31853     TEST_REQUIRES_X86_SSE41;
31854     GemmMicrokernelTester()
31855       .mr(1)
31856       .nr(4)
31857       .kr(8)
31858       .sr(1)
31859       .m(1)
31860       .n(4)
31861       .k(8)
31862       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c8__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
31863   }
31864 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD64,strided_cn)31865   TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD64, strided_cn) {
31866     TEST_REQUIRES_X86_SSE41;
31867     GemmMicrokernelTester()
31868       .mr(1)
31869       .nr(4)
31870       .kr(8)
31871       .sr(1)
31872       .m(1)
31873       .n(4)
31874       .k(8)
31875       .cn_stride(7)
31876       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c8__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
31877   }
31878 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD64,k_eq_8_subtile)31879   TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD64, k_eq_8_subtile) {
31880     TEST_REQUIRES_X86_SSE41;
31881     for (uint32_t n = 1; n <= 4; n++) {
31882       for (uint32_t m = 1; m <= 1; m++) {
31883         GemmMicrokernelTester()
31884           .mr(1)
31885           .nr(4)
31886           .kr(8)
31887           .sr(1)
31888           .m(m)
31889           .n(n)
31890           .k(8)
31891           .iterations(1)
31892           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c8__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
31893       }
31894     }
31895   }
31896 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD64,k_eq_8_subtile_m)31897   TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD64, k_eq_8_subtile_m) {
31898     TEST_REQUIRES_X86_SSE41;
31899     for (uint32_t m = 1; m <= 1; m++) {
31900       GemmMicrokernelTester()
31901         .mr(1)
31902         .nr(4)
31903         .kr(8)
31904         .sr(1)
31905         .m(m)
31906         .n(4)
31907         .k(8)
31908         .iterations(1)
31909         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c8__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
31910     }
31911   }
31912 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD64,k_eq_8_subtile_n)31913   TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD64, k_eq_8_subtile_n) {
31914     TEST_REQUIRES_X86_SSE41;
31915     for (uint32_t n = 1; n <= 4; n++) {
31916       GemmMicrokernelTester()
31917         .mr(1)
31918         .nr(4)
31919         .kr(8)
31920         .sr(1)
31921         .m(1)
31922         .n(n)
31923         .k(8)
31924         .iterations(1)
31925         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c8__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
31926     }
31927   }
31928 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD64,k_lt_8)31929   TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD64, k_lt_8) {
31930     TEST_REQUIRES_X86_SSE41;
31931     for (size_t k = 1; k < 8; k++) {
31932       GemmMicrokernelTester()
31933         .mr(1)
31934         .nr(4)
31935         .kr(8)
31936         .sr(1)
31937         .m(1)
31938         .n(4)
31939         .k(k)
31940         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c8__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
31941     }
31942   }
31943 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD64,k_lt_8_subtile)31944   TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD64, k_lt_8_subtile) {
31945     TEST_REQUIRES_X86_SSE41;
31946     for (size_t k = 1; k < 8; k++) {
31947       for (uint32_t n = 1; n <= 4; n++) {
31948         for (uint32_t m = 1; m <= 1; m++) {
31949           GemmMicrokernelTester()
31950             .mr(1)
31951             .nr(4)
31952             .kr(8)
31953             .sr(1)
31954             .m(m)
31955             .n(n)
31956             .k(k)
31957             .iterations(1)
31958             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c8__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
31959         }
31960       }
31961     }
31962   }
31963 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD64,k_gt_8)31964   TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD64, k_gt_8) {
31965     TEST_REQUIRES_X86_SSE41;
31966     for (size_t k = 9; k < 16; k++) {
31967       GemmMicrokernelTester()
31968         .mr(1)
31969         .nr(4)
31970         .kr(8)
31971         .sr(1)
31972         .m(1)
31973         .n(4)
31974         .k(k)
31975         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c8__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
31976     }
31977   }
31978 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD64,k_gt_8_subtile)31979   TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD64, k_gt_8_subtile) {
31980     TEST_REQUIRES_X86_SSE41;
31981     for (size_t k = 9; k < 16; k++) {
31982       for (uint32_t n = 1; n <= 4; n++) {
31983         for (uint32_t m = 1; m <= 1; m++) {
31984           GemmMicrokernelTester()
31985             .mr(1)
31986             .nr(4)
31987             .kr(8)
31988             .sr(1)
31989             .m(m)
31990             .n(n)
31991             .k(k)
31992             .iterations(1)
31993             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c8__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
31994         }
31995       }
31996     }
31997   }
31998 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD64,k_div_8)31999   TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD64, k_div_8) {
32000     TEST_REQUIRES_X86_SSE41;
32001     for (size_t k = 16; k <= 80; k += 8) {
32002       GemmMicrokernelTester()
32003         .mr(1)
32004         .nr(4)
32005         .kr(8)
32006         .sr(1)
32007         .m(1)
32008         .n(4)
32009         .k(k)
32010         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c8__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
32011     }
32012   }
32013 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD64,k_div_8_subtile)32014   TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD64, k_div_8_subtile) {
32015     TEST_REQUIRES_X86_SSE41;
32016     for (size_t k = 16; k <= 80; k += 8) {
32017       for (uint32_t n = 1; n <= 4; n++) {
32018         for (uint32_t m = 1; m <= 1; m++) {
32019           GemmMicrokernelTester()
32020             .mr(1)
32021             .nr(4)
32022             .kr(8)
32023             .sr(1)
32024             .m(m)
32025             .n(n)
32026             .k(k)
32027             .iterations(1)
32028             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c8__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
32029         }
32030       }
32031     }
32032   }
32033 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD64,n_gt_4)32034   TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD64, n_gt_4) {
32035     TEST_REQUIRES_X86_SSE41;
32036     for (uint32_t n = 5; n < 8; n++) {
32037       for (size_t k = 1; k <= 40; k += 9) {
32038         GemmMicrokernelTester()
32039           .mr(1)
32040           .nr(4)
32041           .kr(8)
32042           .sr(1)
32043           .m(1)
32044           .n(n)
32045           .k(k)
32046           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c8__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
32047       }
32048     }
32049   }
32050 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD64,n_gt_4_strided_cn)32051   TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD64, n_gt_4_strided_cn) {
32052     TEST_REQUIRES_X86_SSE41;
32053     for (uint32_t n = 5; n < 8; n++) {
32054       for (size_t k = 1; k <= 40; k += 9) {
32055         GemmMicrokernelTester()
32056           .mr(1)
32057           .nr(4)
32058           .kr(8)
32059           .sr(1)
32060           .m(1)
32061           .n(n)
32062           .k(k)
32063           .cn_stride(7)
32064           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c8__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
32065       }
32066     }
32067   }
32068 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD64,n_gt_4_subtile)32069   TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD64, n_gt_4_subtile) {
32070     TEST_REQUIRES_X86_SSE41;
32071     for (uint32_t n = 5; n < 8; n++) {
32072       for (size_t k = 1; k <= 40; k += 9) {
32073         for (uint32_t m = 1; m <= 1; m++) {
32074           GemmMicrokernelTester()
32075             .mr(1)
32076             .nr(4)
32077             .kr(8)
32078             .sr(1)
32079             .m(m)
32080             .n(n)
32081             .k(k)
32082             .iterations(1)
32083             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c8__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
32084         }
32085       }
32086     }
32087   }
32088 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD64,n_div_4)32089   TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD64, n_div_4) {
32090     TEST_REQUIRES_X86_SSE41;
32091     for (uint32_t n = 8; n <= 12; n += 4) {
32092       for (size_t k = 1; k <= 40; k += 9) {
32093         GemmMicrokernelTester()
32094           .mr(1)
32095           .nr(4)
32096           .kr(8)
32097           .sr(1)
32098           .m(1)
32099           .n(n)
32100           .k(k)
32101           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c8__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
32102       }
32103     }
32104   }
32105 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD64,n_div_4_strided_cn)32106   TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD64, n_div_4_strided_cn) {
32107     TEST_REQUIRES_X86_SSE41;
32108     for (uint32_t n = 8; n <= 12; n += 4) {
32109       for (size_t k = 1; k <= 40; k += 9) {
32110         GemmMicrokernelTester()
32111           .mr(1)
32112           .nr(4)
32113           .kr(8)
32114           .sr(1)
32115           .m(1)
32116           .n(n)
32117           .k(k)
32118           .cn_stride(7)
32119           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c8__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
32120       }
32121     }
32122   }
32123 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD64,n_div_4_subtile)32124   TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD64, n_div_4_subtile) {
32125     TEST_REQUIRES_X86_SSE41;
32126     for (uint32_t n = 8; n <= 12; n += 4) {
32127       for (size_t k = 1; k <= 40; k += 9) {
32128         for (uint32_t m = 1; m <= 1; m++) {
32129           GemmMicrokernelTester()
32130             .mr(1)
32131             .nr(4)
32132             .kr(8)
32133             .sr(1)
32134             .m(m)
32135             .n(n)
32136             .k(k)
32137             .iterations(1)
32138             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c8__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
32139         }
32140       }
32141     }
32142   }
32143 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD64,small_kernel)32144   TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD64, small_kernel) {
32145     TEST_REQUIRES_X86_SSE41;
32146     for (size_t k = 1; k <= 40; k += 9) {
32147       GemmMicrokernelTester()
32148         .mr(1)
32149         .nr(4)
32150         .kr(8)
32151         .sr(1)
32152         .m(1)
32153         .n(4)
32154         .k(k)
32155         .ks(3)
32156         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c8__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
32157     }
32158   }
32159 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD64,small_kernel_subtile)32160   TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD64, small_kernel_subtile) {
32161     TEST_REQUIRES_X86_SSE41;
32162     for (size_t k = 1; k <= 40; k += 9) {
32163       for (uint32_t n = 1; n <= 4; n++) {
32164         for (uint32_t m = 1; m <= 1; m++) {
32165           GemmMicrokernelTester()
32166             .mr(1)
32167             .nr(4)
32168             .kr(8)
32169             .sr(1)
32170             .m(m)
32171             .n(n)
32172             .k(k)
32173             .ks(3)
32174             .iterations(1)
32175             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c8__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
32176         }
32177       }
32178     }
32179   }
32180 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD64,n_gt_4_small_kernel)32181   TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD64, n_gt_4_small_kernel) {
32182     TEST_REQUIRES_X86_SSE41;
32183     for (uint32_t n = 5; n < 8; n++) {
32184       for (size_t k = 1; k <= 40; k += 9) {
32185         GemmMicrokernelTester()
32186           .mr(1)
32187           .nr(4)
32188           .kr(8)
32189           .sr(1)
32190           .m(1)
32191           .n(n)
32192           .k(k)
32193           .ks(3)
32194           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c8__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
32195       }
32196     }
32197   }
32198 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD64,n_div_4_small_kernel)32199   TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD64, n_div_4_small_kernel) {
32200     TEST_REQUIRES_X86_SSE41;
32201     for (uint32_t n = 8; n <= 12; n += 4) {
32202       for (size_t k = 1; k <= 40; k += 9) {
32203         GemmMicrokernelTester()
32204           .mr(1)
32205           .nr(4)
32206           .kr(8)
32207           .sr(1)
32208           .m(1)
32209           .n(n)
32210           .k(k)
32211           .ks(3)
32212           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c8__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
32213       }
32214     }
32215   }
32216 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD64,strided_cm_subtile)32217   TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD64, strided_cm_subtile) {
32218     TEST_REQUIRES_X86_SSE41;
32219     for (size_t k = 1; k <= 40; k += 9) {
32220       for (uint32_t n = 1; n <= 4; n++) {
32221         for (uint32_t m = 1; m <= 1; m++) {
32222           GemmMicrokernelTester()
32223             .mr(1)
32224             .nr(4)
32225             .kr(8)
32226             .sr(1)
32227             .m(m)
32228             .n(n)
32229             .k(k)
32230             .cm_stride(7)
32231             .iterations(1)
32232             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c8__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
32233         }
32234       }
32235     }
32236   }
32237 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD64,a_offset)32238   TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD64, a_offset) {
32239     TEST_REQUIRES_X86_SSE41;
32240     for (size_t k = 1; k <= 40; k += 9) {
32241       GemmMicrokernelTester()
32242         .mr(1)
32243         .nr(4)
32244         .kr(8)
32245         .sr(1)
32246         .m(1)
32247         .n(4)
32248         .k(k)
32249         .ks(3)
32250         .a_offset(43)
32251         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c8__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
32252     }
32253   }
32254 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD64,zero)32255   TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD64, zero) {
32256     TEST_REQUIRES_X86_SSE41;
32257     for (size_t k = 1; k <= 40; k += 9) {
32258       for (uint32_t mz = 0; mz < 1; mz++) {
32259         GemmMicrokernelTester()
32260           .mr(1)
32261           .nr(4)
32262           .kr(8)
32263           .sr(1)
32264           .m(1)
32265           .n(4)
32266           .k(k)
32267           .ks(3)
32268           .a_offset(43)
32269           .zero_index(mz)
32270           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c8__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
32271       }
32272     }
32273   }
32274 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD64,qmin)32275   TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD64, qmin) {
32276     TEST_REQUIRES_X86_SSE41;
32277     GemmMicrokernelTester()
32278       .mr(1)
32279       .nr(4)
32280       .kr(8)
32281       .sr(1)
32282       .m(1)
32283       .n(4)
32284       .k(8)
32285       .qmin(128)
32286       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c8__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
32287   }
32288 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD64,qmax)32289   TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD64, qmax) {
32290     TEST_REQUIRES_X86_SSE41;
32291     GemmMicrokernelTester()
32292       .mr(1)
32293       .nr(4)
32294       .kr(8)
32295       .sr(1)
32296       .m(1)
32297       .n(4)
32298       .k(8)
32299       .qmax(128)
32300       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c8__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
32301   }
32302 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD64,strided_cm)32303   TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD64, strided_cm) {
32304     TEST_REQUIRES_X86_SSE41;
32305     GemmMicrokernelTester()
32306       .mr(1)
32307       .nr(4)
32308       .kr(8)
32309       .sr(1)
32310       .m(1)
32311       .n(4)
32312       .k(8)
32313       .cm_stride(7)
32314       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c8__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
32315   }
32316 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
32317 
32318 
32319 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD64,k_eq_8)32320   TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD64, k_eq_8) {
32321     TEST_REQUIRES_X86_AVX;
32322     GemmMicrokernelTester()
32323       .mr(1)
32324       .nr(4)
32325       .kr(8)
32326       .sr(1)
32327       .m(1)
32328       .n(4)
32329       .k(8)
32330       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c8__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
32331   }
32332 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD64,strided_cn)32333   TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD64, strided_cn) {
32334     TEST_REQUIRES_X86_AVX;
32335     GemmMicrokernelTester()
32336       .mr(1)
32337       .nr(4)
32338       .kr(8)
32339       .sr(1)
32340       .m(1)
32341       .n(4)
32342       .k(8)
32343       .cn_stride(7)
32344       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c8__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
32345   }
32346 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD64,k_eq_8_subtile)32347   TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD64, k_eq_8_subtile) {
32348     TEST_REQUIRES_X86_AVX;
32349     for (uint32_t n = 1; n <= 4; n++) {
32350       for (uint32_t m = 1; m <= 1; m++) {
32351         GemmMicrokernelTester()
32352           .mr(1)
32353           .nr(4)
32354           .kr(8)
32355           .sr(1)
32356           .m(m)
32357           .n(n)
32358           .k(8)
32359           .iterations(1)
32360           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c8__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
32361       }
32362     }
32363   }
32364 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD64,k_eq_8_subtile_m)32365   TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD64, k_eq_8_subtile_m) {
32366     TEST_REQUIRES_X86_AVX;
32367     for (uint32_t m = 1; m <= 1; m++) {
32368       GemmMicrokernelTester()
32369         .mr(1)
32370         .nr(4)
32371         .kr(8)
32372         .sr(1)
32373         .m(m)
32374         .n(4)
32375         .k(8)
32376         .iterations(1)
32377         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c8__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
32378     }
32379   }
32380 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD64,k_eq_8_subtile_n)32381   TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD64, k_eq_8_subtile_n) {
32382     TEST_REQUIRES_X86_AVX;
32383     for (uint32_t n = 1; n <= 4; n++) {
32384       GemmMicrokernelTester()
32385         .mr(1)
32386         .nr(4)
32387         .kr(8)
32388         .sr(1)
32389         .m(1)
32390         .n(n)
32391         .k(8)
32392         .iterations(1)
32393         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c8__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
32394     }
32395   }
32396 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD64,k_lt_8)32397   TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD64, k_lt_8) {
32398     TEST_REQUIRES_X86_AVX;
32399     for (size_t k = 1; k < 8; k++) {
32400       GemmMicrokernelTester()
32401         .mr(1)
32402         .nr(4)
32403         .kr(8)
32404         .sr(1)
32405         .m(1)
32406         .n(4)
32407         .k(k)
32408         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c8__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
32409     }
32410   }
32411 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD64,k_lt_8_subtile)32412   TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD64, k_lt_8_subtile) {
32413     TEST_REQUIRES_X86_AVX;
32414     for (size_t k = 1; k < 8; k++) {
32415       for (uint32_t n = 1; n <= 4; n++) {
32416         for (uint32_t m = 1; m <= 1; m++) {
32417           GemmMicrokernelTester()
32418             .mr(1)
32419             .nr(4)
32420             .kr(8)
32421             .sr(1)
32422             .m(m)
32423             .n(n)
32424             .k(k)
32425             .iterations(1)
32426             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c8__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
32427         }
32428       }
32429     }
32430   }
32431 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD64,k_gt_8)32432   TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD64, k_gt_8) {
32433     TEST_REQUIRES_X86_AVX;
32434     for (size_t k = 9; k < 16; k++) {
32435       GemmMicrokernelTester()
32436         .mr(1)
32437         .nr(4)
32438         .kr(8)
32439         .sr(1)
32440         .m(1)
32441         .n(4)
32442         .k(k)
32443         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c8__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
32444     }
32445   }
32446 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD64,k_gt_8_subtile)32447   TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD64, k_gt_8_subtile) {
32448     TEST_REQUIRES_X86_AVX;
32449     for (size_t k = 9; k < 16; k++) {
32450       for (uint32_t n = 1; n <= 4; n++) {
32451         for (uint32_t m = 1; m <= 1; m++) {
32452           GemmMicrokernelTester()
32453             .mr(1)
32454             .nr(4)
32455             .kr(8)
32456             .sr(1)
32457             .m(m)
32458             .n(n)
32459             .k(k)
32460             .iterations(1)
32461             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c8__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
32462         }
32463       }
32464     }
32465   }
32466 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD64,k_div_8)32467   TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD64, k_div_8) {
32468     TEST_REQUIRES_X86_AVX;
32469     for (size_t k = 16; k <= 80; k += 8) {
32470       GemmMicrokernelTester()
32471         .mr(1)
32472         .nr(4)
32473         .kr(8)
32474         .sr(1)
32475         .m(1)
32476         .n(4)
32477         .k(k)
32478         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c8__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
32479     }
32480   }
32481 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD64,k_div_8_subtile)32482   TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD64, k_div_8_subtile) {
32483     TEST_REQUIRES_X86_AVX;
32484     for (size_t k = 16; k <= 80; k += 8) {
32485       for (uint32_t n = 1; n <= 4; n++) {
32486         for (uint32_t m = 1; m <= 1; m++) {
32487           GemmMicrokernelTester()
32488             .mr(1)
32489             .nr(4)
32490             .kr(8)
32491             .sr(1)
32492             .m(m)
32493             .n(n)
32494             .k(k)
32495             .iterations(1)
32496             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c8__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
32497         }
32498       }
32499     }
32500   }
32501 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD64,n_gt_4)32502   TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD64, n_gt_4) {
32503     TEST_REQUIRES_X86_AVX;
32504     for (uint32_t n = 5; n < 8; n++) {
32505       for (size_t k = 1; k <= 40; k += 9) {
32506         GemmMicrokernelTester()
32507           .mr(1)
32508           .nr(4)
32509           .kr(8)
32510           .sr(1)
32511           .m(1)
32512           .n(n)
32513           .k(k)
32514           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c8__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
32515       }
32516     }
32517   }
32518 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD64,n_gt_4_strided_cn)32519   TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD64, n_gt_4_strided_cn) {
32520     TEST_REQUIRES_X86_AVX;
32521     for (uint32_t n = 5; n < 8; n++) {
32522       for (size_t k = 1; k <= 40; k += 9) {
32523         GemmMicrokernelTester()
32524           .mr(1)
32525           .nr(4)
32526           .kr(8)
32527           .sr(1)
32528           .m(1)
32529           .n(n)
32530           .k(k)
32531           .cn_stride(7)
32532           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c8__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
32533       }
32534     }
32535   }
32536 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD64,n_gt_4_subtile)32537   TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD64, n_gt_4_subtile) {
32538     TEST_REQUIRES_X86_AVX;
32539     for (uint32_t n = 5; n < 8; n++) {
32540       for (size_t k = 1; k <= 40; k += 9) {
32541         for (uint32_t m = 1; m <= 1; m++) {
32542           GemmMicrokernelTester()
32543             .mr(1)
32544             .nr(4)
32545             .kr(8)
32546             .sr(1)
32547             .m(m)
32548             .n(n)
32549             .k(k)
32550             .iterations(1)
32551             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c8__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
32552         }
32553       }
32554     }
32555   }
32556 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD64,n_div_4)32557   TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD64, n_div_4) {
32558     TEST_REQUIRES_X86_AVX;
32559     for (uint32_t n = 8; n <= 12; n += 4) {
32560       for (size_t k = 1; k <= 40; k += 9) {
32561         GemmMicrokernelTester()
32562           .mr(1)
32563           .nr(4)
32564           .kr(8)
32565           .sr(1)
32566           .m(1)
32567           .n(n)
32568           .k(k)
32569           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c8__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
32570       }
32571     }
32572   }
32573 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD64,n_div_4_strided_cn)32574   TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD64, n_div_4_strided_cn) {
32575     TEST_REQUIRES_X86_AVX;
32576     for (uint32_t n = 8; n <= 12; n += 4) {
32577       for (size_t k = 1; k <= 40; k += 9) {
32578         GemmMicrokernelTester()
32579           .mr(1)
32580           .nr(4)
32581           .kr(8)
32582           .sr(1)
32583           .m(1)
32584           .n(n)
32585           .k(k)
32586           .cn_stride(7)
32587           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c8__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
32588       }
32589     }
32590   }
32591 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD64,n_div_4_subtile)32592   TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD64, n_div_4_subtile) {
32593     TEST_REQUIRES_X86_AVX;
32594     for (uint32_t n = 8; n <= 12; n += 4) {
32595       for (size_t k = 1; k <= 40; k += 9) {
32596         for (uint32_t m = 1; m <= 1; m++) {
32597           GemmMicrokernelTester()
32598             .mr(1)
32599             .nr(4)
32600             .kr(8)
32601             .sr(1)
32602             .m(m)
32603             .n(n)
32604             .k(k)
32605             .iterations(1)
32606             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c8__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
32607         }
32608       }
32609     }
32610   }
32611 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD64,small_kernel)32612   TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD64, small_kernel) {
32613     TEST_REQUIRES_X86_AVX;
32614     for (size_t k = 1; k <= 40; k += 9) {
32615       GemmMicrokernelTester()
32616         .mr(1)
32617         .nr(4)
32618         .kr(8)
32619         .sr(1)
32620         .m(1)
32621         .n(4)
32622         .k(k)
32623         .ks(3)
32624         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c8__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
32625     }
32626   }
32627 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD64,small_kernel_subtile)32628   TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD64, small_kernel_subtile) {
32629     TEST_REQUIRES_X86_AVX;
32630     for (size_t k = 1; k <= 40; k += 9) {
32631       for (uint32_t n = 1; n <= 4; n++) {
32632         for (uint32_t m = 1; m <= 1; m++) {
32633           GemmMicrokernelTester()
32634             .mr(1)
32635             .nr(4)
32636             .kr(8)
32637             .sr(1)
32638             .m(m)
32639             .n(n)
32640             .k(k)
32641             .ks(3)
32642             .iterations(1)
32643             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c8__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
32644         }
32645       }
32646     }
32647   }
32648 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD64,n_gt_4_small_kernel)32649   TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD64, n_gt_4_small_kernel) {
32650     TEST_REQUIRES_X86_AVX;
32651     for (uint32_t n = 5; n < 8; n++) {
32652       for (size_t k = 1; k <= 40; k += 9) {
32653         GemmMicrokernelTester()
32654           .mr(1)
32655           .nr(4)
32656           .kr(8)
32657           .sr(1)
32658           .m(1)
32659           .n(n)
32660           .k(k)
32661           .ks(3)
32662           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c8__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
32663       }
32664     }
32665   }
32666 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD64,n_div_4_small_kernel)32667   TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD64, n_div_4_small_kernel) {
32668     TEST_REQUIRES_X86_AVX;
32669     for (uint32_t n = 8; n <= 12; n += 4) {
32670       for (size_t k = 1; k <= 40; k += 9) {
32671         GemmMicrokernelTester()
32672           .mr(1)
32673           .nr(4)
32674           .kr(8)
32675           .sr(1)
32676           .m(1)
32677           .n(n)
32678           .k(k)
32679           .ks(3)
32680           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c8__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
32681       }
32682     }
32683   }
32684 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD64,strided_cm_subtile)32685   TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD64, strided_cm_subtile) {
32686     TEST_REQUIRES_X86_AVX;
32687     for (size_t k = 1; k <= 40; k += 9) {
32688       for (uint32_t n = 1; n <= 4; n++) {
32689         for (uint32_t m = 1; m <= 1; m++) {
32690           GemmMicrokernelTester()
32691             .mr(1)
32692             .nr(4)
32693             .kr(8)
32694             .sr(1)
32695             .m(m)
32696             .n(n)
32697             .k(k)
32698             .cm_stride(7)
32699             .iterations(1)
32700             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c8__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
32701         }
32702       }
32703     }
32704   }
32705 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD64,a_offset)32706   TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD64, a_offset) {
32707     TEST_REQUIRES_X86_AVX;
32708     for (size_t k = 1; k <= 40; k += 9) {
32709       GemmMicrokernelTester()
32710         .mr(1)
32711         .nr(4)
32712         .kr(8)
32713         .sr(1)
32714         .m(1)
32715         .n(4)
32716         .k(k)
32717         .ks(3)
32718         .a_offset(43)
32719         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c8__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
32720     }
32721   }
32722 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD64,zero)32723   TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD64, zero) {
32724     TEST_REQUIRES_X86_AVX;
32725     for (size_t k = 1; k <= 40; k += 9) {
32726       for (uint32_t mz = 0; mz < 1; mz++) {
32727         GemmMicrokernelTester()
32728           .mr(1)
32729           .nr(4)
32730           .kr(8)
32731           .sr(1)
32732           .m(1)
32733           .n(4)
32734           .k(k)
32735           .ks(3)
32736           .a_offset(43)
32737           .zero_index(mz)
32738           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c8__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
32739       }
32740     }
32741   }
32742 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD64,qmin)32743   TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD64, qmin) {
32744     TEST_REQUIRES_X86_AVX;
32745     GemmMicrokernelTester()
32746       .mr(1)
32747       .nr(4)
32748       .kr(8)
32749       .sr(1)
32750       .m(1)
32751       .n(4)
32752       .k(8)
32753       .qmin(128)
32754       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c8__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
32755   }
32756 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD64,qmax)32757   TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD64, qmax) {
32758     TEST_REQUIRES_X86_AVX;
32759     GemmMicrokernelTester()
32760       .mr(1)
32761       .nr(4)
32762       .kr(8)
32763       .sr(1)
32764       .m(1)
32765       .n(4)
32766       .k(8)
32767       .qmax(128)
32768       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c8__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
32769   }
32770 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD64,strided_cm)32771   TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD64, strided_cm) {
32772     TEST_REQUIRES_X86_AVX;
32773     GemmMicrokernelTester()
32774       .mr(1)
32775       .nr(4)
32776       .kr(8)
32777       .sr(1)
32778       .m(1)
32779       .n(4)
32780       .k(8)
32781       .cm_stride(7)
32782       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c8__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
32783   }
32784 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
32785 
32786 
32787 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD64,k_eq_8)32788   TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD64, k_eq_8) {
32789     TEST_REQUIRES_X86_XOP;
32790     GemmMicrokernelTester()
32791       .mr(1)
32792       .nr(4)
32793       .kr(8)
32794       .sr(1)
32795       .m(1)
32796       .n(4)
32797       .k(8)
32798       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c8__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
32799   }
32800 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD64,strided_cn)32801   TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD64, strided_cn) {
32802     TEST_REQUIRES_X86_XOP;
32803     GemmMicrokernelTester()
32804       .mr(1)
32805       .nr(4)
32806       .kr(8)
32807       .sr(1)
32808       .m(1)
32809       .n(4)
32810       .k(8)
32811       .cn_stride(7)
32812       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c8__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
32813   }
32814 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD64,k_eq_8_subtile)32815   TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD64, k_eq_8_subtile) {
32816     TEST_REQUIRES_X86_XOP;
32817     for (uint32_t n = 1; n <= 4; n++) {
32818       for (uint32_t m = 1; m <= 1; m++) {
32819         GemmMicrokernelTester()
32820           .mr(1)
32821           .nr(4)
32822           .kr(8)
32823           .sr(1)
32824           .m(m)
32825           .n(n)
32826           .k(8)
32827           .iterations(1)
32828           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c8__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
32829       }
32830     }
32831   }
32832 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD64,k_eq_8_subtile_m)32833   TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD64, k_eq_8_subtile_m) {
32834     TEST_REQUIRES_X86_XOP;
32835     for (uint32_t m = 1; m <= 1; m++) {
32836       GemmMicrokernelTester()
32837         .mr(1)
32838         .nr(4)
32839         .kr(8)
32840         .sr(1)
32841         .m(m)
32842         .n(4)
32843         .k(8)
32844         .iterations(1)
32845         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c8__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
32846     }
32847   }
32848 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD64,k_eq_8_subtile_n)32849   TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD64, k_eq_8_subtile_n) {
32850     TEST_REQUIRES_X86_XOP;
32851     for (uint32_t n = 1; n <= 4; n++) {
32852       GemmMicrokernelTester()
32853         .mr(1)
32854         .nr(4)
32855         .kr(8)
32856         .sr(1)
32857         .m(1)
32858         .n(n)
32859         .k(8)
32860         .iterations(1)
32861         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c8__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
32862     }
32863   }
32864 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD64,k_lt_8)32865   TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD64, k_lt_8) {
32866     TEST_REQUIRES_X86_XOP;
32867     for (size_t k = 1; k < 8; k++) {
32868       GemmMicrokernelTester()
32869         .mr(1)
32870         .nr(4)
32871         .kr(8)
32872         .sr(1)
32873         .m(1)
32874         .n(4)
32875         .k(k)
32876         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c8__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
32877     }
32878   }
32879 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD64,k_lt_8_subtile)32880   TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD64, k_lt_8_subtile) {
32881     TEST_REQUIRES_X86_XOP;
32882     for (size_t k = 1; k < 8; k++) {
32883       for (uint32_t n = 1; n <= 4; n++) {
32884         for (uint32_t m = 1; m <= 1; m++) {
32885           GemmMicrokernelTester()
32886             .mr(1)
32887             .nr(4)
32888             .kr(8)
32889             .sr(1)
32890             .m(m)
32891             .n(n)
32892             .k(k)
32893             .iterations(1)
32894             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c8__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
32895         }
32896       }
32897     }
32898   }
32899 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD64,k_gt_8)32900   TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD64, k_gt_8) {
32901     TEST_REQUIRES_X86_XOP;
32902     for (size_t k = 9; k < 16; k++) {
32903       GemmMicrokernelTester()
32904         .mr(1)
32905         .nr(4)
32906         .kr(8)
32907         .sr(1)
32908         .m(1)
32909         .n(4)
32910         .k(k)
32911         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c8__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
32912     }
32913   }
32914 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD64,k_gt_8_subtile)32915   TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD64, k_gt_8_subtile) {
32916     TEST_REQUIRES_X86_XOP;
32917     for (size_t k = 9; k < 16; k++) {
32918       for (uint32_t n = 1; n <= 4; n++) {
32919         for (uint32_t m = 1; m <= 1; m++) {
32920           GemmMicrokernelTester()
32921             .mr(1)
32922             .nr(4)
32923             .kr(8)
32924             .sr(1)
32925             .m(m)
32926             .n(n)
32927             .k(k)
32928             .iterations(1)
32929             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c8__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
32930         }
32931       }
32932     }
32933   }
32934 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD64,k_div_8)32935   TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD64, k_div_8) {
32936     TEST_REQUIRES_X86_XOP;
32937     for (size_t k = 16; k <= 80; k += 8) {
32938       GemmMicrokernelTester()
32939         .mr(1)
32940         .nr(4)
32941         .kr(8)
32942         .sr(1)
32943         .m(1)
32944         .n(4)
32945         .k(k)
32946         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c8__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
32947     }
32948   }
32949 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD64,k_div_8_subtile)32950   TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD64, k_div_8_subtile) {
32951     TEST_REQUIRES_X86_XOP;
32952     for (size_t k = 16; k <= 80; k += 8) {
32953       for (uint32_t n = 1; n <= 4; n++) {
32954         for (uint32_t m = 1; m <= 1; m++) {
32955           GemmMicrokernelTester()
32956             .mr(1)
32957             .nr(4)
32958             .kr(8)
32959             .sr(1)
32960             .m(m)
32961             .n(n)
32962             .k(k)
32963             .iterations(1)
32964             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c8__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
32965         }
32966       }
32967     }
32968   }
32969 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD64,n_gt_4)32970   TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD64, n_gt_4) {
32971     TEST_REQUIRES_X86_XOP;
32972     for (uint32_t n = 5; n < 8; n++) {
32973       for (size_t k = 1; k <= 40; k += 9) {
32974         GemmMicrokernelTester()
32975           .mr(1)
32976           .nr(4)
32977           .kr(8)
32978           .sr(1)
32979           .m(1)
32980           .n(n)
32981           .k(k)
32982           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c8__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
32983       }
32984     }
32985   }
32986 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD64,n_gt_4_strided_cn)32987   TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD64, n_gt_4_strided_cn) {
32988     TEST_REQUIRES_X86_XOP;
32989     for (uint32_t n = 5; n < 8; n++) {
32990       for (size_t k = 1; k <= 40; k += 9) {
32991         GemmMicrokernelTester()
32992           .mr(1)
32993           .nr(4)
32994           .kr(8)
32995           .sr(1)
32996           .m(1)
32997           .n(n)
32998           .k(k)
32999           .cn_stride(7)
33000           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c8__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
33001       }
33002     }
33003   }
33004 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD64,n_gt_4_subtile)33005   TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD64, n_gt_4_subtile) {
33006     TEST_REQUIRES_X86_XOP;
33007     for (uint32_t n = 5; n < 8; n++) {
33008       for (size_t k = 1; k <= 40; k += 9) {
33009         for (uint32_t m = 1; m <= 1; m++) {
33010           GemmMicrokernelTester()
33011             .mr(1)
33012             .nr(4)
33013             .kr(8)
33014             .sr(1)
33015             .m(m)
33016             .n(n)
33017             .k(k)
33018             .iterations(1)
33019             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c8__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
33020         }
33021       }
33022     }
33023   }
33024 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD64,n_div_4)33025   TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD64, n_div_4) {
33026     TEST_REQUIRES_X86_XOP;
33027     for (uint32_t n = 8; n <= 12; n += 4) {
33028       for (size_t k = 1; k <= 40; k += 9) {
33029         GemmMicrokernelTester()
33030           .mr(1)
33031           .nr(4)
33032           .kr(8)
33033           .sr(1)
33034           .m(1)
33035           .n(n)
33036           .k(k)
33037           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c8__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
33038       }
33039     }
33040   }
33041 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD64,n_div_4_strided_cn)33042   TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD64, n_div_4_strided_cn) {
33043     TEST_REQUIRES_X86_XOP;
33044     for (uint32_t n = 8; n <= 12; n += 4) {
33045       for (size_t k = 1; k <= 40; k += 9) {
33046         GemmMicrokernelTester()
33047           .mr(1)
33048           .nr(4)
33049           .kr(8)
33050           .sr(1)
33051           .m(1)
33052           .n(n)
33053           .k(k)
33054           .cn_stride(7)
33055           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c8__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
33056       }
33057     }
33058   }
33059 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD64,n_div_4_subtile)33060   TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD64, n_div_4_subtile) {
33061     TEST_REQUIRES_X86_XOP;
33062     for (uint32_t n = 8; n <= 12; n += 4) {
33063       for (size_t k = 1; k <= 40; k += 9) {
33064         for (uint32_t m = 1; m <= 1; m++) {
33065           GemmMicrokernelTester()
33066             .mr(1)
33067             .nr(4)
33068             .kr(8)
33069             .sr(1)
33070             .m(m)
33071             .n(n)
33072             .k(k)
33073             .iterations(1)
33074             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c8__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
33075         }
33076       }
33077     }
33078   }
33079 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD64,small_kernel)33080   TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD64, small_kernel) {
33081     TEST_REQUIRES_X86_XOP;
33082     for (size_t k = 1; k <= 40; k += 9) {
33083       GemmMicrokernelTester()
33084         .mr(1)
33085         .nr(4)
33086         .kr(8)
33087         .sr(1)
33088         .m(1)
33089         .n(4)
33090         .k(k)
33091         .ks(3)
33092         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c8__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
33093     }
33094   }
33095 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD64,small_kernel_subtile)33096   TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD64, small_kernel_subtile) {
33097     TEST_REQUIRES_X86_XOP;
33098     for (size_t k = 1; k <= 40; k += 9) {
33099       for (uint32_t n = 1; n <= 4; n++) {
33100         for (uint32_t m = 1; m <= 1; m++) {
33101           GemmMicrokernelTester()
33102             .mr(1)
33103             .nr(4)
33104             .kr(8)
33105             .sr(1)
33106             .m(m)
33107             .n(n)
33108             .k(k)
33109             .ks(3)
33110             .iterations(1)
33111             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c8__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
33112         }
33113       }
33114     }
33115   }
33116 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD64,n_gt_4_small_kernel)33117   TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD64, n_gt_4_small_kernel) {
33118     TEST_REQUIRES_X86_XOP;
33119     for (uint32_t n = 5; n < 8; n++) {
33120       for (size_t k = 1; k <= 40; k += 9) {
33121         GemmMicrokernelTester()
33122           .mr(1)
33123           .nr(4)
33124           .kr(8)
33125           .sr(1)
33126           .m(1)
33127           .n(n)
33128           .k(k)
33129           .ks(3)
33130           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c8__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
33131       }
33132     }
33133   }
33134 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD64,n_div_4_small_kernel)33135   TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD64, n_div_4_small_kernel) {
33136     TEST_REQUIRES_X86_XOP;
33137     for (uint32_t n = 8; n <= 12; n += 4) {
33138       for (size_t k = 1; k <= 40; k += 9) {
33139         GemmMicrokernelTester()
33140           .mr(1)
33141           .nr(4)
33142           .kr(8)
33143           .sr(1)
33144           .m(1)
33145           .n(n)
33146           .k(k)
33147           .ks(3)
33148           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c8__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
33149       }
33150     }
33151   }
33152 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD64,strided_cm_subtile)33153   TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD64, strided_cm_subtile) {
33154     TEST_REQUIRES_X86_XOP;
33155     for (size_t k = 1; k <= 40; k += 9) {
33156       for (uint32_t n = 1; n <= 4; n++) {
33157         for (uint32_t m = 1; m <= 1; m++) {
33158           GemmMicrokernelTester()
33159             .mr(1)
33160             .nr(4)
33161             .kr(8)
33162             .sr(1)
33163             .m(m)
33164             .n(n)
33165             .k(k)
33166             .cm_stride(7)
33167             .iterations(1)
33168             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c8__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
33169         }
33170       }
33171     }
33172   }
33173 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD64,a_offset)33174   TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD64, a_offset) {
33175     TEST_REQUIRES_X86_XOP;
33176     for (size_t k = 1; k <= 40; k += 9) {
33177       GemmMicrokernelTester()
33178         .mr(1)
33179         .nr(4)
33180         .kr(8)
33181         .sr(1)
33182         .m(1)
33183         .n(4)
33184         .k(k)
33185         .ks(3)
33186         .a_offset(43)
33187         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c8__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
33188     }
33189   }
33190 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD64,zero)33191   TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD64, zero) {
33192     TEST_REQUIRES_X86_XOP;
33193     for (size_t k = 1; k <= 40; k += 9) {
33194       for (uint32_t mz = 0; mz < 1; mz++) {
33195         GemmMicrokernelTester()
33196           .mr(1)
33197           .nr(4)
33198           .kr(8)
33199           .sr(1)
33200           .m(1)
33201           .n(4)
33202           .k(k)
33203           .ks(3)
33204           .a_offset(43)
33205           .zero_index(mz)
33206           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c8__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
33207       }
33208     }
33209   }
33210 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD64,qmin)33211   TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD64, qmin) {
33212     TEST_REQUIRES_X86_XOP;
33213     GemmMicrokernelTester()
33214       .mr(1)
33215       .nr(4)
33216       .kr(8)
33217       .sr(1)
33218       .m(1)
33219       .n(4)
33220       .k(8)
33221       .qmin(128)
33222       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c8__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
33223   }
33224 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD64,qmax)33225   TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD64, qmax) {
33226     TEST_REQUIRES_X86_XOP;
33227     GemmMicrokernelTester()
33228       .mr(1)
33229       .nr(4)
33230       .kr(8)
33231       .sr(1)
33232       .m(1)
33233       .n(4)
33234       .k(8)
33235       .qmax(128)
33236       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c8__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
33237   }
33238 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD64,strided_cm)33239   TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD64, strided_cm) {
33240     TEST_REQUIRES_X86_XOP;
33241     GemmMicrokernelTester()
33242       .mr(1)
33243       .nr(4)
33244       .kr(8)
33245       .sr(1)
33246       .m(1)
33247       .n(4)
33248       .k(8)
33249       .cm_stride(7)
33250       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c8__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
33251   }
33252 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
33253 
33254 
33255 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD64,k_eq_8)33256   TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD64, k_eq_8) {
33257     TEST_REQUIRES_X86_AVX;
33258     GemmMicrokernelTester()
33259       .mr(2)
33260       .nr(4)
33261       .kr(8)
33262       .sr(1)
33263       .m(2)
33264       .n(4)
33265       .k(8)
33266       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
33267   }
33268 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD64,strided_cn)33269   TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD64, strided_cn) {
33270     TEST_REQUIRES_X86_AVX;
33271     GemmMicrokernelTester()
33272       .mr(2)
33273       .nr(4)
33274       .kr(8)
33275       .sr(1)
33276       .m(2)
33277       .n(4)
33278       .k(8)
33279       .cn_stride(7)
33280       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
33281   }
33282 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD64,k_eq_8_subtile)33283   TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD64, k_eq_8_subtile) {
33284     TEST_REQUIRES_X86_AVX;
33285     for (uint32_t n = 1; n <= 4; n++) {
33286       for (uint32_t m = 1; m <= 2; m++) {
33287         GemmMicrokernelTester()
33288           .mr(2)
33289           .nr(4)
33290           .kr(8)
33291           .sr(1)
33292           .m(m)
33293           .n(n)
33294           .k(8)
33295           .iterations(1)
33296           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
33297       }
33298     }
33299   }
33300 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD64,k_eq_8_subtile_m)33301   TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD64, k_eq_8_subtile_m) {
33302     TEST_REQUIRES_X86_AVX;
33303     for (uint32_t m = 1; m <= 2; m++) {
33304       GemmMicrokernelTester()
33305         .mr(2)
33306         .nr(4)
33307         .kr(8)
33308         .sr(1)
33309         .m(m)
33310         .n(4)
33311         .k(8)
33312         .iterations(1)
33313         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
33314     }
33315   }
33316 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD64,k_eq_8_subtile_n)33317   TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD64, k_eq_8_subtile_n) {
33318     TEST_REQUIRES_X86_AVX;
33319     for (uint32_t n = 1; n <= 4; n++) {
33320       GemmMicrokernelTester()
33321         .mr(2)
33322         .nr(4)
33323         .kr(8)
33324         .sr(1)
33325         .m(2)
33326         .n(n)
33327         .k(8)
33328         .iterations(1)
33329         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
33330     }
33331   }
33332 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD64,k_lt_8)33333   TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD64, k_lt_8) {
33334     TEST_REQUIRES_X86_AVX;
33335     for (size_t k = 1; k < 8; k++) {
33336       GemmMicrokernelTester()
33337         .mr(2)
33338         .nr(4)
33339         .kr(8)
33340         .sr(1)
33341         .m(2)
33342         .n(4)
33343         .k(k)
33344         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
33345     }
33346   }
33347 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD64,k_lt_8_subtile)33348   TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD64, k_lt_8_subtile) {
33349     TEST_REQUIRES_X86_AVX;
33350     for (size_t k = 1; k < 8; k++) {
33351       for (uint32_t n = 1; n <= 4; n++) {
33352         for (uint32_t m = 1; m <= 2; m++) {
33353           GemmMicrokernelTester()
33354             .mr(2)
33355             .nr(4)
33356             .kr(8)
33357             .sr(1)
33358             .m(m)
33359             .n(n)
33360             .k(k)
33361             .iterations(1)
33362             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
33363         }
33364       }
33365     }
33366   }
33367 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD64,k_gt_8)33368   TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD64, k_gt_8) {
33369     TEST_REQUIRES_X86_AVX;
33370     for (size_t k = 9; k < 16; k++) {
33371       GemmMicrokernelTester()
33372         .mr(2)
33373         .nr(4)
33374         .kr(8)
33375         .sr(1)
33376         .m(2)
33377         .n(4)
33378         .k(k)
33379         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
33380     }
33381   }
33382 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD64,k_gt_8_subtile)33383   TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD64, k_gt_8_subtile) {
33384     TEST_REQUIRES_X86_AVX;
33385     for (size_t k = 9; k < 16; k++) {
33386       for (uint32_t n = 1; n <= 4; n++) {
33387         for (uint32_t m = 1; m <= 2; m++) {
33388           GemmMicrokernelTester()
33389             .mr(2)
33390             .nr(4)
33391             .kr(8)
33392             .sr(1)
33393             .m(m)
33394             .n(n)
33395             .k(k)
33396             .iterations(1)
33397             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
33398         }
33399       }
33400     }
33401   }
33402 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD64,k_div_8)33403   TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD64, k_div_8) {
33404     TEST_REQUIRES_X86_AVX;
33405     for (size_t k = 16; k <= 80; k += 8) {
33406       GemmMicrokernelTester()
33407         .mr(2)
33408         .nr(4)
33409         .kr(8)
33410         .sr(1)
33411         .m(2)
33412         .n(4)
33413         .k(k)
33414         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
33415     }
33416   }
33417 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD64,k_div_8_subtile)33418   TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD64, k_div_8_subtile) {
33419     TEST_REQUIRES_X86_AVX;
33420     for (size_t k = 16; k <= 80; k += 8) {
33421       for (uint32_t n = 1; n <= 4; n++) {
33422         for (uint32_t m = 1; m <= 2; m++) {
33423           GemmMicrokernelTester()
33424             .mr(2)
33425             .nr(4)
33426             .kr(8)
33427             .sr(1)
33428             .m(m)
33429             .n(n)
33430             .k(k)
33431             .iterations(1)
33432             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
33433         }
33434       }
33435     }
33436   }
33437 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD64,n_gt_4)33438   TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD64, n_gt_4) {
33439     TEST_REQUIRES_X86_AVX;
33440     for (uint32_t n = 5; n < 8; n++) {
33441       for (size_t k = 1; k <= 40; k += 9) {
33442         GemmMicrokernelTester()
33443           .mr(2)
33444           .nr(4)
33445           .kr(8)
33446           .sr(1)
33447           .m(2)
33448           .n(n)
33449           .k(k)
33450           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
33451       }
33452     }
33453   }
33454 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD64,n_gt_4_strided_cn)33455   TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD64, n_gt_4_strided_cn) {
33456     TEST_REQUIRES_X86_AVX;
33457     for (uint32_t n = 5; n < 8; n++) {
33458       for (size_t k = 1; k <= 40; k += 9) {
33459         GemmMicrokernelTester()
33460           .mr(2)
33461           .nr(4)
33462           .kr(8)
33463           .sr(1)
33464           .m(2)
33465           .n(n)
33466           .k(k)
33467           .cn_stride(7)
33468           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
33469       }
33470     }
33471   }
33472 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD64,n_gt_4_subtile)33473   TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD64, n_gt_4_subtile) {
33474     TEST_REQUIRES_X86_AVX;
33475     for (uint32_t n = 5; n < 8; n++) {
33476       for (size_t k = 1; k <= 40; k += 9) {
33477         for (uint32_t m = 1; m <= 2; m++) {
33478           GemmMicrokernelTester()
33479             .mr(2)
33480             .nr(4)
33481             .kr(8)
33482             .sr(1)
33483             .m(m)
33484             .n(n)
33485             .k(k)
33486             .iterations(1)
33487             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
33488         }
33489       }
33490     }
33491   }
33492 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD64,n_div_4)33493   TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD64, n_div_4) {
33494     TEST_REQUIRES_X86_AVX;
33495     for (uint32_t n = 8; n <= 12; n += 4) {
33496       for (size_t k = 1; k <= 40; k += 9) {
33497         GemmMicrokernelTester()
33498           .mr(2)
33499           .nr(4)
33500           .kr(8)
33501           .sr(1)
33502           .m(2)
33503           .n(n)
33504           .k(k)
33505           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
33506       }
33507     }
33508   }
33509 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD64,n_div_4_strided_cn)33510   TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD64, n_div_4_strided_cn) {
33511     TEST_REQUIRES_X86_AVX;
33512     for (uint32_t n = 8; n <= 12; n += 4) {
33513       for (size_t k = 1; k <= 40; k += 9) {
33514         GemmMicrokernelTester()
33515           .mr(2)
33516           .nr(4)
33517           .kr(8)
33518           .sr(1)
33519           .m(2)
33520           .n(n)
33521           .k(k)
33522           .cn_stride(7)
33523           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
33524       }
33525     }
33526   }
33527 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD64,n_div_4_subtile)33528   TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD64, n_div_4_subtile) {
33529     TEST_REQUIRES_X86_AVX;
33530     for (uint32_t n = 8; n <= 12; n += 4) {
33531       for (size_t k = 1; k <= 40; k += 9) {
33532         for (uint32_t m = 1; m <= 2; m++) {
33533           GemmMicrokernelTester()
33534             .mr(2)
33535             .nr(4)
33536             .kr(8)
33537             .sr(1)
33538             .m(m)
33539             .n(n)
33540             .k(k)
33541             .iterations(1)
33542             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
33543         }
33544       }
33545     }
33546   }
33547 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD64,small_kernel)33548   TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD64, small_kernel) {
33549     TEST_REQUIRES_X86_AVX;
33550     for (size_t k = 1; k <= 40; k += 9) {
33551       GemmMicrokernelTester()
33552         .mr(2)
33553         .nr(4)
33554         .kr(8)
33555         .sr(1)
33556         .m(2)
33557         .n(4)
33558         .k(k)
33559         .ks(3)
33560         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
33561     }
33562   }
33563 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD64,small_kernel_subtile)33564   TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD64, small_kernel_subtile) {
33565     TEST_REQUIRES_X86_AVX;
33566     for (size_t k = 1; k <= 40; k += 9) {
33567       for (uint32_t n = 1; n <= 4; n++) {
33568         for (uint32_t m = 1; m <= 2; m++) {
33569           GemmMicrokernelTester()
33570             .mr(2)
33571             .nr(4)
33572             .kr(8)
33573             .sr(1)
33574             .m(m)
33575             .n(n)
33576             .k(k)
33577             .ks(3)
33578             .iterations(1)
33579             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
33580         }
33581       }
33582     }
33583   }
33584 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD64,n_gt_4_small_kernel)33585   TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD64, n_gt_4_small_kernel) {
33586     TEST_REQUIRES_X86_AVX;
33587     for (uint32_t n = 5; n < 8; n++) {
33588       for (size_t k = 1; k <= 40; k += 9) {
33589         GemmMicrokernelTester()
33590           .mr(2)
33591           .nr(4)
33592           .kr(8)
33593           .sr(1)
33594           .m(2)
33595           .n(n)
33596           .k(k)
33597           .ks(3)
33598           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
33599       }
33600     }
33601   }
33602 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD64,n_div_4_small_kernel)33603   TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD64, n_div_4_small_kernel) {
33604     TEST_REQUIRES_X86_AVX;
33605     for (uint32_t n = 8; n <= 12; n += 4) {
33606       for (size_t k = 1; k <= 40; k += 9) {
33607         GemmMicrokernelTester()
33608           .mr(2)
33609           .nr(4)
33610           .kr(8)
33611           .sr(1)
33612           .m(2)
33613           .n(n)
33614           .k(k)
33615           .ks(3)
33616           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
33617       }
33618     }
33619   }
33620 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD64,strided_cm_subtile)33621   TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD64, strided_cm_subtile) {
33622     TEST_REQUIRES_X86_AVX;
33623     for (size_t k = 1; k <= 40; k += 9) {
33624       for (uint32_t n = 1; n <= 4; n++) {
33625         for (uint32_t m = 1; m <= 2; m++) {
33626           GemmMicrokernelTester()
33627             .mr(2)
33628             .nr(4)
33629             .kr(8)
33630             .sr(1)
33631             .m(m)
33632             .n(n)
33633             .k(k)
33634             .cm_stride(7)
33635             .iterations(1)
33636             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
33637         }
33638       }
33639     }
33640   }
33641 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD64,a_offset)33642   TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD64, a_offset) {
33643     TEST_REQUIRES_X86_AVX;
33644     for (size_t k = 1; k <= 40; k += 9) {
33645       GemmMicrokernelTester()
33646         .mr(2)
33647         .nr(4)
33648         .kr(8)
33649         .sr(1)
33650         .m(2)
33651         .n(4)
33652         .k(k)
33653         .ks(3)
33654         .a_offset(83)
33655         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
33656     }
33657   }
33658 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD64,zero)33659   TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD64, zero) {
33660     TEST_REQUIRES_X86_AVX;
33661     for (size_t k = 1; k <= 40; k += 9) {
33662       for (uint32_t mz = 0; mz < 2; mz++) {
33663         GemmMicrokernelTester()
33664           .mr(2)
33665           .nr(4)
33666           .kr(8)
33667           .sr(1)
33668           .m(2)
33669           .n(4)
33670           .k(k)
33671           .ks(3)
33672           .a_offset(83)
33673           .zero_index(mz)
33674           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
33675       }
33676     }
33677   }
33678 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD64,qmin)33679   TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD64, qmin) {
33680     TEST_REQUIRES_X86_AVX;
33681     GemmMicrokernelTester()
33682       .mr(2)
33683       .nr(4)
33684       .kr(8)
33685       .sr(1)
33686       .m(2)
33687       .n(4)
33688       .k(8)
33689       .qmin(128)
33690       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
33691   }
33692 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD64,qmax)33693   TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD64, qmax) {
33694     TEST_REQUIRES_X86_AVX;
33695     GemmMicrokernelTester()
33696       .mr(2)
33697       .nr(4)
33698       .kr(8)
33699       .sr(1)
33700       .m(2)
33701       .n(4)
33702       .k(8)
33703       .qmax(128)
33704       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
33705   }
33706 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD64,strided_cm)33707   TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD64, strided_cm) {
33708     TEST_REQUIRES_X86_AVX;
33709     GemmMicrokernelTester()
33710       .mr(2)
33711       .nr(4)
33712       .kr(8)
33713       .sr(1)
33714       .m(2)
33715       .n(4)
33716       .k(8)
33717       .cm_stride(7)
33718       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
33719   }
33720 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
33721 
33722 
33723 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD64,k_eq_8)33724   TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD64, k_eq_8) {
33725     TEST_REQUIRES_X86_AVX;
33726     GemmMicrokernelTester()
33727       .mr(3)
33728       .nr(4)
33729       .kr(8)
33730       .sr(1)
33731       .m(3)
33732       .n(4)
33733       .k(8)
33734       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c8__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
33735   }
33736 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD64,strided_cn)33737   TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD64, strided_cn) {
33738     TEST_REQUIRES_X86_AVX;
33739     GemmMicrokernelTester()
33740       .mr(3)
33741       .nr(4)
33742       .kr(8)
33743       .sr(1)
33744       .m(3)
33745       .n(4)
33746       .k(8)
33747       .cn_stride(7)
33748       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c8__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
33749   }
33750 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD64,k_eq_8_subtile)33751   TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD64, k_eq_8_subtile) {
33752     TEST_REQUIRES_X86_AVX;
33753     for (uint32_t n = 1; n <= 4; n++) {
33754       for (uint32_t m = 1; m <= 3; m++) {
33755         GemmMicrokernelTester()
33756           .mr(3)
33757           .nr(4)
33758           .kr(8)
33759           .sr(1)
33760           .m(m)
33761           .n(n)
33762           .k(8)
33763           .iterations(1)
33764           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c8__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
33765       }
33766     }
33767   }
33768 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD64,k_eq_8_subtile_m)33769   TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD64, k_eq_8_subtile_m) {
33770     TEST_REQUIRES_X86_AVX;
33771     for (uint32_t m = 1; m <= 3; m++) {
33772       GemmMicrokernelTester()
33773         .mr(3)
33774         .nr(4)
33775         .kr(8)
33776         .sr(1)
33777         .m(m)
33778         .n(4)
33779         .k(8)
33780         .iterations(1)
33781         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c8__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
33782     }
33783   }
33784 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD64,k_eq_8_subtile_n)33785   TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD64, k_eq_8_subtile_n) {
33786     TEST_REQUIRES_X86_AVX;
33787     for (uint32_t n = 1; n <= 4; n++) {
33788       GemmMicrokernelTester()
33789         .mr(3)
33790         .nr(4)
33791         .kr(8)
33792         .sr(1)
33793         .m(3)
33794         .n(n)
33795         .k(8)
33796         .iterations(1)
33797         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c8__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
33798     }
33799   }
33800 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD64,k_lt_8)33801   TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD64, k_lt_8) {
33802     TEST_REQUIRES_X86_AVX;
33803     for (size_t k = 1; k < 8; k++) {
33804       GemmMicrokernelTester()
33805         .mr(3)
33806         .nr(4)
33807         .kr(8)
33808         .sr(1)
33809         .m(3)
33810         .n(4)
33811         .k(k)
33812         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c8__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
33813     }
33814   }
33815 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD64,k_lt_8_subtile)33816   TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD64, k_lt_8_subtile) {
33817     TEST_REQUIRES_X86_AVX;
33818     for (size_t k = 1; k < 8; k++) {
33819       for (uint32_t n = 1; n <= 4; n++) {
33820         for (uint32_t m = 1; m <= 3; m++) {
33821           GemmMicrokernelTester()
33822             .mr(3)
33823             .nr(4)
33824             .kr(8)
33825             .sr(1)
33826             .m(m)
33827             .n(n)
33828             .k(k)
33829             .iterations(1)
33830             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c8__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
33831         }
33832       }
33833     }
33834   }
33835 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD64,k_gt_8)33836   TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD64, k_gt_8) {
33837     TEST_REQUIRES_X86_AVX;
33838     for (size_t k = 9; k < 16; k++) {
33839       GemmMicrokernelTester()
33840         .mr(3)
33841         .nr(4)
33842         .kr(8)
33843         .sr(1)
33844         .m(3)
33845         .n(4)
33846         .k(k)
33847         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c8__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
33848     }
33849   }
33850 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD64,k_gt_8_subtile)33851   TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD64, k_gt_8_subtile) {
33852     TEST_REQUIRES_X86_AVX;
33853     for (size_t k = 9; k < 16; k++) {
33854       for (uint32_t n = 1; n <= 4; n++) {
33855         for (uint32_t m = 1; m <= 3; m++) {
33856           GemmMicrokernelTester()
33857             .mr(3)
33858             .nr(4)
33859             .kr(8)
33860             .sr(1)
33861             .m(m)
33862             .n(n)
33863             .k(k)
33864             .iterations(1)
33865             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c8__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
33866         }
33867       }
33868     }
33869   }
33870 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD64,k_div_8)33871   TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD64, k_div_8) {
33872     TEST_REQUIRES_X86_AVX;
33873     for (size_t k = 16; k <= 80; k += 8) {
33874       GemmMicrokernelTester()
33875         .mr(3)
33876         .nr(4)
33877         .kr(8)
33878         .sr(1)
33879         .m(3)
33880         .n(4)
33881         .k(k)
33882         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c8__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
33883     }
33884   }
33885 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD64,k_div_8_subtile)33886   TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD64, k_div_8_subtile) {
33887     TEST_REQUIRES_X86_AVX;
33888     for (size_t k = 16; k <= 80; k += 8) {
33889       for (uint32_t n = 1; n <= 4; n++) {
33890         for (uint32_t m = 1; m <= 3; m++) {
33891           GemmMicrokernelTester()
33892             .mr(3)
33893             .nr(4)
33894             .kr(8)
33895             .sr(1)
33896             .m(m)
33897             .n(n)
33898             .k(k)
33899             .iterations(1)
33900             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c8__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
33901         }
33902       }
33903     }
33904   }
33905 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD64,n_gt_4)33906   TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD64, n_gt_4) {
33907     TEST_REQUIRES_X86_AVX;
33908     for (uint32_t n = 5; n < 8; n++) {
33909       for (size_t k = 1; k <= 40; k += 9) {
33910         GemmMicrokernelTester()
33911           .mr(3)
33912           .nr(4)
33913           .kr(8)
33914           .sr(1)
33915           .m(3)
33916           .n(n)
33917           .k(k)
33918           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c8__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
33919       }
33920     }
33921   }
33922 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD64,n_gt_4_strided_cn)33923   TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD64, n_gt_4_strided_cn) {
33924     TEST_REQUIRES_X86_AVX;
33925     for (uint32_t n = 5; n < 8; n++) {
33926       for (size_t k = 1; k <= 40; k += 9) {
33927         GemmMicrokernelTester()
33928           .mr(3)
33929           .nr(4)
33930           .kr(8)
33931           .sr(1)
33932           .m(3)
33933           .n(n)
33934           .k(k)
33935           .cn_stride(7)
33936           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c8__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
33937       }
33938     }
33939   }
33940 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD64,n_gt_4_subtile)33941   TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD64, n_gt_4_subtile) {
33942     TEST_REQUIRES_X86_AVX;
33943     for (uint32_t n = 5; n < 8; n++) {
33944       for (size_t k = 1; k <= 40; k += 9) {
33945         for (uint32_t m = 1; m <= 3; m++) {
33946           GemmMicrokernelTester()
33947             .mr(3)
33948             .nr(4)
33949             .kr(8)
33950             .sr(1)
33951             .m(m)
33952             .n(n)
33953             .k(k)
33954             .iterations(1)
33955             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c8__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
33956         }
33957       }
33958     }
33959   }
33960 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD64,n_div_4)33961   TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD64, n_div_4) {
33962     TEST_REQUIRES_X86_AVX;
33963     for (uint32_t n = 8; n <= 12; n += 4) {
33964       for (size_t k = 1; k <= 40; k += 9) {
33965         GemmMicrokernelTester()
33966           .mr(3)
33967           .nr(4)
33968           .kr(8)
33969           .sr(1)
33970           .m(3)
33971           .n(n)
33972           .k(k)
33973           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c8__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
33974       }
33975     }
33976   }
33977 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD64,n_div_4_strided_cn)33978   TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD64, n_div_4_strided_cn) {
33979     TEST_REQUIRES_X86_AVX;
33980     for (uint32_t n = 8; n <= 12; n += 4) {
33981       for (size_t k = 1; k <= 40; k += 9) {
33982         GemmMicrokernelTester()
33983           .mr(3)
33984           .nr(4)
33985           .kr(8)
33986           .sr(1)
33987           .m(3)
33988           .n(n)
33989           .k(k)
33990           .cn_stride(7)
33991           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c8__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
33992       }
33993     }
33994   }
33995 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD64,n_div_4_subtile)33996   TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD64, n_div_4_subtile) {
33997     TEST_REQUIRES_X86_AVX;
33998     for (uint32_t n = 8; n <= 12; n += 4) {
33999       for (size_t k = 1; k <= 40; k += 9) {
34000         for (uint32_t m = 1; m <= 3; m++) {
34001           GemmMicrokernelTester()
34002             .mr(3)
34003             .nr(4)
34004             .kr(8)
34005             .sr(1)
34006             .m(m)
34007             .n(n)
34008             .k(k)
34009             .iterations(1)
34010             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c8__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
34011         }
34012       }
34013     }
34014   }
34015 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD64,small_kernel)34016   TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD64, small_kernel) {
34017     TEST_REQUIRES_X86_AVX;
34018     for (size_t k = 1; k <= 40; k += 9) {
34019       GemmMicrokernelTester()
34020         .mr(3)
34021         .nr(4)
34022         .kr(8)
34023         .sr(1)
34024         .m(3)
34025         .n(4)
34026         .k(k)
34027         .ks(3)
34028         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c8__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
34029     }
34030   }
34031 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD64,small_kernel_subtile)34032   TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD64, small_kernel_subtile) {
34033     TEST_REQUIRES_X86_AVX;
34034     for (size_t k = 1; k <= 40; k += 9) {
34035       for (uint32_t n = 1; n <= 4; n++) {
34036         for (uint32_t m = 1; m <= 3; m++) {
34037           GemmMicrokernelTester()
34038             .mr(3)
34039             .nr(4)
34040             .kr(8)
34041             .sr(1)
34042             .m(m)
34043             .n(n)
34044             .k(k)
34045             .ks(3)
34046             .iterations(1)
34047             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c8__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
34048         }
34049       }
34050     }
34051   }
34052 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD64,n_gt_4_small_kernel)34053   TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD64, n_gt_4_small_kernel) {
34054     TEST_REQUIRES_X86_AVX;
34055     for (uint32_t n = 5; n < 8; n++) {
34056       for (size_t k = 1; k <= 40; k += 9) {
34057         GemmMicrokernelTester()
34058           .mr(3)
34059           .nr(4)
34060           .kr(8)
34061           .sr(1)
34062           .m(3)
34063           .n(n)
34064           .k(k)
34065           .ks(3)
34066           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c8__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
34067       }
34068     }
34069   }
34070 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD64,n_div_4_small_kernel)34071   TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD64, n_div_4_small_kernel) {
34072     TEST_REQUIRES_X86_AVX;
34073     for (uint32_t n = 8; n <= 12; n += 4) {
34074       for (size_t k = 1; k <= 40; k += 9) {
34075         GemmMicrokernelTester()
34076           .mr(3)
34077           .nr(4)
34078           .kr(8)
34079           .sr(1)
34080           .m(3)
34081           .n(n)
34082           .k(k)
34083           .ks(3)
34084           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c8__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
34085       }
34086     }
34087   }
34088 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD64,strided_cm_subtile)34089   TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD64, strided_cm_subtile) {
34090     TEST_REQUIRES_X86_AVX;
34091     for (size_t k = 1; k <= 40; k += 9) {
34092       for (uint32_t n = 1; n <= 4; n++) {
34093         for (uint32_t m = 1; m <= 3; m++) {
34094           GemmMicrokernelTester()
34095             .mr(3)
34096             .nr(4)
34097             .kr(8)
34098             .sr(1)
34099             .m(m)
34100             .n(n)
34101             .k(k)
34102             .cm_stride(7)
34103             .iterations(1)
34104             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c8__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
34105         }
34106       }
34107     }
34108   }
34109 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD64,a_offset)34110   TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD64, a_offset) {
34111     TEST_REQUIRES_X86_AVX;
34112     for (size_t k = 1; k <= 40; k += 9) {
34113       GemmMicrokernelTester()
34114         .mr(3)
34115         .nr(4)
34116         .kr(8)
34117         .sr(1)
34118         .m(3)
34119         .n(4)
34120         .k(k)
34121         .ks(3)
34122         .a_offset(127)
34123         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c8__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
34124     }
34125   }
34126 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD64,zero)34127   TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD64, zero) {
34128     TEST_REQUIRES_X86_AVX;
34129     for (size_t k = 1; k <= 40; k += 9) {
34130       for (uint32_t mz = 0; mz < 3; mz++) {
34131         GemmMicrokernelTester()
34132           .mr(3)
34133           .nr(4)
34134           .kr(8)
34135           .sr(1)
34136           .m(3)
34137           .n(4)
34138           .k(k)
34139           .ks(3)
34140           .a_offset(127)
34141           .zero_index(mz)
34142           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c8__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
34143       }
34144     }
34145   }
34146 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD64,qmin)34147   TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD64, qmin) {
34148     TEST_REQUIRES_X86_AVX;
34149     GemmMicrokernelTester()
34150       .mr(3)
34151       .nr(4)
34152       .kr(8)
34153       .sr(1)
34154       .m(3)
34155       .n(4)
34156       .k(8)
34157       .qmin(128)
34158       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c8__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
34159   }
34160 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD64,qmax)34161   TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD64, qmax) {
34162     TEST_REQUIRES_X86_AVX;
34163     GemmMicrokernelTester()
34164       .mr(3)
34165       .nr(4)
34166       .kr(8)
34167       .sr(1)
34168       .m(3)
34169       .n(4)
34170       .k(8)
34171       .qmax(128)
34172       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c8__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
34173   }
34174 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD64,strided_cm)34175   TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD64, strided_cm) {
34176     TEST_REQUIRES_X86_AVX;
34177     GemmMicrokernelTester()
34178       .mr(3)
34179       .nr(4)
34180       .kr(8)
34181       .sr(1)
34182       .m(3)
34183       .n(4)
34184       .k(8)
34185       .cm_stride(7)
34186       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c8__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
34187   }
34188 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
34189 
34190 
34191 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD128,k_eq_8)34192   TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD128, k_eq_8) {
34193     TEST_REQUIRES_X86_SSE2;
34194     GemmMicrokernelTester()
34195       .mr(1)
34196       .nr(4)
34197       .kr(8)
34198       .sr(1)
34199       .m(1)
34200       .n(4)
34201       .k(8)
34202       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c8__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
34203   }
34204 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD128,strided_cn)34205   TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD128, strided_cn) {
34206     TEST_REQUIRES_X86_SSE2;
34207     GemmMicrokernelTester()
34208       .mr(1)
34209       .nr(4)
34210       .kr(8)
34211       .sr(1)
34212       .m(1)
34213       .n(4)
34214       .k(8)
34215       .cn_stride(7)
34216       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c8__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
34217   }
34218 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD128,k_eq_8_subtile)34219   TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD128, k_eq_8_subtile) {
34220     TEST_REQUIRES_X86_SSE2;
34221     for (uint32_t n = 1; n <= 4; n++) {
34222       for (uint32_t m = 1; m <= 1; m++) {
34223         GemmMicrokernelTester()
34224           .mr(1)
34225           .nr(4)
34226           .kr(8)
34227           .sr(1)
34228           .m(m)
34229           .n(n)
34230           .k(8)
34231           .iterations(1)
34232           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c8__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
34233       }
34234     }
34235   }
34236 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD128,k_eq_8_subtile_m)34237   TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD128, k_eq_8_subtile_m) {
34238     TEST_REQUIRES_X86_SSE2;
34239     for (uint32_t m = 1; m <= 1; m++) {
34240       GemmMicrokernelTester()
34241         .mr(1)
34242         .nr(4)
34243         .kr(8)
34244         .sr(1)
34245         .m(m)
34246         .n(4)
34247         .k(8)
34248         .iterations(1)
34249         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c8__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
34250     }
34251   }
34252 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD128,k_eq_8_subtile_n)34253   TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD128, k_eq_8_subtile_n) {
34254     TEST_REQUIRES_X86_SSE2;
34255     for (uint32_t n = 1; n <= 4; n++) {
34256       GemmMicrokernelTester()
34257         .mr(1)
34258         .nr(4)
34259         .kr(8)
34260         .sr(1)
34261         .m(1)
34262         .n(n)
34263         .k(8)
34264         .iterations(1)
34265         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c8__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
34266     }
34267   }
34268 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD128,k_lt_8)34269   TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD128, k_lt_8) {
34270     TEST_REQUIRES_X86_SSE2;
34271     for (size_t k = 1; k < 8; k++) {
34272       GemmMicrokernelTester()
34273         .mr(1)
34274         .nr(4)
34275         .kr(8)
34276         .sr(1)
34277         .m(1)
34278         .n(4)
34279         .k(k)
34280         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c8__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
34281     }
34282   }
34283 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD128,k_lt_8_subtile)34284   TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD128, k_lt_8_subtile) {
34285     TEST_REQUIRES_X86_SSE2;
34286     for (size_t k = 1; k < 8; k++) {
34287       for (uint32_t n = 1; n <= 4; n++) {
34288         for (uint32_t m = 1; m <= 1; m++) {
34289           GemmMicrokernelTester()
34290             .mr(1)
34291             .nr(4)
34292             .kr(8)
34293             .sr(1)
34294             .m(m)
34295             .n(n)
34296             .k(k)
34297             .iterations(1)
34298             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c8__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
34299         }
34300       }
34301     }
34302   }
34303 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD128,k_gt_8)34304   TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD128, k_gt_8) {
34305     TEST_REQUIRES_X86_SSE2;
34306     for (size_t k = 9; k < 16; k++) {
34307       GemmMicrokernelTester()
34308         .mr(1)
34309         .nr(4)
34310         .kr(8)
34311         .sr(1)
34312         .m(1)
34313         .n(4)
34314         .k(k)
34315         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c8__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
34316     }
34317   }
34318 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD128,k_gt_8_subtile)34319   TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD128, k_gt_8_subtile) {
34320     TEST_REQUIRES_X86_SSE2;
34321     for (size_t k = 9; k < 16; k++) {
34322       for (uint32_t n = 1; n <= 4; n++) {
34323         for (uint32_t m = 1; m <= 1; m++) {
34324           GemmMicrokernelTester()
34325             .mr(1)
34326             .nr(4)
34327             .kr(8)
34328             .sr(1)
34329             .m(m)
34330             .n(n)
34331             .k(k)
34332             .iterations(1)
34333             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c8__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
34334         }
34335       }
34336     }
34337   }
34338 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD128,k_div_8)34339   TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD128, k_div_8) {
34340     TEST_REQUIRES_X86_SSE2;
34341     for (size_t k = 16; k <= 80; k += 8) {
34342       GemmMicrokernelTester()
34343         .mr(1)
34344         .nr(4)
34345         .kr(8)
34346         .sr(1)
34347         .m(1)
34348         .n(4)
34349         .k(k)
34350         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c8__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
34351     }
34352   }
34353 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD128,k_div_8_subtile)34354   TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD128, k_div_8_subtile) {
34355     TEST_REQUIRES_X86_SSE2;
34356     for (size_t k = 16; k <= 80; k += 8) {
34357       for (uint32_t n = 1; n <= 4; n++) {
34358         for (uint32_t m = 1; m <= 1; m++) {
34359           GemmMicrokernelTester()
34360             .mr(1)
34361             .nr(4)
34362             .kr(8)
34363             .sr(1)
34364             .m(m)
34365             .n(n)
34366             .k(k)
34367             .iterations(1)
34368             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c8__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
34369         }
34370       }
34371     }
34372   }
34373 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD128,n_gt_4)34374   TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD128, n_gt_4) {
34375     TEST_REQUIRES_X86_SSE2;
34376     for (uint32_t n = 5; n < 8; n++) {
34377       for (size_t k = 1; k <= 40; k += 9) {
34378         GemmMicrokernelTester()
34379           .mr(1)
34380           .nr(4)
34381           .kr(8)
34382           .sr(1)
34383           .m(1)
34384           .n(n)
34385           .k(k)
34386           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c8__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
34387       }
34388     }
34389   }
34390 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD128,n_gt_4_strided_cn)34391   TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD128, n_gt_4_strided_cn) {
34392     TEST_REQUIRES_X86_SSE2;
34393     for (uint32_t n = 5; n < 8; n++) {
34394       for (size_t k = 1; k <= 40; k += 9) {
34395         GemmMicrokernelTester()
34396           .mr(1)
34397           .nr(4)
34398           .kr(8)
34399           .sr(1)
34400           .m(1)
34401           .n(n)
34402           .k(k)
34403           .cn_stride(7)
34404           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c8__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
34405       }
34406     }
34407   }
34408 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD128,n_gt_4_subtile)34409   TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD128, n_gt_4_subtile) {
34410     TEST_REQUIRES_X86_SSE2;
34411     for (uint32_t n = 5; n < 8; n++) {
34412       for (size_t k = 1; k <= 40; k += 9) {
34413         for (uint32_t m = 1; m <= 1; m++) {
34414           GemmMicrokernelTester()
34415             .mr(1)
34416             .nr(4)
34417             .kr(8)
34418             .sr(1)
34419             .m(m)
34420             .n(n)
34421             .k(k)
34422             .iterations(1)
34423             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c8__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
34424         }
34425       }
34426     }
34427   }
34428 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD128,n_div_4)34429   TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD128, n_div_4) {
34430     TEST_REQUIRES_X86_SSE2;
34431     for (uint32_t n = 8; n <= 12; n += 4) {
34432       for (size_t k = 1; k <= 40; k += 9) {
34433         GemmMicrokernelTester()
34434           .mr(1)
34435           .nr(4)
34436           .kr(8)
34437           .sr(1)
34438           .m(1)
34439           .n(n)
34440           .k(k)
34441           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c8__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
34442       }
34443     }
34444   }
34445 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD128,n_div_4_strided_cn)34446   TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD128, n_div_4_strided_cn) {
34447     TEST_REQUIRES_X86_SSE2;
34448     for (uint32_t n = 8; n <= 12; n += 4) {
34449       for (size_t k = 1; k <= 40; k += 9) {
34450         GemmMicrokernelTester()
34451           .mr(1)
34452           .nr(4)
34453           .kr(8)
34454           .sr(1)
34455           .m(1)
34456           .n(n)
34457           .k(k)
34458           .cn_stride(7)
34459           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c8__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
34460       }
34461     }
34462   }
34463 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD128,n_div_4_subtile)34464   TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD128, n_div_4_subtile) {
34465     TEST_REQUIRES_X86_SSE2;
34466     for (uint32_t n = 8; n <= 12; n += 4) {
34467       for (size_t k = 1; k <= 40; k += 9) {
34468         for (uint32_t m = 1; m <= 1; m++) {
34469           GemmMicrokernelTester()
34470             .mr(1)
34471             .nr(4)
34472             .kr(8)
34473             .sr(1)
34474             .m(m)
34475             .n(n)
34476             .k(k)
34477             .iterations(1)
34478             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c8__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
34479         }
34480       }
34481     }
34482   }
34483 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD128,small_kernel)34484   TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD128, small_kernel) {
34485     TEST_REQUIRES_X86_SSE2;
34486     for (size_t k = 1; k <= 40; k += 9) {
34487       GemmMicrokernelTester()
34488         .mr(1)
34489         .nr(4)
34490         .kr(8)
34491         .sr(1)
34492         .m(1)
34493         .n(4)
34494         .k(k)
34495         .ks(3)
34496         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c8__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
34497     }
34498   }
34499 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD128,small_kernel_subtile)34500   TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD128, small_kernel_subtile) {
34501     TEST_REQUIRES_X86_SSE2;
34502     for (size_t k = 1; k <= 40; k += 9) {
34503       for (uint32_t n = 1; n <= 4; n++) {
34504         for (uint32_t m = 1; m <= 1; m++) {
34505           GemmMicrokernelTester()
34506             .mr(1)
34507             .nr(4)
34508             .kr(8)
34509             .sr(1)
34510             .m(m)
34511             .n(n)
34512             .k(k)
34513             .ks(3)
34514             .iterations(1)
34515             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c8__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
34516         }
34517       }
34518     }
34519   }
34520 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD128,n_gt_4_small_kernel)34521   TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD128, n_gt_4_small_kernel) {
34522     TEST_REQUIRES_X86_SSE2;
34523     for (uint32_t n = 5; n < 8; n++) {
34524       for (size_t k = 1; k <= 40; k += 9) {
34525         GemmMicrokernelTester()
34526           .mr(1)
34527           .nr(4)
34528           .kr(8)
34529           .sr(1)
34530           .m(1)
34531           .n(n)
34532           .k(k)
34533           .ks(3)
34534           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c8__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
34535       }
34536     }
34537   }
34538 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD128,n_div_4_small_kernel)34539   TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD128, n_div_4_small_kernel) {
34540     TEST_REQUIRES_X86_SSE2;
34541     for (uint32_t n = 8; n <= 12; n += 4) {
34542       for (size_t k = 1; k <= 40; k += 9) {
34543         GemmMicrokernelTester()
34544           .mr(1)
34545           .nr(4)
34546           .kr(8)
34547           .sr(1)
34548           .m(1)
34549           .n(n)
34550           .k(k)
34551           .ks(3)
34552           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c8__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
34553       }
34554     }
34555   }
34556 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD128,strided_cm_subtile)34557   TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD128, strided_cm_subtile) {
34558     TEST_REQUIRES_X86_SSE2;
34559     for (size_t k = 1; k <= 40; k += 9) {
34560       for (uint32_t n = 1; n <= 4; n++) {
34561         for (uint32_t m = 1; m <= 1; m++) {
34562           GemmMicrokernelTester()
34563             .mr(1)
34564             .nr(4)
34565             .kr(8)
34566             .sr(1)
34567             .m(m)
34568             .n(n)
34569             .k(k)
34570             .cm_stride(7)
34571             .iterations(1)
34572             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c8__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
34573         }
34574       }
34575     }
34576   }
34577 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD128,a_offset)34578   TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD128, a_offset) {
34579     TEST_REQUIRES_X86_SSE2;
34580     for (size_t k = 1; k <= 40; k += 9) {
34581       GemmMicrokernelTester()
34582         .mr(1)
34583         .nr(4)
34584         .kr(8)
34585         .sr(1)
34586         .m(1)
34587         .n(4)
34588         .k(k)
34589         .ks(3)
34590         .a_offset(43)
34591         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c8__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
34592     }
34593   }
34594 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD128,zero)34595   TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD128, zero) {
34596     TEST_REQUIRES_X86_SSE2;
34597     for (size_t k = 1; k <= 40; k += 9) {
34598       for (uint32_t mz = 0; mz < 1; mz++) {
34599         GemmMicrokernelTester()
34600           .mr(1)
34601           .nr(4)
34602           .kr(8)
34603           .sr(1)
34604           .m(1)
34605           .n(4)
34606           .k(k)
34607           .ks(3)
34608           .a_offset(43)
34609           .zero_index(mz)
34610           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c8__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
34611       }
34612     }
34613   }
34614 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD128,qmin)34615   TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD128, qmin) {
34616     TEST_REQUIRES_X86_SSE2;
34617     GemmMicrokernelTester()
34618       .mr(1)
34619       .nr(4)
34620       .kr(8)
34621       .sr(1)
34622       .m(1)
34623       .n(4)
34624       .k(8)
34625       .qmin(128)
34626       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c8__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
34627   }
34628 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD128,qmax)34629   TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD128, qmax) {
34630     TEST_REQUIRES_X86_SSE2;
34631     GemmMicrokernelTester()
34632       .mr(1)
34633       .nr(4)
34634       .kr(8)
34635       .sr(1)
34636       .m(1)
34637       .n(4)
34638       .k(8)
34639       .qmax(128)
34640       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c8__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
34641   }
34642 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD128,strided_cm)34643   TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD128, strided_cm) {
34644     TEST_REQUIRES_X86_SSE2;
34645     GemmMicrokernelTester()
34646       .mr(1)
34647       .nr(4)
34648       .kr(8)
34649       .sr(1)
34650       .m(1)
34651       .n(4)
34652       .k(8)
34653       .cm_stride(7)
34654       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c8__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
34655   }
34656 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
34657 
34658 
34659 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD128,k_eq_8)34660   TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD128, k_eq_8) {
34661     TEST_REQUIRES_X86_SSE41;
34662     GemmMicrokernelTester()
34663       .mr(2)
34664       .nr(4)
34665       .kr(8)
34666       .sr(1)
34667       .m(2)
34668       .n(4)
34669       .k(8)
34670       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c8__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
34671   }
34672 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD128,strided_cn)34673   TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD128, strided_cn) {
34674     TEST_REQUIRES_X86_SSE41;
34675     GemmMicrokernelTester()
34676       .mr(2)
34677       .nr(4)
34678       .kr(8)
34679       .sr(1)
34680       .m(2)
34681       .n(4)
34682       .k(8)
34683       .cn_stride(7)
34684       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c8__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
34685   }
34686 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD128,k_eq_8_subtile)34687   TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD128, k_eq_8_subtile) {
34688     TEST_REQUIRES_X86_SSE41;
34689     for (uint32_t n = 1; n <= 4; n++) {
34690       for (uint32_t m = 1; m <= 2; m++) {
34691         GemmMicrokernelTester()
34692           .mr(2)
34693           .nr(4)
34694           .kr(8)
34695           .sr(1)
34696           .m(m)
34697           .n(n)
34698           .k(8)
34699           .iterations(1)
34700           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c8__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
34701       }
34702     }
34703   }
34704 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD128,k_eq_8_subtile_m)34705   TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD128, k_eq_8_subtile_m) {
34706     TEST_REQUIRES_X86_SSE41;
34707     for (uint32_t m = 1; m <= 2; m++) {
34708       GemmMicrokernelTester()
34709         .mr(2)
34710         .nr(4)
34711         .kr(8)
34712         .sr(1)
34713         .m(m)
34714         .n(4)
34715         .k(8)
34716         .iterations(1)
34717         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c8__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
34718     }
34719   }
34720 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD128,k_eq_8_subtile_n)34721   TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD128, k_eq_8_subtile_n) {
34722     TEST_REQUIRES_X86_SSE41;
34723     for (uint32_t n = 1; n <= 4; n++) {
34724       GemmMicrokernelTester()
34725         .mr(2)
34726         .nr(4)
34727         .kr(8)
34728         .sr(1)
34729         .m(2)
34730         .n(n)
34731         .k(8)
34732         .iterations(1)
34733         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c8__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
34734     }
34735   }
34736 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD128,k_lt_8)34737   TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD128, k_lt_8) {
34738     TEST_REQUIRES_X86_SSE41;
34739     for (size_t k = 1; k < 8; k++) {
34740       GemmMicrokernelTester()
34741         .mr(2)
34742         .nr(4)
34743         .kr(8)
34744         .sr(1)
34745         .m(2)
34746         .n(4)
34747         .k(k)
34748         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c8__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
34749     }
34750   }
34751 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD128,k_lt_8_subtile)34752   TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD128, k_lt_8_subtile) {
34753     TEST_REQUIRES_X86_SSE41;
34754     for (size_t k = 1; k < 8; k++) {
34755       for (uint32_t n = 1; n <= 4; n++) {
34756         for (uint32_t m = 1; m <= 2; m++) {
34757           GemmMicrokernelTester()
34758             .mr(2)
34759             .nr(4)
34760             .kr(8)
34761             .sr(1)
34762             .m(m)
34763             .n(n)
34764             .k(k)
34765             .iterations(1)
34766             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c8__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
34767         }
34768       }
34769     }
34770   }
34771 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD128,k_gt_8)34772   TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD128, k_gt_8) {
34773     TEST_REQUIRES_X86_SSE41;
34774     for (size_t k = 9; k < 16; k++) {
34775       GemmMicrokernelTester()
34776         .mr(2)
34777         .nr(4)
34778         .kr(8)
34779         .sr(1)
34780         .m(2)
34781         .n(4)
34782         .k(k)
34783         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c8__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
34784     }
34785   }
34786 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD128,k_gt_8_subtile)34787   TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD128, k_gt_8_subtile) {
34788     TEST_REQUIRES_X86_SSE41;
34789     for (size_t k = 9; k < 16; k++) {
34790       for (uint32_t n = 1; n <= 4; n++) {
34791         for (uint32_t m = 1; m <= 2; m++) {
34792           GemmMicrokernelTester()
34793             .mr(2)
34794             .nr(4)
34795             .kr(8)
34796             .sr(1)
34797             .m(m)
34798             .n(n)
34799             .k(k)
34800             .iterations(1)
34801             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c8__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
34802         }
34803       }
34804     }
34805   }
34806 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD128,k_div_8)34807   TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD128, k_div_8) {
34808     TEST_REQUIRES_X86_SSE41;
34809     for (size_t k = 16; k <= 80; k += 8) {
34810       GemmMicrokernelTester()
34811         .mr(2)
34812         .nr(4)
34813         .kr(8)
34814         .sr(1)
34815         .m(2)
34816         .n(4)
34817         .k(k)
34818         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c8__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
34819     }
34820   }
34821 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD128,k_div_8_subtile)34822   TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD128, k_div_8_subtile) {
34823     TEST_REQUIRES_X86_SSE41;
34824     for (size_t k = 16; k <= 80; k += 8) {
34825       for (uint32_t n = 1; n <= 4; n++) {
34826         for (uint32_t m = 1; m <= 2; m++) {
34827           GemmMicrokernelTester()
34828             .mr(2)
34829             .nr(4)
34830             .kr(8)
34831             .sr(1)
34832             .m(m)
34833             .n(n)
34834             .k(k)
34835             .iterations(1)
34836             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c8__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
34837         }
34838       }
34839     }
34840   }
34841 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD128,n_gt_4)34842   TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD128, n_gt_4) {
34843     TEST_REQUIRES_X86_SSE41;
34844     for (uint32_t n = 5; n < 8; n++) {
34845       for (size_t k = 1; k <= 40; k += 9) {
34846         GemmMicrokernelTester()
34847           .mr(2)
34848           .nr(4)
34849           .kr(8)
34850           .sr(1)
34851           .m(2)
34852           .n(n)
34853           .k(k)
34854           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c8__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
34855       }
34856     }
34857   }
34858 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD128,n_gt_4_strided_cn)34859   TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD128, n_gt_4_strided_cn) {
34860     TEST_REQUIRES_X86_SSE41;
34861     for (uint32_t n = 5; n < 8; n++) {
34862       for (size_t k = 1; k <= 40; k += 9) {
34863         GemmMicrokernelTester()
34864           .mr(2)
34865           .nr(4)
34866           .kr(8)
34867           .sr(1)
34868           .m(2)
34869           .n(n)
34870           .k(k)
34871           .cn_stride(7)
34872           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c8__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
34873       }
34874     }
34875   }
34876 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD128,n_gt_4_subtile)34877   TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD128, n_gt_4_subtile) {
34878     TEST_REQUIRES_X86_SSE41;
34879     for (uint32_t n = 5; n < 8; n++) {
34880       for (size_t k = 1; k <= 40; k += 9) {
34881         for (uint32_t m = 1; m <= 2; m++) {
34882           GemmMicrokernelTester()
34883             .mr(2)
34884             .nr(4)
34885             .kr(8)
34886             .sr(1)
34887             .m(m)
34888             .n(n)
34889             .k(k)
34890             .iterations(1)
34891             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c8__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
34892         }
34893       }
34894     }
34895   }
34896 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD128,n_div_4)34897   TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD128, n_div_4) {
34898     TEST_REQUIRES_X86_SSE41;
34899     for (uint32_t n = 8; n <= 12; n += 4) {
34900       for (size_t k = 1; k <= 40; k += 9) {
34901         GemmMicrokernelTester()
34902           .mr(2)
34903           .nr(4)
34904           .kr(8)
34905           .sr(1)
34906           .m(2)
34907           .n(n)
34908           .k(k)
34909           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c8__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
34910       }
34911     }
34912   }
34913 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD128,n_div_4_strided_cn)34914   TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD128, n_div_4_strided_cn) {
34915     TEST_REQUIRES_X86_SSE41;
34916     for (uint32_t n = 8; n <= 12; n += 4) {
34917       for (size_t k = 1; k <= 40; k += 9) {
34918         GemmMicrokernelTester()
34919           .mr(2)
34920           .nr(4)
34921           .kr(8)
34922           .sr(1)
34923           .m(2)
34924           .n(n)
34925           .k(k)
34926           .cn_stride(7)
34927           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c8__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
34928       }
34929     }
34930   }
34931 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD128,n_div_4_subtile)34932   TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD128, n_div_4_subtile) {
34933     TEST_REQUIRES_X86_SSE41;
34934     for (uint32_t n = 8; n <= 12; n += 4) {
34935       for (size_t k = 1; k <= 40; k += 9) {
34936         for (uint32_t m = 1; m <= 2; m++) {
34937           GemmMicrokernelTester()
34938             .mr(2)
34939             .nr(4)
34940             .kr(8)
34941             .sr(1)
34942             .m(m)
34943             .n(n)
34944             .k(k)
34945             .iterations(1)
34946             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c8__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
34947         }
34948       }
34949     }
34950   }
34951 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD128,small_kernel)34952   TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD128, small_kernel) {
34953     TEST_REQUIRES_X86_SSE41;
34954     for (size_t k = 1; k <= 40; k += 9) {
34955       GemmMicrokernelTester()
34956         .mr(2)
34957         .nr(4)
34958         .kr(8)
34959         .sr(1)
34960         .m(2)
34961         .n(4)
34962         .k(k)
34963         .ks(3)
34964         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c8__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
34965     }
34966   }
34967 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD128,small_kernel_subtile)34968   TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD128, small_kernel_subtile) {
34969     TEST_REQUIRES_X86_SSE41;
34970     for (size_t k = 1; k <= 40; k += 9) {
34971       for (uint32_t n = 1; n <= 4; n++) {
34972         for (uint32_t m = 1; m <= 2; m++) {
34973           GemmMicrokernelTester()
34974             .mr(2)
34975             .nr(4)
34976             .kr(8)
34977             .sr(1)
34978             .m(m)
34979             .n(n)
34980             .k(k)
34981             .ks(3)
34982             .iterations(1)
34983             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c8__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
34984         }
34985       }
34986     }
34987   }
34988 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD128,n_gt_4_small_kernel)34989   TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD128, n_gt_4_small_kernel) {
34990     TEST_REQUIRES_X86_SSE41;
34991     for (uint32_t n = 5; n < 8; n++) {
34992       for (size_t k = 1; k <= 40; k += 9) {
34993         GemmMicrokernelTester()
34994           .mr(2)
34995           .nr(4)
34996           .kr(8)
34997           .sr(1)
34998           .m(2)
34999           .n(n)
35000           .k(k)
35001           .ks(3)
35002           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c8__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
35003       }
35004     }
35005   }
35006 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD128,n_div_4_small_kernel)35007   TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD128, n_div_4_small_kernel) {
35008     TEST_REQUIRES_X86_SSE41;
35009     for (uint32_t n = 8; n <= 12; n += 4) {
35010       for (size_t k = 1; k <= 40; k += 9) {
35011         GemmMicrokernelTester()
35012           .mr(2)
35013           .nr(4)
35014           .kr(8)
35015           .sr(1)
35016           .m(2)
35017           .n(n)
35018           .k(k)
35019           .ks(3)
35020           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c8__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
35021       }
35022     }
35023   }
35024 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD128,strided_cm_subtile)35025   TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD128, strided_cm_subtile) {
35026     TEST_REQUIRES_X86_SSE41;
35027     for (size_t k = 1; k <= 40; k += 9) {
35028       for (uint32_t n = 1; n <= 4; n++) {
35029         for (uint32_t m = 1; m <= 2; m++) {
35030           GemmMicrokernelTester()
35031             .mr(2)
35032             .nr(4)
35033             .kr(8)
35034             .sr(1)
35035             .m(m)
35036             .n(n)
35037             .k(k)
35038             .cm_stride(7)
35039             .iterations(1)
35040             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c8__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
35041         }
35042       }
35043     }
35044   }
35045 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD128,a_offset)35046   TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD128, a_offset) {
35047     TEST_REQUIRES_X86_SSE41;
35048     for (size_t k = 1; k <= 40; k += 9) {
35049       GemmMicrokernelTester()
35050         .mr(2)
35051         .nr(4)
35052         .kr(8)
35053         .sr(1)
35054         .m(2)
35055         .n(4)
35056         .k(k)
35057         .ks(3)
35058         .a_offset(83)
35059         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c8__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
35060     }
35061   }
35062 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD128,zero)35063   TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD128, zero) {
35064     TEST_REQUIRES_X86_SSE41;
35065     for (size_t k = 1; k <= 40; k += 9) {
35066       for (uint32_t mz = 0; mz < 2; mz++) {
35067         GemmMicrokernelTester()
35068           .mr(2)
35069           .nr(4)
35070           .kr(8)
35071           .sr(1)
35072           .m(2)
35073           .n(4)
35074           .k(k)
35075           .ks(3)
35076           .a_offset(83)
35077           .zero_index(mz)
35078           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c8__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
35079       }
35080     }
35081   }
35082 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD128,qmin)35083   TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD128, qmin) {
35084     TEST_REQUIRES_X86_SSE41;
35085     GemmMicrokernelTester()
35086       .mr(2)
35087       .nr(4)
35088       .kr(8)
35089       .sr(1)
35090       .m(2)
35091       .n(4)
35092       .k(8)
35093       .qmin(128)
35094       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c8__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
35095   }
35096 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD128,qmax)35097   TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD128, qmax) {
35098     TEST_REQUIRES_X86_SSE41;
35099     GemmMicrokernelTester()
35100       .mr(2)
35101       .nr(4)
35102       .kr(8)
35103       .sr(1)
35104       .m(2)
35105       .n(4)
35106       .k(8)
35107       .qmax(128)
35108       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c8__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
35109   }
35110 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD128,strided_cm)35111   TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD128, strided_cm) {
35112     TEST_REQUIRES_X86_SSE41;
35113     GemmMicrokernelTester()
35114       .mr(2)
35115       .nr(4)
35116       .kr(8)
35117       .sr(1)
35118       .m(2)
35119       .n(4)
35120       .k(8)
35121       .cm_stride(7)
35122       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c8__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
35123   }
35124 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
35125 
35126 
35127 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD128,k_eq_8)35128   TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD128, k_eq_8) {
35129     TEST_REQUIRES_X86_XOP;
35130     GemmMicrokernelTester()
35131       .mr(2)
35132       .nr(4)
35133       .kr(8)
35134       .sr(1)
35135       .m(2)
35136       .n(4)
35137       .k(8)
35138       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c8__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
35139   }
35140 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD128,strided_cn)35141   TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD128, strided_cn) {
35142     TEST_REQUIRES_X86_XOP;
35143     GemmMicrokernelTester()
35144       .mr(2)
35145       .nr(4)
35146       .kr(8)
35147       .sr(1)
35148       .m(2)
35149       .n(4)
35150       .k(8)
35151       .cn_stride(7)
35152       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c8__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
35153   }
35154 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD128,k_eq_8_subtile)35155   TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD128, k_eq_8_subtile) {
35156     TEST_REQUIRES_X86_XOP;
35157     for (uint32_t n = 1; n <= 4; n++) {
35158       for (uint32_t m = 1; m <= 2; m++) {
35159         GemmMicrokernelTester()
35160           .mr(2)
35161           .nr(4)
35162           .kr(8)
35163           .sr(1)
35164           .m(m)
35165           .n(n)
35166           .k(8)
35167           .iterations(1)
35168           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c8__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
35169       }
35170     }
35171   }
35172 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD128,k_eq_8_subtile_m)35173   TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD128, k_eq_8_subtile_m) {
35174     TEST_REQUIRES_X86_XOP;
35175     for (uint32_t m = 1; m <= 2; m++) {
35176       GemmMicrokernelTester()
35177         .mr(2)
35178         .nr(4)
35179         .kr(8)
35180         .sr(1)
35181         .m(m)
35182         .n(4)
35183         .k(8)
35184         .iterations(1)
35185         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c8__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
35186     }
35187   }
35188 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD128,k_eq_8_subtile_n)35189   TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD128, k_eq_8_subtile_n) {
35190     TEST_REQUIRES_X86_XOP;
35191     for (uint32_t n = 1; n <= 4; n++) {
35192       GemmMicrokernelTester()
35193         .mr(2)
35194         .nr(4)
35195         .kr(8)
35196         .sr(1)
35197         .m(2)
35198         .n(n)
35199         .k(8)
35200         .iterations(1)
35201         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c8__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
35202     }
35203   }
35204 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD128,k_lt_8)35205   TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD128, k_lt_8) {
35206     TEST_REQUIRES_X86_XOP;
35207     for (size_t k = 1; k < 8; k++) {
35208       GemmMicrokernelTester()
35209         .mr(2)
35210         .nr(4)
35211         .kr(8)
35212         .sr(1)
35213         .m(2)
35214         .n(4)
35215         .k(k)
35216         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c8__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
35217     }
35218   }
35219 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD128,k_lt_8_subtile)35220   TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD128, k_lt_8_subtile) {
35221     TEST_REQUIRES_X86_XOP;
35222     for (size_t k = 1; k < 8; k++) {
35223       for (uint32_t n = 1; n <= 4; n++) {
35224         for (uint32_t m = 1; m <= 2; m++) {
35225           GemmMicrokernelTester()
35226             .mr(2)
35227             .nr(4)
35228             .kr(8)
35229             .sr(1)
35230             .m(m)
35231             .n(n)
35232             .k(k)
35233             .iterations(1)
35234             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c8__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
35235         }
35236       }
35237     }
35238   }
35239 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD128,k_gt_8)35240   TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD128, k_gt_8) {
35241     TEST_REQUIRES_X86_XOP;
35242     for (size_t k = 9; k < 16; k++) {
35243       GemmMicrokernelTester()
35244         .mr(2)
35245         .nr(4)
35246         .kr(8)
35247         .sr(1)
35248         .m(2)
35249         .n(4)
35250         .k(k)
35251         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c8__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
35252     }
35253   }
35254 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD128,k_gt_8_subtile)35255   TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD128, k_gt_8_subtile) {
35256     TEST_REQUIRES_X86_XOP;
35257     for (size_t k = 9; k < 16; k++) {
35258       for (uint32_t n = 1; n <= 4; n++) {
35259         for (uint32_t m = 1; m <= 2; m++) {
35260           GemmMicrokernelTester()
35261             .mr(2)
35262             .nr(4)
35263             .kr(8)
35264             .sr(1)
35265             .m(m)
35266             .n(n)
35267             .k(k)
35268             .iterations(1)
35269             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c8__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
35270         }
35271       }
35272     }
35273   }
35274 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD128,k_div_8)35275   TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD128, k_div_8) {
35276     TEST_REQUIRES_X86_XOP;
35277     for (size_t k = 16; k <= 80; k += 8) {
35278       GemmMicrokernelTester()
35279         .mr(2)
35280         .nr(4)
35281         .kr(8)
35282         .sr(1)
35283         .m(2)
35284         .n(4)
35285         .k(k)
35286         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c8__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
35287     }
35288   }
35289 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD128,k_div_8_subtile)35290   TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD128, k_div_8_subtile) {
35291     TEST_REQUIRES_X86_XOP;
35292     for (size_t k = 16; k <= 80; k += 8) {
35293       for (uint32_t n = 1; n <= 4; n++) {
35294         for (uint32_t m = 1; m <= 2; m++) {
35295           GemmMicrokernelTester()
35296             .mr(2)
35297             .nr(4)
35298             .kr(8)
35299             .sr(1)
35300             .m(m)
35301             .n(n)
35302             .k(k)
35303             .iterations(1)
35304             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c8__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
35305         }
35306       }
35307     }
35308   }
35309 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD128,n_gt_4)35310   TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD128, n_gt_4) {
35311     TEST_REQUIRES_X86_XOP;
35312     for (uint32_t n = 5; n < 8; n++) {
35313       for (size_t k = 1; k <= 40; k += 9) {
35314         GemmMicrokernelTester()
35315           .mr(2)
35316           .nr(4)
35317           .kr(8)
35318           .sr(1)
35319           .m(2)
35320           .n(n)
35321           .k(k)
35322           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c8__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
35323       }
35324     }
35325   }
35326 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD128,n_gt_4_strided_cn)35327   TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD128, n_gt_4_strided_cn) {
35328     TEST_REQUIRES_X86_XOP;
35329     for (uint32_t n = 5; n < 8; n++) {
35330       for (size_t k = 1; k <= 40; k += 9) {
35331         GemmMicrokernelTester()
35332           .mr(2)
35333           .nr(4)
35334           .kr(8)
35335           .sr(1)
35336           .m(2)
35337           .n(n)
35338           .k(k)
35339           .cn_stride(7)
35340           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c8__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
35341       }
35342     }
35343   }
35344 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD128,n_gt_4_subtile)35345   TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD128, n_gt_4_subtile) {
35346     TEST_REQUIRES_X86_XOP;
35347     for (uint32_t n = 5; n < 8; n++) {
35348       for (size_t k = 1; k <= 40; k += 9) {
35349         for (uint32_t m = 1; m <= 2; m++) {
35350           GemmMicrokernelTester()
35351             .mr(2)
35352             .nr(4)
35353             .kr(8)
35354             .sr(1)
35355             .m(m)
35356             .n(n)
35357             .k(k)
35358             .iterations(1)
35359             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c8__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
35360         }
35361       }
35362     }
35363   }
35364 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD128,n_div_4)35365   TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD128, n_div_4) {
35366     TEST_REQUIRES_X86_XOP;
35367     for (uint32_t n = 8; n <= 12; n += 4) {
35368       for (size_t k = 1; k <= 40; k += 9) {
35369         GemmMicrokernelTester()
35370           .mr(2)
35371           .nr(4)
35372           .kr(8)
35373           .sr(1)
35374           .m(2)
35375           .n(n)
35376           .k(k)
35377           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c8__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
35378       }
35379     }
35380   }
35381 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD128,n_div_4_strided_cn)35382   TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD128, n_div_4_strided_cn) {
35383     TEST_REQUIRES_X86_XOP;
35384     for (uint32_t n = 8; n <= 12; n += 4) {
35385       for (size_t k = 1; k <= 40; k += 9) {
35386         GemmMicrokernelTester()
35387           .mr(2)
35388           .nr(4)
35389           .kr(8)
35390           .sr(1)
35391           .m(2)
35392           .n(n)
35393           .k(k)
35394           .cn_stride(7)
35395           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c8__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
35396       }
35397     }
35398   }
35399 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD128,n_div_4_subtile)35400   TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD128, n_div_4_subtile) {
35401     TEST_REQUIRES_X86_XOP;
35402     for (uint32_t n = 8; n <= 12; n += 4) {
35403       for (size_t k = 1; k <= 40; k += 9) {
35404         for (uint32_t m = 1; m <= 2; m++) {
35405           GemmMicrokernelTester()
35406             .mr(2)
35407             .nr(4)
35408             .kr(8)
35409             .sr(1)
35410             .m(m)
35411             .n(n)
35412             .k(k)
35413             .iterations(1)
35414             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c8__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
35415         }
35416       }
35417     }
35418   }
35419 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD128,small_kernel)35420   TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD128, small_kernel) {
35421     TEST_REQUIRES_X86_XOP;
35422     for (size_t k = 1; k <= 40; k += 9) {
35423       GemmMicrokernelTester()
35424         .mr(2)
35425         .nr(4)
35426         .kr(8)
35427         .sr(1)
35428         .m(2)
35429         .n(4)
35430         .k(k)
35431         .ks(3)
35432         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c8__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
35433     }
35434   }
35435 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD128,small_kernel_subtile)35436   TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD128, small_kernel_subtile) {
35437     TEST_REQUIRES_X86_XOP;
35438     for (size_t k = 1; k <= 40; k += 9) {
35439       for (uint32_t n = 1; n <= 4; n++) {
35440         for (uint32_t m = 1; m <= 2; m++) {
35441           GemmMicrokernelTester()
35442             .mr(2)
35443             .nr(4)
35444             .kr(8)
35445             .sr(1)
35446             .m(m)
35447             .n(n)
35448             .k(k)
35449             .ks(3)
35450             .iterations(1)
35451             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c8__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
35452         }
35453       }
35454     }
35455   }
35456 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD128,n_gt_4_small_kernel)35457   TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD128, n_gt_4_small_kernel) {
35458     TEST_REQUIRES_X86_XOP;
35459     for (uint32_t n = 5; n < 8; n++) {
35460       for (size_t k = 1; k <= 40; k += 9) {
35461         GemmMicrokernelTester()
35462           .mr(2)
35463           .nr(4)
35464           .kr(8)
35465           .sr(1)
35466           .m(2)
35467           .n(n)
35468           .k(k)
35469           .ks(3)
35470           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c8__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
35471       }
35472     }
35473   }
35474 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD128,n_div_4_small_kernel)35475   TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD128, n_div_4_small_kernel) {
35476     TEST_REQUIRES_X86_XOP;
35477     for (uint32_t n = 8; n <= 12; n += 4) {
35478       for (size_t k = 1; k <= 40; k += 9) {
35479         GemmMicrokernelTester()
35480           .mr(2)
35481           .nr(4)
35482           .kr(8)
35483           .sr(1)
35484           .m(2)
35485           .n(n)
35486           .k(k)
35487           .ks(3)
35488           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c8__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
35489       }
35490     }
35491   }
35492 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD128,strided_cm_subtile)35493   TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD128, strided_cm_subtile) {
35494     TEST_REQUIRES_X86_XOP;
35495     for (size_t k = 1; k <= 40; k += 9) {
35496       for (uint32_t n = 1; n <= 4; n++) {
35497         for (uint32_t m = 1; m <= 2; m++) {
35498           GemmMicrokernelTester()
35499             .mr(2)
35500             .nr(4)
35501             .kr(8)
35502             .sr(1)
35503             .m(m)
35504             .n(n)
35505             .k(k)
35506             .cm_stride(7)
35507             .iterations(1)
35508             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c8__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
35509         }
35510       }
35511     }
35512   }
35513 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD128,a_offset)35514   TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD128, a_offset) {
35515     TEST_REQUIRES_X86_XOP;
35516     for (size_t k = 1; k <= 40; k += 9) {
35517       GemmMicrokernelTester()
35518         .mr(2)
35519         .nr(4)
35520         .kr(8)
35521         .sr(1)
35522         .m(2)
35523         .n(4)
35524         .k(k)
35525         .ks(3)
35526         .a_offset(83)
35527         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c8__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
35528     }
35529   }
35530 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD128,zero)35531   TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD128, zero) {
35532     TEST_REQUIRES_X86_XOP;
35533     for (size_t k = 1; k <= 40; k += 9) {
35534       for (uint32_t mz = 0; mz < 2; mz++) {
35535         GemmMicrokernelTester()
35536           .mr(2)
35537           .nr(4)
35538           .kr(8)
35539           .sr(1)
35540           .m(2)
35541           .n(4)
35542           .k(k)
35543           .ks(3)
35544           .a_offset(83)
35545           .zero_index(mz)
35546           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c8__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
35547       }
35548     }
35549   }
35550 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD128,qmin)35551   TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD128, qmin) {
35552     TEST_REQUIRES_X86_XOP;
35553     GemmMicrokernelTester()
35554       .mr(2)
35555       .nr(4)
35556       .kr(8)
35557       .sr(1)
35558       .m(2)
35559       .n(4)
35560       .k(8)
35561       .qmin(128)
35562       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c8__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
35563   }
35564 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD128,qmax)35565   TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD128, qmax) {
35566     TEST_REQUIRES_X86_XOP;
35567     GemmMicrokernelTester()
35568       .mr(2)
35569       .nr(4)
35570       .kr(8)
35571       .sr(1)
35572       .m(2)
35573       .n(4)
35574       .k(8)
35575       .qmax(128)
35576       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c8__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
35577   }
35578 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD128,strided_cm)35579   TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD128, strided_cm) {
35580     TEST_REQUIRES_X86_XOP;
35581     GemmMicrokernelTester()
35582       .mr(2)
35583       .nr(4)
35584       .kr(8)
35585       .sr(1)
35586       .m(2)
35587       .n(4)
35588       .k(8)
35589       .cm_stride(7)
35590       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c8__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
35591   }
35592 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
35593 
35594 
35595 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD128,k_eq_8)35596   TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD128, k_eq_8) {
35597     TEST_REQUIRES_X86_AVX;
35598     GemmMicrokernelTester()
35599       .mr(3)
35600       .nr(4)
35601       .kr(8)
35602       .sr(1)
35603       .m(3)
35604       .n(4)
35605       .k(8)
35606       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c8__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
35607   }
35608 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD128,strided_cn)35609   TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD128, strided_cn) {
35610     TEST_REQUIRES_X86_AVX;
35611     GemmMicrokernelTester()
35612       .mr(3)
35613       .nr(4)
35614       .kr(8)
35615       .sr(1)
35616       .m(3)
35617       .n(4)
35618       .k(8)
35619       .cn_stride(7)
35620       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c8__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
35621   }
35622 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD128,k_eq_8_subtile)35623   TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD128, k_eq_8_subtile) {
35624     TEST_REQUIRES_X86_AVX;
35625     for (uint32_t n = 1; n <= 4; n++) {
35626       for (uint32_t m = 1; m <= 3; m++) {
35627         GemmMicrokernelTester()
35628           .mr(3)
35629           .nr(4)
35630           .kr(8)
35631           .sr(1)
35632           .m(m)
35633           .n(n)
35634           .k(8)
35635           .iterations(1)
35636           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c8__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
35637       }
35638     }
35639   }
35640 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD128,k_eq_8_subtile_m)35641   TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD128, k_eq_8_subtile_m) {
35642     TEST_REQUIRES_X86_AVX;
35643     for (uint32_t m = 1; m <= 3; m++) {
35644       GemmMicrokernelTester()
35645         .mr(3)
35646         .nr(4)
35647         .kr(8)
35648         .sr(1)
35649         .m(m)
35650         .n(4)
35651         .k(8)
35652         .iterations(1)
35653         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c8__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
35654     }
35655   }
35656 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD128,k_eq_8_subtile_n)35657   TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD128, k_eq_8_subtile_n) {
35658     TEST_REQUIRES_X86_AVX;
35659     for (uint32_t n = 1; n <= 4; n++) {
35660       GemmMicrokernelTester()
35661         .mr(3)
35662         .nr(4)
35663         .kr(8)
35664         .sr(1)
35665         .m(3)
35666         .n(n)
35667         .k(8)
35668         .iterations(1)
35669         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c8__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
35670     }
35671   }
35672 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD128,k_lt_8)35673   TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD128, k_lt_8) {
35674     TEST_REQUIRES_X86_AVX;
35675     for (size_t k = 1; k < 8; k++) {
35676       GemmMicrokernelTester()
35677         .mr(3)
35678         .nr(4)
35679         .kr(8)
35680         .sr(1)
35681         .m(3)
35682         .n(4)
35683         .k(k)
35684         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c8__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
35685     }
35686   }
35687 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD128,k_lt_8_subtile)35688   TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD128, k_lt_8_subtile) {
35689     TEST_REQUIRES_X86_AVX;
35690     for (size_t k = 1; k < 8; k++) {
35691       for (uint32_t n = 1; n <= 4; n++) {
35692         for (uint32_t m = 1; m <= 3; m++) {
35693           GemmMicrokernelTester()
35694             .mr(3)
35695             .nr(4)
35696             .kr(8)
35697             .sr(1)
35698             .m(m)
35699             .n(n)
35700             .k(k)
35701             .iterations(1)
35702             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c8__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
35703         }
35704       }
35705     }
35706   }
35707 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD128,k_gt_8)35708   TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD128, k_gt_8) {
35709     TEST_REQUIRES_X86_AVX;
35710     for (size_t k = 9; k < 16; k++) {
35711       GemmMicrokernelTester()
35712         .mr(3)
35713         .nr(4)
35714         .kr(8)
35715         .sr(1)
35716         .m(3)
35717         .n(4)
35718         .k(k)
35719         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c8__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
35720     }
35721   }
35722 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD128,k_gt_8_subtile)35723   TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD128, k_gt_8_subtile) {
35724     TEST_REQUIRES_X86_AVX;
35725     for (size_t k = 9; k < 16; k++) {
35726       for (uint32_t n = 1; n <= 4; n++) {
35727         for (uint32_t m = 1; m <= 3; m++) {
35728           GemmMicrokernelTester()
35729             .mr(3)
35730             .nr(4)
35731             .kr(8)
35732             .sr(1)
35733             .m(m)
35734             .n(n)
35735             .k(k)
35736             .iterations(1)
35737             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c8__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
35738         }
35739       }
35740     }
35741   }
35742 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD128,k_div_8)35743   TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD128, k_div_8) {
35744     TEST_REQUIRES_X86_AVX;
35745     for (size_t k = 16; k <= 80; k += 8) {
35746       GemmMicrokernelTester()
35747         .mr(3)
35748         .nr(4)
35749         .kr(8)
35750         .sr(1)
35751         .m(3)
35752         .n(4)
35753         .k(k)
35754         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c8__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
35755     }
35756   }
35757 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD128,k_div_8_subtile)35758   TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD128, k_div_8_subtile) {
35759     TEST_REQUIRES_X86_AVX;
35760     for (size_t k = 16; k <= 80; k += 8) {
35761       for (uint32_t n = 1; n <= 4; n++) {
35762         for (uint32_t m = 1; m <= 3; m++) {
35763           GemmMicrokernelTester()
35764             .mr(3)
35765             .nr(4)
35766             .kr(8)
35767             .sr(1)
35768             .m(m)
35769             .n(n)
35770             .k(k)
35771             .iterations(1)
35772             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c8__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
35773         }
35774       }
35775     }
35776   }
35777 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD128,n_gt_4)35778   TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD128, n_gt_4) {
35779     TEST_REQUIRES_X86_AVX;
35780     for (uint32_t n = 5; n < 8; n++) {
35781       for (size_t k = 1; k <= 40; k += 9) {
35782         GemmMicrokernelTester()
35783           .mr(3)
35784           .nr(4)
35785           .kr(8)
35786           .sr(1)
35787           .m(3)
35788           .n(n)
35789           .k(k)
35790           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c8__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
35791       }
35792     }
35793   }
35794 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD128,n_gt_4_strided_cn)35795   TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD128, n_gt_4_strided_cn) {
35796     TEST_REQUIRES_X86_AVX;
35797     for (uint32_t n = 5; n < 8; n++) {
35798       for (size_t k = 1; k <= 40; k += 9) {
35799         GemmMicrokernelTester()
35800           .mr(3)
35801           .nr(4)
35802           .kr(8)
35803           .sr(1)
35804           .m(3)
35805           .n(n)
35806           .k(k)
35807           .cn_stride(7)
35808           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c8__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
35809       }
35810     }
35811   }
35812 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD128,n_gt_4_subtile)35813   TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD128, n_gt_4_subtile) {
35814     TEST_REQUIRES_X86_AVX;
35815     for (uint32_t n = 5; n < 8; n++) {
35816       for (size_t k = 1; k <= 40; k += 9) {
35817         for (uint32_t m = 1; m <= 3; m++) {
35818           GemmMicrokernelTester()
35819             .mr(3)
35820             .nr(4)
35821             .kr(8)
35822             .sr(1)
35823             .m(m)
35824             .n(n)
35825             .k(k)
35826             .iterations(1)
35827             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c8__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
35828         }
35829       }
35830     }
35831   }
35832 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD128,n_div_4)35833   TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD128, n_div_4) {
35834     TEST_REQUIRES_X86_AVX;
35835     for (uint32_t n = 8; n <= 12; n += 4) {
35836       for (size_t k = 1; k <= 40; k += 9) {
35837         GemmMicrokernelTester()
35838           .mr(3)
35839           .nr(4)
35840           .kr(8)
35841           .sr(1)
35842           .m(3)
35843           .n(n)
35844           .k(k)
35845           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c8__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
35846       }
35847     }
35848   }
35849 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD128,n_div_4_strided_cn)35850   TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD128, n_div_4_strided_cn) {
35851     TEST_REQUIRES_X86_AVX;
35852     for (uint32_t n = 8; n <= 12; n += 4) {
35853       for (size_t k = 1; k <= 40; k += 9) {
35854         GemmMicrokernelTester()
35855           .mr(3)
35856           .nr(4)
35857           .kr(8)
35858           .sr(1)
35859           .m(3)
35860           .n(n)
35861           .k(k)
35862           .cn_stride(7)
35863           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c8__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
35864       }
35865     }
35866   }
35867 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD128,n_div_4_subtile)35868   TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD128, n_div_4_subtile) {
35869     TEST_REQUIRES_X86_AVX;
35870     for (uint32_t n = 8; n <= 12; n += 4) {
35871       for (size_t k = 1; k <= 40; k += 9) {
35872         for (uint32_t m = 1; m <= 3; m++) {
35873           GemmMicrokernelTester()
35874             .mr(3)
35875             .nr(4)
35876             .kr(8)
35877             .sr(1)
35878             .m(m)
35879             .n(n)
35880             .k(k)
35881             .iterations(1)
35882             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c8__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
35883         }
35884       }
35885     }
35886   }
35887 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD128,small_kernel)35888   TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD128, small_kernel) {
35889     TEST_REQUIRES_X86_AVX;
35890     for (size_t k = 1; k <= 40; k += 9) {
35891       GemmMicrokernelTester()
35892         .mr(3)
35893         .nr(4)
35894         .kr(8)
35895         .sr(1)
35896         .m(3)
35897         .n(4)
35898         .k(k)
35899         .ks(3)
35900         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c8__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
35901     }
35902   }
35903 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD128,small_kernel_subtile)35904   TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD128, small_kernel_subtile) {
35905     TEST_REQUIRES_X86_AVX;
35906     for (size_t k = 1; k <= 40; k += 9) {
35907       for (uint32_t n = 1; n <= 4; n++) {
35908         for (uint32_t m = 1; m <= 3; m++) {
35909           GemmMicrokernelTester()
35910             .mr(3)
35911             .nr(4)
35912             .kr(8)
35913             .sr(1)
35914             .m(m)
35915             .n(n)
35916             .k(k)
35917             .ks(3)
35918             .iterations(1)
35919             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c8__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
35920         }
35921       }
35922     }
35923   }
35924 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD128,n_gt_4_small_kernel)35925   TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD128, n_gt_4_small_kernel) {
35926     TEST_REQUIRES_X86_AVX;
35927     for (uint32_t n = 5; n < 8; n++) {
35928       for (size_t k = 1; k <= 40; k += 9) {
35929         GemmMicrokernelTester()
35930           .mr(3)
35931           .nr(4)
35932           .kr(8)
35933           .sr(1)
35934           .m(3)
35935           .n(n)
35936           .k(k)
35937           .ks(3)
35938           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c8__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
35939       }
35940     }
35941   }
35942 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD128,n_div_4_small_kernel)35943   TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD128, n_div_4_small_kernel) {
35944     TEST_REQUIRES_X86_AVX;
35945     for (uint32_t n = 8; n <= 12; n += 4) {
35946       for (size_t k = 1; k <= 40; k += 9) {
35947         GemmMicrokernelTester()
35948           .mr(3)
35949           .nr(4)
35950           .kr(8)
35951           .sr(1)
35952           .m(3)
35953           .n(n)
35954           .k(k)
35955           .ks(3)
35956           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c8__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
35957       }
35958     }
35959   }
35960 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD128,strided_cm_subtile)35961   TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD128, strided_cm_subtile) {
35962     TEST_REQUIRES_X86_AVX;
35963     for (size_t k = 1; k <= 40; k += 9) {
35964       for (uint32_t n = 1; n <= 4; n++) {
35965         for (uint32_t m = 1; m <= 3; m++) {
35966           GemmMicrokernelTester()
35967             .mr(3)
35968             .nr(4)
35969             .kr(8)
35970             .sr(1)
35971             .m(m)
35972             .n(n)
35973             .k(k)
35974             .cm_stride(7)
35975             .iterations(1)
35976             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c8__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
35977         }
35978       }
35979     }
35980   }
35981 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD128,a_offset)35982   TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD128, a_offset) {
35983     TEST_REQUIRES_X86_AVX;
35984     for (size_t k = 1; k <= 40; k += 9) {
35985       GemmMicrokernelTester()
35986         .mr(3)
35987         .nr(4)
35988         .kr(8)
35989         .sr(1)
35990         .m(3)
35991         .n(4)
35992         .k(k)
35993         .ks(3)
35994         .a_offset(127)
35995         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c8__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
35996     }
35997   }
35998 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD128,zero)35999   TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD128, zero) {
36000     TEST_REQUIRES_X86_AVX;
36001     for (size_t k = 1; k <= 40; k += 9) {
36002       for (uint32_t mz = 0; mz < 3; mz++) {
36003         GemmMicrokernelTester()
36004           .mr(3)
36005           .nr(4)
36006           .kr(8)
36007           .sr(1)
36008           .m(3)
36009           .n(4)
36010           .k(k)
36011           .ks(3)
36012           .a_offset(127)
36013           .zero_index(mz)
36014           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c8__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
36015       }
36016     }
36017   }
36018 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD128,qmin)36019   TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD128, qmin) {
36020     TEST_REQUIRES_X86_AVX;
36021     GemmMicrokernelTester()
36022       .mr(3)
36023       .nr(4)
36024       .kr(8)
36025       .sr(1)
36026       .m(3)
36027       .n(4)
36028       .k(8)
36029       .qmin(128)
36030       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c8__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
36031   }
36032 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD128,qmax)36033   TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD128, qmax) {
36034     TEST_REQUIRES_X86_AVX;
36035     GemmMicrokernelTester()
36036       .mr(3)
36037       .nr(4)
36038       .kr(8)
36039       .sr(1)
36040       .m(3)
36041       .n(4)
36042       .k(8)
36043       .qmax(128)
36044       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c8__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
36045   }
36046 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD128,strided_cm)36047   TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD128, strided_cm) {
36048     TEST_REQUIRES_X86_AVX;
36049     GemmMicrokernelTester()
36050       .mr(3)
36051       .nr(4)
36052       .kr(8)
36053       .sr(1)
36054       .m(3)
36055       .n(4)
36056       .k(8)
36057       .cm_stride(7)
36058       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c8__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
36059   }
36060 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
36061 
36062 
36063 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD128,k_eq_8)36064   TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD128, k_eq_8) {
36065     TEST_REQUIRES_X86_XOP;
36066     GemmMicrokernelTester()
36067       .mr(3)
36068       .nr(4)
36069       .kr(8)
36070       .sr(1)
36071       .m(3)
36072       .n(4)
36073       .k(8)
36074       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
36075   }
36076 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD128,strided_cn)36077   TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD128, strided_cn) {
36078     TEST_REQUIRES_X86_XOP;
36079     GemmMicrokernelTester()
36080       .mr(3)
36081       .nr(4)
36082       .kr(8)
36083       .sr(1)
36084       .m(3)
36085       .n(4)
36086       .k(8)
36087       .cn_stride(7)
36088       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
36089   }
36090 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD128,k_eq_8_subtile)36091   TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD128, k_eq_8_subtile) {
36092     TEST_REQUIRES_X86_XOP;
36093     for (uint32_t n = 1; n <= 4; n++) {
36094       for (uint32_t m = 1; m <= 3; m++) {
36095         GemmMicrokernelTester()
36096           .mr(3)
36097           .nr(4)
36098           .kr(8)
36099           .sr(1)
36100           .m(m)
36101           .n(n)
36102           .k(8)
36103           .iterations(1)
36104           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
36105       }
36106     }
36107   }
36108 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD128,k_eq_8_subtile_m)36109   TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD128, k_eq_8_subtile_m) {
36110     TEST_REQUIRES_X86_XOP;
36111     for (uint32_t m = 1; m <= 3; m++) {
36112       GemmMicrokernelTester()
36113         .mr(3)
36114         .nr(4)
36115         .kr(8)
36116         .sr(1)
36117         .m(m)
36118         .n(4)
36119         .k(8)
36120         .iterations(1)
36121         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
36122     }
36123   }
36124 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD128,k_eq_8_subtile_n)36125   TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD128, k_eq_8_subtile_n) {
36126     TEST_REQUIRES_X86_XOP;
36127     for (uint32_t n = 1; n <= 4; n++) {
36128       GemmMicrokernelTester()
36129         .mr(3)
36130         .nr(4)
36131         .kr(8)
36132         .sr(1)
36133         .m(3)
36134         .n(n)
36135         .k(8)
36136         .iterations(1)
36137         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
36138     }
36139   }
36140 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD128,k_lt_8)36141   TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD128, k_lt_8) {
36142     TEST_REQUIRES_X86_XOP;
36143     for (size_t k = 1; k < 8; k++) {
36144       GemmMicrokernelTester()
36145         .mr(3)
36146         .nr(4)
36147         .kr(8)
36148         .sr(1)
36149         .m(3)
36150         .n(4)
36151         .k(k)
36152         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
36153     }
36154   }
36155 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD128,k_lt_8_subtile)36156   TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD128, k_lt_8_subtile) {
36157     TEST_REQUIRES_X86_XOP;
36158     for (size_t k = 1; k < 8; k++) {
36159       for (uint32_t n = 1; n <= 4; n++) {
36160         for (uint32_t m = 1; m <= 3; m++) {
36161           GemmMicrokernelTester()
36162             .mr(3)
36163             .nr(4)
36164             .kr(8)
36165             .sr(1)
36166             .m(m)
36167             .n(n)
36168             .k(k)
36169             .iterations(1)
36170             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
36171         }
36172       }
36173     }
36174   }
36175 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD128,k_gt_8)36176   TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD128, k_gt_8) {
36177     TEST_REQUIRES_X86_XOP;
36178     for (size_t k = 9; k < 16; k++) {
36179       GemmMicrokernelTester()
36180         .mr(3)
36181         .nr(4)
36182         .kr(8)
36183         .sr(1)
36184         .m(3)
36185         .n(4)
36186         .k(k)
36187         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
36188     }
36189   }
36190 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD128,k_gt_8_subtile)36191   TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD128, k_gt_8_subtile) {
36192     TEST_REQUIRES_X86_XOP;
36193     for (size_t k = 9; k < 16; k++) {
36194       for (uint32_t n = 1; n <= 4; n++) {
36195         for (uint32_t m = 1; m <= 3; m++) {
36196           GemmMicrokernelTester()
36197             .mr(3)
36198             .nr(4)
36199             .kr(8)
36200             .sr(1)
36201             .m(m)
36202             .n(n)
36203             .k(k)
36204             .iterations(1)
36205             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
36206         }
36207       }
36208     }
36209   }
36210 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD128,k_div_8)36211   TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD128, k_div_8) {
36212     TEST_REQUIRES_X86_XOP;
36213     for (size_t k = 16; k <= 80; k += 8) {
36214       GemmMicrokernelTester()
36215         .mr(3)
36216         .nr(4)
36217         .kr(8)
36218         .sr(1)
36219         .m(3)
36220         .n(4)
36221         .k(k)
36222         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
36223     }
36224   }
36225 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD128,k_div_8_subtile)36226   TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD128, k_div_8_subtile) {
36227     TEST_REQUIRES_X86_XOP;
36228     for (size_t k = 16; k <= 80; k += 8) {
36229       for (uint32_t n = 1; n <= 4; n++) {
36230         for (uint32_t m = 1; m <= 3; m++) {
36231           GemmMicrokernelTester()
36232             .mr(3)
36233             .nr(4)
36234             .kr(8)
36235             .sr(1)
36236             .m(m)
36237             .n(n)
36238             .k(k)
36239             .iterations(1)
36240             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
36241         }
36242       }
36243     }
36244   }
36245 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD128,n_gt_4)36246   TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD128, n_gt_4) {
36247     TEST_REQUIRES_X86_XOP;
36248     for (uint32_t n = 5; n < 8; n++) {
36249       for (size_t k = 1; k <= 40; k += 9) {
36250         GemmMicrokernelTester()
36251           .mr(3)
36252           .nr(4)
36253           .kr(8)
36254           .sr(1)
36255           .m(3)
36256           .n(n)
36257           .k(k)
36258           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
36259       }
36260     }
36261   }
36262 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD128,n_gt_4_strided_cn)36263   TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD128, n_gt_4_strided_cn) {
36264     TEST_REQUIRES_X86_XOP;
36265     for (uint32_t n = 5; n < 8; n++) {
36266       for (size_t k = 1; k <= 40; k += 9) {
36267         GemmMicrokernelTester()
36268           .mr(3)
36269           .nr(4)
36270           .kr(8)
36271           .sr(1)
36272           .m(3)
36273           .n(n)
36274           .k(k)
36275           .cn_stride(7)
36276           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
36277       }
36278     }
36279   }
36280 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD128,n_gt_4_subtile)36281   TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD128, n_gt_4_subtile) {
36282     TEST_REQUIRES_X86_XOP;
36283     for (uint32_t n = 5; n < 8; n++) {
36284       for (size_t k = 1; k <= 40; k += 9) {
36285         for (uint32_t m = 1; m <= 3; m++) {
36286           GemmMicrokernelTester()
36287             .mr(3)
36288             .nr(4)
36289             .kr(8)
36290             .sr(1)
36291             .m(m)
36292             .n(n)
36293             .k(k)
36294             .iterations(1)
36295             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
36296         }
36297       }
36298     }
36299   }
36300 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD128,n_div_4)36301   TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD128, n_div_4) {
36302     TEST_REQUIRES_X86_XOP;
36303     for (uint32_t n = 8; n <= 12; n += 4) {
36304       for (size_t k = 1; k <= 40; k += 9) {
36305         GemmMicrokernelTester()
36306           .mr(3)
36307           .nr(4)
36308           .kr(8)
36309           .sr(1)
36310           .m(3)
36311           .n(n)
36312           .k(k)
36313           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
36314       }
36315     }
36316   }
36317 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD128,n_div_4_strided_cn)36318   TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD128, n_div_4_strided_cn) {
36319     TEST_REQUIRES_X86_XOP;
36320     for (uint32_t n = 8; n <= 12; n += 4) {
36321       for (size_t k = 1; k <= 40; k += 9) {
36322         GemmMicrokernelTester()
36323           .mr(3)
36324           .nr(4)
36325           .kr(8)
36326           .sr(1)
36327           .m(3)
36328           .n(n)
36329           .k(k)
36330           .cn_stride(7)
36331           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
36332       }
36333     }
36334   }
36335 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD128,n_div_4_subtile)36336   TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD128, n_div_4_subtile) {
36337     TEST_REQUIRES_X86_XOP;
36338     for (uint32_t n = 8; n <= 12; n += 4) {
36339       for (size_t k = 1; k <= 40; k += 9) {
36340         for (uint32_t m = 1; m <= 3; m++) {
36341           GemmMicrokernelTester()
36342             .mr(3)
36343             .nr(4)
36344             .kr(8)
36345             .sr(1)
36346             .m(m)
36347             .n(n)
36348             .k(k)
36349             .iterations(1)
36350             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
36351         }
36352       }
36353     }
36354   }
36355 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD128,small_kernel)36356   TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD128, small_kernel) {
36357     TEST_REQUIRES_X86_XOP;
36358     for (size_t k = 1; k <= 40; k += 9) {
36359       GemmMicrokernelTester()
36360         .mr(3)
36361         .nr(4)
36362         .kr(8)
36363         .sr(1)
36364         .m(3)
36365         .n(4)
36366         .k(k)
36367         .ks(3)
36368         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
36369     }
36370   }
36371 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD128,small_kernel_subtile)36372   TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD128, small_kernel_subtile) {
36373     TEST_REQUIRES_X86_XOP;
36374     for (size_t k = 1; k <= 40; k += 9) {
36375       for (uint32_t n = 1; n <= 4; n++) {
36376         for (uint32_t m = 1; m <= 3; m++) {
36377           GemmMicrokernelTester()
36378             .mr(3)
36379             .nr(4)
36380             .kr(8)
36381             .sr(1)
36382             .m(m)
36383             .n(n)
36384             .k(k)
36385             .ks(3)
36386             .iterations(1)
36387             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
36388         }
36389       }
36390     }
36391   }
36392 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD128,n_gt_4_small_kernel)36393   TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD128, n_gt_4_small_kernel) {
36394     TEST_REQUIRES_X86_XOP;
36395     for (uint32_t n = 5; n < 8; n++) {
36396       for (size_t k = 1; k <= 40; k += 9) {
36397         GemmMicrokernelTester()
36398           .mr(3)
36399           .nr(4)
36400           .kr(8)
36401           .sr(1)
36402           .m(3)
36403           .n(n)
36404           .k(k)
36405           .ks(3)
36406           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
36407       }
36408     }
36409   }
36410 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD128,n_div_4_small_kernel)36411   TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD128, n_div_4_small_kernel) {
36412     TEST_REQUIRES_X86_XOP;
36413     for (uint32_t n = 8; n <= 12; n += 4) {
36414       for (size_t k = 1; k <= 40; k += 9) {
36415         GemmMicrokernelTester()
36416           .mr(3)
36417           .nr(4)
36418           .kr(8)
36419           .sr(1)
36420           .m(3)
36421           .n(n)
36422           .k(k)
36423           .ks(3)
36424           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
36425       }
36426     }
36427   }
36428 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD128,strided_cm_subtile)36429   TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD128, strided_cm_subtile) {
36430     TEST_REQUIRES_X86_XOP;
36431     for (size_t k = 1; k <= 40; k += 9) {
36432       for (uint32_t n = 1; n <= 4; n++) {
36433         for (uint32_t m = 1; m <= 3; m++) {
36434           GemmMicrokernelTester()
36435             .mr(3)
36436             .nr(4)
36437             .kr(8)
36438             .sr(1)
36439             .m(m)
36440             .n(n)
36441             .k(k)
36442             .cm_stride(7)
36443             .iterations(1)
36444             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
36445         }
36446       }
36447     }
36448   }
36449 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD128,a_offset)36450   TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD128, a_offset) {
36451     TEST_REQUIRES_X86_XOP;
36452     for (size_t k = 1; k <= 40; k += 9) {
36453       GemmMicrokernelTester()
36454         .mr(3)
36455         .nr(4)
36456         .kr(8)
36457         .sr(1)
36458         .m(3)
36459         .n(4)
36460         .k(k)
36461         .ks(3)
36462         .a_offset(127)
36463         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
36464     }
36465   }
36466 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD128,zero)36467   TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD128, zero) {
36468     TEST_REQUIRES_X86_XOP;
36469     for (size_t k = 1; k <= 40; k += 9) {
36470       for (uint32_t mz = 0; mz < 3; mz++) {
36471         GemmMicrokernelTester()
36472           .mr(3)
36473           .nr(4)
36474           .kr(8)
36475           .sr(1)
36476           .m(3)
36477           .n(4)
36478           .k(k)
36479           .ks(3)
36480           .a_offset(127)
36481           .zero_index(mz)
36482           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
36483       }
36484     }
36485   }
36486 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD128,qmin)36487   TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD128, qmin) {
36488     TEST_REQUIRES_X86_XOP;
36489     GemmMicrokernelTester()
36490       .mr(3)
36491       .nr(4)
36492       .kr(8)
36493       .sr(1)
36494       .m(3)
36495       .n(4)
36496       .k(8)
36497       .qmin(128)
36498       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
36499   }
36500 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD128,qmax)36501   TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD128, qmax) {
36502     TEST_REQUIRES_X86_XOP;
36503     GemmMicrokernelTester()
36504       .mr(3)
36505       .nr(4)
36506       .kr(8)
36507       .sr(1)
36508       .m(3)
36509       .n(4)
36510       .k(8)
36511       .qmax(128)
36512       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
36513   }
36514 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD128,strided_cm)36515   TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD128, strided_cm) {
36516     TEST_REQUIRES_X86_XOP;
36517     GemmMicrokernelTester()
36518       .mr(3)
36519       .nr(4)
36520       .kr(8)
36521       .sr(1)
36522       .m(3)
36523       .n(4)
36524       .k(8)
36525       .cm_stride(7)
36526       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
36527   }
36528 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
36529 
36530 
36531 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QC8_IGEMM_MINMAX_FP32_1X16C8__AVX512SKX,k_eq_8)36532   TEST(QC8_IGEMM_MINMAX_FP32_1X16C8__AVX512SKX, k_eq_8) {
36533     TEST_REQUIRES_X86_AVX512SKX;
36534     GemmMicrokernelTester()
36535       .mr(1)
36536       .nr(16)
36537       .kr(8)
36538       .sr(1)
36539       .m(1)
36540       .n(16)
36541       .k(8)
36542       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x16c8__avx512skx, xnn_init_qc8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
36543   }
36544 
TEST(QC8_IGEMM_MINMAX_FP32_1X16C8__AVX512SKX,strided_cn)36545   TEST(QC8_IGEMM_MINMAX_FP32_1X16C8__AVX512SKX, strided_cn) {
36546     TEST_REQUIRES_X86_AVX512SKX;
36547     GemmMicrokernelTester()
36548       .mr(1)
36549       .nr(16)
36550       .kr(8)
36551       .sr(1)
36552       .m(1)
36553       .n(16)
36554       .k(8)
36555       .cn_stride(19)
36556       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x16c8__avx512skx, xnn_init_qc8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
36557   }
36558 
TEST(QC8_IGEMM_MINMAX_FP32_1X16C8__AVX512SKX,k_eq_8_subtile)36559   TEST(QC8_IGEMM_MINMAX_FP32_1X16C8__AVX512SKX, k_eq_8_subtile) {
36560     TEST_REQUIRES_X86_AVX512SKX;
36561     for (uint32_t n = 1; n <= 16; n++) {
36562       for (uint32_t m = 1; m <= 1; m++) {
36563         GemmMicrokernelTester()
36564           .mr(1)
36565           .nr(16)
36566           .kr(8)
36567           .sr(1)
36568           .m(m)
36569           .n(n)
36570           .k(8)
36571           .iterations(1)
36572           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x16c8__avx512skx, xnn_init_qc8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
36573       }
36574     }
36575   }
36576 
TEST(QC8_IGEMM_MINMAX_FP32_1X16C8__AVX512SKX,k_eq_8_subtile_m)36577   TEST(QC8_IGEMM_MINMAX_FP32_1X16C8__AVX512SKX, k_eq_8_subtile_m) {
36578     TEST_REQUIRES_X86_AVX512SKX;
36579     for (uint32_t m = 1; m <= 1; m++) {
36580       GemmMicrokernelTester()
36581         .mr(1)
36582         .nr(16)
36583         .kr(8)
36584         .sr(1)
36585         .m(m)
36586         .n(16)
36587         .k(8)
36588         .iterations(1)
36589         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x16c8__avx512skx, xnn_init_qc8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
36590     }
36591   }
36592 
TEST(QC8_IGEMM_MINMAX_FP32_1X16C8__AVX512SKX,k_eq_8_subtile_n)36593   TEST(QC8_IGEMM_MINMAX_FP32_1X16C8__AVX512SKX, k_eq_8_subtile_n) {
36594     TEST_REQUIRES_X86_AVX512SKX;
36595     for (uint32_t n = 1; n <= 16; n++) {
36596       GemmMicrokernelTester()
36597         .mr(1)
36598         .nr(16)
36599         .kr(8)
36600         .sr(1)
36601         .m(1)
36602         .n(n)
36603         .k(8)
36604         .iterations(1)
36605         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x16c8__avx512skx, xnn_init_qc8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
36606     }
36607   }
36608 
TEST(QC8_IGEMM_MINMAX_FP32_1X16C8__AVX512SKX,k_lt_8)36609   TEST(QC8_IGEMM_MINMAX_FP32_1X16C8__AVX512SKX, k_lt_8) {
36610     TEST_REQUIRES_X86_AVX512SKX;
36611     for (size_t k = 1; k < 8; k++) {
36612       GemmMicrokernelTester()
36613         .mr(1)
36614         .nr(16)
36615         .kr(8)
36616         .sr(1)
36617         .m(1)
36618         .n(16)
36619         .k(k)
36620         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x16c8__avx512skx, xnn_init_qc8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
36621     }
36622   }
36623 
TEST(QC8_IGEMM_MINMAX_FP32_1X16C8__AVX512SKX,k_lt_8_subtile)36624   TEST(QC8_IGEMM_MINMAX_FP32_1X16C8__AVX512SKX, k_lt_8_subtile) {
36625     TEST_REQUIRES_X86_AVX512SKX;
36626     for (size_t k = 1; k < 8; k++) {
36627       for (uint32_t n = 1; n <= 16; n++) {
36628         for (uint32_t m = 1; m <= 1; m++) {
36629           GemmMicrokernelTester()
36630             .mr(1)
36631             .nr(16)
36632             .kr(8)
36633             .sr(1)
36634             .m(m)
36635             .n(n)
36636             .k(k)
36637             .iterations(1)
36638             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x16c8__avx512skx, xnn_init_qc8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
36639         }
36640       }
36641     }
36642   }
36643 
TEST(QC8_IGEMM_MINMAX_FP32_1X16C8__AVX512SKX,k_gt_8)36644   TEST(QC8_IGEMM_MINMAX_FP32_1X16C8__AVX512SKX, k_gt_8) {
36645     TEST_REQUIRES_X86_AVX512SKX;
36646     for (size_t k = 9; k < 16; k++) {
36647       GemmMicrokernelTester()
36648         .mr(1)
36649         .nr(16)
36650         .kr(8)
36651         .sr(1)
36652         .m(1)
36653         .n(16)
36654         .k(k)
36655         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x16c8__avx512skx, xnn_init_qc8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
36656     }
36657   }
36658 
TEST(QC8_IGEMM_MINMAX_FP32_1X16C8__AVX512SKX,k_gt_8_subtile)36659   TEST(QC8_IGEMM_MINMAX_FP32_1X16C8__AVX512SKX, k_gt_8_subtile) {
36660     TEST_REQUIRES_X86_AVX512SKX;
36661     for (size_t k = 9; k < 16; k++) {
36662       for (uint32_t n = 1; n <= 16; n++) {
36663         for (uint32_t m = 1; m <= 1; m++) {
36664           GemmMicrokernelTester()
36665             .mr(1)
36666             .nr(16)
36667             .kr(8)
36668             .sr(1)
36669             .m(m)
36670             .n(n)
36671             .k(k)
36672             .iterations(1)
36673             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x16c8__avx512skx, xnn_init_qc8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
36674         }
36675       }
36676     }
36677   }
36678 
TEST(QC8_IGEMM_MINMAX_FP32_1X16C8__AVX512SKX,k_div_8)36679   TEST(QC8_IGEMM_MINMAX_FP32_1X16C8__AVX512SKX, k_div_8) {
36680     TEST_REQUIRES_X86_AVX512SKX;
36681     for (size_t k = 16; k <= 80; k += 8) {
36682       GemmMicrokernelTester()
36683         .mr(1)
36684         .nr(16)
36685         .kr(8)
36686         .sr(1)
36687         .m(1)
36688         .n(16)
36689         .k(k)
36690         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x16c8__avx512skx, xnn_init_qc8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
36691     }
36692   }
36693 
TEST(QC8_IGEMM_MINMAX_FP32_1X16C8__AVX512SKX,k_div_8_subtile)36694   TEST(QC8_IGEMM_MINMAX_FP32_1X16C8__AVX512SKX, k_div_8_subtile) {
36695     TEST_REQUIRES_X86_AVX512SKX;
36696     for (size_t k = 16; k <= 80; k += 8) {
36697       for (uint32_t n = 1; n <= 16; n++) {
36698         for (uint32_t m = 1; m <= 1; m++) {
36699           GemmMicrokernelTester()
36700             .mr(1)
36701             .nr(16)
36702             .kr(8)
36703             .sr(1)
36704             .m(m)
36705             .n(n)
36706             .k(k)
36707             .iterations(1)
36708             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x16c8__avx512skx, xnn_init_qc8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
36709         }
36710       }
36711     }
36712   }
36713 
TEST(QC8_IGEMM_MINMAX_FP32_1X16C8__AVX512SKX,n_gt_16)36714   TEST(QC8_IGEMM_MINMAX_FP32_1X16C8__AVX512SKX, n_gt_16) {
36715     TEST_REQUIRES_X86_AVX512SKX;
36716     for (uint32_t n = 17; n < 32; n++) {
36717       for (size_t k = 1; k <= 40; k += 9) {
36718         GemmMicrokernelTester()
36719           .mr(1)
36720           .nr(16)
36721           .kr(8)
36722           .sr(1)
36723           .m(1)
36724           .n(n)
36725           .k(k)
36726           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x16c8__avx512skx, xnn_init_qc8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
36727       }
36728     }
36729   }
36730 
TEST(QC8_IGEMM_MINMAX_FP32_1X16C8__AVX512SKX,n_gt_16_strided_cn)36731   TEST(QC8_IGEMM_MINMAX_FP32_1X16C8__AVX512SKX, n_gt_16_strided_cn) {
36732     TEST_REQUIRES_X86_AVX512SKX;
36733     for (uint32_t n = 17; n < 32; n++) {
36734       for (size_t k = 1; k <= 40; k += 9) {
36735         GemmMicrokernelTester()
36736           .mr(1)
36737           .nr(16)
36738           .kr(8)
36739           .sr(1)
36740           .m(1)
36741           .n(n)
36742           .k(k)
36743           .cn_stride(19)
36744           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x16c8__avx512skx, xnn_init_qc8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
36745       }
36746     }
36747   }
36748 
TEST(QC8_IGEMM_MINMAX_FP32_1X16C8__AVX512SKX,n_gt_16_subtile)36749   TEST(QC8_IGEMM_MINMAX_FP32_1X16C8__AVX512SKX, n_gt_16_subtile) {
36750     TEST_REQUIRES_X86_AVX512SKX;
36751     for (uint32_t n = 17; n < 32; n++) {
36752       for (size_t k = 1; k <= 40; k += 9) {
36753         for (uint32_t m = 1; m <= 1; m++) {
36754           GemmMicrokernelTester()
36755             .mr(1)
36756             .nr(16)
36757             .kr(8)
36758             .sr(1)
36759             .m(m)
36760             .n(n)
36761             .k(k)
36762             .iterations(1)
36763             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x16c8__avx512skx, xnn_init_qc8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
36764         }
36765       }
36766     }
36767   }
36768 
TEST(QC8_IGEMM_MINMAX_FP32_1X16C8__AVX512SKX,n_div_16)36769   TEST(QC8_IGEMM_MINMAX_FP32_1X16C8__AVX512SKX, n_div_16) {
36770     TEST_REQUIRES_X86_AVX512SKX;
36771     for (uint32_t n = 32; n <= 48; n += 16) {
36772       for (size_t k = 1; k <= 40; k += 9) {
36773         GemmMicrokernelTester()
36774           .mr(1)
36775           .nr(16)
36776           .kr(8)
36777           .sr(1)
36778           .m(1)
36779           .n(n)
36780           .k(k)
36781           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x16c8__avx512skx, xnn_init_qc8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
36782       }
36783     }
36784   }
36785 
TEST(QC8_IGEMM_MINMAX_FP32_1X16C8__AVX512SKX,n_div_16_strided_cn)36786   TEST(QC8_IGEMM_MINMAX_FP32_1X16C8__AVX512SKX, n_div_16_strided_cn) {
36787     TEST_REQUIRES_X86_AVX512SKX;
36788     for (uint32_t n = 32; n <= 48; n += 16) {
36789       for (size_t k = 1; k <= 40; k += 9) {
36790         GemmMicrokernelTester()
36791           .mr(1)
36792           .nr(16)
36793           .kr(8)
36794           .sr(1)
36795           .m(1)
36796           .n(n)
36797           .k(k)
36798           .cn_stride(19)
36799           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x16c8__avx512skx, xnn_init_qc8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
36800       }
36801     }
36802   }
36803 
TEST(QC8_IGEMM_MINMAX_FP32_1X16C8__AVX512SKX,n_div_16_subtile)36804   TEST(QC8_IGEMM_MINMAX_FP32_1X16C8__AVX512SKX, n_div_16_subtile) {
36805     TEST_REQUIRES_X86_AVX512SKX;
36806     for (uint32_t n = 32; n <= 48; n += 16) {
36807       for (size_t k = 1; k <= 40; k += 9) {
36808         for (uint32_t m = 1; m <= 1; m++) {
36809           GemmMicrokernelTester()
36810             .mr(1)
36811             .nr(16)
36812             .kr(8)
36813             .sr(1)
36814             .m(m)
36815             .n(n)
36816             .k(k)
36817             .iterations(1)
36818             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x16c8__avx512skx, xnn_init_qc8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
36819         }
36820       }
36821     }
36822   }
36823 
TEST(QC8_IGEMM_MINMAX_FP32_1X16C8__AVX512SKX,small_kernel)36824   TEST(QC8_IGEMM_MINMAX_FP32_1X16C8__AVX512SKX, small_kernel) {
36825     TEST_REQUIRES_X86_AVX512SKX;
36826     for (size_t k = 1; k <= 40; k += 9) {
36827       GemmMicrokernelTester()
36828         .mr(1)
36829         .nr(16)
36830         .kr(8)
36831         .sr(1)
36832         .m(1)
36833         .n(16)
36834         .k(k)
36835         .ks(3)
36836         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x16c8__avx512skx, xnn_init_qc8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
36837     }
36838   }
36839 
TEST(QC8_IGEMM_MINMAX_FP32_1X16C8__AVX512SKX,small_kernel_subtile)36840   TEST(QC8_IGEMM_MINMAX_FP32_1X16C8__AVX512SKX, small_kernel_subtile) {
36841     TEST_REQUIRES_X86_AVX512SKX;
36842     for (size_t k = 1; k <= 40; k += 9) {
36843       for (uint32_t n = 1; n <= 16; n++) {
36844         for (uint32_t m = 1; m <= 1; m++) {
36845           GemmMicrokernelTester()
36846             .mr(1)
36847             .nr(16)
36848             .kr(8)
36849             .sr(1)
36850             .m(m)
36851             .n(n)
36852             .k(k)
36853             .ks(3)
36854             .iterations(1)
36855             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x16c8__avx512skx, xnn_init_qc8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
36856         }
36857       }
36858     }
36859   }
36860 
TEST(QC8_IGEMM_MINMAX_FP32_1X16C8__AVX512SKX,n_gt_16_small_kernel)36861   TEST(QC8_IGEMM_MINMAX_FP32_1X16C8__AVX512SKX, n_gt_16_small_kernel) {
36862     TEST_REQUIRES_X86_AVX512SKX;
36863     for (uint32_t n = 17; n < 32; n++) {
36864       for (size_t k = 1; k <= 40; k += 9) {
36865         GemmMicrokernelTester()
36866           .mr(1)
36867           .nr(16)
36868           .kr(8)
36869           .sr(1)
36870           .m(1)
36871           .n(n)
36872           .k(k)
36873           .ks(3)
36874           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x16c8__avx512skx, xnn_init_qc8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
36875       }
36876     }
36877   }
36878 
TEST(QC8_IGEMM_MINMAX_FP32_1X16C8__AVX512SKX,n_div_16_small_kernel)36879   TEST(QC8_IGEMM_MINMAX_FP32_1X16C8__AVX512SKX, n_div_16_small_kernel) {
36880     TEST_REQUIRES_X86_AVX512SKX;
36881     for (uint32_t n = 32; n <= 48; n += 16) {
36882       for (size_t k = 1; k <= 40; k += 9) {
36883         GemmMicrokernelTester()
36884           .mr(1)
36885           .nr(16)
36886           .kr(8)
36887           .sr(1)
36888           .m(1)
36889           .n(n)
36890           .k(k)
36891           .ks(3)
36892           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x16c8__avx512skx, xnn_init_qc8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
36893       }
36894     }
36895   }
36896 
TEST(QC8_IGEMM_MINMAX_FP32_1X16C8__AVX512SKX,strided_cm_subtile)36897   TEST(QC8_IGEMM_MINMAX_FP32_1X16C8__AVX512SKX, strided_cm_subtile) {
36898     TEST_REQUIRES_X86_AVX512SKX;
36899     for (size_t k = 1; k <= 40; k += 9) {
36900       for (uint32_t n = 1; n <= 16; n++) {
36901         for (uint32_t m = 1; m <= 1; m++) {
36902           GemmMicrokernelTester()
36903             .mr(1)
36904             .nr(16)
36905             .kr(8)
36906             .sr(1)
36907             .m(m)
36908             .n(n)
36909             .k(k)
36910             .cm_stride(19)
36911             .iterations(1)
36912             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x16c8__avx512skx, xnn_init_qc8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
36913         }
36914       }
36915     }
36916   }
36917 
TEST(QC8_IGEMM_MINMAX_FP32_1X16C8__AVX512SKX,a_offset)36918   TEST(QC8_IGEMM_MINMAX_FP32_1X16C8__AVX512SKX, a_offset) {
36919     TEST_REQUIRES_X86_AVX512SKX;
36920     for (size_t k = 1; k <= 40; k += 9) {
36921       GemmMicrokernelTester()
36922         .mr(1)
36923         .nr(16)
36924         .kr(8)
36925         .sr(1)
36926         .m(1)
36927         .n(16)
36928         .k(k)
36929         .ks(3)
36930         .a_offset(43)
36931         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x16c8__avx512skx, xnn_init_qc8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
36932     }
36933   }
36934 
TEST(QC8_IGEMM_MINMAX_FP32_1X16C8__AVX512SKX,zero)36935   TEST(QC8_IGEMM_MINMAX_FP32_1X16C8__AVX512SKX, zero) {
36936     TEST_REQUIRES_X86_AVX512SKX;
36937     for (size_t k = 1; k <= 40; k += 9) {
36938       for (uint32_t mz = 0; mz < 1; mz++) {
36939         GemmMicrokernelTester()
36940           .mr(1)
36941           .nr(16)
36942           .kr(8)
36943           .sr(1)
36944           .m(1)
36945           .n(16)
36946           .k(k)
36947           .ks(3)
36948           .a_offset(43)
36949           .zero_index(mz)
36950           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x16c8__avx512skx, xnn_init_qc8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
36951       }
36952     }
36953   }
36954 
TEST(QC8_IGEMM_MINMAX_FP32_1X16C8__AVX512SKX,qmin)36955   TEST(QC8_IGEMM_MINMAX_FP32_1X16C8__AVX512SKX, qmin) {
36956     TEST_REQUIRES_X86_AVX512SKX;
36957     GemmMicrokernelTester()
36958       .mr(1)
36959       .nr(16)
36960       .kr(8)
36961       .sr(1)
36962       .m(1)
36963       .n(16)
36964       .k(8)
36965       .qmin(128)
36966       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x16c8__avx512skx, xnn_init_qc8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
36967   }
36968 
TEST(QC8_IGEMM_MINMAX_FP32_1X16C8__AVX512SKX,qmax)36969   TEST(QC8_IGEMM_MINMAX_FP32_1X16C8__AVX512SKX, qmax) {
36970     TEST_REQUIRES_X86_AVX512SKX;
36971     GemmMicrokernelTester()
36972       .mr(1)
36973       .nr(16)
36974       .kr(8)
36975       .sr(1)
36976       .m(1)
36977       .n(16)
36978       .k(8)
36979       .qmax(128)
36980       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x16c8__avx512skx, xnn_init_qc8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
36981   }
36982 
TEST(QC8_IGEMM_MINMAX_FP32_1X16C8__AVX512SKX,strided_cm)36983   TEST(QC8_IGEMM_MINMAX_FP32_1X16C8__AVX512SKX, strided_cm) {
36984     TEST_REQUIRES_X86_AVX512SKX;
36985     GemmMicrokernelTester()
36986       .mr(1)
36987       .nr(16)
36988       .kr(8)
36989       .sr(1)
36990       .m(1)
36991       .n(16)
36992       .k(8)
36993       .cm_stride(19)
36994       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x16c8__avx512skx, xnn_init_qc8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
36995   }
36996 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
36997 
36998 
36999 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QC8_IGEMM_MINMAX_FP32_3X16C8__AVX512SKX,k_eq_8)37000   TEST(QC8_IGEMM_MINMAX_FP32_3X16C8__AVX512SKX, k_eq_8) {
37001     TEST_REQUIRES_X86_AVX512SKX;
37002     GemmMicrokernelTester()
37003       .mr(3)
37004       .nr(16)
37005       .kr(8)
37006       .sr(1)
37007       .m(3)
37008       .n(16)
37009       .k(8)
37010       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x16c8__avx512skx, xnn_init_qc8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
37011   }
37012 
TEST(QC8_IGEMM_MINMAX_FP32_3X16C8__AVX512SKX,strided_cn)37013   TEST(QC8_IGEMM_MINMAX_FP32_3X16C8__AVX512SKX, strided_cn) {
37014     TEST_REQUIRES_X86_AVX512SKX;
37015     GemmMicrokernelTester()
37016       .mr(3)
37017       .nr(16)
37018       .kr(8)
37019       .sr(1)
37020       .m(3)
37021       .n(16)
37022       .k(8)
37023       .cn_stride(19)
37024       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x16c8__avx512skx, xnn_init_qc8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
37025   }
37026 
TEST(QC8_IGEMM_MINMAX_FP32_3X16C8__AVX512SKX,k_eq_8_subtile)37027   TEST(QC8_IGEMM_MINMAX_FP32_3X16C8__AVX512SKX, k_eq_8_subtile) {
37028     TEST_REQUIRES_X86_AVX512SKX;
37029     for (uint32_t n = 1; n <= 16; n++) {
37030       for (uint32_t m = 1; m <= 3; m++) {
37031         GemmMicrokernelTester()
37032           .mr(3)
37033           .nr(16)
37034           .kr(8)
37035           .sr(1)
37036           .m(m)
37037           .n(n)
37038           .k(8)
37039           .iterations(1)
37040           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x16c8__avx512skx, xnn_init_qc8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
37041       }
37042     }
37043   }
37044 
TEST(QC8_IGEMM_MINMAX_FP32_3X16C8__AVX512SKX,k_eq_8_subtile_m)37045   TEST(QC8_IGEMM_MINMAX_FP32_3X16C8__AVX512SKX, k_eq_8_subtile_m) {
37046     TEST_REQUIRES_X86_AVX512SKX;
37047     for (uint32_t m = 1; m <= 3; m++) {
37048       GemmMicrokernelTester()
37049         .mr(3)
37050         .nr(16)
37051         .kr(8)
37052         .sr(1)
37053         .m(m)
37054         .n(16)
37055         .k(8)
37056         .iterations(1)
37057         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x16c8__avx512skx, xnn_init_qc8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
37058     }
37059   }
37060 
TEST(QC8_IGEMM_MINMAX_FP32_3X16C8__AVX512SKX,k_eq_8_subtile_n)37061   TEST(QC8_IGEMM_MINMAX_FP32_3X16C8__AVX512SKX, k_eq_8_subtile_n) {
37062     TEST_REQUIRES_X86_AVX512SKX;
37063     for (uint32_t n = 1; n <= 16; n++) {
37064       GemmMicrokernelTester()
37065         .mr(3)
37066         .nr(16)
37067         .kr(8)
37068         .sr(1)
37069         .m(3)
37070         .n(n)
37071         .k(8)
37072         .iterations(1)
37073         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x16c8__avx512skx, xnn_init_qc8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
37074     }
37075   }
37076 
TEST(QC8_IGEMM_MINMAX_FP32_3X16C8__AVX512SKX,k_lt_8)37077   TEST(QC8_IGEMM_MINMAX_FP32_3X16C8__AVX512SKX, k_lt_8) {
37078     TEST_REQUIRES_X86_AVX512SKX;
37079     for (size_t k = 1; k < 8; k++) {
37080       GemmMicrokernelTester()
37081         .mr(3)
37082         .nr(16)
37083         .kr(8)
37084         .sr(1)
37085         .m(3)
37086         .n(16)
37087         .k(k)
37088         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x16c8__avx512skx, xnn_init_qc8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
37089     }
37090   }
37091 
TEST(QC8_IGEMM_MINMAX_FP32_3X16C8__AVX512SKX,k_lt_8_subtile)37092   TEST(QC8_IGEMM_MINMAX_FP32_3X16C8__AVX512SKX, k_lt_8_subtile) {
37093     TEST_REQUIRES_X86_AVX512SKX;
37094     for (size_t k = 1; k < 8; k++) {
37095       for (uint32_t n = 1; n <= 16; n++) {
37096         for (uint32_t m = 1; m <= 3; m++) {
37097           GemmMicrokernelTester()
37098             .mr(3)
37099             .nr(16)
37100             .kr(8)
37101             .sr(1)
37102             .m(m)
37103             .n(n)
37104             .k(k)
37105             .iterations(1)
37106             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x16c8__avx512skx, xnn_init_qc8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
37107         }
37108       }
37109     }
37110   }
37111 
TEST(QC8_IGEMM_MINMAX_FP32_3X16C8__AVX512SKX,k_gt_8)37112   TEST(QC8_IGEMM_MINMAX_FP32_3X16C8__AVX512SKX, k_gt_8) {
37113     TEST_REQUIRES_X86_AVX512SKX;
37114     for (size_t k = 9; k < 16; k++) {
37115       GemmMicrokernelTester()
37116         .mr(3)
37117         .nr(16)
37118         .kr(8)
37119         .sr(1)
37120         .m(3)
37121         .n(16)
37122         .k(k)
37123         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x16c8__avx512skx, xnn_init_qc8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
37124     }
37125   }
37126 
TEST(QC8_IGEMM_MINMAX_FP32_3X16C8__AVX512SKX,k_gt_8_subtile)37127   TEST(QC8_IGEMM_MINMAX_FP32_3X16C8__AVX512SKX, k_gt_8_subtile) {
37128     TEST_REQUIRES_X86_AVX512SKX;
37129     for (size_t k = 9; k < 16; k++) {
37130       for (uint32_t n = 1; n <= 16; n++) {
37131         for (uint32_t m = 1; m <= 3; m++) {
37132           GemmMicrokernelTester()
37133             .mr(3)
37134             .nr(16)
37135             .kr(8)
37136             .sr(1)
37137             .m(m)
37138             .n(n)
37139             .k(k)
37140             .iterations(1)
37141             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x16c8__avx512skx, xnn_init_qc8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
37142         }
37143       }
37144     }
37145   }
37146 
TEST(QC8_IGEMM_MINMAX_FP32_3X16C8__AVX512SKX,k_div_8)37147   TEST(QC8_IGEMM_MINMAX_FP32_3X16C8__AVX512SKX, k_div_8) {
37148     TEST_REQUIRES_X86_AVX512SKX;
37149     for (size_t k = 16; k <= 80; k += 8) {
37150       GemmMicrokernelTester()
37151         .mr(3)
37152         .nr(16)
37153         .kr(8)
37154         .sr(1)
37155         .m(3)
37156         .n(16)
37157         .k(k)
37158         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x16c8__avx512skx, xnn_init_qc8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
37159     }
37160   }
37161 
TEST(QC8_IGEMM_MINMAX_FP32_3X16C8__AVX512SKX,k_div_8_subtile)37162   TEST(QC8_IGEMM_MINMAX_FP32_3X16C8__AVX512SKX, k_div_8_subtile) {
37163     TEST_REQUIRES_X86_AVX512SKX;
37164     for (size_t k = 16; k <= 80; k += 8) {
37165       for (uint32_t n = 1; n <= 16; n++) {
37166         for (uint32_t m = 1; m <= 3; m++) {
37167           GemmMicrokernelTester()
37168             .mr(3)
37169             .nr(16)
37170             .kr(8)
37171             .sr(1)
37172             .m(m)
37173             .n(n)
37174             .k(k)
37175             .iterations(1)
37176             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x16c8__avx512skx, xnn_init_qc8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
37177         }
37178       }
37179     }
37180   }
37181 
TEST(QC8_IGEMM_MINMAX_FP32_3X16C8__AVX512SKX,n_gt_16)37182   TEST(QC8_IGEMM_MINMAX_FP32_3X16C8__AVX512SKX, n_gt_16) {
37183     TEST_REQUIRES_X86_AVX512SKX;
37184     for (uint32_t n = 17; n < 32; n++) {
37185       for (size_t k = 1; k <= 40; k += 9) {
37186         GemmMicrokernelTester()
37187           .mr(3)
37188           .nr(16)
37189           .kr(8)
37190           .sr(1)
37191           .m(3)
37192           .n(n)
37193           .k(k)
37194           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x16c8__avx512skx, xnn_init_qc8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
37195       }
37196     }
37197   }
37198 
TEST(QC8_IGEMM_MINMAX_FP32_3X16C8__AVX512SKX,n_gt_16_strided_cn)37199   TEST(QC8_IGEMM_MINMAX_FP32_3X16C8__AVX512SKX, n_gt_16_strided_cn) {
37200     TEST_REQUIRES_X86_AVX512SKX;
37201     for (uint32_t n = 17; n < 32; n++) {
37202       for (size_t k = 1; k <= 40; k += 9) {
37203         GemmMicrokernelTester()
37204           .mr(3)
37205           .nr(16)
37206           .kr(8)
37207           .sr(1)
37208           .m(3)
37209           .n(n)
37210           .k(k)
37211           .cn_stride(19)
37212           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x16c8__avx512skx, xnn_init_qc8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
37213       }
37214     }
37215   }
37216 
TEST(QC8_IGEMM_MINMAX_FP32_3X16C8__AVX512SKX,n_gt_16_subtile)37217   TEST(QC8_IGEMM_MINMAX_FP32_3X16C8__AVX512SKX, n_gt_16_subtile) {
37218     TEST_REQUIRES_X86_AVX512SKX;
37219     for (uint32_t n = 17; n < 32; n++) {
37220       for (size_t k = 1; k <= 40; k += 9) {
37221         for (uint32_t m = 1; m <= 3; m++) {
37222           GemmMicrokernelTester()
37223             .mr(3)
37224             .nr(16)
37225             .kr(8)
37226             .sr(1)
37227             .m(m)
37228             .n(n)
37229             .k(k)
37230             .iterations(1)
37231             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x16c8__avx512skx, xnn_init_qc8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
37232         }
37233       }
37234     }
37235   }
37236 
TEST(QC8_IGEMM_MINMAX_FP32_3X16C8__AVX512SKX,n_div_16)37237   TEST(QC8_IGEMM_MINMAX_FP32_3X16C8__AVX512SKX, n_div_16) {
37238     TEST_REQUIRES_X86_AVX512SKX;
37239     for (uint32_t n = 32; n <= 48; n += 16) {
37240       for (size_t k = 1; k <= 40; k += 9) {
37241         GemmMicrokernelTester()
37242           .mr(3)
37243           .nr(16)
37244           .kr(8)
37245           .sr(1)
37246           .m(3)
37247           .n(n)
37248           .k(k)
37249           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x16c8__avx512skx, xnn_init_qc8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
37250       }
37251     }
37252   }
37253 
TEST(QC8_IGEMM_MINMAX_FP32_3X16C8__AVX512SKX,n_div_16_strided_cn)37254   TEST(QC8_IGEMM_MINMAX_FP32_3X16C8__AVX512SKX, n_div_16_strided_cn) {
37255     TEST_REQUIRES_X86_AVX512SKX;
37256     for (uint32_t n = 32; n <= 48; n += 16) {
37257       for (size_t k = 1; k <= 40; k += 9) {
37258         GemmMicrokernelTester()
37259           .mr(3)
37260           .nr(16)
37261           .kr(8)
37262           .sr(1)
37263           .m(3)
37264           .n(n)
37265           .k(k)
37266           .cn_stride(19)
37267           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x16c8__avx512skx, xnn_init_qc8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
37268       }
37269     }
37270   }
37271 
TEST(QC8_IGEMM_MINMAX_FP32_3X16C8__AVX512SKX,n_div_16_subtile)37272   TEST(QC8_IGEMM_MINMAX_FP32_3X16C8__AVX512SKX, n_div_16_subtile) {
37273     TEST_REQUIRES_X86_AVX512SKX;
37274     for (uint32_t n = 32; n <= 48; n += 16) {
37275       for (size_t k = 1; k <= 40; k += 9) {
37276         for (uint32_t m = 1; m <= 3; m++) {
37277           GemmMicrokernelTester()
37278             .mr(3)
37279             .nr(16)
37280             .kr(8)
37281             .sr(1)
37282             .m(m)
37283             .n(n)
37284             .k(k)
37285             .iterations(1)
37286             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x16c8__avx512skx, xnn_init_qc8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
37287         }
37288       }
37289     }
37290   }
37291 
TEST(QC8_IGEMM_MINMAX_FP32_3X16C8__AVX512SKX,small_kernel)37292   TEST(QC8_IGEMM_MINMAX_FP32_3X16C8__AVX512SKX, small_kernel) {
37293     TEST_REQUIRES_X86_AVX512SKX;
37294     for (size_t k = 1; k <= 40; k += 9) {
37295       GemmMicrokernelTester()
37296         .mr(3)
37297         .nr(16)
37298         .kr(8)
37299         .sr(1)
37300         .m(3)
37301         .n(16)
37302         .k(k)
37303         .ks(3)
37304         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x16c8__avx512skx, xnn_init_qc8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
37305     }
37306   }
37307 
TEST(QC8_IGEMM_MINMAX_FP32_3X16C8__AVX512SKX,small_kernel_subtile)37308   TEST(QC8_IGEMM_MINMAX_FP32_3X16C8__AVX512SKX, small_kernel_subtile) {
37309     TEST_REQUIRES_X86_AVX512SKX;
37310     for (size_t k = 1; k <= 40; k += 9) {
37311       for (uint32_t n = 1; n <= 16; n++) {
37312         for (uint32_t m = 1; m <= 3; m++) {
37313           GemmMicrokernelTester()
37314             .mr(3)
37315             .nr(16)
37316             .kr(8)
37317             .sr(1)
37318             .m(m)
37319             .n(n)
37320             .k(k)
37321             .ks(3)
37322             .iterations(1)
37323             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x16c8__avx512skx, xnn_init_qc8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
37324         }
37325       }
37326     }
37327   }
37328 
TEST(QC8_IGEMM_MINMAX_FP32_3X16C8__AVX512SKX,n_gt_16_small_kernel)37329   TEST(QC8_IGEMM_MINMAX_FP32_3X16C8__AVX512SKX, n_gt_16_small_kernel) {
37330     TEST_REQUIRES_X86_AVX512SKX;
37331     for (uint32_t n = 17; n < 32; n++) {
37332       for (size_t k = 1; k <= 40; k += 9) {
37333         GemmMicrokernelTester()
37334           .mr(3)
37335           .nr(16)
37336           .kr(8)
37337           .sr(1)
37338           .m(3)
37339           .n(n)
37340           .k(k)
37341           .ks(3)
37342           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x16c8__avx512skx, xnn_init_qc8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
37343       }
37344     }
37345   }
37346 
TEST(QC8_IGEMM_MINMAX_FP32_3X16C8__AVX512SKX,n_div_16_small_kernel)37347   TEST(QC8_IGEMM_MINMAX_FP32_3X16C8__AVX512SKX, n_div_16_small_kernel) {
37348     TEST_REQUIRES_X86_AVX512SKX;
37349     for (uint32_t n = 32; n <= 48; n += 16) {
37350       for (size_t k = 1; k <= 40; k += 9) {
37351         GemmMicrokernelTester()
37352           .mr(3)
37353           .nr(16)
37354           .kr(8)
37355           .sr(1)
37356           .m(3)
37357           .n(n)
37358           .k(k)
37359           .ks(3)
37360           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x16c8__avx512skx, xnn_init_qc8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
37361       }
37362     }
37363   }
37364 
TEST(QC8_IGEMM_MINMAX_FP32_3X16C8__AVX512SKX,strided_cm_subtile)37365   TEST(QC8_IGEMM_MINMAX_FP32_3X16C8__AVX512SKX, strided_cm_subtile) {
37366     TEST_REQUIRES_X86_AVX512SKX;
37367     for (size_t k = 1; k <= 40; k += 9) {
37368       for (uint32_t n = 1; n <= 16; n++) {
37369         for (uint32_t m = 1; m <= 3; m++) {
37370           GemmMicrokernelTester()
37371             .mr(3)
37372             .nr(16)
37373             .kr(8)
37374             .sr(1)
37375             .m(m)
37376             .n(n)
37377             .k(k)
37378             .cm_stride(19)
37379             .iterations(1)
37380             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x16c8__avx512skx, xnn_init_qc8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
37381         }
37382       }
37383     }
37384   }
37385 
TEST(QC8_IGEMM_MINMAX_FP32_3X16C8__AVX512SKX,a_offset)37386   TEST(QC8_IGEMM_MINMAX_FP32_3X16C8__AVX512SKX, a_offset) {
37387     TEST_REQUIRES_X86_AVX512SKX;
37388     for (size_t k = 1; k <= 40; k += 9) {
37389       GemmMicrokernelTester()
37390         .mr(3)
37391         .nr(16)
37392         .kr(8)
37393         .sr(1)
37394         .m(3)
37395         .n(16)
37396         .k(k)
37397         .ks(3)
37398         .a_offset(127)
37399         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x16c8__avx512skx, xnn_init_qc8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
37400     }
37401   }
37402 
TEST(QC8_IGEMM_MINMAX_FP32_3X16C8__AVX512SKX,zero)37403   TEST(QC8_IGEMM_MINMAX_FP32_3X16C8__AVX512SKX, zero) {
37404     TEST_REQUIRES_X86_AVX512SKX;
37405     for (size_t k = 1; k <= 40; k += 9) {
37406       for (uint32_t mz = 0; mz < 3; mz++) {
37407         GemmMicrokernelTester()
37408           .mr(3)
37409           .nr(16)
37410           .kr(8)
37411           .sr(1)
37412           .m(3)
37413           .n(16)
37414           .k(k)
37415           .ks(3)
37416           .a_offset(127)
37417           .zero_index(mz)
37418           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x16c8__avx512skx, xnn_init_qc8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
37419       }
37420     }
37421   }
37422 
TEST(QC8_IGEMM_MINMAX_FP32_3X16C8__AVX512SKX,qmin)37423   TEST(QC8_IGEMM_MINMAX_FP32_3X16C8__AVX512SKX, qmin) {
37424     TEST_REQUIRES_X86_AVX512SKX;
37425     GemmMicrokernelTester()
37426       .mr(3)
37427       .nr(16)
37428       .kr(8)
37429       .sr(1)
37430       .m(3)
37431       .n(16)
37432       .k(8)
37433       .qmin(128)
37434       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x16c8__avx512skx, xnn_init_qc8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
37435   }
37436 
TEST(QC8_IGEMM_MINMAX_FP32_3X16C8__AVX512SKX,qmax)37437   TEST(QC8_IGEMM_MINMAX_FP32_3X16C8__AVX512SKX, qmax) {
37438     TEST_REQUIRES_X86_AVX512SKX;
37439     GemmMicrokernelTester()
37440       .mr(3)
37441       .nr(16)
37442       .kr(8)
37443       .sr(1)
37444       .m(3)
37445       .n(16)
37446       .k(8)
37447       .qmax(128)
37448       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x16c8__avx512skx, xnn_init_qc8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
37449   }
37450 
TEST(QC8_IGEMM_MINMAX_FP32_3X16C8__AVX512SKX,strided_cm)37451   TEST(QC8_IGEMM_MINMAX_FP32_3X16C8__AVX512SKX, strided_cm) {
37452     TEST_REQUIRES_X86_AVX512SKX;
37453     GemmMicrokernelTester()
37454       .mr(3)
37455       .nr(16)
37456       .kr(8)
37457       .sr(1)
37458       .m(3)
37459       .n(16)
37460       .k(8)
37461       .cm_stride(19)
37462       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x16c8__avx512skx, xnn_init_qc8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
37463   }
37464 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
37465 
37466 
37467 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD64,k_eq_8)37468   TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD64, k_eq_8) {
37469     GemmMicrokernelTester()
37470       .mr(1)
37471       .nr(4)
37472       .kr(2)
37473       .sr(1)
37474       .m(1)
37475       .n(4)
37476       .k(8)
37477       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
37478   }
37479 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD64,strided_cn)37480   TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD64, strided_cn) {
37481     GemmMicrokernelTester()
37482       .mr(1)
37483       .nr(4)
37484       .kr(2)
37485       .sr(1)
37486       .m(1)
37487       .n(4)
37488       .k(8)
37489       .cn_stride(7)
37490       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
37491   }
37492 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD64,k_eq_8_subtile)37493   TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD64, k_eq_8_subtile) {
37494     for (uint32_t n = 1; n <= 4; n++) {
37495       for (uint32_t m = 1; m <= 1; m++) {
37496         GemmMicrokernelTester()
37497           .mr(1)
37498           .nr(4)
37499           .kr(2)
37500           .sr(1)
37501           .m(m)
37502           .n(n)
37503           .k(8)
37504           .iterations(1)
37505           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
37506       }
37507     }
37508   }
37509 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD64,k_eq_8_subtile_m)37510   TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD64, k_eq_8_subtile_m) {
37511     for (uint32_t m = 1; m <= 1; m++) {
37512       GemmMicrokernelTester()
37513         .mr(1)
37514         .nr(4)
37515         .kr(2)
37516         .sr(1)
37517         .m(m)
37518         .n(4)
37519         .k(8)
37520         .iterations(1)
37521         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
37522     }
37523   }
37524 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD64,k_eq_8_subtile_n)37525   TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD64, k_eq_8_subtile_n) {
37526     for (uint32_t n = 1; n <= 4; n++) {
37527       GemmMicrokernelTester()
37528         .mr(1)
37529         .nr(4)
37530         .kr(2)
37531         .sr(1)
37532         .m(1)
37533         .n(n)
37534         .k(8)
37535         .iterations(1)
37536         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
37537     }
37538   }
37539 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD64,k_lt_8)37540   TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD64, k_lt_8) {
37541     for (size_t k = 1; k < 8; k++) {
37542       GemmMicrokernelTester()
37543         .mr(1)
37544         .nr(4)
37545         .kr(2)
37546         .sr(1)
37547         .m(1)
37548         .n(4)
37549         .k(k)
37550         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
37551     }
37552   }
37553 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD64,k_lt_8_subtile)37554   TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD64, k_lt_8_subtile) {
37555     for (size_t k = 1; k < 8; k++) {
37556       for (uint32_t n = 1; n <= 4; n++) {
37557         for (uint32_t m = 1; m <= 1; m++) {
37558           GemmMicrokernelTester()
37559             .mr(1)
37560             .nr(4)
37561             .kr(2)
37562             .sr(1)
37563             .m(m)
37564             .n(n)
37565             .k(k)
37566             .iterations(1)
37567             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
37568         }
37569       }
37570     }
37571   }
37572 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD64,k_gt_8)37573   TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD64, k_gt_8) {
37574     for (size_t k = 9; k < 16; k++) {
37575       GemmMicrokernelTester()
37576         .mr(1)
37577         .nr(4)
37578         .kr(2)
37579         .sr(1)
37580         .m(1)
37581         .n(4)
37582         .k(k)
37583         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
37584     }
37585   }
37586 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD64,k_gt_8_subtile)37587   TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD64, k_gt_8_subtile) {
37588     for (size_t k = 9; k < 16; k++) {
37589       for (uint32_t n = 1; n <= 4; n++) {
37590         for (uint32_t m = 1; m <= 1; m++) {
37591           GemmMicrokernelTester()
37592             .mr(1)
37593             .nr(4)
37594             .kr(2)
37595             .sr(1)
37596             .m(m)
37597             .n(n)
37598             .k(k)
37599             .iterations(1)
37600             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
37601         }
37602       }
37603     }
37604   }
37605 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD64,k_div_8)37606   TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD64, k_div_8) {
37607     for (size_t k = 16; k <= 80; k += 8) {
37608       GemmMicrokernelTester()
37609         .mr(1)
37610         .nr(4)
37611         .kr(2)
37612         .sr(1)
37613         .m(1)
37614         .n(4)
37615         .k(k)
37616         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
37617     }
37618   }
37619 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD64,k_div_8_subtile)37620   TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD64, k_div_8_subtile) {
37621     for (size_t k = 16; k <= 80; k += 8) {
37622       for (uint32_t n = 1; n <= 4; n++) {
37623         for (uint32_t m = 1; m <= 1; m++) {
37624           GemmMicrokernelTester()
37625             .mr(1)
37626             .nr(4)
37627             .kr(2)
37628             .sr(1)
37629             .m(m)
37630             .n(n)
37631             .k(k)
37632             .iterations(1)
37633             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
37634         }
37635       }
37636     }
37637   }
37638 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD64,n_gt_4)37639   TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD64, n_gt_4) {
37640     for (uint32_t n = 5; n < 8; n++) {
37641       for (size_t k = 1; k <= 40; k += 9) {
37642         GemmMicrokernelTester()
37643           .mr(1)
37644           .nr(4)
37645           .kr(2)
37646           .sr(1)
37647           .m(1)
37648           .n(n)
37649           .k(k)
37650           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
37651       }
37652     }
37653   }
37654 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD64,n_gt_4_strided_cn)37655   TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD64, n_gt_4_strided_cn) {
37656     for (uint32_t n = 5; n < 8; n++) {
37657       for (size_t k = 1; k <= 40; k += 9) {
37658         GemmMicrokernelTester()
37659           .mr(1)
37660           .nr(4)
37661           .kr(2)
37662           .sr(1)
37663           .m(1)
37664           .n(n)
37665           .k(k)
37666           .cn_stride(7)
37667           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
37668       }
37669     }
37670   }
37671 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD64,n_gt_4_subtile)37672   TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD64, n_gt_4_subtile) {
37673     for (uint32_t n = 5; n < 8; n++) {
37674       for (size_t k = 1; k <= 40; k += 9) {
37675         for (uint32_t m = 1; m <= 1; m++) {
37676           GemmMicrokernelTester()
37677             .mr(1)
37678             .nr(4)
37679             .kr(2)
37680             .sr(1)
37681             .m(m)
37682             .n(n)
37683             .k(k)
37684             .iterations(1)
37685             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
37686         }
37687       }
37688     }
37689   }
37690 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD64,n_div_4)37691   TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD64, n_div_4) {
37692     for (uint32_t n = 8; n <= 12; n += 4) {
37693       for (size_t k = 1; k <= 40; k += 9) {
37694         GemmMicrokernelTester()
37695           .mr(1)
37696           .nr(4)
37697           .kr(2)
37698           .sr(1)
37699           .m(1)
37700           .n(n)
37701           .k(k)
37702           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
37703       }
37704     }
37705   }
37706 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD64,n_div_4_strided_cn)37707   TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD64, n_div_4_strided_cn) {
37708     for (uint32_t n = 8; n <= 12; n += 4) {
37709       for (size_t k = 1; k <= 40; k += 9) {
37710         GemmMicrokernelTester()
37711           .mr(1)
37712           .nr(4)
37713           .kr(2)
37714           .sr(1)
37715           .m(1)
37716           .n(n)
37717           .k(k)
37718           .cn_stride(7)
37719           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
37720       }
37721     }
37722   }
37723 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD64,n_div_4_subtile)37724   TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD64, n_div_4_subtile) {
37725     for (uint32_t n = 8; n <= 12; n += 4) {
37726       for (size_t k = 1; k <= 40; k += 9) {
37727         for (uint32_t m = 1; m <= 1; m++) {
37728           GemmMicrokernelTester()
37729             .mr(1)
37730             .nr(4)
37731             .kr(2)
37732             .sr(1)
37733             .m(m)
37734             .n(n)
37735             .k(k)
37736             .iterations(1)
37737             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
37738         }
37739       }
37740     }
37741   }
37742 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD64,small_kernel)37743   TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD64, small_kernel) {
37744     for (size_t k = 1; k <= 40; k += 9) {
37745       GemmMicrokernelTester()
37746         .mr(1)
37747         .nr(4)
37748         .kr(2)
37749         .sr(1)
37750         .m(1)
37751         .n(4)
37752         .k(k)
37753         .ks(3)
37754         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
37755     }
37756   }
37757 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD64,small_kernel_subtile)37758   TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD64, small_kernel_subtile) {
37759     for (size_t k = 1; k <= 40; k += 9) {
37760       for (uint32_t n = 1; n <= 4; n++) {
37761         for (uint32_t m = 1; m <= 1; m++) {
37762           GemmMicrokernelTester()
37763             .mr(1)
37764             .nr(4)
37765             .kr(2)
37766             .sr(1)
37767             .m(m)
37768             .n(n)
37769             .k(k)
37770             .ks(3)
37771             .iterations(1)
37772             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
37773         }
37774       }
37775     }
37776   }
37777 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD64,n_gt_4_small_kernel)37778   TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD64, n_gt_4_small_kernel) {
37779     for (uint32_t n = 5; n < 8; n++) {
37780       for (size_t k = 1; k <= 40; k += 9) {
37781         GemmMicrokernelTester()
37782           .mr(1)
37783           .nr(4)
37784           .kr(2)
37785           .sr(1)
37786           .m(1)
37787           .n(n)
37788           .k(k)
37789           .ks(3)
37790           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
37791       }
37792     }
37793   }
37794 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD64,n_div_4_small_kernel)37795   TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD64, n_div_4_small_kernel) {
37796     for (uint32_t n = 8; n <= 12; n += 4) {
37797       for (size_t k = 1; k <= 40; k += 9) {
37798         GemmMicrokernelTester()
37799           .mr(1)
37800           .nr(4)
37801           .kr(2)
37802           .sr(1)
37803           .m(1)
37804           .n(n)
37805           .k(k)
37806           .ks(3)
37807           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
37808       }
37809     }
37810   }
37811 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD64,strided_cm_subtile)37812   TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD64, strided_cm_subtile) {
37813     for (size_t k = 1; k <= 40; k += 9) {
37814       for (uint32_t n = 1; n <= 4; n++) {
37815         for (uint32_t m = 1; m <= 1; m++) {
37816           GemmMicrokernelTester()
37817             .mr(1)
37818             .nr(4)
37819             .kr(2)
37820             .sr(1)
37821             .m(m)
37822             .n(n)
37823             .k(k)
37824             .cm_stride(7)
37825             .iterations(1)
37826             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
37827         }
37828       }
37829     }
37830   }
37831 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD64,a_offset)37832   TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD64, a_offset) {
37833     for (size_t k = 1; k <= 40; k += 9) {
37834       GemmMicrokernelTester()
37835         .mr(1)
37836         .nr(4)
37837         .kr(2)
37838         .sr(1)
37839         .m(1)
37840         .n(4)
37841         .k(k)
37842         .ks(3)
37843         .a_offset(43)
37844         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
37845     }
37846   }
37847 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD64,zero)37848   TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD64, zero) {
37849     for (size_t k = 1; k <= 40; k += 9) {
37850       for (uint32_t mz = 0; mz < 1; mz++) {
37851         GemmMicrokernelTester()
37852           .mr(1)
37853           .nr(4)
37854           .kr(2)
37855           .sr(1)
37856           .m(1)
37857           .n(4)
37858           .k(k)
37859           .ks(3)
37860           .a_offset(43)
37861           .zero_index(mz)
37862           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
37863       }
37864     }
37865   }
37866 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD64,qmin)37867   TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD64, qmin) {
37868     GemmMicrokernelTester()
37869       .mr(1)
37870       .nr(4)
37871       .kr(2)
37872       .sr(1)
37873       .m(1)
37874       .n(4)
37875       .k(8)
37876       .qmin(128)
37877       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
37878   }
37879 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD64,qmax)37880   TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD64, qmax) {
37881     GemmMicrokernelTester()
37882       .mr(1)
37883       .nr(4)
37884       .kr(2)
37885       .sr(1)
37886       .m(1)
37887       .n(4)
37888       .k(8)
37889       .qmax(128)
37890       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
37891   }
37892 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD64,strided_cm)37893   TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD64, strided_cm) {
37894     GemmMicrokernelTester()
37895       .mr(1)
37896       .nr(4)
37897       .kr(2)
37898       .sr(1)
37899       .m(1)
37900       .n(4)
37901       .k(8)
37902       .cm_stride(7)
37903       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
37904   }
37905 #endif  // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
37906 
37907 
37908 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD64,k_eq_8)37909   TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD64, k_eq_8) {
37910     GemmMicrokernelTester()
37911       .mr(1)
37912       .nr(4)
37913       .kr(2)
37914       .sr(4)
37915       .m(1)
37916       .n(4)
37917       .k(8)
37918       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
37919   }
37920 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD64,strided_cn)37921   TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD64, strided_cn) {
37922     GemmMicrokernelTester()
37923       .mr(1)
37924       .nr(4)
37925       .kr(2)
37926       .sr(4)
37927       .m(1)
37928       .n(4)
37929       .k(8)
37930       .cn_stride(7)
37931       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
37932   }
37933 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD64,k_eq_8_subtile)37934   TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD64, k_eq_8_subtile) {
37935     for (uint32_t n = 1; n <= 4; n++) {
37936       for (uint32_t m = 1; m <= 1; m++) {
37937         GemmMicrokernelTester()
37938           .mr(1)
37939           .nr(4)
37940           .kr(2)
37941           .sr(4)
37942           .m(m)
37943           .n(n)
37944           .k(8)
37945           .iterations(1)
37946           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
37947       }
37948     }
37949   }
37950 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD64,k_eq_8_subtile_m)37951   TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD64, k_eq_8_subtile_m) {
37952     for (uint32_t m = 1; m <= 1; m++) {
37953       GemmMicrokernelTester()
37954         .mr(1)
37955         .nr(4)
37956         .kr(2)
37957         .sr(4)
37958         .m(m)
37959         .n(4)
37960         .k(8)
37961         .iterations(1)
37962         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
37963     }
37964   }
37965 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD64,k_eq_8_subtile_n)37966   TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD64, k_eq_8_subtile_n) {
37967     for (uint32_t n = 1; n <= 4; n++) {
37968       GemmMicrokernelTester()
37969         .mr(1)
37970         .nr(4)
37971         .kr(2)
37972         .sr(4)
37973         .m(1)
37974         .n(n)
37975         .k(8)
37976         .iterations(1)
37977         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
37978     }
37979   }
37980 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD64,k_lt_8)37981   TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD64, k_lt_8) {
37982     for (size_t k = 1; k < 8; k++) {
37983       GemmMicrokernelTester()
37984         .mr(1)
37985         .nr(4)
37986         .kr(2)
37987         .sr(4)
37988         .m(1)
37989         .n(4)
37990         .k(k)
37991         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
37992     }
37993   }
37994 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD64,k_lt_8_subtile)37995   TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD64, k_lt_8_subtile) {
37996     for (size_t k = 1; k < 8; k++) {
37997       for (uint32_t n = 1; n <= 4; n++) {
37998         for (uint32_t m = 1; m <= 1; m++) {
37999           GemmMicrokernelTester()
38000             .mr(1)
38001             .nr(4)
38002             .kr(2)
38003             .sr(4)
38004             .m(m)
38005             .n(n)
38006             .k(k)
38007             .iterations(1)
38008             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
38009         }
38010       }
38011     }
38012   }
38013 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD64,k_gt_8)38014   TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD64, k_gt_8) {
38015     for (size_t k = 9; k < 16; k++) {
38016       GemmMicrokernelTester()
38017         .mr(1)
38018         .nr(4)
38019         .kr(2)
38020         .sr(4)
38021         .m(1)
38022         .n(4)
38023         .k(k)
38024         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
38025     }
38026   }
38027 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD64,k_gt_8_subtile)38028   TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD64, k_gt_8_subtile) {
38029     for (size_t k = 9; k < 16; k++) {
38030       for (uint32_t n = 1; n <= 4; n++) {
38031         for (uint32_t m = 1; m <= 1; m++) {
38032           GemmMicrokernelTester()
38033             .mr(1)
38034             .nr(4)
38035             .kr(2)
38036             .sr(4)
38037             .m(m)
38038             .n(n)
38039             .k(k)
38040             .iterations(1)
38041             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
38042         }
38043       }
38044     }
38045   }
38046 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD64,k_div_8)38047   TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD64, k_div_8) {
38048     for (size_t k = 16; k <= 80; k += 8) {
38049       GemmMicrokernelTester()
38050         .mr(1)
38051         .nr(4)
38052         .kr(2)
38053         .sr(4)
38054         .m(1)
38055         .n(4)
38056         .k(k)
38057         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
38058     }
38059   }
38060 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD64,k_div_8_subtile)38061   TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD64, k_div_8_subtile) {
38062     for (size_t k = 16; k <= 80; k += 8) {
38063       for (uint32_t n = 1; n <= 4; n++) {
38064         for (uint32_t m = 1; m <= 1; m++) {
38065           GemmMicrokernelTester()
38066             .mr(1)
38067             .nr(4)
38068             .kr(2)
38069             .sr(4)
38070             .m(m)
38071             .n(n)
38072             .k(k)
38073             .iterations(1)
38074             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
38075         }
38076       }
38077     }
38078   }
38079 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD64,n_gt_4)38080   TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD64, n_gt_4) {
38081     for (uint32_t n = 5; n < 8; n++) {
38082       for (size_t k = 1; k <= 40; k += 9) {
38083         GemmMicrokernelTester()
38084           .mr(1)
38085           .nr(4)
38086           .kr(2)
38087           .sr(4)
38088           .m(1)
38089           .n(n)
38090           .k(k)
38091           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
38092       }
38093     }
38094   }
38095 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD64,n_gt_4_strided_cn)38096   TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD64, n_gt_4_strided_cn) {
38097     for (uint32_t n = 5; n < 8; n++) {
38098       for (size_t k = 1; k <= 40; k += 9) {
38099         GemmMicrokernelTester()
38100           .mr(1)
38101           .nr(4)
38102           .kr(2)
38103           .sr(4)
38104           .m(1)
38105           .n(n)
38106           .k(k)
38107           .cn_stride(7)
38108           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
38109       }
38110     }
38111   }
38112 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD64,n_gt_4_subtile)38113   TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD64, n_gt_4_subtile) {
38114     for (uint32_t n = 5; n < 8; n++) {
38115       for (size_t k = 1; k <= 40; k += 9) {
38116         for (uint32_t m = 1; m <= 1; m++) {
38117           GemmMicrokernelTester()
38118             .mr(1)
38119             .nr(4)
38120             .kr(2)
38121             .sr(4)
38122             .m(m)
38123             .n(n)
38124             .k(k)
38125             .iterations(1)
38126             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
38127         }
38128       }
38129     }
38130   }
38131 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD64,n_div_4)38132   TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD64, n_div_4) {
38133     for (uint32_t n = 8; n <= 12; n += 4) {
38134       for (size_t k = 1; k <= 40; k += 9) {
38135         GemmMicrokernelTester()
38136           .mr(1)
38137           .nr(4)
38138           .kr(2)
38139           .sr(4)
38140           .m(1)
38141           .n(n)
38142           .k(k)
38143           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
38144       }
38145     }
38146   }
38147 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD64,n_div_4_strided_cn)38148   TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD64, n_div_4_strided_cn) {
38149     for (uint32_t n = 8; n <= 12; n += 4) {
38150       for (size_t k = 1; k <= 40; k += 9) {
38151         GemmMicrokernelTester()
38152           .mr(1)
38153           .nr(4)
38154           .kr(2)
38155           .sr(4)
38156           .m(1)
38157           .n(n)
38158           .k(k)
38159           .cn_stride(7)
38160           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
38161       }
38162     }
38163   }
38164 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD64,n_div_4_subtile)38165   TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD64, n_div_4_subtile) {
38166     for (uint32_t n = 8; n <= 12; n += 4) {
38167       for (size_t k = 1; k <= 40; k += 9) {
38168         for (uint32_t m = 1; m <= 1; m++) {
38169           GemmMicrokernelTester()
38170             .mr(1)
38171             .nr(4)
38172             .kr(2)
38173             .sr(4)
38174             .m(m)
38175             .n(n)
38176             .k(k)
38177             .iterations(1)
38178             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
38179         }
38180       }
38181     }
38182   }
38183 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD64,small_kernel)38184   TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD64, small_kernel) {
38185     for (size_t k = 1; k <= 40; k += 9) {
38186       GemmMicrokernelTester()
38187         .mr(1)
38188         .nr(4)
38189         .kr(2)
38190         .sr(4)
38191         .m(1)
38192         .n(4)
38193         .k(k)
38194         .ks(3)
38195         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
38196     }
38197   }
38198 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD64,small_kernel_subtile)38199   TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD64, small_kernel_subtile) {
38200     for (size_t k = 1; k <= 40; k += 9) {
38201       for (uint32_t n = 1; n <= 4; n++) {
38202         for (uint32_t m = 1; m <= 1; m++) {
38203           GemmMicrokernelTester()
38204             .mr(1)
38205             .nr(4)
38206             .kr(2)
38207             .sr(4)
38208             .m(m)
38209             .n(n)
38210             .k(k)
38211             .ks(3)
38212             .iterations(1)
38213             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
38214         }
38215       }
38216     }
38217   }
38218 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD64,n_gt_4_small_kernel)38219   TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD64, n_gt_4_small_kernel) {
38220     for (uint32_t n = 5; n < 8; n++) {
38221       for (size_t k = 1; k <= 40; k += 9) {
38222         GemmMicrokernelTester()
38223           .mr(1)
38224           .nr(4)
38225           .kr(2)
38226           .sr(4)
38227           .m(1)
38228           .n(n)
38229           .k(k)
38230           .ks(3)
38231           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
38232       }
38233     }
38234   }
38235 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD64,n_div_4_small_kernel)38236   TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD64, n_div_4_small_kernel) {
38237     for (uint32_t n = 8; n <= 12; n += 4) {
38238       for (size_t k = 1; k <= 40; k += 9) {
38239         GemmMicrokernelTester()
38240           .mr(1)
38241           .nr(4)
38242           .kr(2)
38243           .sr(4)
38244           .m(1)
38245           .n(n)
38246           .k(k)
38247           .ks(3)
38248           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
38249       }
38250     }
38251   }
38252 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD64,strided_cm_subtile)38253   TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD64, strided_cm_subtile) {
38254     for (size_t k = 1; k <= 40; k += 9) {
38255       for (uint32_t n = 1; n <= 4; n++) {
38256         for (uint32_t m = 1; m <= 1; m++) {
38257           GemmMicrokernelTester()
38258             .mr(1)
38259             .nr(4)
38260             .kr(2)
38261             .sr(4)
38262             .m(m)
38263             .n(n)
38264             .k(k)
38265             .cm_stride(7)
38266             .iterations(1)
38267             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
38268         }
38269       }
38270     }
38271   }
38272 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD64,a_offset)38273   TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD64, a_offset) {
38274     for (size_t k = 1; k <= 40; k += 9) {
38275       GemmMicrokernelTester()
38276         .mr(1)
38277         .nr(4)
38278         .kr(2)
38279         .sr(4)
38280         .m(1)
38281         .n(4)
38282         .k(k)
38283         .ks(3)
38284         .a_offset(43)
38285         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
38286     }
38287   }
38288 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD64,zero)38289   TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD64, zero) {
38290     for (size_t k = 1; k <= 40; k += 9) {
38291       for (uint32_t mz = 0; mz < 1; mz++) {
38292         GemmMicrokernelTester()
38293           .mr(1)
38294           .nr(4)
38295           .kr(2)
38296           .sr(4)
38297           .m(1)
38298           .n(4)
38299           .k(k)
38300           .ks(3)
38301           .a_offset(43)
38302           .zero_index(mz)
38303           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
38304       }
38305     }
38306   }
38307 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD64,qmin)38308   TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD64, qmin) {
38309     GemmMicrokernelTester()
38310       .mr(1)
38311       .nr(4)
38312       .kr(2)
38313       .sr(4)
38314       .m(1)
38315       .n(4)
38316       .k(8)
38317       .qmin(128)
38318       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
38319   }
38320 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD64,qmax)38321   TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD64, qmax) {
38322     GemmMicrokernelTester()
38323       .mr(1)
38324       .nr(4)
38325       .kr(2)
38326       .sr(4)
38327       .m(1)
38328       .n(4)
38329       .k(8)
38330       .qmax(128)
38331       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
38332   }
38333 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD64,strided_cm)38334   TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD64, strided_cm) {
38335     GemmMicrokernelTester()
38336       .mr(1)
38337       .nr(4)
38338       .kr(2)
38339       .sr(4)
38340       .m(1)
38341       .n(4)
38342       .k(8)
38343       .cm_stride(7)
38344       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
38345   }
38346 #endif  // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
38347 
38348 
38349 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128,k_eq_8)38350   TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128, k_eq_8) {
38351     GemmMicrokernelTester()
38352       .mr(1)
38353       .nr(4)
38354       .kr(2)
38355       .sr(4)
38356       .m(1)
38357       .n(4)
38358       .k(8)
38359       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
38360   }
38361 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128,strided_cn)38362   TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128, strided_cn) {
38363     GemmMicrokernelTester()
38364       .mr(1)
38365       .nr(4)
38366       .kr(2)
38367       .sr(4)
38368       .m(1)
38369       .n(4)
38370       .k(8)
38371       .cn_stride(7)
38372       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
38373   }
38374 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128,k_eq_8_subtile)38375   TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128, k_eq_8_subtile) {
38376     for (uint32_t n = 1; n <= 4; n++) {
38377       for (uint32_t m = 1; m <= 1; m++) {
38378         GemmMicrokernelTester()
38379           .mr(1)
38380           .nr(4)
38381           .kr(2)
38382           .sr(4)
38383           .m(m)
38384           .n(n)
38385           .k(8)
38386           .iterations(1)
38387           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
38388       }
38389     }
38390   }
38391 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128,k_eq_8_subtile_m)38392   TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128, k_eq_8_subtile_m) {
38393     for (uint32_t m = 1; m <= 1; m++) {
38394       GemmMicrokernelTester()
38395         .mr(1)
38396         .nr(4)
38397         .kr(2)
38398         .sr(4)
38399         .m(m)
38400         .n(4)
38401         .k(8)
38402         .iterations(1)
38403         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
38404     }
38405   }
38406 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128,k_eq_8_subtile_n)38407   TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128, k_eq_8_subtile_n) {
38408     for (uint32_t n = 1; n <= 4; n++) {
38409       GemmMicrokernelTester()
38410         .mr(1)
38411         .nr(4)
38412         .kr(2)
38413         .sr(4)
38414         .m(1)
38415         .n(n)
38416         .k(8)
38417         .iterations(1)
38418         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
38419     }
38420   }
38421 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128,k_lt_8)38422   TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128, k_lt_8) {
38423     for (size_t k = 1; k < 8; k++) {
38424       GemmMicrokernelTester()
38425         .mr(1)
38426         .nr(4)
38427         .kr(2)
38428         .sr(4)
38429         .m(1)
38430         .n(4)
38431         .k(k)
38432         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
38433     }
38434   }
38435 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128,k_lt_8_subtile)38436   TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128, k_lt_8_subtile) {
38437     for (size_t k = 1; k < 8; k++) {
38438       for (uint32_t n = 1; n <= 4; n++) {
38439         for (uint32_t m = 1; m <= 1; m++) {
38440           GemmMicrokernelTester()
38441             .mr(1)
38442             .nr(4)
38443             .kr(2)
38444             .sr(4)
38445             .m(m)
38446             .n(n)
38447             .k(k)
38448             .iterations(1)
38449             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
38450         }
38451       }
38452     }
38453   }
38454 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128,k_gt_8)38455   TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128, k_gt_8) {
38456     for (size_t k = 9; k < 16; k++) {
38457       GemmMicrokernelTester()
38458         .mr(1)
38459         .nr(4)
38460         .kr(2)
38461         .sr(4)
38462         .m(1)
38463         .n(4)
38464         .k(k)
38465         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
38466     }
38467   }
38468 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128,k_gt_8_subtile)38469   TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128, k_gt_8_subtile) {
38470     for (size_t k = 9; k < 16; k++) {
38471       for (uint32_t n = 1; n <= 4; n++) {
38472         for (uint32_t m = 1; m <= 1; m++) {
38473           GemmMicrokernelTester()
38474             .mr(1)
38475             .nr(4)
38476             .kr(2)
38477             .sr(4)
38478             .m(m)
38479             .n(n)
38480             .k(k)
38481             .iterations(1)
38482             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
38483         }
38484       }
38485     }
38486   }
38487 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128,k_div_8)38488   TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128, k_div_8) {
38489     for (size_t k = 16; k <= 80; k += 8) {
38490       GemmMicrokernelTester()
38491         .mr(1)
38492         .nr(4)
38493         .kr(2)
38494         .sr(4)
38495         .m(1)
38496         .n(4)
38497         .k(k)
38498         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
38499     }
38500   }
38501 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128,k_div_8_subtile)38502   TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128, k_div_8_subtile) {
38503     for (size_t k = 16; k <= 80; k += 8) {
38504       for (uint32_t n = 1; n <= 4; n++) {
38505         for (uint32_t m = 1; m <= 1; m++) {
38506           GemmMicrokernelTester()
38507             .mr(1)
38508             .nr(4)
38509             .kr(2)
38510             .sr(4)
38511             .m(m)
38512             .n(n)
38513             .k(k)
38514             .iterations(1)
38515             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
38516         }
38517       }
38518     }
38519   }
38520 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128,n_gt_4)38521   TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128, n_gt_4) {
38522     for (uint32_t n = 5; n < 8; n++) {
38523       for (size_t k = 1; k <= 40; k += 9) {
38524         GemmMicrokernelTester()
38525           .mr(1)
38526           .nr(4)
38527           .kr(2)
38528           .sr(4)
38529           .m(1)
38530           .n(n)
38531           .k(k)
38532           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
38533       }
38534     }
38535   }
38536 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128,n_gt_4_strided_cn)38537   TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128, n_gt_4_strided_cn) {
38538     for (uint32_t n = 5; n < 8; n++) {
38539       for (size_t k = 1; k <= 40; k += 9) {
38540         GemmMicrokernelTester()
38541           .mr(1)
38542           .nr(4)
38543           .kr(2)
38544           .sr(4)
38545           .m(1)
38546           .n(n)
38547           .k(k)
38548           .cn_stride(7)
38549           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
38550       }
38551     }
38552   }
38553 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128,n_gt_4_subtile)38554   TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128, n_gt_4_subtile) {
38555     for (uint32_t n = 5; n < 8; n++) {
38556       for (size_t k = 1; k <= 40; k += 9) {
38557         for (uint32_t m = 1; m <= 1; m++) {
38558           GemmMicrokernelTester()
38559             .mr(1)
38560             .nr(4)
38561             .kr(2)
38562             .sr(4)
38563             .m(m)
38564             .n(n)
38565             .k(k)
38566             .iterations(1)
38567             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
38568         }
38569       }
38570     }
38571   }
38572 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128,n_div_4)38573   TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128, n_div_4) {
38574     for (uint32_t n = 8; n <= 12; n += 4) {
38575       for (size_t k = 1; k <= 40; k += 9) {
38576         GemmMicrokernelTester()
38577           .mr(1)
38578           .nr(4)
38579           .kr(2)
38580           .sr(4)
38581           .m(1)
38582           .n(n)
38583           .k(k)
38584           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
38585       }
38586     }
38587   }
38588 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128,n_div_4_strided_cn)38589   TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128, n_div_4_strided_cn) {
38590     for (uint32_t n = 8; n <= 12; n += 4) {
38591       for (size_t k = 1; k <= 40; k += 9) {
38592         GemmMicrokernelTester()
38593           .mr(1)
38594           .nr(4)
38595           .kr(2)
38596           .sr(4)
38597           .m(1)
38598           .n(n)
38599           .k(k)
38600           .cn_stride(7)
38601           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
38602       }
38603     }
38604   }
38605 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128,n_div_4_subtile)38606   TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128, n_div_4_subtile) {
38607     for (uint32_t n = 8; n <= 12; n += 4) {
38608       for (size_t k = 1; k <= 40; k += 9) {
38609         for (uint32_t m = 1; m <= 1; m++) {
38610           GemmMicrokernelTester()
38611             .mr(1)
38612             .nr(4)
38613             .kr(2)
38614             .sr(4)
38615             .m(m)
38616             .n(n)
38617             .k(k)
38618             .iterations(1)
38619             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
38620         }
38621       }
38622     }
38623   }
38624 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128,small_kernel)38625   TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128, small_kernel) {
38626     for (size_t k = 1; k <= 40; k += 9) {
38627       GemmMicrokernelTester()
38628         .mr(1)
38629         .nr(4)
38630         .kr(2)
38631         .sr(4)
38632         .m(1)
38633         .n(4)
38634         .k(k)
38635         .ks(3)
38636         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
38637     }
38638   }
38639 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128,small_kernel_subtile)38640   TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128, small_kernel_subtile) {
38641     for (size_t k = 1; k <= 40; k += 9) {
38642       for (uint32_t n = 1; n <= 4; n++) {
38643         for (uint32_t m = 1; m <= 1; m++) {
38644           GemmMicrokernelTester()
38645             .mr(1)
38646             .nr(4)
38647             .kr(2)
38648             .sr(4)
38649             .m(m)
38650             .n(n)
38651             .k(k)
38652             .ks(3)
38653             .iterations(1)
38654             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
38655         }
38656       }
38657     }
38658   }
38659 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128,n_gt_4_small_kernel)38660   TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128, n_gt_4_small_kernel) {
38661     for (uint32_t n = 5; n < 8; n++) {
38662       for (size_t k = 1; k <= 40; k += 9) {
38663         GemmMicrokernelTester()
38664           .mr(1)
38665           .nr(4)
38666           .kr(2)
38667           .sr(4)
38668           .m(1)
38669           .n(n)
38670           .k(k)
38671           .ks(3)
38672           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
38673       }
38674     }
38675   }
38676 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128,n_div_4_small_kernel)38677   TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128, n_div_4_small_kernel) {
38678     for (uint32_t n = 8; n <= 12; n += 4) {
38679       for (size_t k = 1; k <= 40; k += 9) {
38680         GemmMicrokernelTester()
38681           .mr(1)
38682           .nr(4)
38683           .kr(2)
38684           .sr(4)
38685           .m(1)
38686           .n(n)
38687           .k(k)
38688           .ks(3)
38689           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
38690       }
38691     }
38692   }
38693 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128,strided_cm_subtile)38694   TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128, strided_cm_subtile) {
38695     for (size_t k = 1; k <= 40; k += 9) {
38696       for (uint32_t n = 1; n <= 4; n++) {
38697         for (uint32_t m = 1; m <= 1; m++) {
38698           GemmMicrokernelTester()
38699             .mr(1)
38700             .nr(4)
38701             .kr(2)
38702             .sr(4)
38703             .m(m)
38704             .n(n)
38705             .k(k)
38706             .cm_stride(7)
38707             .iterations(1)
38708             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
38709         }
38710       }
38711     }
38712   }
38713 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128,a_offset)38714   TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128, a_offset) {
38715     for (size_t k = 1; k <= 40; k += 9) {
38716       GemmMicrokernelTester()
38717         .mr(1)
38718         .nr(4)
38719         .kr(2)
38720         .sr(4)
38721         .m(1)
38722         .n(4)
38723         .k(k)
38724         .ks(3)
38725         .a_offset(43)
38726         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
38727     }
38728   }
38729 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128,zero)38730   TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128, zero) {
38731     for (size_t k = 1; k <= 40; k += 9) {
38732       for (uint32_t mz = 0; mz < 1; mz++) {
38733         GemmMicrokernelTester()
38734           .mr(1)
38735           .nr(4)
38736           .kr(2)
38737           .sr(4)
38738           .m(1)
38739           .n(4)
38740           .k(k)
38741           .ks(3)
38742           .a_offset(43)
38743           .zero_index(mz)
38744           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
38745       }
38746     }
38747   }
38748 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128,qmin)38749   TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128, qmin) {
38750     GemmMicrokernelTester()
38751       .mr(1)
38752       .nr(4)
38753       .kr(2)
38754       .sr(4)
38755       .m(1)
38756       .n(4)
38757       .k(8)
38758       .qmin(128)
38759       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
38760   }
38761 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128,qmax)38762   TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128, qmax) {
38763     GemmMicrokernelTester()
38764       .mr(1)
38765       .nr(4)
38766       .kr(2)
38767       .sr(4)
38768       .m(1)
38769       .n(4)
38770       .k(8)
38771       .qmax(128)
38772       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
38773   }
38774 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128,strided_cm)38775   TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128, strided_cm) {
38776     GemmMicrokernelTester()
38777       .mr(1)
38778       .nr(4)
38779       .kr(2)
38780       .sr(4)
38781       .m(1)
38782       .n(4)
38783       .k(8)
38784       .cm_stride(7)
38785       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
38786   }
38787 #endif  // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
38788 
38789 
38790 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD64,k_eq_8)38791   TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD64, k_eq_8) {
38792     GemmMicrokernelTester()
38793       .mr(2)
38794       .nr(4)
38795       .kr(2)
38796       .sr(4)
38797       .m(2)
38798       .n(4)
38799       .k(8)
38800       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
38801   }
38802 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD64,strided_cn)38803   TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD64, strided_cn) {
38804     GemmMicrokernelTester()
38805       .mr(2)
38806       .nr(4)
38807       .kr(2)
38808       .sr(4)
38809       .m(2)
38810       .n(4)
38811       .k(8)
38812       .cn_stride(7)
38813       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
38814   }
38815 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD64,k_eq_8_subtile)38816   TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD64, k_eq_8_subtile) {
38817     for (uint32_t n = 1; n <= 4; n++) {
38818       for (uint32_t m = 1; m <= 2; m++) {
38819         GemmMicrokernelTester()
38820           .mr(2)
38821           .nr(4)
38822           .kr(2)
38823           .sr(4)
38824           .m(m)
38825           .n(n)
38826           .k(8)
38827           .iterations(1)
38828           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
38829       }
38830     }
38831   }
38832 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD64,k_eq_8_subtile_m)38833   TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD64, k_eq_8_subtile_m) {
38834     for (uint32_t m = 1; m <= 2; m++) {
38835       GemmMicrokernelTester()
38836         .mr(2)
38837         .nr(4)
38838         .kr(2)
38839         .sr(4)
38840         .m(m)
38841         .n(4)
38842         .k(8)
38843         .iterations(1)
38844         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
38845     }
38846   }
38847 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD64,k_eq_8_subtile_n)38848   TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD64, k_eq_8_subtile_n) {
38849     for (uint32_t n = 1; n <= 4; n++) {
38850       GemmMicrokernelTester()
38851         .mr(2)
38852         .nr(4)
38853         .kr(2)
38854         .sr(4)
38855         .m(2)
38856         .n(n)
38857         .k(8)
38858         .iterations(1)
38859         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
38860     }
38861   }
38862 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD64,k_lt_8)38863   TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD64, k_lt_8) {
38864     for (size_t k = 1; k < 8; k++) {
38865       GemmMicrokernelTester()
38866         .mr(2)
38867         .nr(4)
38868         .kr(2)
38869         .sr(4)
38870         .m(2)
38871         .n(4)
38872         .k(k)
38873         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
38874     }
38875   }
38876 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD64,k_lt_8_subtile)38877   TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD64, k_lt_8_subtile) {
38878     for (size_t k = 1; k < 8; k++) {
38879       for (uint32_t n = 1; n <= 4; n++) {
38880         for (uint32_t m = 1; m <= 2; m++) {
38881           GemmMicrokernelTester()
38882             .mr(2)
38883             .nr(4)
38884             .kr(2)
38885             .sr(4)
38886             .m(m)
38887             .n(n)
38888             .k(k)
38889             .iterations(1)
38890             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
38891         }
38892       }
38893     }
38894   }
38895 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD64,k_gt_8)38896   TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD64, k_gt_8) {
38897     for (size_t k = 9; k < 16; k++) {
38898       GemmMicrokernelTester()
38899         .mr(2)
38900         .nr(4)
38901         .kr(2)
38902         .sr(4)
38903         .m(2)
38904         .n(4)
38905         .k(k)
38906         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
38907     }
38908   }
38909 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD64,k_gt_8_subtile)38910   TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD64, k_gt_8_subtile) {
38911     for (size_t k = 9; k < 16; k++) {
38912       for (uint32_t n = 1; n <= 4; n++) {
38913         for (uint32_t m = 1; m <= 2; m++) {
38914           GemmMicrokernelTester()
38915             .mr(2)
38916             .nr(4)
38917             .kr(2)
38918             .sr(4)
38919             .m(m)
38920             .n(n)
38921             .k(k)
38922             .iterations(1)
38923             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
38924         }
38925       }
38926     }
38927   }
38928 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD64,k_div_8)38929   TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD64, k_div_8) {
38930     for (size_t k = 16; k <= 80; k += 8) {
38931       GemmMicrokernelTester()
38932         .mr(2)
38933         .nr(4)
38934         .kr(2)
38935         .sr(4)
38936         .m(2)
38937         .n(4)
38938         .k(k)
38939         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
38940     }
38941   }
38942 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD64,k_div_8_subtile)38943   TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD64, k_div_8_subtile) {
38944     for (size_t k = 16; k <= 80; k += 8) {
38945       for (uint32_t n = 1; n <= 4; n++) {
38946         for (uint32_t m = 1; m <= 2; m++) {
38947           GemmMicrokernelTester()
38948             .mr(2)
38949             .nr(4)
38950             .kr(2)
38951             .sr(4)
38952             .m(m)
38953             .n(n)
38954             .k(k)
38955             .iterations(1)
38956             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
38957         }
38958       }
38959     }
38960   }
38961 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD64,n_gt_4)38962   TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD64, n_gt_4) {
38963     for (uint32_t n = 5; n < 8; n++) {
38964       for (size_t k = 1; k <= 40; k += 9) {
38965         GemmMicrokernelTester()
38966           .mr(2)
38967           .nr(4)
38968           .kr(2)
38969           .sr(4)
38970           .m(2)
38971           .n(n)
38972           .k(k)
38973           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
38974       }
38975     }
38976   }
38977 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD64,n_gt_4_strided_cn)38978   TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD64, n_gt_4_strided_cn) {
38979     for (uint32_t n = 5; n < 8; n++) {
38980       for (size_t k = 1; k <= 40; k += 9) {
38981         GemmMicrokernelTester()
38982           .mr(2)
38983           .nr(4)
38984           .kr(2)
38985           .sr(4)
38986           .m(2)
38987           .n(n)
38988           .k(k)
38989           .cn_stride(7)
38990           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
38991       }
38992     }
38993   }
38994 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD64,n_gt_4_subtile)38995   TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD64, n_gt_4_subtile) {
38996     for (uint32_t n = 5; n < 8; n++) {
38997       for (size_t k = 1; k <= 40; k += 9) {
38998         for (uint32_t m = 1; m <= 2; m++) {
38999           GemmMicrokernelTester()
39000             .mr(2)
39001             .nr(4)
39002             .kr(2)
39003             .sr(4)
39004             .m(m)
39005             .n(n)
39006             .k(k)
39007             .iterations(1)
39008             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
39009         }
39010       }
39011     }
39012   }
39013 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD64,n_div_4)39014   TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD64, n_div_4) {
39015     for (uint32_t n = 8; n <= 12; n += 4) {
39016       for (size_t k = 1; k <= 40; k += 9) {
39017         GemmMicrokernelTester()
39018           .mr(2)
39019           .nr(4)
39020           .kr(2)
39021           .sr(4)
39022           .m(2)
39023           .n(n)
39024           .k(k)
39025           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
39026       }
39027     }
39028   }
39029 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD64,n_div_4_strided_cn)39030   TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD64, n_div_4_strided_cn) {
39031     for (uint32_t n = 8; n <= 12; n += 4) {
39032       for (size_t k = 1; k <= 40; k += 9) {
39033         GemmMicrokernelTester()
39034           .mr(2)
39035           .nr(4)
39036           .kr(2)
39037           .sr(4)
39038           .m(2)
39039           .n(n)
39040           .k(k)
39041           .cn_stride(7)
39042           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
39043       }
39044     }
39045   }
39046 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD64,n_div_4_subtile)39047   TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD64, n_div_4_subtile) {
39048     for (uint32_t n = 8; n <= 12; n += 4) {
39049       for (size_t k = 1; k <= 40; k += 9) {
39050         for (uint32_t m = 1; m <= 2; m++) {
39051           GemmMicrokernelTester()
39052             .mr(2)
39053             .nr(4)
39054             .kr(2)
39055             .sr(4)
39056             .m(m)
39057             .n(n)
39058             .k(k)
39059             .iterations(1)
39060             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
39061         }
39062       }
39063     }
39064   }
39065 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD64,small_kernel)39066   TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD64, small_kernel) {
39067     for (size_t k = 1; k <= 40; k += 9) {
39068       GemmMicrokernelTester()
39069         .mr(2)
39070         .nr(4)
39071         .kr(2)
39072         .sr(4)
39073         .m(2)
39074         .n(4)
39075         .k(k)
39076         .ks(3)
39077         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
39078     }
39079   }
39080 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD64,small_kernel_subtile)39081   TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD64, small_kernel_subtile) {
39082     for (size_t k = 1; k <= 40; k += 9) {
39083       for (uint32_t n = 1; n <= 4; n++) {
39084         for (uint32_t m = 1; m <= 2; m++) {
39085           GemmMicrokernelTester()
39086             .mr(2)
39087             .nr(4)
39088             .kr(2)
39089             .sr(4)
39090             .m(m)
39091             .n(n)
39092             .k(k)
39093             .ks(3)
39094             .iterations(1)
39095             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
39096         }
39097       }
39098     }
39099   }
39100 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD64,n_gt_4_small_kernel)39101   TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD64, n_gt_4_small_kernel) {
39102     for (uint32_t n = 5; n < 8; n++) {
39103       for (size_t k = 1; k <= 40; k += 9) {
39104         GemmMicrokernelTester()
39105           .mr(2)
39106           .nr(4)
39107           .kr(2)
39108           .sr(4)
39109           .m(2)
39110           .n(n)
39111           .k(k)
39112           .ks(3)
39113           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
39114       }
39115     }
39116   }
39117 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD64,n_div_4_small_kernel)39118   TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD64, n_div_4_small_kernel) {
39119     for (uint32_t n = 8; n <= 12; n += 4) {
39120       for (size_t k = 1; k <= 40; k += 9) {
39121         GemmMicrokernelTester()
39122           .mr(2)
39123           .nr(4)
39124           .kr(2)
39125           .sr(4)
39126           .m(2)
39127           .n(n)
39128           .k(k)
39129           .ks(3)
39130           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
39131       }
39132     }
39133   }
39134 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD64,strided_cm_subtile)39135   TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD64, strided_cm_subtile) {
39136     for (size_t k = 1; k <= 40; k += 9) {
39137       for (uint32_t n = 1; n <= 4; n++) {
39138         for (uint32_t m = 1; m <= 2; m++) {
39139           GemmMicrokernelTester()
39140             .mr(2)
39141             .nr(4)
39142             .kr(2)
39143             .sr(4)
39144             .m(m)
39145             .n(n)
39146             .k(k)
39147             .cm_stride(7)
39148             .iterations(1)
39149             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
39150         }
39151       }
39152     }
39153   }
39154 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD64,a_offset)39155   TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD64, a_offset) {
39156     for (size_t k = 1; k <= 40; k += 9) {
39157       GemmMicrokernelTester()
39158         .mr(2)
39159         .nr(4)
39160         .kr(2)
39161         .sr(4)
39162         .m(2)
39163         .n(4)
39164         .k(k)
39165         .ks(3)
39166         .a_offset(83)
39167         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
39168     }
39169   }
39170 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD64,zero)39171   TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD64, zero) {
39172     for (size_t k = 1; k <= 40; k += 9) {
39173       for (uint32_t mz = 0; mz < 2; mz++) {
39174         GemmMicrokernelTester()
39175           .mr(2)
39176           .nr(4)
39177           .kr(2)
39178           .sr(4)
39179           .m(2)
39180           .n(4)
39181           .k(k)
39182           .ks(3)
39183           .a_offset(83)
39184           .zero_index(mz)
39185           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
39186       }
39187     }
39188   }
39189 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD64,qmin)39190   TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD64, qmin) {
39191     GemmMicrokernelTester()
39192       .mr(2)
39193       .nr(4)
39194       .kr(2)
39195       .sr(4)
39196       .m(2)
39197       .n(4)
39198       .k(8)
39199       .qmin(128)
39200       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
39201   }
39202 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD64,qmax)39203   TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD64, qmax) {
39204     GemmMicrokernelTester()
39205       .mr(2)
39206       .nr(4)
39207       .kr(2)
39208       .sr(4)
39209       .m(2)
39210       .n(4)
39211       .k(8)
39212       .qmax(128)
39213       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
39214   }
39215 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD64,strided_cm)39216   TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD64, strided_cm) {
39217     GemmMicrokernelTester()
39218       .mr(2)
39219       .nr(4)
39220       .kr(2)
39221       .sr(4)
39222       .m(2)
39223       .n(4)
39224       .k(8)
39225       .cm_stride(7)
39226       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
39227   }
39228 #endif  // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
39229 
39230 
39231 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD64,k_eq_8)39232   TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD64, k_eq_8) {
39233     GemmMicrokernelTester()
39234       .mr(2)
39235       .nr(4)
39236       .kr(8)
39237       .sr(1)
39238       .m(2)
39239       .n(4)
39240       .k(8)
39241       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
39242   }
39243 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD64,strided_cn)39244   TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD64, strided_cn) {
39245     GemmMicrokernelTester()
39246       .mr(2)
39247       .nr(4)
39248       .kr(8)
39249       .sr(1)
39250       .m(2)
39251       .n(4)
39252       .k(8)
39253       .cn_stride(7)
39254       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
39255   }
39256 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD64,k_eq_8_subtile)39257   TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD64, k_eq_8_subtile) {
39258     for (uint32_t n = 1; n <= 4; n++) {
39259       for (uint32_t m = 1; m <= 2; m++) {
39260         GemmMicrokernelTester()
39261           .mr(2)
39262           .nr(4)
39263           .kr(8)
39264           .sr(1)
39265           .m(m)
39266           .n(n)
39267           .k(8)
39268           .iterations(1)
39269           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
39270       }
39271     }
39272   }
39273 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD64,k_eq_8_subtile_m)39274   TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD64, k_eq_8_subtile_m) {
39275     for (uint32_t m = 1; m <= 2; m++) {
39276       GemmMicrokernelTester()
39277         .mr(2)
39278         .nr(4)
39279         .kr(8)
39280         .sr(1)
39281         .m(m)
39282         .n(4)
39283         .k(8)
39284         .iterations(1)
39285         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
39286     }
39287   }
39288 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD64,k_eq_8_subtile_n)39289   TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD64, k_eq_8_subtile_n) {
39290     for (uint32_t n = 1; n <= 4; n++) {
39291       GemmMicrokernelTester()
39292         .mr(2)
39293         .nr(4)
39294         .kr(8)
39295         .sr(1)
39296         .m(2)
39297         .n(n)
39298         .k(8)
39299         .iterations(1)
39300         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
39301     }
39302   }
39303 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD64,k_lt_8)39304   TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD64, k_lt_8) {
39305     for (size_t k = 1; k < 8; k++) {
39306       GemmMicrokernelTester()
39307         .mr(2)
39308         .nr(4)
39309         .kr(8)
39310         .sr(1)
39311         .m(2)
39312         .n(4)
39313         .k(k)
39314         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
39315     }
39316   }
39317 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD64,k_lt_8_subtile)39318   TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD64, k_lt_8_subtile) {
39319     for (size_t k = 1; k < 8; k++) {
39320       for (uint32_t n = 1; n <= 4; n++) {
39321         for (uint32_t m = 1; m <= 2; m++) {
39322           GemmMicrokernelTester()
39323             .mr(2)
39324             .nr(4)
39325             .kr(8)
39326             .sr(1)
39327             .m(m)
39328             .n(n)
39329             .k(k)
39330             .iterations(1)
39331             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
39332         }
39333       }
39334     }
39335   }
39336 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD64,k_gt_8)39337   TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD64, k_gt_8) {
39338     for (size_t k = 9; k < 16; k++) {
39339       GemmMicrokernelTester()
39340         .mr(2)
39341         .nr(4)
39342         .kr(8)
39343         .sr(1)
39344         .m(2)
39345         .n(4)
39346         .k(k)
39347         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
39348     }
39349   }
39350 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD64,k_gt_8_subtile)39351   TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD64, k_gt_8_subtile) {
39352     for (size_t k = 9; k < 16; k++) {
39353       for (uint32_t n = 1; n <= 4; n++) {
39354         for (uint32_t m = 1; m <= 2; m++) {
39355           GemmMicrokernelTester()
39356             .mr(2)
39357             .nr(4)
39358             .kr(8)
39359             .sr(1)
39360             .m(m)
39361             .n(n)
39362             .k(k)
39363             .iterations(1)
39364             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
39365         }
39366       }
39367     }
39368   }
39369 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD64,k_div_8)39370   TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD64, k_div_8) {
39371     for (size_t k = 16; k <= 80; k += 8) {
39372       GemmMicrokernelTester()
39373         .mr(2)
39374         .nr(4)
39375         .kr(8)
39376         .sr(1)
39377         .m(2)
39378         .n(4)
39379         .k(k)
39380         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
39381     }
39382   }
39383 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD64,k_div_8_subtile)39384   TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD64, k_div_8_subtile) {
39385     for (size_t k = 16; k <= 80; k += 8) {
39386       for (uint32_t n = 1; n <= 4; n++) {
39387         for (uint32_t m = 1; m <= 2; m++) {
39388           GemmMicrokernelTester()
39389             .mr(2)
39390             .nr(4)
39391             .kr(8)
39392             .sr(1)
39393             .m(m)
39394             .n(n)
39395             .k(k)
39396             .iterations(1)
39397             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
39398         }
39399       }
39400     }
39401   }
39402 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD64,n_gt_4)39403   TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD64, n_gt_4) {
39404     for (uint32_t n = 5; n < 8; n++) {
39405       for (size_t k = 1; k <= 40; k += 9) {
39406         GemmMicrokernelTester()
39407           .mr(2)
39408           .nr(4)
39409           .kr(8)
39410           .sr(1)
39411           .m(2)
39412           .n(n)
39413           .k(k)
39414           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
39415       }
39416     }
39417   }
39418 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD64,n_gt_4_strided_cn)39419   TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD64, n_gt_4_strided_cn) {
39420     for (uint32_t n = 5; n < 8; n++) {
39421       for (size_t k = 1; k <= 40; k += 9) {
39422         GemmMicrokernelTester()
39423           .mr(2)
39424           .nr(4)
39425           .kr(8)
39426           .sr(1)
39427           .m(2)
39428           .n(n)
39429           .k(k)
39430           .cn_stride(7)
39431           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
39432       }
39433     }
39434   }
39435 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD64,n_gt_4_subtile)39436   TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD64, n_gt_4_subtile) {
39437     for (uint32_t n = 5; n < 8; n++) {
39438       for (size_t k = 1; k <= 40; k += 9) {
39439         for (uint32_t m = 1; m <= 2; m++) {
39440           GemmMicrokernelTester()
39441             .mr(2)
39442             .nr(4)
39443             .kr(8)
39444             .sr(1)
39445             .m(m)
39446             .n(n)
39447             .k(k)
39448             .iterations(1)
39449             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
39450         }
39451       }
39452     }
39453   }
39454 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD64,n_div_4)39455   TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD64, n_div_4) {
39456     for (uint32_t n = 8; n <= 12; n += 4) {
39457       for (size_t k = 1; k <= 40; k += 9) {
39458         GemmMicrokernelTester()
39459           .mr(2)
39460           .nr(4)
39461           .kr(8)
39462           .sr(1)
39463           .m(2)
39464           .n(n)
39465           .k(k)
39466           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
39467       }
39468     }
39469   }
39470 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD64,n_div_4_strided_cn)39471   TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD64, n_div_4_strided_cn) {
39472     for (uint32_t n = 8; n <= 12; n += 4) {
39473       for (size_t k = 1; k <= 40; k += 9) {
39474         GemmMicrokernelTester()
39475           .mr(2)
39476           .nr(4)
39477           .kr(8)
39478           .sr(1)
39479           .m(2)
39480           .n(n)
39481           .k(k)
39482           .cn_stride(7)
39483           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
39484       }
39485     }
39486   }
39487 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD64,n_div_4_subtile)39488   TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD64, n_div_4_subtile) {
39489     for (uint32_t n = 8; n <= 12; n += 4) {
39490       for (size_t k = 1; k <= 40; k += 9) {
39491         for (uint32_t m = 1; m <= 2; m++) {
39492           GemmMicrokernelTester()
39493             .mr(2)
39494             .nr(4)
39495             .kr(8)
39496             .sr(1)
39497             .m(m)
39498             .n(n)
39499             .k(k)
39500             .iterations(1)
39501             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
39502         }
39503       }
39504     }
39505   }
39506 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD64,small_kernel)39507   TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD64, small_kernel) {
39508     for (size_t k = 1; k <= 40; k += 9) {
39509       GemmMicrokernelTester()
39510         .mr(2)
39511         .nr(4)
39512         .kr(8)
39513         .sr(1)
39514         .m(2)
39515         .n(4)
39516         .k(k)
39517         .ks(3)
39518         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
39519     }
39520   }
39521 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD64,small_kernel_subtile)39522   TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD64, small_kernel_subtile) {
39523     for (size_t k = 1; k <= 40; k += 9) {
39524       for (uint32_t n = 1; n <= 4; n++) {
39525         for (uint32_t m = 1; m <= 2; m++) {
39526           GemmMicrokernelTester()
39527             .mr(2)
39528             .nr(4)
39529             .kr(8)
39530             .sr(1)
39531             .m(m)
39532             .n(n)
39533             .k(k)
39534             .ks(3)
39535             .iterations(1)
39536             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
39537         }
39538       }
39539     }
39540   }
39541 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD64,n_gt_4_small_kernel)39542   TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD64, n_gt_4_small_kernel) {
39543     for (uint32_t n = 5; n < 8; n++) {
39544       for (size_t k = 1; k <= 40; k += 9) {
39545         GemmMicrokernelTester()
39546           .mr(2)
39547           .nr(4)
39548           .kr(8)
39549           .sr(1)
39550           .m(2)
39551           .n(n)
39552           .k(k)
39553           .ks(3)
39554           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
39555       }
39556     }
39557   }
39558 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD64,n_div_4_small_kernel)39559   TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD64, n_div_4_small_kernel) {
39560     for (uint32_t n = 8; n <= 12; n += 4) {
39561       for (size_t k = 1; k <= 40; k += 9) {
39562         GemmMicrokernelTester()
39563           .mr(2)
39564           .nr(4)
39565           .kr(8)
39566           .sr(1)
39567           .m(2)
39568           .n(n)
39569           .k(k)
39570           .ks(3)
39571           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
39572       }
39573     }
39574   }
39575 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD64,strided_cm_subtile)39576   TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD64, strided_cm_subtile) {
39577     for (size_t k = 1; k <= 40; k += 9) {
39578       for (uint32_t n = 1; n <= 4; n++) {
39579         for (uint32_t m = 1; m <= 2; m++) {
39580           GemmMicrokernelTester()
39581             .mr(2)
39582             .nr(4)
39583             .kr(8)
39584             .sr(1)
39585             .m(m)
39586             .n(n)
39587             .k(k)
39588             .cm_stride(7)
39589             .iterations(1)
39590             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
39591         }
39592       }
39593     }
39594   }
39595 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD64,a_offset)39596   TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD64, a_offset) {
39597     for (size_t k = 1; k <= 40; k += 9) {
39598       GemmMicrokernelTester()
39599         .mr(2)
39600         .nr(4)
39601         .kr(8)
39602         .sr(1)
39603         .m(2)
39604         .n(4)
39605         .k(k)
39606         .ks(3)
39607         .a_offset(83)
39608         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
39609     }
39610   }
39611 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD64,zero)39612   TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD64, zero) {
39613     for (size_t k = 1; k <= 40; k += 9) {
39614       for (uint32_t mz = 0; mz < 2; mz++) {
39615         GemmMicrokernelTester()
39616           .mr(2)
39617           .nr(4)
39618           .kr(8)
39619           .sr(1)
39620           .m(2)
39621           .n(4)
39622           .k(k)
39623           .ks(3)
39624           .a_offset(83)
39625           .zero_index(mz)
39626           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
39627       }
39628     }
39629   }
39630 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD64,qmin)39631   TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD64, qmin) {
39632     GemmMicrokernelTester()
39633       .mr(2)
39634       .nr(4)
39635       .kr(8)
39636       .sr(1)
39637       .m(2)
39638       .n(4)
39639       .k(8)
39640       .qmin(128)
39641       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
39642   }
39643 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD64,qmax)39644   TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD64, qmax) {
39645     GemmMicrokernelTester()
39646       .mr(2)
39647       .nr(4)
39648       .kr(8)
39649       .sr(1)
39650       .m(2)
39651       .n(4)
39652       .k(8)
39653       .qmax(128)
39654       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
39655   }
39656 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD64,strided_cm)39657   TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD64, strided_cm) {
39658     GemmMicrokernelTester()
39659       .mr(2)
39660       .nr(4)
39661       .kr(8)
39662       .sr(1)
39663       .m(2)
39664       .n(4)
39665       .k(8)
39666       .cm_stride(7)
39667       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
39668   }
39669 #endif  // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
39670 
39671 
39672 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD64,k_eq_8)39673   TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD64, k_eq_8) {
39674     GemmMicrokernelTester()
39675       .mr(3)
39676       .nr(4)
39677       .kr(2)
39678       .sr(1)
39679       .m(3)
39680       .n(4)
39681       .k(8)
39682       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
39683   }
39684 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD64,strided_cn)39685   TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD64, strided_cn) {
39686     GemmMicrokernelTester()
39687       .mr(3)
39688       .nr(4)
39689       .kr(2)
39690       .sr(1)
39691       .m(3)
39692       .n(4)
39693       .k(8)
39694       .cn_stride(7)
39695       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
39696   }
39697 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD64,k_eq_8_subtile)39698   TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD64, k_eq_8_subtile) {
39699     for (uint32_t n = 1; n <= 4; n++) {
39700       for (uint32_t m = 1; m <= 3; m++) {
39701         GemmMicrokernelTester()
39702           .mr(3)
39703           .nr(4)
39704           .kr(2)
39705           .sr(1)
39706           .m(m)
39707           .n(n)
39708           .k(8)
39709           .iterations(1)
39710           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
39711       }
39712     }
39713   }
39714 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD64,k_eq_8_subtile_m)39715   TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD64, k_eq_8_subtile_m) {
39716     for (uint32_t m = 1; m <= 3; m++) {
39717       GemmMicrokernelTester()
39718         .mr(3)
39719         .nr(4)
39720         .kr(2)
39721         .sr(1)
39722         .m(m)
39723         .n(4)
39724         .k(8)
39725         .iterations(1)
39726         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
39727     }
39728   }
39729 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD64,k_eq_8_subtile_n)39730   TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD64, k_eq_8_subtile_n) {
39731     for (uint32_t n = 1; n <= 4; n++) {
39732       GemmMicrokernelTester()
39733         .mr(3)
39734         .nr(4)
39735         .kr(2)
39736         .sr(1)
39737         .m(3)
39738         .n(n)
39739         .k(8)
39740         .iterations(1)
39741         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
39742     }
39743   }
39744 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD64,k_lt_8)39745   TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD64, k_lt_8) {
39746     for (size_t k = 1; k < 8; k++) {
39747       GemmMicrokernelTester()
39748         .mr(3)
39749         .nr(4)
39750         .kr(2)
39751         .sr(1)
39752         .m(3)
39753         .n(4)
39754         .k(k)
39755         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
39756     }
39757   }
39758 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD64,k_lt_8_subtile)39759   TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD64, k_lt_8_subtile) {
39760     for (size_t k = 1; k < 8; k++) {
39761       for (uint32_t n = 1; n <= 4; n++) {
39762         for (uint32_t m = 1; m <= 3; m++) {
39763           GemmMicrokernelTester()
39764             .mr(3)
39765             .nr(4)
39766             .kr(2)
39767             .sr(1)
39768             .m(m)
39769             .n(n)
39770             .k(k)
39771             .iterations(1)
39772             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
39773         }
39774       }
39775     }
39776   }
39777 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD64,k_gt_8)39778   TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD64, k_gt_8) {
39779     for (size_t k = 9; k < 16; k++) {
39780       GemmMicrokernelTester()
39781         .mr(3)
39782         .nr(4)
39783         .kr(2)
39784         .sr(1)
39785         .m(3)
39786         .n(4)
39787         .k(k)
39788         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
39789     }
39790   }
39791 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD64,k_gt_8_subtile)39792   TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD64, k_gt_8_subtile) {
39793     for (size_t k = 9; k < 16; k++) {
39794       for (uint32_t n = 1; n <= 4; n++) {
39795         for (uint32_t m = 1; m <= 3; m++) {
39796           GemmMicrokernelTester()
39797             .mr(3)
39798             .nr(4)
39799             .kr(2)
39800             .sr(1)
39801             .m(m)
39802             .n(n)
39803             .k(k)
39804             .iterations(1)
39805             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
39806         }
39807       }
39808     }
39809   }
39810 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD64,k_div_8)39811   TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD64, k_div_8) {
39812     for (size_t k = 16; k <= 80; k += 8) {
39813       GemmMicrokernelTester()
39814         .mr(3)
39815         .nr(4)
39816         .kr(2)
39817         .sr(1)
39818         .m(3)
39819         .n(4)
39820         .k(k)
39821         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
39822     }
39823   }
39824 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD64,k_div_8_subtile)39825   TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD64, k_div_8_subtile) {
39826     for (size_t k = 16; k <= 80; k += 8) {
39827       for (uint32_t n = 1; n <= 4; n++) {
39828         for (uint32_t m = 1; m <= 3; m++) {
39829           GemmMicrokernelTester()
39830             .mr(3)
39831             .nr(4)
39832             .kr(2)
39833             .sr(1)
39834             .m(m)
39835             .n(n)
39836             .k(k)
39837             .iterations(1)
39838             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
39839         }
39840       }
39841     }
39842   }
39843 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD64,n_gt_4)39844   TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD64, n_gt_4) {
39845     for (uint32_t n = 5; n < 8; n++) {
39846       for (size_t k = 1; k <= 40; k += 9) {
39847         GemmMicrokernelTester()
39848           .mr(3)
39849           .nr(4)
39850           .kr(2)
39851           .sr(1)
39852           .m(3)
39853           .n(n)
39854           .k(k)
39855           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
39856       }
39857     }
39858   }
39859 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD64,n_gt_4_strided_cn)39860   TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD64, n_gt_4_strided_cn) {
39861     for (uint32_t n = 5; n < 8; n++) {
39862       for (size_t k = 1; k <= 40; k += 9) {
39863         GemmMicrokernelTester()
39864           .mr(3)
39865           .nr(4)
39866           .kr(2)
39867           .sr(1)
39868           .m(3)
39869           .n(n)
39870           .k(k)
39871           .cn_stride(7)
39872           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
39873       }
39874     }
39875   }
39876 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD64,n_gt_4_subtile)39877   TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD64, n_gt_4_subtile) {
39878     for (uint32_t n = 5; n < 8; n++) {
39879       for (size_t k = 1; k <= 40; k += 9) {
39880         for (uint32_t m = 1; m <= 3; m++) {
39881           GemmMicrokernelTester()
39882             .mr(3)
39883             .nr(4)
39884             .kr(2)
39885             .sr(1)
39886             .m(m)
39887             .n(n)
39888             .k(k)
39889             .iterations(1)
39890             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
39891         }
39892       }
39893     }
39894   }
39895 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD64,n_div_4)39896   TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD64, n_div_4) {
39897     for (uint32_t n = 8; n <= 12; n += 4) {
39898       for (size_t k = 1; k <= 40; k += 9) {
39899         GemmMicrokernelTester()
39900           .mr(3)
39901           .nr(4)
39902           .kr(2)
39903           .sr(1)
39904           .m(3)
39905           .n(n)
39906           .k(k)
39907           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
39908       }
39909     }
39910   }
39911 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD64,n_div_4_strided_cn)39912   TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD64, n_div_4_strided_cn) {
39913     for (uint32_t n = 8; n <= 12; n += 4) {
39914       for (size_t k = 1; k <= 40; k += 9) {
39915         GemmMicrokernelTester()
39916           .mr(3)
39917           .nr(4)
39918           .kr(2)
39919           .sr(1)
39920           .m(3)
39921           .n(n)
39922           .k(k)
39923           .cn_stride(7)
39924           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
39925       }
39926     }
39927   }
39928 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD64,n_div_4_subtile)39929   TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD64, n_div_4_subtile) {
39930     for (uint32_t n = 8; n <= 12; n += 4) {
39931       for (size_t k = 1; k <= 40; k += 9) {
39932         for (uint32_t m = 1; m <= 3; m++) {
39933           GemmMicrokernelTester()
39934             .mr(3)
39935             .nr(4)
39936             .kr(2)
39937             .sr(1)
39938             .m(m)
39939             .n(n)
39940             .k(k)
39941             .iterations(1)
39942             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
39943         }
39944       }
39945     }
39946   }
39947 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD64,small_kernel)39948   TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD64, small_kernel) {
39949     for (size_t k = 1; k <= 40; k += 9) {
39950       GemmMicrokernelTester()
39951         .mr(3)
39952         .nr(4)
39953         .kr(2)
39954         .sr(1)
39955         .m(3)
39956         .n(4)
39957         .k(k)
39958         .ks(3)
39959         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
39960     }
39961   }
39962 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD64,small_kernel_subtile)39963   TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD64, small_kernel_subtile) {
39964     for (size_t k = 1; k <= 40; k += 9) {
39965       for (uint32_t n = 1; n <= 4; n++) {
39966         for (uint32_t m = 1; m <= 3; m++) {
39967           GemmMicrokernelTester()
39968             .mr(3)
39969             .nr(4)
39970             .kr(2)
39971             .sr(1)
39972             .m(m)
39973             .n(n)
39974             .k(k)
39975             .ks(3)
39976             .iterations(1)
39977             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
39978         }
39979       }
39980     }
39981   }
39982 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD64,n_gt_4_small_kernel)39983   TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD64, n_gt_4_small_kernel) {
39984     for (uint32_t n = 5; n < 8; n++) {
39985       for (size_t k = 1; k <= 40; k += 9) {
39986         GemmMicrokernelTester()
39987           .mr(3)
39988           .nr(4)
39989           .kr(2)
39990           .sr(1)
39991           .m(3)
39992           .n(n)
39993           .k(k)
39994           .ks(3)
39995           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
39996       }
39997     }
39998   }
39999 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD64,n_div_4_small_kernel)40000   TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD64, n_div_4_small_kernel) {
40001     for (uint32_t n = 8; n <= 12; n += 4) {
40002       for (size_t k = 1; k <= 40; k += 9) {
40003         GemmMicrokernelTester()
40004           .mr(3)
40005           .nr(4)
40006           .kr(2)
40007           .sr(1)
40008           .m(3)
40009           .n(n)
40010           .k(k)
40011           .ks(3)
40012           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
40013       }
40014     }
40015   }
40016 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD64,strided_cm_subtile)40017   TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD64, strided_cm_subtile) {
40018     for (size_t k = 1; k <= 40; k += 9) {
40019       for (uint32_t n = 1; n <= 4; n++) {
40020         for (uint32_t m = 1; m <= 3; m++) {
40021           GemmMicrokernelTester()
40022             .mr(3)
40023             .nr(4)
40024             .kr(2)
40025             .sr(1)
40026             .m(m)
40027             .n(n)
40028             .k(k)
40029             .cm_stride(7)
40030             .iterations(1)
40031             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
40032         }
40033       }
40034     }
40035   }
40036 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD64,a_offset)40037   TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD64, a_offset) {
40038     for (size_t k = 1; k <= 40; k += 9) {
40039       GemmMicrokernelTester()
40040         .mr(3)
40041         .nr(4)
40042         .kr(2)
40043         .sr(1)
40044         .m(3)
40045         .n(4)
40046         .k(k)
40047         .ks(3)
40048         .a_offset(127)
40049         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
40050     }
40051   }
40052 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD64,zero)40053   TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD64, zero) {
40054     for (size_t k = 1; k <= 40; k += 9) {
40055       for (uint32_t mz = 0; mz < 3; mz++) {
40056         GemmMicrokernelTester()
40057           .mr(3)
40058           .nr(4)
40059           .kr(2)
40060           .sr(1)
40061           .m(3)
40062           .n(4)
40063           .k(k)
40064           .ks(3)
40065           .a_offset(127)
40066           .zero_index(mz)
40067           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
40068       }
40069     }
40070   }
40071 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD64,qmin)40072   TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD64, qmin) {
40073     GemmMicrokernelTester()
40074       .mr(3)
40075       .nr(4)
40076       .kr(2)
40077       .sr(1)
40078       .m(3)
40079       .n(4)
40080       .k(8)
40081       .qmin(128)
40082       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
40083   }
40084 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD64,qmax)40085   TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD64, qmax) {
40086     GemmMicrokernelTester()
40087       .mr(3)
40088       .nr(4)
40089       .kr(2)
40090       .sr(1)
40091       .m(3)
40092       .n(4)
40093       .k(8)
40094       .qmax(128)
40095       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
40096   }
40097 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD64,strided_cm)40098   TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD64, strided_cm) {
40099     GemmMicrokernelTester()
40100       .mr(3)
40101       .nr(4)
40102       .kr(2)
40103       .sr(1)
40104       .m(3)
40105       .n(4)
40106       .k(8)
40107       .cm_stride(7)
40108       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
40109   }
40110 #endif  // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
40111 
40112 
40113 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD64,k_eq_8)40114   TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD64, k_eq_8) {
40115     GemmMicrokernelTester()
40116       .mr(3)
40117       .nr(4)
40118       .kr(2)
40119       .sr(4)
40120       .m(3)
40121       .n(4)
40122       .k(8)
40123       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
40124   }
40125 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD64,strided_cn)40126   TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD64, strided_cn) {
40127     GemmMicrokernelTester()
40128       .mr(3)
40129       .nr(4)
40130       .kr(2)
40131       .sr(4)
40132       .m(3)
40133       .n(4)
40134       .k(8)
40135       .cn_stride(7)
40136       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
40137   }
40138 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD64,k_eq_8_subtile)40139   TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD64, k_eq_8_subtile) {
40140     for (uint32_t n = 1; n <= 4; n++) {
40141       for (uint32_t m = 1; m <= 3; m++) {
40142         GemmMicrokernelTester()
40143           .mr(3)
40144           .nr(4)
40145           .kr(2)
40146           .sr(4)
40147           .m(m)
40148           .n(n)
40149           .k(8)
40150           .iterations(1)
40151           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
40152       }
40153     }
40154   }
40155 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD64,k_eq_8_subtile_m)40156   TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD64, k_eq_8_subtile_m) {
40157     for (uint32_t m = 1; m <= 3; m++) {
40158       GemmMicrokernelTester()
40159         .mr(3)
40160         .nr(4)
40161         .kr(2)
40162         .sr(4)
40163         .m(m)
40164         .n(4)
40165         .k(8)
40166         .iterations(1)
40167         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
40168     }
40169   }
40170 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD64,k_eq_8_subtile_n)40171   TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD64, k_eq_8_subtile_n) {
40172     for (uint32_t n = 1; n <= 4; n++) {
40173       GemmMicrokernelTester()
40174         .mr(3)
40175         .nr(4)
40176         .kr(2)
40177         .sr(4)
40178         .m(3)
40179         .n(n)
40180         .k(8)
40181         .iterations(1)
40182         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
40183     }
40184   }
40185 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD64,k_lt_8)40186   TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD64, k_lt_8) {
40187     for (size_t k = 1; k < 8; k++) {
40188       GemmMicrokernelTester()
40189         .mr(3)
40190         .nr(4)
40191         .kr(2)
40192         .sr(4)
40193         .m(3)
40194         .n(4)
40195         .k(k)
40196         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
40197     }
40198   }
40199 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD64,k_lt_8_subtile)40200   TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD64, k_lt_8_subtile) {
40201     for (size_t k = 1; k < 8; k++) {
40202       for (uint32_t n = 1; n <= 4; n++) {
40203         for (uint32_t m = 1; m <= 3; m++) {
40204           GemmMicrokernelTester()
40205             .mr(3)
40206             .nr(4)
40207             .kr(2)
40208             .sr(4)
40209             .m(m)
40210             .n(n)
40211             .k(k)
40212             .iterations(1)
40213             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
40214         }
40215       }
40216     }
40217   }
40218 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD64,k_gt_8)40219   TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD64, k_gt_8) {
40220     for (size_t k = 9; k < 16; k++) {
40221       GemmMicrokernelTester()
40222         .mr(3)
40223         .nr(4)
40224         .kr(2)
40225         .sr(4)
40226         .m(3)
40227         .n(4)
40228         .k(k)
40229         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
40230     }
40231   }
40232 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD64,k_gt_8_subtile)40233   TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD64, k_gt_8_subtile) {
40234     for (size_t k = 9; k < 16; k++) {
40235       for (uint32_t n = 1; n <= 4; n++) {
40236         for (uint32_t m = 1; m <= 3; m++) {
40237           GemmMicrokernelTester()
40238             .mr(3)
40239             .nr(4)
40240             .kr(2)
40241             .sr(4)
40242             .m(m)
40243             .n(n)
40244             .k(k)
40245             .iterations(1)
40246             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
40247         }
40248       }
40249     }
40250   }
40251 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD64,k_div_8)40252   TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD64, k_div_8) {
40253     for (size_t k = 16; k <= 80; k += 8) {
40254       GemmMicrokernelTester()
40255         .mr(3)
40256         .nr(4)
40257         .kr(2)
40258         .sr(4)
40259         .m(3)
40260         .n(4)
40261         .k(k)
40262         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
40263     }
40264   }
40265 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD64,k_div_8_subtile)40266   TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD64, k_div_8_subtile) {
40267     for (size_t k = 16; k <= 80; k += 8) {
40268       for (uint32_t n = 1; n <= 4; n++) {
40269         for (uint32_t m = 1; m <= 3; m++) {
40270           GemmMicrokernelTester()
40271             .mr(3)
40272             .nr(4)
40273             .kr(2)
40274             .sr(4)
40275             .m(m)
40276             .n(n)
40277             .k(k)
40278             .iterations(1)
40279             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
40280         }
40281       }
40282     }
40283   }
40284 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD64,n_gt_4)40285   TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD64, n_gt_4) {
40286     for (uint32_t n = 5; n < 8; n++) {
40287       for (size_t k = 1; k <= 40; k += 9) {
40288         GemmMicrokernelTester()
40289           .mr(3)
40290           .nr(4)
40291           .kr(2)
40292           .sr(4)
40293           .m(3)
40294           .n(n)
40295           .k(k)
40296           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
40297       }
40298     }
40299   }
40300 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD64,n_gt_4_strided_cn)40301   TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD64, n_gt_4_strided_cn) {
40302     for (uint32_t n = 5; n < 8; n++) {
40303       for (size_t k = 1; k <= 40; k += 9) {
40304         GemmMicrokernelTester()
40305           .mr(3)
40306           .nr(4)
40307           .kr(2)
40308           .sr(4)
40309           .m(3)
40310           .n(n)
40311           .k(k)
40312           .cn_stride(7)
40313           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
40314       }
40315     }
40316   }
40317 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD64,n_gt_4_subtile)40318   TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD64, n_gt_4_subtile) {
40319     for (uint32_t n = 5; n < 8; n++) {
40320       for (size_t k = 1; k <= 40; k += 9) {
40321         for (uint32_t m = 1; m <= 3; m++) {
40322           GemmMicrokernelTester()
40323             .mr(3)
40324             .nr(4)
40325             .kr(2)
40326             .sr(4)
40327             .m(m)
40328             .n(n)
40329             .k(k)
40330             .iterations(1)
40331             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
40332         }
40333       }
40334     }
40335   }
40336 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD64,n_div_4)40337   TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD64, n_div_4) {
40338     for (uint32_t n = 8; n <= 12; n += 4) {
40339       for (size_t k = 1; k <= 40; k += 9) {
40340         GemmMicrokernelTester()
40341           .mr(3)
40342           .nr(4)
40343           .kr(2)
40344           .sr(4)
40345           .m(3)
40346           .n(n)
40347           .k(k)
40348           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
40349       }
40350     }
40351   }
40352 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD64,n_div_4_strided_cn)40353   TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD64, n_div_4_strided_cn) {
40354     for (uint32_t n = 8; n <= 12; n += 4) {
40355       for (size_t k = 1; k <= 40; k += 9) {
40356         GemmMicrokernelTester()
40357           .mr(3)
40358           .nr(4)
40359           .kr(2)
40360           .sr(4)
40361           .m(3)
40362           .n(n)
40363           .k(k)
40364           .cn_stride(7)
40365           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
40366       }
40367     }
40368   }
40369 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD64,n_div_4_subtile)40370   TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD64, n_div_4_subtile) {
40371     for (uint32_t n = 8; n <= 12; n += 4) {
40372       for (size_t k = 1; k <= 40; k += 9) {
40373         for (uint32_t m = 1; m <= 3; m++) {
40374           GemmMicrokernelTester()
40375             .mr(3)
40376             .nr(4)
40377             .kr(2)
40378             .sr(4)
40379             .m(m)
40380             .n(n)
40381             .k(k)
40382             .iterations(1)
40383             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
40384         }
40385       }
40386     }
40387   }
40388 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD64,small_kernel)40389   TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD64, small_kernel) {
40390     for (size_t k = 1; k <= 40; k += 9) {
40391       GemmMicrokernelTester()
40392         .mr(3)
40393         .nr(4)
40394         .kr(2)
40395         .sr(4)
40396         .m(3)
40397         .n(4)
40398         .k(k)
40399         .ks(3)
40400         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
40401     }
40402   }
40403 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD64,small_kernel_subtile)40404   TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD64, small_kernel_subtile) {
40405     for (size_t k = 1; k <= 40; k += 9) {
40406       for (uint32_t n = 1; n <= 4; n++) {
40407         for (uint32_t m = 1; m <= 3; m++) {
40408           GemmMicrokernelTester()
40409             .mr(3)
40410             .nr(4)
40411             .kr(2)
40412             .sr(4)
40413             .m(m)
40414             .n(n)
40415             .k(k)
40416             .ks(3)
40417             .iterations(1)
40418             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
40419         }
40420       }
40421     }
40422   }
40423 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD64,n_gt_4_small_kernel)40424   TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD64, n_gt_4_small_kernel) {
40425     for (uint32_t n = 5; n < 8; n++) {
40426       for (size_t k = 1; k <= 40; k += 9) {
40427         GemmMicrokernelTester()
40428           .mr(3)
40429           .nr(4)
40430           .kr(2)
40431           .sr(4)
40432           .m(3)
40433           .n(n)
40434           .k(k)
40435           .ks(3)
40436           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
40437       }
40438     }
40439   }
40440 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD64,n_div_4_small_kernel)40441   TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD64, n_div_4_small_kernel) {
40442     for (uint32_t n = 8; n <= 12; n += 4) {
40443       for (size_t k = 1; k <= 40; k += 9) {
40444         GemmMicrokernelTester()
40445           .mr(3)
40446           .nr(4)
40447           .kr(2)
40448           .sr(4)
40449           .m(3)
40450           .n(n)
40451           .k(k)
40452           .ks(3)
40453           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
40454       }
40455     }
40456   }
40457 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD64,strided_cm_subtile)40458   TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD64, strided_cm_subtile) {
40459     for (size_t k = 1; k <= 40; k += 9) {
40460       for (uint32_t n = 1; n <= 4; n++) {
40461         for (uint32_t m = 1; m <= 3; m++) {
40462           GemmMicrokernelTester()
40463             .mr(3)
40464             .nr(4)
40465             .kr(2)
40466             .sr(4)
40467             .m(m)
40468             .n(n)
40469             .k(k)
40470             .cm_stride(7)
40471             .iterations(1)
40472             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
40473         }
40474       }
40475     }
40476   }
40477 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD64,a_offset)40478   TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD64, a_offset) {
40479     for (size_t k = 1; k <= 40; k += 9) {
40480       GemmMicrokernelTester()
40481         .mr(3)
40482         .nr(4)
40483         .kr(2)
40484         .sr(4)
40485         .m(3)
40486         .n(4)
40487         .k(k)
40488         .ks(3)
40489         .a_offset(127)
40490         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
40491     }
40492   }
40493 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD64,zero)40494   TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD64, zero) {
40495     for (size_t k = 1; k <= 40; k += 9) {
40496       for (uint32_t mz = 0; mz < 3; mz++) {
40497         GemmMicrokernelTester()
40498           .mr(3)
40499           .nr(4)
40500           .kr(2)
40501           .sr(4)
40502           .m(3)
40503           .n(4)
40504           .k(k)
40505           .ks(3)
40506           .a_offset(127)
40507           .zero_index(mz)
40508           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
40509       }
40510     }
40511   }
40512 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD64,qmin)40513   TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD64, qmin) {
40514     GemmMicrokernelTester()
40515       .mr(3)
40516       .nr(4)
40517       .kr(2)
40518       .sr(4)
40519       .m(3)
40520       .n(4)
40521       .k(8)
40522       .qmin(128)
40523       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
40524   }
40525 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD64,qmax)40526   TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD64, qmax) {
40527     GemmMicrokernelTester()
40528       .mr(3)
40529       .nr(4)
40530       .kr(2)
40531       .sr(4)
40532       .m(3)
40533       .n(4)
40534       .k(8)
40535       .qmax(128)
40536       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
40537   }
40538 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD64,strided_cm)40539   TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD64, strided_cm) {
40540     GemmMicrokernelTester()
40541       .mr(3)
40542       .nr(4)
40543       .kr(2)
40544       .sr(4)
40545       .m(3)
40546       .n(4)
40547       .k(8)
40548       .cm_stride(7)
40549       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
40550   }
40551 #endif  // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
40552 
40553 
40554 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD64,k_eq_8)40555   TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD64, k_eq_8) {
40556     GemmMicrokernelTester()
40557       .mr(3)
40558       .nr(4)
40559       .kr(8)
40560       .sr(1)
40561       .m(3)
40562       .n(4)
40563       .k(8)
40564       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
40565   }
40566 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD64,strided_cn)40567   TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD64, strided_cn) {
40568     GemmMicrokernelTester()
40569       .mr(3)
40570       .nr(4)
40571       .kr(8)
40572       .sr(1)
40573       .m(3)
40574       .n(4)
40575       .k(8)
40576       .cn_stride(7)
40577       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
40578   }
40579 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD64,k_eq_8_subtile)40580   TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD64, k_eq_8_subtile) {
40581     for (uint32_t n = 1; n <= 4; n++) {
40582       for (uint32_t m = 1; m <= 3; m++) {
40583         GemmMicrokernelTester()
40584           .mr(3)
40585           .nr(4)
40586           .kr(8)
40587           .sr(1)
40588           .m(m)
40589           .n(n)
40590           .k(8)
40591           .iterations(1)
40592           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
40593       }
40594     }
40595   }
40596 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD64,k_eq_8_subtile_m)40597   TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD64, k_eq_8_subtile_m) {
40598     for (uint32_t m = 1; m <= 3; m++) {
40599       GemmMicrokernelTester()
40600         .mr(3)
40601         .nr(4)
40602         .kr(8)
40603         .sr(1)
40604         .m(m)
40605         .n(4)
40606         .k(8)
40607         .iterations(1)
40608         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
40609     }
40610   }
40611 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD64,k_eq_8_subtile_n)40612   TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD64, k_eq_8_subtile_n) {
40613     for (uint32_t n = 1; n <= 4; n++) {
40614       GemmMicrokernelTester()
40615         .mr(3)
40616         .nr(4)
40617         .kr(8)
40618         .sr(1)
40619         .m(3)
40620         .n(n)
40621         .k(8)
40622         .iterations(1)
40623         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
40624     }
40625   }
40626 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD64,k_lt_8)40627   TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD64, k_lt_8) {
40628     for (size_t k = 1; k < 8; k++) {
40629       GemmMicrokernelTester()
40630         .mr(3)
40631         .nr(4)
40632         .kr(8)
40633         .sr(1)
40634         .m(3)
40635         .n(4)
40636         .k(k)
40637         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
40638     }
40639   }
40640 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD64,k_lt_8_subtile)40641   TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD64, k_lt_8_subtile) {
40642     for (size_t k = 1; k < 8; k++) {
40643       for (uint32_t n = 1; n <= 4; n++) {
40644         for (uint32_t m = 1; m <= 3; m++) {
40645           GemmMicrokernelTester()
40646             .mr(3)
40647             .nr(4)
40648             .kr(8)
40649             .sr(1)
40650             .m(m)
40651             .n(n)
40652             .k(k)
40653             .iterations(1)
40654             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
40655         }
40656       }
40657     }
40658   }
40659 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD64,k_gt_8)40660   TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD64, k_gt_8) {
40661     for (size_t k = 9; k < 16; k++) {
40662       GemmMicrokernelTester()
40663         .mr(3)
40664         .nr(4)
40665         .kr(8)
40666         .sr(1)
40667         .m(3)
40668         .n(4)
40669         .k(k)
40670         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
40671     }
40672   }
40673 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD64,k_gt_8_subtile)40674   TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD64, k_gt_8_subtile) {
40675     for (size_t k = 9; k < 16; k++) {
40676       for (uint32_t n = 1; n <= 4; n++) {
40677         for (uint32_t m = 1; m <= 3; m++) {
40678           GemmMicrokernelTester()
40679             .mr(3)
40680             .nr(4)
40681             .kr(8)
40682             .sr(1)
40683             .m(m)
40684             .n(n)
40685             .k(k)
40686             .iterations(1)
40687             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
40688         }
40689       }
40690     }
40691   }
40692 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD64,k_div_8)40693   TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD64, k_div_8) {
40694     for (size_t k = 16; k <= 80; k += 8) {
40695       GemmMicrokernelTester()
40696         .mr(3)
40697         .nr(4)
40698         .kr(8)
40699         .sr(1)
40700         .m(3)
40701         .n(4)
40702         .k(k)
40703         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
40704     }
40705   }
40706 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD64,k_div_8_subtile)40707   TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD64, k_div_8_subtile) {
40708     for (size_t k = 16; k <= 80; k += 8) {
40709       for (uint32_t n = 1; n <= 4; n++) {
40710         for (uint32_t m = 1; m <= 3; m++) {
40711           GemmMicrokernelTester()
40712             .mr(3)
40713             .nr(4)
40714             .kr(8)
40715             .sr(1)
40716             .m(m)
40717             .n(n)
40718             .k(k)
40719             .iterations(1)
40720             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
40721         }
40722       }
40723     }
40724   }
40725 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD64,n_gt_4)40726   TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD64, n_gt_4) {
40727     for (uint32_t n = 5; n < 8; n++) {
40728       for (size_t k = 1; k <= 40; k += 9) {
40729         GemmMicrokernelTester()
40730           .mr(3)
40731           .nr(4)
40732           .kr(8)
40733           .sr(1)
40734           .m(3)
40735           .n(n)
40736           .k(k)
40737           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
40738       }
40739     }
40740   }
40741 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD64,n_gt_4_strided_cn)40742   TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD64, n_gt_4_strided_cn) {
40743     for (uint32_t n = 5; n < 8; n++) {
40744       for (size_t k = 1; k <= 40; k += 9) {
40745         GemmMicrokernelTester()
40746           .mr(3)
40747           .nr(4)
40748           .kr(8)
40749           .sr(1)
40750           .m(3)
40751           .n(n)
40752           .k(k)
40753           .cn_stride(7)
40754           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
40755       }
40756     }
40757   }
40758 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD64,n_gt_4_subtile)40759   TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD64, n_gt_4_subtile) {
40760     for (uint32_t n = 5; n < 8; n++) {
40761       for (size_t k = 1; k <= 40; k += 9) {
40762         for (uint32_t m = 1; m <= 3; m++) {
40763           GemmMicrokernelTester()
40764             .mr(3)
40765             .nr(4)
40766             .kr(8)
40767             .sr(1)
40768             .m(m)
40769             .n(n)
40770             .k(k)
40771             .iterations(1)
40772             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
40773         }
40774       }
40775     }
40776   }
40777 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD64,n_div_4)40778   TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD64, n_div_4) {
40779     for (uint32_t n = 8; n <= 12; n += 4) {
40780       for (size_t k = 1; k <= 40; k += 9) {
40781         GemmMicrokernelTester()
40782           .mr(3)
40783           .nr(4)
40784           .kr(8)
40785           .sr(1)
40786           .m(3)
40787           .n(n)
40788           .k(k)
40789           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
40790       }
40791     }
40792   }
40793 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD64,n_div_4_strided_cn)40794   TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD64, n_div_4_strided_cn) {
40795     for (uint32_t n = 8; n <= 12; n += 4) {
40796       for (size_t k = 1; k <= 40; k += 9) {
40797         GemmMicrokernelTester()
40798           .mr(3)
40799           .nr(4)
40800           .kr(8)
40801           .sr(1)
40802           .m(3)
40803           .n(n)
40804           .k(k)
40805           .cn_stride(7)
40806           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
40807       }
40808     }
40809   }
40810 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD64,n_div_4_subtile)40811   TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD64, n_div_4_subtile) {
40812     for (uint32_t n = 8; n <= 12; n += 4) {
40813       for (size_t k = 1; k <= 40; k += 9) {
40814         for (uint32_t m = 1; m <= 3; m++) {
40815           GemmMicrokernelTester()
40816             .mr(3)
40817             .nr(4)
40818             .kr(8)
40819             .sr(1)
40820             .m(m)
40821             .n(n)
40822             .k(k)
40823             .iterations(1)
40824             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
40825         }
40826       }
40827     }
40828   }
40829 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD64,small_kernel)40830   TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD64, small_kernel) {
40831     for (size_t k = 1; k <= 40; k += 9) {
40832       GemmMicrokernelTester()
40833         .mr(3)
40834         .nr(4)
40835         .kr(8)
40836         .sr(1)
40837         .m(3)
40838         .n(4)
40839         .k(k)
40840         .ks(3)
40841         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
40842     }
40843   }
40844 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD64,small_kernel_subtile)40845   TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD64, small_kernel_subtile) {
40846     for (size_t k = 1; k <= 40; k += 9) {
40847       for (uint32_t n = 1; n <= 4; n++) {
40848         for (uint32_t m = 1; m <= 3; m++) {
40849           GemmMicrokernelTester()
40850             .mr(3)
40851             .nr(4)
40852             .kr(8)
40853             .sr(1)
40854             .m(m)
40855             .n(n)
40856             .k(k)
40857             .ks(3)
40858             .iterations(1)
40859             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
40860         }
40861       }
40862     }
40863   }
40864 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD64,n_gt_4_small_kernel)40865   TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD64, n_gt_4_small_kernel) {
40866     for (uint32_t n = 5; n < 8; n++) {
40867       for (size_t k = 1; k <= 40; k += 9) {
40868         GemmMicrokernelTester()
40869           .mr(3)
40870           .nr(4)
40871           .kr(8)
40872           .sr(1)
40873           .m(3)
40874           .n(n)
40875           .k(k)
40876           .ks(3)
40877           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
40878       }
40879     }
40880   }
40881 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD64,n_div_4_small_kernel)40882   TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD64, n_div_4_small_kernel) {
40883     for (uint32_t n = 8; n <= 12; n += 4) {
40884       for (size_t k = 1; k <= 40; k += 9) {
40885         GemmMicrokernelTester()
40886           .mr(3)
40887           .nr(4)
40888           .kr(8)
40889           .sr(1)
40890           .m(3)
40891           .n(n)
40892           .k(k)
40893           .ks(3)
40894           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
40895       }
40896     }
40897   }
40898 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD64,strided_cm_subtile)40899   TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD64, strided_cm_subtile) {
40900     for (size_t k = 1; k <= 40; k += 9) {
40901       for (uint32_t n = 1; n <= 4; n++) {
40902         for (uint32_t m = 1; m <= 3; m++) {
40903           GemmMicrokernelTester()
40904             .mr(3)
40905             .nr(4)
40906             .kr(8)
40907             .sr(1)
40908             .m(m)
40909             .n(n)
40910             .k(k)
40911             .cm_stride(7)
40912             .iterations(1)
40913             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
40914         }
40915       }
40916     }
40917   }
40918 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD64,a_offset)40919   TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD64, a_offset) {
40920     for (size_t k = 1; k <= 40; k += 9) {
40921       GemmMicrokernelTester()
40922         .mr(3)
40923         .nr(4)
40924         .kr(8)
40925         .sr(1)
40926         .m(3)
40927         .n(4)
40928         .k(k)
40929         .ks(3)
40930         .a_offset(127)
40931         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
40932     }
40933   }
40934 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD64,zero)40935   TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD64, zero) {
40936     for (size_t k = 1; k <= 40; k += 9) {
40937       for (uint32_t mz = 0; mz < 3; mz++) {
40938         GemmMicrokernelTester()
40939           .mr(3)
40940           .nr(4)
40941           .kr(8)
40942           .sr(1)
40943           .m(3)
40944           .n(4)
40945           .k(k)
40946           .ks(3)
40947           .a_offset(127)
40948           .zero_index(mz)
40949           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
40950       }
40951     }
40952   }
40953 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD64,qmin)40954   TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD64, qmin) {
40955     GemmMicrokernelTester()
40956       .mr(3)
40957       .nr(4)
40958       .kr(8)
40959       .sr(1)
40960       .m(3)
40961       .n(4)
40962       .k(8)
40963       .qmin(128)
40964       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
40965   }
40966 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD64,qmax)40967   TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD64, qmax) {
40968     GemmMicrokernelTester()
40969       .mr(3)
40970       .nr(4)
40971       .kr(8)
40972       .sr(1)
40973       .m(3)
40974       .n(4)
40975       .k(8)
40976       .qmax(128)
40977       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
40978   }
40979 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD64,strided_cm)40980   TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD64, strided_cm) {
40981     GemmMicrokernelTester()
40982       .mr(3)
40983       .nr(4)
40984       .kr(8)
40985       .sr(1)
40986       .m(3)
40987       .n(4)
40988       .k(8)
40989       .cm_stride(7)
40990       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
40991   }
40992 #endif  // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
40993 
40994 
40995 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD64,k_eq_8)40996   TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD64, k_eq_8) {
40997     GemmMicrokernelTester()
40998       .mr(4)
40999       .nr(4)
41000       .kr(2)
41001       .sr(4)
41002       .m(4)
41003       .n(4)
41004       .k(8)
41005       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
41006   }
41007 
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD64,strided_cn)41008   TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD64, strided_cn) {
41009     GemmMicrokernelTester()
41010       .mr(4)
41011       .nr(4)
41012       .kr(2)
41013       .sr(4)
41014       .m(4)
41015       .n(4)
41016       .k(8)
41017       .cn_stride(7)
41018       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
41019   }
41020 
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD64,k_eq_8_subtile)41021   TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD64, k_eq_8_subtile) {
41022     for (uint32_t n = 1; n <= 4; n++) {
41023       for (uint32_t m = 1; m <= 4; m++) {
41024         GemmMicrokernelTester()
41025           .mr(4)
41026           .nr(4)
41027           .kr(2)
41028           .sr(4)
41029           .m(m)
41030           .n(n)
41031           .k(8)
41032           .iterations(1)
41033           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
41034       }
41035     }
41036   }
41037 
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD64,k_eq_8_subtile_m)41038   TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD64, k_eq_8_subtile_m) {
41039     for (uint32_t m = 1; m <= 4; m++) {
41040       GemmMicrokernelTester()
41041         .mr(4)
41042         .nr(4)
41043         .kr(2)
41044         .sr(4)
41045         .m(m)
41046         .n(4)
41047         .k(8)
41048         .iterations(1)
41049         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
41050     }
41051   }
41052 
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD64,k_eq_8_subtile_n)41053   TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD64, k_eq_8_subtile_n) {
41054     for (uint32_t n = 1; n <= 4; n++) {
41055       GemmMicrokernelTester()
41056         .mr(4)
41057         .nr(4)
41058         .kr(2)
41059         .sr(4)
41060         .m(4)
41061         .n(n)
41062         .k(8)
41063         .iterations(1)
41064         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
41065     }
41066   }
41067 
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD64,k_lt_8)41068   TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD64, k_lt_8) {
41069     for (size_t k = 1; k < 8; k++) {
41070       GemmMicrokernelTester()
41071         .mr(4)
41072         .nr(4)
41073         .kr(2)
41074         .sr(4)
41075         .m(4)
41076         .n(4)
41077         .k(k)
41078         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
41079     }
41080   }
41081 
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD64,k_lt_8_subtile)41082   TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD64, k_lt_8_subtile) {
41083     for (size_t k = 1; k < 8; k++) {
41084       for (uint32_t n = 1; n <= 4; n++) {
41085         for (uint32_t m = 1; m <= 4; m++) {
41086           GemmMicrokernelTester()
41087             .mr(4)
41088             .nr(4)
41089             .kr(2)
41090             .sr(4)
41091             .m(m)
41092             .n(n)
41093             .k(k)
41094             .iterations(1)
41095             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
41096         }
41097       }
41098     }
41099   }
41100 
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD64,k_gt_8)41101   TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD64, k_gt_8) {
41102     for (size_t k = 9; k < 16; k++) {
41103       GemmMicrokernelTester()
41104         .mr(4)
41105         .nr(4)
41106         .kr(2)
41107         .sr(4)
41108         .m(4)
41109         .n(4)
41110         .k(k)
41111         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
41112     }
41113   }
41114 
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD64,k_gt_8_subtile)41115   TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD64, k_gt_8_subtile) {
41116     for (size_t k = 9; k < 16; k++) {
41117       for (uint32_t n = 1; n <= 4; n++) {
41118         for (uint32_t m = 1; m <= 4; m++) {
41119           GemmMicrokernelTester()
41120             .mr(4)
41121             .nr(4)
41122             .kr(2)
41123             .sr(4)
41124             .m(m)
41125             .n(n)
41126             .k(k)
41127             .iterations(1)
41128             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
41129         }
41130       }
41131     }
41132   }
41133 
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD64,k_div_8)41134   TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD64, k_div_8) {
41135     for (size_t k = 16; k <= 80; k += 8) {
41136       GemmMicrokernelTester()
41137         .mr(4)
41138         .nr(4)
41139         .kr(2)
41140         .sr(4)
41141         .m(4)
41142         .n(4)
41143         .k(k)
41144         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
41145     }
41146   }
41147 
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD64,k_div_8_subtile)41148   TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD64, k_div_8_subtile) {
41149     for (size_t k = 16; k <= 80; k += 8) {
41150       for (uint32_t n = 1; n <= 4; n++) {
41151         for (uint32_t m = 1; m <= 4; m++) {
41152           GemmMicrokernelTester()
41153             .mr(4)
41154             .nr(4)
41155             .kr(2)
41156             .sr(4)
41157             .m(m)
41158             .n(n)
41159             .k(k)
41160             .iterations(1)
41161             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
41162         }
41163       }
41164     }
41165   }
41166 
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD64,n_gt_4)41167   TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD64, n_gt_4) {
41168     for (uint32_t n = 5; n < 8; n++) {
41169       for (size_t k = 1; k <= 40; k += 9) {
41170         GemmMicrokernelTester()
41171           .mr(4)
41172           .nr(4)
41173           .kr(2)
41174           .sr(4)
41175           .m(4)
41176           .n(n)
41177           .k(k)
41178           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
41179       }
41180     }
41181   }
41182 
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD64,n_gt_4_strided_cn)41183   TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD64, n_gt_4_strided_cn) {
41184     for (uint32_t n = 5; n < 8; n++) {
41185       for (size_t k = 1; k <= 40; k += 9) {
41186         GemmMicrokernelTester()
41187           .mr(4)
41188           .nr(4)
41189           .kr(2)
41190           .sr(4)
41191           .m(4)
41192           .n(n)
41193           .k(k)
41194           .cn_stride(7)
41195           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
41196       }
41197     }
41198   }
41199 
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD64,n_gt_4_subtile)41200   TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD64, n_gt_4_subtile) {
41201     for (uint32_t n = 5; n < 8; n++) {
41202       for (size_t k = 1; k <= 40; k += 9) {
41203         for (uint32_t m = 1; m <= 4; m++) {
41204           GemmMicrokernelTester()
41205             .mr(4)
41206             .nr(4)
41207             .kr(2)
41208             .sr(4)
41209             .m(m)
41210             .n(n)
41211             .k(k)
41212             .iterations(1)
41213             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
41214         }
41215       }
41216     }
41217   }
41218 
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD64,n_div_4)41219   TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD64, n_div_4) {
41220     for (uint32_t n = 8; n <= 12; n += 4) {
41221       for (size_t k = 1; k <= 40; k += 9) {
41222         GemmMicrokernelTester()
41223           .mr(4)
41224           .nr(4)
41225           .kr(2)
41226           .sr(4)
41227           .m(4)
41228           .n(n)
41229           .k(k)
41230           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
41231       }
41232     }
41233   }
41234 
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD64,n_div_4_strided_cn)41235   TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD64, n_div_4_strided_cn) {
41236     for (uint32_t n = 8; n <= 12; n += 4) {
41237       for (size_t k = 1; k <= 40; k += 9) {
41238         GemmMicrokernelTester()
41239           .mr(4)
41240           .nr(4)
41241           .kr(2)
41242           .sr(4)
41243           .m(4)
41244           .n(n)
41245           .k(k)
41246           .cn_stride(7)
41247           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
41248       }
41249     }
41250   }
41251 
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD64,n_div_4_subtile)41252   TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD64, n_div_4_subtile) {
41253     for (uint32_t n = 8; n <= 12; n += 4) {
41254       for (size_t k = 1; k <= 40; k += 9) {
41255         for (uint32_t m = 1; m <= 4; m++) {
41256           GemmMicrokernelTester()
41257             .mr(4)
41258             .nr(4)
41259             .kr(2)
41260             .sr(4)
41261             .m(m)
41262             .n(n)
41263             .k(k)
41264             .iterations(1)
41265             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
41266         }
41267       }
41268     }
41269   }
41270 
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD64,small_kernel)41271   TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD64, small_kernel) {
41272     for (size_t k = 1; k <= 40; k += 9) {
41273       GemmMicrokernelTester()
41274         .mr(4)
41275         .nr(4)
41276         .kr(2)
41277         .sr(4)
41278         .m(4)
41279         .n(4)
41280         .k(k)
41281         .ks(3)
41282         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
41283     }
41284   }
41285 
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD64,small_kernel_subtile)41286   TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD64, small_kernel_subtile) {
41287     for (size_t k = 1; k <= 40; k += 9) {
41288       for (uint32_t n = 1; n <= 4; n++) {
41289         for (uint32_t m = 1; m <= 4; m++) {
41290           GemmMicrokernelTester()
41291             .mr(4)
41292             .nr(4)
41293             .kr(2)
41294             .sr(4)
41295             .m(m)
41296             .n(n)
41297             .k(k)
41298             .ks(3)
41299             .iterations(1)
41300             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
41301         }
41302       }
41303     }
41304   }
41305 
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD64,n_gt_4_small_kernel)41306   TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD64, n_gt_4_small_kernel) {
41307     for (uint32_t n = 5; n < 8; n++) {
41308       for (size_t k = 1; k <= 40; k += 9) {
41309         GemmMicrokernelTester()
41310           .mr(4)
41311           .nr(4)
41312           .kr(2)
41313           .sr(4)
41314           .m(4)
41315           .n(n)
41316           .k(k)
41317           .ks(3)
41318           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
41319       }
41320     }
41321   }
41322 
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD64,n_div_4_small_kernel)41323   TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD64, n_div_4_small_kernel) {
41324     for (uint32_t n = 8; n <= 12; n += 4) {
41325       for (size_t k = 1; k <= 40; k += 9) {
41326         GemmMicrokernelTester()
41327           .mr(4)
41328           .nr(4)
41329           .kr(2)
41330           .sr(4)
41331           .m(4)
41332           .n(n)
41333           .k(k)
41334           .ks(3)
41335           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
41336       }
41337     }
41338   }
41339 
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD64,strided_cm_subtile)41340   TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD64, strided_cm_subtile) {
41341     for (size_t k = 1; k <= 40; k += 9) {
41342       for (uint32_t n = 1; n <= 4; n++) {
41343         for (uint32_t m = 1; m <= 4; m++) {
41344           GemmMicrokernelTester()
41345             .mr(4)
41346             .nr(4)
41347             .kr(2)
41348             .sr(4)
41349             .m(m)
41350             .n(n)
41351             .k(k)
41352             .cm_stride(7)
41353             .iterations(1)
41354             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
41355         }
41356       }
41357     }
41358   }
41359 
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD64,a_offset)41360   TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD64, a_offset) {
41361     for (size_t k = 1; k <= 40; k += 9) {
41362       GemmMicrokernelTester()
41363         .mr(4)
41364         .nr(4)
41365         .kr(2)
41366         .sr(4)
41367         .m(4)
41368         .n(4)
41369         .k(k)
41370         .ks(3)
41371         .a_offset(163)
41372         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
41373     }
41374   }
41375 
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD64,zero)41376   TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD64, zero) {
41377     for (size_t k = 1; k <= 40; k += 9) {
41378       for (uint32_t mz = 0; mz < 4; mz++) {
41379         GemmMicrokernelTester()
41380           .mr(4)
41381           .nr(4)
41382           .kr(2)
41383           .sr(4)
41384           .m(4)
41385           .n(4)
41386           .k(k)
41387           .ks(3)
41388           .a_offset(163)
41389           .zero_index(mz)
41390           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
41391       }
41392     }
41393   }
41394 
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD64,qmin)41395   TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD64, qmin) {
41396     GemmMicrokernelTester()
41397       .mr(4)
41398       .nr(4)
41399       .kr(2)
41400       .sr(4)
41401       .m(4)
41402       .n(4)
41403       .k(8)
41404       .qmin(128)
41405       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
41406   }
41407 
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD64,qmax)41408   TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD64, qmax) {
41409     GemmMicrokernelTester()
41410       .mr(4)
41411       .nr(4)
41412       .kr(2)
41413       .sr(4)
41414       .m(4)
41415       .n(4)
41416       .k(8)
41417       .qmax(128)
41418       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
41419   }
41420 
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD64,strided_cm)41421   TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD64, strided_cm) {
41422     GemmMicrokernelTester()
41423       .mr(4)
41424       .nr(4)
41425       .kr(2)
41426       .sr(4)
41427       .m(4)
41428       .n(4)
41429       .k(8)
41430       .cm_stride(7)
41431       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
41432   }
41433 #endif  // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
41434 
41435 
41436 #if XNN_ARCH_WASM || XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(QC8_IGEMM_MINMAX_FP32_1X4__WASM_FMAGIC,k_eq_1)41437   TEST(QC8_IGEMM_MINMAX_FP32_1X4__WASM_FMAGIC, k_eq_1) {
41438     GemmMicrokernelTester()
41439       .mr(1)
41440       .nr(4)
41441       .kr(1)
41442       .sr(1)
41443       .m(1)
41444       .n(4)
41445       .k(1)
41446       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
41447   }
41448 
TEST(QC8_IGEMM_MINMAX_FP32_1X4__WASM_FMAGIC,strided_cn)41449   TEST(QC8_IGEMM_MINMAX_FP32_1X4__WASM_FMAGIC, strided_cn) {
41450     GemmMicrokernelTester()
41451       .mr(1)
41452       .nr(4)
41453       .kr(1)
41454       .sr(1)
41455       .m(1)
41456       .n(4)
41457       .k(1)
41458       .cn_stride(7)
41459       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
41460   }
41461 
TEST(QC8_IGEMM_MINMAX_FP32_1X4__WASM_FMAGIC,k_eq_1_subtile)41462   TEST(QC8_IGEMM_MINMAX_FP32_1X4__WASM_FMAGIC, k_eq_1_subtile) {
41463     for (uint32_t n = 1; n <= 4; n++) {
41464       for (uint32_t m = 1; m <= 1; m++) {
41465         GemmMicrokernelTester()
41466           .mr(1)
41467           .nr(4)
41468           .kr(1)
41469           .sr(1)
41470           .m(m)
41471           .n(n)
41472           .k(1)
41473           .iterations(1)
41474           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
41475       }
41476     }
41477   }
41478 
TEST(QC8_IGEMM_MINMAX_FP32_1X4__WASM_FMAGIC,k_eq_1_subtile_m)41479   TEST(QC8_IGEMM_MINMAX_FP32_1X4__WASM_FMAGIC, k_eq_1_subtile_m) {
41480     for (uint32_t m = 1; m <= 1; m++) {
41481       GemmMicrokernelTester()
41482         .mr(1)
41483         .nr(4)
41484         .kr(1)
41485         .sr(1)
41486         .m(m)
41487         .n(4)
41488         .k(1)
41489         .iterations(1)
41490         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
41491     }
41492   }
41493 
TEST(QC8_IGEMM_MINMAX_FP32_1X4__WASM_FMAGIC,k_eq_1_subtile_n)41494   TEST(QC8_IGEMM_MINMAX_FP32_1X4__WASM_FMAGIC, k_eq_1_subtile_n) {
41495     for (uint32_t n = 1; n <= 4; n++) {
41496       GemmMicrokernelTester()
41497         .mr(1)
41498         .nr(4)
41499         .kr(1)
41500         .sr(1)
41501         .m(1)
41502         .n(n)
41503         .k(1)
41504         .iterations(1)
41505         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
41506     }
41507   }
41508 
TEST(QC8_IGEMM_MINMAX_FP32_1X4__WASM_FMAGIC,k_gt_1)41509   TEST(QC8_IGEMM_MINMAX_FP32_1X4__WASM_FMAGIC, k_gt_1) {
41510     for (size_t k = 2; k < 10; k++) {
41511       GemmMicrokernelTester()
41512         .mr(1)
41513         .nr(4)
41514         .kr(1)
41515         .sr(1)
41516         .m(1)
41517         .n(4)
41518         .k(k)
41519         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
41520     }
41521   }
41522 
TEST(QC8_IGEMM_MINMAX_FP32_1X4__WASM_FMAGIC,k_gt_1_subtile)41523   TEST(QC8_IGEMM_MINMAX_FP32_1X4__WASM_FMAGIC, k_gt_1_subtile) {
41524     for (size_t k = 2; k < 10; k++) {
41525       for (uint32_t n = 1; n <= 4; n++) {
41526         for (uint32_t m = 1; m <= 1; m++) {
41527           GemmMicrokernelTester()
41528             .mr(1)
41529             .nr(4)
41530             .kr(1)
41531             .sr(1)
41532             .m(m)
41533             .n(n)
41534             .k(k)
41535             .iterations(1)
41536             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
41537         }
41538       }
41539     }
41540   }
41541 
TEST(QC8_IGEMM_MINMAX_FP32_1X4__WASM_FMAGIC,n_gt_4)41542   TEST(QC8_IGEMM_MINMAX_FP32_1X4__WASM_FMAGIC, n_gt_4) {
41543     for (uint32_t n = 5; n < 8; n++) {
41544       for (size_t k = 1; k <= 5; k += 2) {
41545         GemmMicrokernelTester()
41546           .mr(1)
41547           .nr(4)
41548           .kr(1)
41549           .sr(1)
41550           .m(1)
41551           .n(n)
41552           .k(k)
41553           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
41554       }
41555     }
41556   }
41557 
TEST(QC8_IGEMM_MINMAX_FP32_1X4__WASM_FMAGIC,n_gt_4_strided_cn)41558   TEST(QC8_IGEMM_MINMAX_FP32_1X4__WASM_FMAGIC, n_gt_4_strided_cn) {
41559     for (uint32_t n = 5; n < 8; n++) {
41560       for (size_t k = 1; k <= 5; k += 2) {
41561         GemmMicrokernelTester()
41562           .mr(1)
41563           .nr(4)
41564           .kr(1)
41565           .sr(1)
41566           .m(1)
41567           .n(n)
41568           .k(k)
41569           .cn_stride(7)
41570           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
41571       }
41572     }
41573   }
41574 
TEST(QC8_IGEMM_MINMAX_FP32_1X4__WASM_FMAGIC,n_gt_4_subtile)41575   TEST(QC8_IGEMM_MINMAX_FP32_1X4__WASM_FMAGIC, n_gt_4_subtile) {
41576     for (uint32_t n = 5; n < 8; n++) {
41577       for (size_t k = 1; k <= 5; k += 2) {
41578         for (uint32_t m = 1; m <= 1; m++) {
41579           GemmMicrokernelTester()
41580             .mr(1)
41581             .nr(4)
41582             .kr(1)
41583             .sr(1)
41584             .m(m)
41585             .n(n)
41586             .k(k)
41587             .iterations(1)
41588             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
41589         }
41590       }
41591     }
41592   }
41593 
TEST(QC8_IGEMM_MINMAX_FP32_1X4__WASM_FMAGIC,n_div_4)41594   TEST(QC8_IGEMM_MINMAX_FP32_1X4__WASM_FMAGIC, n_div_4) {
41595     for (uint32_t n = 8; n <= 12; n += 4) {
41596       for (size_t k = 1; k <= 5; k += 2) {
41597         GemmMicrokernelTester()
41598           .mr(1)
41599           .nr(4)
41600           .kr(1)
41601           .sr(1)
41602           .m(1)
41603           .n(n)
41604           .k(k)
41605           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
41606       }
41607     }
41608   }
41609 
TEST(QC8_IGEMM_MINMAX_FP32_1X4__WASM_FMAGIC,n_div_4_strided_cn)41610   TEST(QC8_IGEMM_MINMAX_FP32_1X4__WASM_FMAGIC, n_div_4_strided_cn) {
41611     for (uint32_t n = 8; n <= 12; n += 4) {
41612       for (size_t k = 1; k <= 5; k += 2) {
41613         GemmMicrokernelTester()
41614           .mr(1)
41615           .nr(4)
41616           .kr(1)
41617           .sr(1)
41618           .m(1)
41619           .n(n)
41620           .k(k)
41621           .cn_stride(7)
41622           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
41623       }
41624     }
41625   }
41626 
TEST(QC8_IGEMM_MINMAX_FP32_1X4__WASM_FMAGIC,n_div_4_subtile)41627   TEST(QC8_IGEMM_MINMAX_FP32_1X4__WASM_FMAGIC, n_div_4_subtile) {
41628     for (uint32_t n = 8; n <= 12; n += 4) {
41629       for (size_t k = 1; k <= 5; k += 2) {
41630         for (uint32_t m = 1; m <= 1; m++) {
41631           GemmMicrokernelTester()
41632             .mr(1)
41633             .nr(4)
41634             .kr(1)
41635             .sr(1)
41636             .m(m)
41637             .n(n)
41638             .k(k)
41639             .iterations(1)
41640             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
41641         }
41642       }
41643     }
41644   }
41645 
TEST(QC8_IGEMM_MINMAX_FP32_1X4__WASM_FMAGIC,small_kernel)41646   TEST(QC8_IGEMM_MINMAX_FP32_1X4__WASM_FMAGIC, small_kernel) {
41647     for (size_t k = 1; k <= 5; k += 2) {
41648       GemmMicrokernelTester()
41649         .mr(1)
41650         .nr(4)
41651         .kr(1)
41652         .sr(1)
41653         .m(1)
41654         .n(4)
41655         .k(k)
41656         .ks(3)
41657         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
41658     }
41659   }
41660 
TEST(QC8_IGEMM_MINMAX_FP32_1X4__WASM_FMAGIC,small_kernel_subtile)41661   TEST(QC8_IGEMM_MINMAX_FP32_1X4__WASM_FMAGIC, small_kernel_subtile) {
41662     for (size_t k = 1; k <= 5; k += 2) {
41663       for (uint32_t n = 1; n <= 4; n++) {
41664         for (uint32_t m = 1; m <= 1; m++) {
41665           GemmMicrokernelTester()
41666             .mr(1)
41667             .nr(4)
41668             .kr(1)
41669             .sr(1)
41670             .m(m)
41671             .n(n)
41672             .k(k)
41673             .ks(3)
41674             .iterations(1)
41675             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
41676         }
41677       }
41678     }
41679   }
41680 
TEST(QC8_IGEMM_MINMAX_FP32_1X4__WASM_FMAGIC,n_gt_4_small_kernel)41681   TEST(QC8_IGEMM_MINMAX_FP32_1X4__WASM_FMAGIC, n_gt_4_small_kernel) {
41682     for (uint32_t n = 5; n < 8; n++) {
41683       for (size_t k = 1; k <= 5; k += 2) {
41684         GemmMicrokernelTester()
41685           .mr(1)
41686           .nr(4)
41687           .kr(1)
41688           .sr(1)
41689           .m(1)
41690           .n(n)
41691           .k(k)
41692           .ks(3)
41693           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
41694       }
41695     }
41696   }
41697 
TEST(QC8_IGEMM_MINMAX_FP32_1X4__WASM_FMAGIC,n_div_4_small_kernel)41698   TEST(QC8_IGEMM_MINMAX_FP32_1X4__WASM_FMAGIC, n_div_4_small_kernel) {
41699     for (uint32_t n = 8; n <= 12; n += 4) {
41700       for (size_t k = 1; k <= 5; k += 2) {
41701         GemmMicrokernelTester()
41702           .mr(1)
41703           .nr(4)
41704           .kr(1)
41705           .sr(1)
41706           .m(1)
41707           .n(n)
41708           .k(k)
41709           .ks(3)
41710           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
41711       }
41712     }
41713   }
41714 
TEST(QC8_IGEMM_MINMAX_FP32_1X4__WASM_FMAGIC,strided_cm_subtile)41715   TEST(QC8_IGEMM_MINMAX_FP32_1X4__WASM_FMAGIC, strided_cm_subtile) {
41716     for (size_t k = 1; k <= 5; k += 2) {
41717       for (uint32_t n = 1; n <= 4; n++) {
41718         for (uint32_t m = 1; m <= 1; m++) {
41719           GemmMicrokernelTester()
41720             .mr(1)
41721             .nr(4)
41722             .kr(1)
41723             .sr(1)
41724             .m(m)
41725             .n(n)
41726             .k(k)
41727             .cm_stride(7)
41728             .iterations(1)
41729             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
41730         }
41731       }
41732     }
41733   }
41734 
TEST(QC8_IGEMM_MINMAX_FP32_1X4__WASM_FMAGIC,a_offset)41735   TEST(QC8_IGEMM_MINMAX_FP32_1X4__WASM_FMAGIC, a_offset) {
41736     for (size_t k = 1; k <= 5; k += 2) {
41737       GemmMicrokernelTester()
41738         .mr(1)
41739         .nr(4)
41740         .kr(1)
41741         .sr(1)
41742         .m(1)
41743         .n(4)
41744         .k(k)
41745         .ks(3)
41746         .a_offset(7)
41747         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
41748     }
41749   }
41750 
TEST(QC8_IGEMM_MINMAX_FP32_1X4__WASM_FMAGIC,zero)41751   TEST(QC8_IGEMM_MINMAX_FP32_1X4__WASM_FMAGIC, zero) {
41752     for (size_t k = 1; k <= 5; k += 2) {
41753       for (uint32_t mz = 0; mz < 1; mz++) {
41754         GemmMicrokernelTester()
41755           .mr(1)
41756           .nr(4)
41757           .kr(1)
41758           .sr(1)
41759           .m(1)
41760           .n(4)
41761           .k(k)
41762           .ks(3)
41763           .a_offset(7)
41764           .zero_index(mz)
41765           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
41766       }
41767     }
41768   }
41769 
TEST(QC8_IGEMM_MINMAX_FP32_1X4__WASM_FMAGIC,qmin)41770   TEST(QC8_IGEMM_MINMAX_FP32_1X4__WASM_FMAGIC, qmin) {
41771     GemmMicrokernelTester()
41772       .mr(1)
41773       .nr(4)
41774       .kr(1)
41775       .sr(1)
41776       .m(1)
41777       .n(4)
41778       .k(1)
41779       .qmin(128)
41780       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
41781   }
41782 
TEST(QC8_IGEMM_MINMAX_FP32_1X4__WASM_FMAGIC,qmax)41783   TEST(QC8_IGEMM_MINMAX_FP32_1X4__WASM_FMAGIC, qmax) {
41784     GemmMicrokernelTester()
41785       .mr(1)
41786       .nr(4)
41787       .kr(1)
41788       .sr(1)
41789       .m(1)
41790       .n(4)
41791       .k(1)
41792       .qmax(128)
41793       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
41794   }
41795 
TEST(QC8_IGEMM_MINMAX_FP32_1X4__WASM_FMAGIC,strided_cm)41796   TEST(QC8_IGEMM_MINMAX_FP32_1X4__WASM_FMAGIC, strided_cm) {
41797     GemmMicrokernelTester()
41798       .mr(1)
41799       .nr(4)
41800       .kr(1)
41801       .sr(1)
41802       .m(1)
41803       .n(4)
41804       .k(1)
41805       .cm_stride(7)
41806       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
41807   }
41808 #endif  // XNN_ARCH_WASM || XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
41809 
41810 
41811 #if XNN_ARCH_WASM || XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(QC8_IGEMM_MINMAX_FP32_3X2__WASM_FMAGIC,k_eq_1)41812   TEST(QC8_IGEMM_MINMAX_FP32_3X2__WASM_FMAGIC, k_eq_1) {
41813     GemmMicrokernelTester()
41814       .mr(3)
41815       .nr(2)
41816       .kr(1)
41817       .sr(1)
41818       .m(3)
41819       .n(2)
41820       .k(1)
41821       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x2__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
41822   }
41823 
TEST(QC8_IGEMM_MINMAX_FP32_3X2__WASM_FMAGIC,strided_cn)41824   TEST(QC8_IGEMM_MINMAX_FP32_3X2__WASM_FMAGIC, strided_cn) {
41825     GemmMicrokernelTester()
41826       .mr(3)
41827       .nr(2)
41828       .kr(1)
41829       .sr(1)
41830       .m(3)
41831       .n(2)
41832       .k(1)
41833       .cn_stride(5)
41834       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x2__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
41835   }
41836 
TEST(QC8_IGEMM_MINMAX_FP32_3X2__WASM_FMAGIC,k_eq_1_subtile)41837   TEST(QC8_IGEMM_MINMAX_FP32_3X2__WASM_FMAGIC, k_eq_1_subtile) {
41838     for (uint32_t n = 1; n <= 2; n++) {
41839       for (uint32_t m = 1; m <= 3; m++) {
41840         GemmMicrokernelTester()
41841           .mr(3)
41842           .nr(2)
41843           .kr(1)
41844           .sr(1)
41845           .m(m)
41846           .n(n)
41847           .k(1)
41848           .iterations(1)
41849           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x2__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
41850       }
41851     }
41852   }
41853 
TEST(QC8_IGEMM_MINMAX_FP32_3X2__WASM_FMAGIC,k_eq_1_subtile_m)41854   TEST(QC8_IGEMM_MINMAX_FP32_3X2__WASM_FMAGIC, k_eq_1_subtile_m) {
41855     for (uint32_t m = 1; m <= 3; m++) {
41856       GemmMicrokernelTester()
41857         .mr(3)
41858         .nr(2)
41859         .kr(1)
41860         .sr(1)
41861         .m(m)
41862         .n(2)
41863         .k(1)
41864         .iterations(1)
41865         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x2__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
41866     }
41867   }
41868 
TEST(QC8_IGEMM_MINMAX_FP32_3X2__WASM_FMAGIC,k_eq_1_subtile_n)41869   TEST(QC8_IGEMM_MINMAX_FP32_3X2__WASM_FMAGIC, k_eq_1_subtile_n) {
41870     for (uint32_t n = 1; n <= 2; n++) {
41871       GemmMicrokernelTester()
41872         .mr(3)
41873         .nr(2)
41874         .kr(1)
41875         .sr(1)
41876         .m(3)
41877         .n(n)
41878         .k(1)
41879         .iterations(1)
41880         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x2__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
41881     }
41882   }
41883 
TEST(QC8_IGEMM_MINMAX_FP32_3X2__WASM_FMAGIC,k_gt_1)41884   TEST(QC8_IGEMM_MINMAX_FP32_3X2__WASM_FMAGIC, k_gt_1) {
41885     for (size_t k = 2; k < 10; k++) {
41886       GemmMicrokernelTester()
41887         .mr(3)
41888         .nr(2)
41889         .kr(1)
41890         .sr(1)
41891         .m(3)
41892         .n(2)
41893         .k(k)
41894         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x2__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
41895     }
41896   }
41897 
TEST(QC8_IGEMM_MINMAX_FP32_3X2__WASM_FMAGIC,k_gt_1_subtile)41898   TEST(QC8_IGEMM_MINMAX_FP32_3X2__WASM_FMAGIC, k_gt_1_subtile) {
41899     for (size_t k = 2; k < 10; k++) {
41900       for (uint32_t n = 1; n <= 2; n++) {
41901         for (uint32_t m = 1; m <= 3; m++) {
41902           GemmMicrokernelTester()
41903             .mr(3)
41904             .nr(2)
41905             .kr(1)
41906             .sr(1)
41907             .m(m)
41908             .n(n)
41909             .k(k)
41910             .iterations(1)
41911             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x2__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
41912         }
41913       }
41914     }
41915   }
41916 
TEST(QC8_IGEMM_MINMAX_FP32_3X2__WASM_FMAGIC,n_gt_2)41917   TEST(QC8_IGEMM_MINMAX_FP32_3X2__WASM_FMAGIC, n_gt_2) {
41918     for (uint32_t n = 3; n < 4; n++) {
41919       for (size_t k = 1; k <= 5; k += 2) {
41920         GemmMicrokernelTester()
41921           .mr(3)
41922           .nr(2)
41923           .kr(1)
41924           .sr(1)
41925           .m(3)
41926           .n(n)
41927           .k(k)
41928           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x2__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
41929       }
41930     }
41931   }
41932 
TEST(QC8_IGEMM_MINMAX_FP32_3X2__WASM_FMAGIC,n_gt_2_strided_cn)41933   TEST(QC8_IGEMM_MINMAX_FP32_3X2__WASM_FMAGIC, n_gt_2_strided_cn) {
41934     for (uint32_t n = 3; n < 4; n++) {
41935       for (size_t k = 1; k <= 5; k += 2) {
41936         GemmMicrokernelTester()
41937           .mr(3)
41938           .nr(2)
41939           .kr(1)
41940           .sr(1)
41941           .m(3)
41942           .n(n)
41943           .k(k)
41944           .cn_stride(5)
41945           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x2__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
41946       }
41947     }
41948   }
41949 
TEST(QC8_IGEMM_MINMAX_FP32_3X2__WASM_FMAGIC,n_gt_2_subtile)41950   TEST(QC8_IGEMM_MINMAX_FP32_3X2__WASM_FMAGIC, n_gt_2_subtile) {
41951     for (uint32_t n = 3; n < 4; n++) {
41952       for (size_t k = 1; k <= 5; k += 2) {
41953         for (uint32_t m = 1; m <= 3; m++) {
41954           GemmMicrokernelTester()
41955             .mr(3)
41956             .nr(2)
41957             .kr(1)
41958             .sr(1)
41959             .m(m)
41960             .n(n)
41961             .k(k)
41962             .iterations(1)
41963             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x2__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
41964         }
41965       }
41966     }
41967   }
41968 
TEST(QC8_IGEMM_MINMAX_FP32_3X2__WASM_FMAGIC,n_div_2)41969   TEST(QC8_IGEMM_MINMAX_FP32_3X2__WASM_FMAGIC, n_div_2) {
41970     for (uint32_t n = 4; n <= 6; n += 2) {
41971       for (size_t k = 1; k <= 5; k += 2) {
41972         GemmMicrokernelTester()
41973           .mr(3)
41974           .nr(2)
41975           .kr(1)
41976           .sr(1)
41977           .m(3)
41978           .n(n)
41979           .k(k)
41980           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x2__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
41981       }
41982     }
41983   }
41984 
TEST(QC8_IGEMM_MINMAX_FP32_3X2__WASM_FMAGIC,n_div_2_strided_cn)41985   TEST(QC8_IGEMM_MINMAX_FP32_3X2__WASM_FMAGIC, n_div_2_strided_cn) {
41986     for (uint32_t n = 4; n <= 6; n += 2) {
41987       for (size_t k = 1; k <= 5; k += 2) {
41988         GemmMicrokernelTester()
41989           .mr(3)
41990           .nr(2)
41991           .kr(1)
41992           .sr(1)
41993           .m(3)
41994           .n(n)
41995           .k(k)
41996           .cn_stride(5)
41997           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x2__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
41998       }
41999     }
42000   }
42001 
TEST(QC8_IGEMM_MINMAX_FP32_3X2__WASM_FMAGIC,n_div_2_subtile)42002   TEST(QC8_IGEMM_MINMAX_FP32_3X2__WASM_FMAGIC, n_div_2_subtile) {
42003     for (uint32_t n = 4; n <= 6; n += 2) {
42004       for (size_t k = 1; k <= 5; k += 2) {
42005         for (uint32_t m = 1; m <= 3; m++) {
42006           GemmMicrokernelTester()
42007             .mr(3)
42008             .nr(2)
42009             .kr(1)
42010             .sr(1)
42011             .m(m)
42012             .n(n)
42013             .k(k)
42014             .iterations(1)
42015             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x2__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
42016         }
42017       }
42018     }
42019   }
42020 
TEST(QC8_IGEMM_MINMAX_FP32_3X2__WASM_FMAGIC,small_kernel)42021   TEST(QC8_IGEMM_MINMAX_FP32_3X2__WASM_FMAGIC, small_kernel) {
42022     for (size_t k = 1; k <= 5; k += 2) {
42023       GemmMicrokernelTester()
42024         .mr(3)
42025         .nr(2)
42026         .kr(1)
42027         .sr(1)
42028         .m(3)
42029         .n(2)
42030         .k(k)
42031         .ks(3)
42032         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x2__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
42033     }
42034   }
42035 
TEST(QC8_IGEMM_MINMAX_FP32_3X2__WASM_FMAGIC,small_kernel_subtile)42036   TEST(QC8_IGEMM_MINMAX_FP32_3X2__WASM_FMAGIC, small_kernel_subtile) {
42037     for (size_t k = 1; k <= 5; k += 2) {
42038       for (uint32_t n = 1; n <= 2; n++) {
42039         for (uint32_t m = 1; m <= 3; m++) {
42040           GemmMicrokernelTester()
42041             .mr(3)
42042             .nr(2)
42043             .kr(1)
42044             .sr(1)
42045             .m(m)
42046             .n(n)
42047             .k(k)
42048             .ks(3)
42049             .iterations(1)
42050             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x2__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
42051         }
42052       }
42053     }
42054   }
42055 
TEST(QC8_IGEMM_MINMAX_FP32_3X2__WASM_FMAGIC,n_gt_2_small_kernel)42056   TEST(QC8_IGEMM_MINMAX_FP32_3X2__WASM_FMAGIC, n_gt_2_small_kernel) {
42057     for (uint32_t n = 3; n < 4; n++) {
42058       for (size_t k = 1; k <= 5; k += 2) {
42059         GemmMicrokernelTester()
42060           .mr(3)
42061           .nr(2)
42062           .kr(1)
42063           .sr(1)
42064           .m(3)
42065           .n(n)
42066           .k(k)
42067           .ks(3)
42068           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x2__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
42069       }
42070     }
42071   }
42072 
TEST(QC8_IGEMM_MINMAX_FP32_3X2__WASM_FMAGIC,n_div_2_small_kernel)42073   TEST(QC8_IGEMM_MINMAX_FP32_3X2__WASM_FMAGIC, n_div_2_small_kernel) {
42074     for (uint32_t n = 4; n <= 6; n += 2) {
42075       for (size_t k = 1; k <= 5; k += 2) {
42076         GemmMicrokernelTester()
42077           .mr(3)
42078           .nr(2)
42079           .kr(1)
42080           .sr(1)
42081           .m(3)
42082           .n(n)
42083           .k(k)
42084           .ks(3)
42085           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x2__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
42086       }
42087     }
42088   }
42089 
TEST(QC8_IGEMM_MINMAX_FP32_3X2__WASM_FMAGIC,strided_cm_subtile)42090   TEST(QC8_IGEMM_MINMAX_FP32_3X2__WASM_FMAGIC, strided_cm_subtile) {
42091     for (size_t k = 1; k <= 5; k += 2) {
42092       for (uint32_t n = 1; n <= 2; n++) {
42093         for (uint32_t m = 1; m <= 3; m++) {
42094           GemmMicrokernelTester()
42095             .mr(3)
42096             .nr(2)
42097             .kr(1)
42098             .sr(1)
42099             .m(m)
42100             .n(n)
42101             .k(k)
42102             .cm_stride(5)
42103             .iterations(1)
42104             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x2__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
42105         }
42106       }
42107     }
42108   }
42109 
TEST(QC8_IGEMM_MINMAX_FP32_3X2__WASM_FMAGIC,a_offset)42110   TEST(QC8_IGEMM_MINMAX_FP32_3X2__WASM_FMAGIC, a_offset) {
42111     for (size_t k = 1; k <= 5; k += 2) {
42112       GemmMicrokernelTester()
42113         .mr(3)
42114         .nr(2)
42115         .kr(1)
42116         .sr(1)
42117         .m(3)
42118         .n(2)
42119         .k(k)
42120         .ks(3)
42121         .a_offset(17)
42122         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x2__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
42123     }
42124   }
42125 
TEST(QC8_IGEMM_MINMAX_FP32_3X2__WASM_FMAGIC,zero)42126   TEST(QC8_IGEMM_MINMAX_FP32_3X2__WASM_FMAGIC, zero) {
42127     for (size_t k = 1; k <= 5; k += 2) {
42128       for (uint32_t mz = 0; mz < 3; mz++) {
42129         GemmMicrokernelTester()
42130           .mr(3)
42131           .nr(2)
42132           .kr(1)
42133           .sr(1)
42134           .m(3)
42135           .n(2)
42136           .k(k)
42137           .ks(3)
42138           .a_offset(17)
42139           .zero_index(mz)
42140           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x2__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
42141       }
42142     }
42143   }
42144 
TEST(QC8_IGEMM_MINMAX_FP32_3X2__WASM_FMAGIC,qmin)42145   TEST(QC8_IGEMM_MINMAX_FP32_3X2__WASM_FMAGIC, qmin) {
42146     GemmMicrokernelTester()
42147       .mr(3)
42148       .nr(2)
42149       .kr(1)
42150       .sr(1)
42151       .m(3)
42152       .n(2)
42153       .k(1)
42154       .qmin(128)
42155       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x2__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
42156   }
42157 
TEST(QC8_IGEMM_MINMAX_FP32_3X2__WASM_FMAGIC,qmax)42158   TEST(QC8_IGEMM_MINMAX_FP32_3X2__WASM_FMAGIC, qmax) {
42159     GemmMicrokernelTester()
42160       .mr(3)
42161       .nr(2)
42162       .kr(1)
42163       .sr(1)
42164       .m(3)
42165       .n(2)
42166       .k(1)
42167       .qmax(128)
42168       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x2__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
42169   }
42170 
TEST(QC8_IGEMM_MINMAX_FP32_3X2__WASM_FMAGIC,strided_cm)42171   TEST(QC8_IGEMM_MINMAX_FP32_3X2__WASM_FMAGIC, strided_cm) {
42172     GemmMicrokernelTester()
42173       .mr(3)
42174       .nr(2)
42175       .kr(1)
42176       .sr(1)
42177       .m(3)
42178       .n(2)
42179       .k(1)
42180       .cm_stride(5)
42181       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x2__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
42182   }
42183 #endif  // XNN_ARCH_WASM || XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
42184 
42185 
42186 #if XNN_ARCH_WASM || XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(QC8_IGEMM_MINMAX_FP32_3X4__WASM_FMAGIC,k_eq_1)42187   TEST(QC8_IGEMM_MINMAX_FP32_3X4__WASM_FMAGIC, k_eq_1) {
42188     GemmMicrokernelTester()
42189       .mr(3)
42190       .nr(4)
42191       .kr(1)
42192       .sr(1)
42193       .m(3)
42194       .n(4)
42195       .k(1)
42196       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
42197   }
42198 
TEST(QC8_IGEMM_MINMAX_FP32_3X4__WASM_FMAGIC,strided_cn)42199   TEST(QC8_IGEMM_MINMAX_FP32_3X4__WASM_FMAGIC, strided_cn) {
42200     GemmMicrokernelTester()
42201       .mr(3)
42202       .nr(4)
42203       .kr(1)
42204       .sr(1)
42205       .m(3)
42206       .n(4)
42207       .k(1)
42208       .cn_stride(7)
42209       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
42210   }
42211 
TEST(QC8_IGEMM_MINMAX_FP32_3X4__WASM_FMAGIC,k_eq_1_subtile)42212   TEST(QC8_IGEMM_MINMAX_FP32_3X4__WASM_FMAGIC, k_eq_1_subtile) {
42213     for (uint32_t n = 1; n <= 4; n++) {
42214       for (uint32_t m = 1; m <= 3; m++) {
42215         GemmMicrokernelTester()
42216           .mr(3)
42217           .nr(4)
42218           .kr(1)
42219           .sr(1)
42220           .m(m)
42221           .n(n)
42222           .k(1)
42223           .iterations(1)
42224           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
42225       }
42226     }
42227   }
42228 
TEST(QC8_IGEMM_MINMAX_FP32_3X4__WASM_FMAGIC,k_eq_1_subtile_m)42229   TEST(QC8_IGEMM_MINMAX_FP32_3X4__WASM_FMAGIC, k_eq_1_subtile_m) {
42230     for (uint32_t m = 1; m <= 3; m++) {
42231       GemmMicrokernelTester()
42232         .mr(3)
42233         .nr(4)
42234         .kr(1)
42235         .sr(1)
42236         .m(m)
42237         .n(4)
42238         .k(1)
42239         .iterations(1)
42240         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
42241     }
42242   }
42243 
TEST(QC8_IGEMM_MINMAX_FP32_3X4__WASM_FMAGIC,k_eq_1_subtile_n)42244   TEST(QC8_IGEMM_MINMAX_FP32_3X4__WASM_FMAGIC, k_eq_1_subtile_n) {
42245     for (uint32_t n = 1; n <= 4; n++) {
42246       GemmMicrokernelTester()
42247         .mr(3)
42248         .nr(4)
42249         .kr(1)
42250         .sr(1)
42251         .m(3)
42252         .n(n)
42253         .k(1)
42254         .iterations(1)
42255         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
42256     }
42257   }
42258 
TEST(QC8_IGEMM_MINMAX_FP32_3X4__WASM_FMAGIC,k_gt_1)42259   TEST(QC8_IGEMM_MINMAX_FP32_3X4__WASM_FMAGIC, k_gt_1) {
42260     for (size_t k = 2; k < 10; k++) {
42261       GemmMicrokernelTester()
42262         .mr(3)
42263         .nr(4)
42264         .kr(1)
42265         .sr(1)
42266         .m(3)
42267         .n(4)
42268         .k(k)
42269         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
42270     }
42271   }
42272 
TEST(QC8_IGEMM_MINMAX_FP32_3X4__WASM_FMAGIC,k_gt_1_subtile)42273   TEST(QC8_IGEMM_MINMAX_FP32_3X4__WASM_FMAGIC, k_gt_1_subtile) {
42274     for (size_t k = 2; k < 10; k++) {
42275       for (uint32_t n = 1; n <= 4; n++) {
42276         for (uint32_t m = 1; m <= 3; m++) {
42277           GemmMicrokernelTester()
42278             .mr(3)
42279             .nr(4)
42280             .kr(1)
42281             .sr(1)
42282             .m(m)
42283             .n(n)
42284             .k(k)
42285             .iterations(1)
42286             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
42287         }
42288       }
42289     }
42290   }
42291 
TEST(QC8_IGEMM_MINMAX_FP32_3X4__WASM_FMAGIC,n_gt_4)42292   TEST(QC8_IGEMM_MINMAX_FP32_3X4__WASM_FMAGIC, n_gt_4) {
42293     for (uint32_t n = 5; n < 8; n++) {
42294       for (size_t k = 1; k <= 5; k += 2) {
42295         GemmMicrokernelTester()
42296           .mr(3)
42297           .nr(4)
42298           .kr(1)
42299           .sr(1)
42300           .m(3)
42301           .n(n)
42302           .k(k)
42303           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
42304       }
42305     }
42306   }
42307 
TEST(QC8_IGEMM_MINMAX_FP32_3X4__WASM_FMAGIC,n_gt_4_strided_cn)42308   TEST(QC8_IGEMM_MINMAX_FP32_3X4__WASM_FMAGIC, n_gt_4_strided_cn) {
42309     for (uint32_t n = 5; n < 8; n++) {
42310       for (size_t k = 1; k <= 5; k += 2) {
42311         GemmMicrokernelTester()
42312           .mr(3)
42313           .nr(4)
42314           .kr(1)
42315           .sr(1)
42316           .m(3)
42317           .n(n)
42318           .k(k)
42319           .cn_stride(7)
42320           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
42321       }
42322     }
42323   }
42324 
TEST(QC8_IGEMM_MINMAX_FP32_3X4__WASM_FMAGIC,n_gt_4_subtile)42325   TEST(QC8_IGEMM_MINMAX_FP32_3X4__WASM_FMAGIC, n_gt_4_subtile) {
42326     for (uint32_t n = 5; n < 8; n++) {
42327       for (size_t k = 1; k <= 5; k += 2) {
42328         for (uint32_t m = 1; m <= 3; m++) {
42329           GemmMicrokernelTester()
42330             .mr(3)
42331             .nr(4)
42332             .kr(1)
42333             .sr(1)
42334             .m(m)
42335             .n(n)
42336             .k(k)
42337             .iterations(1)
42338             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
42339         }
42340       }
42341     }
42342   }
42343 
TEST(QC8_IGEMM_MINMAX_FP32_3X4__WASM_FMAGIC,n_div_4)42344   TEST(QC8_IGEMM_MINMAX_FP32_3X4__WASM_FMAGIC, n_div_4) {
42345     for (uint32_t n = 8; n <= 12; n += 4) {
42346       for (size_t k = 1; k <= 5; k += 2) {
42347         GemmMicrokernelTester()
42348           .mr(3)
42349           .nr(4)
42350           .kr(1)
42351           .sr(1)
42352           .m(3)
42353           .n(n)
42354           .k(k)
42355           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
42356       }
42357     }
42358   }
42359 
TEST(QC8_IGEMM_MINMAX_FP32_3X4__WASM_FMAGIC,n_div_4_strided_cn)42360   TEST(QC8_IGEMM_MINMAX_FP32_3X4__WASM_FMAGIC, n_div_4_strided_cn) {
42361     for (uint32_t n = 8; n <= 12; n += 4) {
42362       for (size_t k = 1; k <= 5; k += 2) {
42363         GemmMicrokernelTester()
42364           .mr(3)
42365           .nr(4)
42366           .kr(1)
42367           .sr(1)
42368           .m(3)
42369           .n(n)
42370           .k(k)
42371           .cn_stride(7)
42372           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
42373       }
42374     }
42375   }
42376 
TEST(QC8_IGEMM_MINMAX_FP32_3X4__WASM_FMAGIC,n_div_4_subtile)42377   TEST(QC8_IGEMM_MINMAX_FP32_3X4__WASM_FMAGIC, n_div_4_subtile) {
42378     for (uint32_t n = 8; n <= 12; n += 4) {
42379       for (size_t k = 1; k <= 5; k += 2) {
42380         for (uint32_t m = 1; m <= 3; m++) {
42381           GemmMicrokernelTester()
42382             .mr(3)
42383             .nr(4)
42384             .kr(1)
42385             .sr(1)
42386             .m(m)
42387             .n(n)
42388             .k(k)
42389             .iterations(1)
42390             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
42391         }
42392       }
42393     }
42394   }
42395 
TEST(QC8_IGEMM_MINMAX_FP32_3X4__WASM_FMAGIC,small_kernel)42396   TEST(QC8_IGEMM_MINMAX_FP32_3X4__WASM_FMAGIC, small_kernel) {
42397     for (size_t k = 1; k <= 5; k += 2) {
42398       GemmMicrokernelTester()
42399         .mr(3)
42400         .nr(4)
42401         .kr(1)
42402         .sr(1)
42403         .m(3)
42404         .n(4)
42405         .k(k)
42406         .ks(3)
42407         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
42408     }
42409   }
42410 
TEST(QC8_IGEMM_MINMAX_FP32_3X4__WASM_FMAGIC,small_kernel_subtile)42411   TEST(QC8_IGEMM_MINMAX_FP32_3X4__WASM_FMAGIC, small_kernel_subtile) {
42412     for (size_t k = 1; k <= 5; k += 2) {
42413       for (uint32_t n = 1; n <= 4; n++) {
42414         for (uint32_t m = 1; m <= 3; m++) {
42415           GemmMicrokernelTester()
42416             .mr(3)
42417             .nr(4)
42418             .kr(1)
42419             .sr(1)
42420             .m(m)
42421             .n(n)
42422             .k(k)
42423             .ks(3)
42424             .iterations(1)
42425             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
42426         }
42427       }
42428     }
42429   }
42430 
TEST(QC8_IGEMM_MINMAX_FP32_3X4__WASM_FMAGIC,n_gt_4_small_kernel)42431   TEST(QC8_IGEMM_MINMAX_FP32_3X4__WASM_FMAGIC, n_gt_4_small_kernel) {
42432     for (uint32_t n = 5; n < 8; n++) {
42433       for (size_t k = 1; k <= 5; k += 2) {
42434         GemmMicrokernelTester()
42435           .mr(3)
42436           .nr(4)
42437           .kr(1)
42438           .sr(1)
42439           .m(3)
42440           .n(n)
42441           .k(k)
42442           .ks(3)
42443           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
42444       }
42445     }
42446   }
42447 
TEST(QC8_IGEMM_MINMAX_FP32_3X4__WASM_FMAGIC,n_div_4_small_kernel)42448   TEST(QC8_IGEMM_MINMAX_FP32_3X4__WASM_FMAGIC, n_div_4_small_kernel) {
42449     for (uint32_t n = 8; n <= 12; n += 4) {
42450       for (size_t k = 1; k <= 5; k += 2) {
42451         GemmMicrokernelTester()
42452           .mr(3)
42453           .nr(4)
42454           .kr(1)
42455           .sr(1)
42456           .m(3)
42457           .n(n)
42458           .k(k)
42459           .ks(3)
42460           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
42461       }
42462     }
42463   }
42464 
TEST(QC8_IGEMM_MINMAX_FP32_3X4__WASM_FMAGIC,strided_cm_subtile)42465   TEST(QC8_IGEMM_MINMAX_FP32_3X4__WASM_FMAGIC, strided_cm_subtile) {
42466     for (size_t k = 1; k <= 5; k += 2) {
42467       for (uint32_t n = 1; n <= 4; n++) {
42468         for (uint32_t m = 1; m <= 3; m++) {
42469           GemmMicrokernelTester()
42470             .mr(3)
42471             .nr(4)
42472             .kr(1)
42473             .sr(1)
42474             .m(m)
42475             .n(n)
42476             .k(k)
42477             .cm_stride(7)
42478             .iterations(1)
42479             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
42480         }
42481       }
42482     }
42483   }
42484 
TEST(QC8_IGEMM_MINMAX_FP32_3X4__WASM_FMAGIC,a_offset)42485   TEST(QC8_IGEMM_MINMAX_FP32_3X4__WASM_FMAGIC, a_offset) {
42486     for (size_t k = 1; k <= 5; k += 2) {
42487       GemmMicrokernelTester()
42488         .mr(3)
42489         .nr(4)
42490         .kr(1)
42491         .sr(1)
42492         .m(3)
42493         .n(4)
42494         .k(k)
42495         .ks(3)
42496         .a_offset(17)
42497         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
42498     }
42499   }
42500 
TEST(QC8_IGEMM_MINMAX_FP32_3X4__WASM_FMAGIC,zero)42501   TEST(QC8_IGEMM_MINMAX_FP32_3X4__WASM_FMAGIC, zero) {
42502     for (size_t k = 1; k <= 5; k += 2) {
42503       for (uint32_t mz = 0; mz < 3; mz++) {
42504         GemmMicrokernelTester()
42505           .mr(3)
42506           .nr(4)
42507           .kr(1)
42508           .sr(1)
42509           .m(3)
42510           .n(4)
42511           .k(k)
42512           .ks(3)
42513           .a_offset(17)
42514           .zero_index(mz)
42515           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
42516       }
42517     }
42518   }
42519 
TEST(QC8_IGEMM_MINMAX_FP32_3X4__WASM_FMAGIC,qmin)42520   TEST(QC8_IGEMM_MINMAX_FP32_3X4__WASM_FMAGIC, qmin) {
42521     GemmMicrokernelTester()
42522       .mr(3)
42523       .nr(4)
42524       .kr(1)
42525       .sr(1)
42526       .m(3)
42527       .n(4)
42528       .k(1)
42529       .qmin(128)
42530       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
42531   }
42532 
TEST(QC8_IGEMM_MINMAX_FP32_3X4__WASM_FMAGIC,qmax)42533   TEST(QC8_IGEMM_MINMAX_FP32_3X4__WASM_FMAGIC, qmax) {
42534     GemmMicrokernelTester()
42535       .mr(3)
42536       .nr(4)
42537       .kr(1)
42538       .sr(1)
42539       .m(3)
42540       .n(4)
42541       .k(1)
42542       .qmax(128)
42543       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
42544   }
42545 
TEST(QC8_IGEMM_MINMAX_FP32_3X4__WASM_FMAGIC,strided_cm)42546   TEST(QC8_IGEMM_MINMAX_FP32_3X4__WASM_FMAGIC, strided_cm) {
42547     GemmMicrokernelTester()
42548       .mr(3)
42549       .nr(4)
42550       .kr(1)
42551       .sr(1)
42552       .m(3)
42553       .n(4)
42554       .k(1)
42555       .cm_stride(7)
42556       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
42557   }
42558 #endif  // XNN_ARCH_WASM || XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
42559 
42560 
42561 #if XNN_ARCH_WASM || XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(QC8_IGEMM_MINMAX_FP32_4X4__WASM_FMAGIC,k_eq_1)42562   TEST(QC8_IGEMM_MINMAX_FP32_4X4__WASM_FMAGIC, k_eq_1) {
42563     GemmMicrokernelTester()
42564       .mr(4)
42565       .nr(4)
42566       .kr(1)
42567       .sr(1)
42568       .m(4)
42569       .n(4)
42570       .k(1)
42571       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
42572   }
42573 
TEST(QC8_IGEMM_MINMAX_FP32_4X4__WASM_FMAGIC,strided_cn)42574   TEST(QC8_IGEMM_MINMAX_FP32_4X4__WASM_FMAGIC, strided_cn) {
42575     GemmMicrokernelTester()
42576       .mr(4)
42577       .nr(4)
42578       .kr(1)
42579       .sr(1)
42580       .m(4)
42581       .n(4)
42582       .k(1)
42583       .cn_stride(7)
42584       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
42585   }
42586 
TEST(QC8_IGEMM_MINMAX_FP32_4X4__WASM_FMAGIC,k_eq_1_subtile)42587   TEST(QC8_IGEMM_MINMAX_FP32_4X4__WASM_FMAGIC, k_eq_1_subtile) {
42588     for (uint32_t n = 1; n <= 4; n++) {
42589       for (uint32_t m = 1; m <= 4; m++) {
42590         GemmMicrokernelTester()
42591           .mr(4)
42592           .nr(4)
42593           .kr(1)
42594           .sr(1)
42595           .m(m)
42596           .n(n)
42597           .k(1)
42598           .iterations(1)
42599           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
42600       }
42601     }
42602   }
42603 
TEST(QC8_IGEMM_MINMAX_FP32_4X4__WASM_FMAGIC,k_eq_1_subtile_m)42604   TEST(QC8_IGEMM_MINMAX_FP32_4X4__WASM_FMAGIC, k_eq_1_subtile_m) {
42605     for (uint32_t m = 1; m <= 4; m++) {
42606       GemmMicrokernelTester()
42607         .mr(4)
42608         .nr(4)
42609         .kr(1)
42610         .sr(1)
42611         .m(m)
42612         .n(4)
42613         .k(1)
42614         .iterations(1)
42615         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
42616     }
42617   }
42618 
TEST(QC8_IGEMM_MINMAX_FP32_4X4__WASM_FMAGIC,k_eq_1_subtile_n)42619   TEST(QC8_IGEMM_MINMAX_FP32_4X4__WASM_FMAGIC, k_eq_1_subtile_n) {
42620     for (uint32_t n = 1; n <= 4; n++) {
42621       GemmMicrokernelTester()
42622         .mr(4)
42623         .nr(4)
42624         .kr(1)
42625         .sr(1)
42626         .m(4)
42627         .n(n)
42628         .k(1)
42629         .iterations(1)
42630         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
42631     }
42632   }
42633 
TEST(QC8_IGEMM_MINMAX_FP32_4X4__WASM_FMAGIC,k_gt_1)42634   TEST(QC8_IGEMM_MINMAX_FP32_4X4__WASM_FMAGIC, k_gt_1) {
42635     for (size_t k = 2; k < 10; k++) {
42636       GemmMicrokernelTester()
42637         .mr(4)
42638         .nr(4)
42639         .kr(1)
42640         .sr(1)
42641         .m(4)
42642         .n(4)
42643         .k(k)
42644         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
42645     }
42646   }
42647 
TEST(QC8_IGEMM_MINMAX_FP32_4X4__WASM_FMAGIC,k_gt_1_subtile)42648   TEST(QC8_IGEMM_MINMAX_FP32_4X4__WASM_FMAGIC, k_gt_1_subtile) {
42649     for (size_t k = 2; k < 10; k++) {
42650       for (uint32_t n = 1; n <= 4; n++) {
42651         for (uint32_t m = 1; m <= 4; m++) {
42652           GemmMicrokernelTester()
42653             .mr(4)
42654             .nr(4)
42655             .kr(1)
42656             .sr(1)
42657             .m(m)
42658             .n(n)
42659             .k(k)
42660             .iterations(1)
42661             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
42662         }
42663       }
42664     }
42665   }
42666 
TEST(QC8_IGEMM_MINMAX_FP32_4X4__WASM_FMAGIC,n_gt_4)42667   TEST(QC8_IGEMM_MINMAX_FP32_4X4__WASM_FMAGIC, n_gt_4) {
42668     for (uint32_t n = 5; n < 8; n++) {
42669       for (size_t k = 1; k <= 5; k += 2) {
42670         GemmMicrokernelTester()
42671           .mr(4)
42672           .nr(4)
42673           .kr(1)
42674           .sr(1)
42675           .m(4)
42676           .n(n)
42677           .k(k)
42678           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
42679       }
42680     }
42681   }
42682 
TEST(QC8_IGEMM_MINMAX_FP32_4X4__WASM_FMAGIC,n_gt_4_strided_cn)42683   TEST(QC8_IGEMM_MINMAX_FP32_4X4__WASM_FMAGIC, n_gt_4_strided_cn) {
42684     for (uint32_t n = 5; n < 8; n++) {
42685       for (size_t k = 1; k <= 5; k += 2) {
42686         GemmMicrokernelTester()
42687           .mr(4)
42688           .nr(4)
42689           .kr(1)
42690           .sr(1)
42691           .m(4)
42692           .n(n)
42693           .k(k)
42694           .cn_stride(7)
42695           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
42696       }
42697     }
42698   }
42699 
TEST(QC8_IGEMM_MINMAX_FP32_4X4__WASM_FMAGIC,n_gt_4_subtile)42700   TEST(QC8_IGEMM_MINMAX_FP32_4X4__WASM_FMAGIC, n_gt_4_subtile) {
42701     for (uint32_t n = 5; n < 8; n++) {
42702       for (size_t k = 1; k <= 5; k += 2) {
42703         for (uint32_t m = 1; m <= 4; m++) {
42704           GemmMicrokernelTester()
42705             .mr(4)
42706             .nr(4)
42707             .kr(1)
42708             .sr(1)
42709             .m(m)
42710             .n(n)
42711             .k(k)
42712             .iterations(1)
42713             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
42714         }
42715       }
42716     }
42717   }
42718 
TEST(QC8_IGEMM_MINMAX_FP32_4X4__WASM_FMAGIC,n_div_4)42719   TEST(QC8_IGEMM_MINMAX_FP32_4X4__WASM_FMAGIC, n_div_4) {
42720     for (uint32_t n = 8; n <= 12; n += 4) {
42721       for (size_t k = 1; k <= 5; k += 2) {
42722         GemmMicrokernelTester()
42723           .mr(4)
42724           .nr(4)
42725           .kr(1)
42726           .sr(1)
42727           .m(4)
42728           .n(n)
42729           .k(k)
42730           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
42731       }
42732     }
42733   }
42734 
TEST(QC8_IGEMM_MINMAX_FP32_4X4__WASM_FMAGIC,n_div_4_strided_cn)42735   TEST(QC8_IGEMM_MINMAX_FP32_4X4__WASM_FMAGIC, n_div_4_strided_cn) {
42736     for (uint32_t n = 8; n <= 12; n += 4) {
42737       for (size_t k = 1; k <= 5; k += 2) {
42738         GemmMicrokernelTester()
42739           .mr(4)
42740           .nr(4)
42741           .kr(1)
42742           .sr(1)
42743           .m(4)
42744           .n(n)
42745           .k(k)
42746           .cn_stride(7)
42747           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
42748       }
42749     }
42750   }
42751 
TEST(QC8_IGEMM_MINMAX_FP32_4X4__WASM_FMAGIC,n_div_4_subtile)42752   TEST(QC8_IGEMM_MINMAX_FP32_4X4__WASM_FMAGIC, n_div_4_subtile) {
42753     for (uint32_t n = 8; n <= 12; n += 4) {
42754       for (size_t k = 1; k <= 5; k += 2) {
42755         for (uint32_t m = 1; m <= 4; m++) {
42756           GemmMicrokernelTester()
42757             .mr(4)
42758             .nr(4)
42759             .kr(1)
42760             .sr(1)
42761             .m(m)
42762             .n(n)
42763             .k(k)
42764             .iterations(1)
42765             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
42766         }
42767       }
42768     }
42769   }
42770 
TEST(QC8_IGEMM_MINMAX_FP32_4X4__WASM_FMAGIC,small_kernel)42771   TEST(QC8_IGEMM_MINMAX_FP32_4X4__WASM_FMAGIC, small_kernel) {
42772     for (size_t k = 1; k <= 5; k += 2) {
42773       GemmMicrokernelTester()
42774         .mr(4)
42775         .nr(4)
42776         .kr(1)
42777         .sr(1)
42778         .m(4)
42779         .n(4)
42780         .k(k)
42781         .ks(3)
42782         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
42783     }
42784   }
42785 
TEST(QC8_IGEMM_MINMAX_FP32_4X4__WASM_FMAGIC,small_kernel_subtile)42786   TEST(QC8_IGEMM_MINMAX_FP32_4X4__WASM_FMAGIC, small_kernel_subtile) {
42787     for (size_t k = 1; k <= 5; k += 2) {
42788       for (uint32_t n = 1; n <= 4; n++) {
42789         for (uint32_t m = 1; m <= 4; m++) {
42790           GemmMicrokernelTester()
42791             .mr(4)
42792             .nr(4)
42793             .kr(1)
42794             .sr(1)
42795             .m(m)
42796             .n(n)
42797             .k(k)
42798             .ks(3)
42799             .iterations(1)
42800             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
42801         }
42802       }
42803     }
42804   }
42805 
TEST(QC8_IGEMM_MINMAX_FP32_4X4__WASM_FMAGIC,n_gt_4_small_kernel)42806   TEST(QC8_IGEMM_MINMAX_FP32_4X4__WASM_FMAGIC, n_gt_4_small_kernel) {
42807     for (uint32_t n = 5; n < 8; n++) {
42808       for (size_t k = 1; k <= 5; k += 2) {
42809         GemmMicrokernelTester()
42810           .mr(4)
42811           .nr(4)
42812           .kr(1)
42813           .sr(1)
42814           .m(4)
42815           .n(n)
42816           .k(k)
42817           .ks(3)
42818           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
42819       }
42820     }
42821   }
42822 
TEST(QC8_IGEMM_MINMAX_FP32_4X4__WASM_FMAGIC,n_div_4_small_kernel)42823   TEST(QC8_IGEMM_MINMAX_FP32_4X4__WASM_FMAGIC, n_div_4_small_kernel) {
42824     for (uint32_t n = 8; n <= 12; n += 4) {
42825       for (size_t k = 1; k <= 5; k += 2) {
42826         GemmMicrokernelTester()
42827           .mr(4)
42828           .nr(4)
42829           .kr(1)
42830           .sr(1)
42831           .m(4)
42832           .n(n)
42833           .k(k)
42834           .ks(3)
42835           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
42836       }
42837     }
42838   }
42839 
TEST(QC8_IGEMM_MINMAX_FP32_4X4__WASM_FMAGIC,strided_cm_subtile)42840   TEST(QC8_IGEMM_MINMAX_FP32_4X4__WASM_FMAGIC, strided_cm_subtile) {
42841     for (size_t k = 1; k <= 5; k += 2) {
42842       for (uint32_t n = 1; n <= 4; n++) {
42843         for (uint32_t m = 1; m <= 4; m++) {
42844           GemmMicrokernelTester()
42845             .mr(4)
42846             .nr(4)
42847             .kr(1)
42848             .sr(1)
42849             .m(m)
42850             .n(n)
42851             .k(k)
42852             .cm_stride(7)
42853             .iterations(1)
42854             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
42855         }
42856       }
42857     }
42858   }
42859 
TEST(QC8_IGEMM_MINMAX_FP32_4X4__WASM_FMAGIC,a_offset)42860   TEST(QC8_IGEMM_MINMAX_FP32_4X4__WASM_FMAGIC, a_offset) {
42861     for (size_t k = 1; k <= 5; k += 2) {
42862       GemmMicrokernelTester()
42863         .mr(4)
42864         .nr(4)
42865         .kr(1)
42866         .sr(1)
42867         .m(4)
42868         .n(4)
42869         .k(k)
42870         .ks(3)
42871         .a_offset(23)
42872         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
42873     }
42874   }
42875 
TEST(QC8_IGEMM_MINMAX_FP32_4X4__WASM_FMAGIC,zero)42876   TEST(QC8_IGEMM_MINMAX_FP32_4X4__WASM_FMAGIC, zero) {
42877     for (size_t k = 1; k <= 5; k += 2) {
42878       for (uint32_t mz = 0; mz < 4; mz++) {
42879         GemmMicrokernelTester()
42880           .mr(4)
42881           .nr(4)
42882           .kr(1)
42883           .sr(1)
42884           .m(4)
42885           .n(4)
42886           .k(k)
42887           .ks(3)
42888           .a_offset(23)
42889           .zero_index(mz)
42890           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
42891       }
42892     }
42893   }
42894 
TEST(QC8_IGEMM_MINMAX_FP32_4X4__WASM_FMAGIC,qmin)42895   TEST(QC8_IGEMM_MINMAX_FP32_4X4__WASM_FMAGIC, qmin) {
42896     GemmMicrokernelTester()
42897       .mr(4)
42898       .nr(4)
42899       .kr(1)
42900       .sr(1)
42901       .m(4)
42902       .n(4)
42903       .k(1)
42904       .qmin(128)
42905       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
42906   }
42907 
TEST(QC8_IGEMM_MINMAX_FP32_4X4__WASM_FMAGIC,qmax)42908   TEST(QC8_IGEMM_MINMAX_FP32_4X4__WASM_FMAGIC, qmax) {
42909     GemmMicrokernelTester()
42910       .mr(4)
42911       .nr(4)
42912       .kr(1)
42913       .sr(1)
42914       .m(4)
42915       .n(4)
42916       .k(1)
42917       .qmax(128)
42918       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
42919   }
42920 
TEST(QC8_IGEMM_MINMAX_FP32_4X4__WASM_FMAGIC,strided_cm)42921   TEST(QC8_IGEMM_MINMAX_FP32_4X4__WASM_FMAGIC, strided_cm) {
42922     GemmMicrokernelTester()
42923       .mr(4)
42924       .nr(4)
42925       .kr(1)
42926       .sr(1)
42927       .m(4)
42928       .n(4)
42929       .k(1)
42930       .cm_stride(7)
42931       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
42932   }
42933 #endif  // XNN_ARCH_WASM || XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
42934 
42935 
TEST(QC8_IGEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC,k_eq_1)42936 TEST(QC8_IGEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC, k_eq_1) {
42937   GemmMicrokernelTester()
42938     .mr(2)
42939     .nr(2)
42940     .kr(1)
42941     .sr(1)
42942     .m(2)
42943     .n(2)
42944     .k(1)
42945     .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x2__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
42946 }
42947 
TEST(QC8_IGEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC,strided_cn)42948 TEST(QC8_IGEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC, strided_cn) {
42949   GemmMicrokernelTester()
42950     .mr(2)
42951     .nr(2)
42952     .kr(1)
42953     .sr(1)
42954     .m(2)
42955     .n(2)
42956     .k(1)
42957     .cn_stride(5)
42958     .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x2__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
42959 }
42960 
TEST(QC8_IGEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC,k_eq_1_subtile)42961 TEST(QC8_IGEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC, k_eq_1_subtile) {
42962   for (uint32_t n = 1; n <= 2; n++) {
42963     for (uint32_t m = 1; m <= 2; m++) {
42964       GemmMicrokernelTester()
42965         .mr(2)
42966         .nr(2)
42967         .kr(1)
42968         .sr(1)
42969         .m(m)
42970         .n(n)
42971         .k(1)
42972         .iterations(1)
42973         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x2__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
42974     }
42975   }
42976 }
42977 
TEST(QC8_IGEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC,k_eq_1_subtile_m)42978 TEST(QC8_IGEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC, k_eq_1_subtile_m) {
42979   for (uint32_t m = 1; m <= 2; m++) {
42980     GemmMicrokernelTester()
42981       .mr(2)
42982       .nr(2)
42983       .kr(1)
42984       .sr(1)
42985       .m(m)
42986       .n(2)
42987       .k(1)
42988       .iterations(1)
42989       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x2__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
42990   }
42991 }
42992 
TEST(QC8_IGEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC,k_eq_1_subtile_n)42993 TEST(QC8_IGEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC, k_eq_1_subtile_n) {
42994   for (uint32_t n = 1; n <= 2; n++) {
42995     GemmMicrokernelTester()
42996       .mr(2)
42997       .nr(2)
42998       .kr(1)
42999       .sr(1)
43000       .m(2)
43001       .n(n)
43002       .k(1)
43003       .iterations(1)
43004       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x2__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
43005   }
43006 }
43007 
TEST(QC8_IGEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC,k_gt_1)43008 TEST(QC8_IGEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC, k_gt_1) {
43009   for (size_t k = 2; k < 10; k++) {
43010     GemmMicrokernelTester()
43011       .mr(2)
43012       .nr(2)
43013       .kr(1)
43014       .sr(1)
43015       .m(2)
43016       .n(2)
43017       .k(k)
43018       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x2__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
43019   }
43020 }
43021 
TEST(QC8_IGEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC,k_gt_1_subtile)43022 TEST(QC8_IGEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC, k_gt_1_subtile) {
43023   for (size_t k = 2; k < 10; k++) {
43024     for (uint32_t n = 1; n <= 2; n++) {
43025       for (uint32_t m = 1; m <= 2; m++) {
43026         GemmMicrokernelTester()
43027           .mr(2)
43028           .nr(2)
43029           .kr(1)
43030           .sr(1)
43031           .m(m)
43032           .n(n)
43033           .k(k)
43034           .iterations(1)
43035           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x2__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
43036       }
43037     }
43038   }
43039 }
43040 
TEST(QC8_IGEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC,n_gt_2)43041 TEST(QC8_IGEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC, n_gt_2) {
43042   for (uint32_t n = 3; n < 4; n++) {
43043     for (size_t k = 1; k <= 5; k += 2) {
43044       GemmMicrokernelTester()
43045         .mr(2)
43046         .nr(2)
43047         .kr(1)
43048         .sr(1)
43049         .m(2)
43050         .n(n)
43051         .k(k)
43052         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x2__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
43053     }
43054   }
43055 }
43056 
TEST(QC8_IGEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC,n_gt_2_strided_cn)43057 TEST(QC8_IGEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC, n_gt_2_strided_cn) {
43058   for (uint32_t n = 3; n < 4; n++) {
43059     for (size_t k = 1; k <= 5; k += 2) {
43060       GemmMicrokernelTester()
43061         .mr(2)
43062         .nr(2)
43063         .kr(1)
43064         .sr(1)
43065         .m(2)
43066         .n(n)
43067         .k(k)
43068         .cn_stride(5)
43069         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x2__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
43070     }
43071   }
43072 }
43073 
TEST(QC8_IGEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC,n_gt_2_subtile)43074 TEST(QC8_IGEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC, n_gt_2_subtile) {
43075   for (uint32_t n = 3; n < 4; n++) {
43076     for (size_t k = 1; k <= 5; k += 2) {
43077       for (uint32_t m = 1; m <= 2; m++) {
43078         GemmMicrokernelTester()
43079           .mr(2)
43080           .nr(2)
43081           .kr(1)
43082           .sr(1)
43083           .m(m)
43084           .n(n)
43085           .k(k)
43086           .iterations(1)
43087           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x2__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
43088       }
43089     }
43090   }
43091 }
43092 
TEST(QC8_IGEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC,n_div_2)43093 TEST(QC8_IGEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC, n_div_2) {
43094   for (uint32_t n = 4; n <= 6; n += 2) {
43095     for (size_t k = 1; k <= 5; k += 2) {
43096       GemmMicrokernelTester()
43097         .mr(2)
43098         .nr(2)
43099         .kr(1)
43100         .sr(1)
43101         .m(2)
43102         .n(n)
43103         .k(k)
43104         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x2__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
43105     }
43106   }
43107 }
43108 
TEST(QC8_IGEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC,n_div_2_strided_cn)43109 TEST(QC8_IGEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC, n_div_2_strided_cn) {
43110   for (uint32_t n = 4; n <= 6; n += 2) {
43111     for (size_t k = 1; k <= 5; k += 2) {
43112       GemmMicrokernelTester()
43113         .mr(2)
43114         .nr(2)
43115         .kr(1)
43116         .sr(1)
43117         .m(2)
43118         .n(n)
43119         .k(k)
43120         .cn_stride(5)
43121         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x2__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
43122     }
43123   }
43124 }
43125 
TEST(QC8_IGEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC,n_div_2_subtile)43126 TEST(QC8_IGEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC, n_div_2_subtile) {
43127   for (uint32_t n = 4; n <= 6; n += 2) {
43128     for (size_t k = 1; k <= 5; k += 2) {
43129       for (uint32_t m = 1; m <= 2; m++) {
43130         GemmMicrokernelTester()
43131           .mr(2)
43132           .nr(2)
43133           .kr(1)
43134           .sr(1)
43135           .m(m)
43136           .n(n)
43137           .k(k)
43138           .iterations(1)
43139           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x2__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
43140       }
43141     }
43142   }
43143 }
43144 
TEST(QC8_IGEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC,small_kernel)43145 TEST(QC8_IGEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC, small_kernel) {
43146   for (size_t k = 1; k <= 5; k += 2) {
43147     GemmMicrokernelTester()
43148       .mr(2)
43149       .nr(2)
43150       .kr(1)
43151       .sr(1)
43152       .m(2)
43153       .n(2)
43154       .k(k)
43155       .ks(3)
43156       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x2__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
43157   }
43158 }
43159 
TEST(QC8_IGEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC,small_kernel_subtile)43160 TEST(QC8_IGEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC, small_kernel_subtile) {
43161   for (size_t k = 1; k <= 5; k += 2) {
43162     for (uint32_t n = 1; n <= 2; n++) {
43163       for (uint32_t m = 1; m <= 2; m++) {
43164         GemmMicrokernelTester()
43165           .mr(2)
43166           .nr(2)
43167           .kr(1)
43168           .sr(1)
43169           .m(m)
43170           .n(n)
43171           .k(k)
43172           .ks(3)
43173           .iterations(1)
43174           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x2__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
43175       }
43176     }
43177   }
43178 }
43179 
TEST(QC8_IGEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC,n_gt_2_small_kernel)43180 TEST(QC8_IGEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC, n_gt_2_small_kernel) {
43181   for (uint32_t n = 3; n < 4; n++) {
43182     for (size_t k = 1; k <= 5; k += 2) {
43183       GemmMicrokernelTester()
43184         .mr(2)
43185         .nr(2)
43186         .kr(1)
43187         .sr(1)
43188         .m(2)
43189         .n(n)
43190         .k(k)
43191         .ks(3)
43192         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x2__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
43193     }
43194   }
43195 }
43196 
TEST(QC8_IGEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC,n_div_2_small_kernel)43197 TEST(QC8_IGEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC, n_div_2_small_kernel) {
43198   for (uint32_t n = 4; n <= 6; n += 2) {
43199     for (size_t k = 1; k <= 5; k += 2) {
43200       GemmMicrokernelTester()
43201         .mr(2)
43202         .nr(2)
43203         .kr(1)
43204         .sr(1)
43205         .m(2)
43206         .n(n)
43207         .k(k)
43208         .ks(3)
43209         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x2__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
43210     }
43211   }
43212 }
43213 
TEST(QC8_IGEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC,strided_cm_subtile)43214 TEST(QC8_IGEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC, strided_cm_subtile) {
43215   for (size_t k = 1; k <= 5; k += 2) {
43216     for (uint32_t n = 1; n <= 2; n++) {
43217       for (uint32_t m = 1; m <= 2; m++) {
43218         GemmMicrokernelTester()
43219           .mr(2)
43220           .nr(2)
43221           .kr(1)
43222           .sr(1)
43223           .m(m)
43224           .n(n)
43225           .k(k)
43226           .cm_stride(5)
43227           .iterations(1)
43228           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x2__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
43229       }
43230     }
43231   }
43232 }
43233 
TEST(QC8_IGEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC,a_offset)43234 TEST(QC8_IGEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC, a_offset) {
43235   for (size_t k = 1; k <= 5; k += 2) {
43236     GemmMicrokernelTester()
43237       .mr(2)
43238       .nr(2)
43239       .kr(1)
43240       .sr(1)
43241       .m(2)
43242       .n(2)
43243       .k(k)
43244       .ks(3)
43245       .a_offset(13)
43246       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x2__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
43247   }
43248 }
43249 
TEST(QC8_IGEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC,zero)43250 TEST(QC8_IGEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC, zero) {
43251   for (size_t k = 1; k <= 5; k += 2) {
43252     for (uint32_t mz = 0; mz < 2; mz++) {
43253       GemmMicrokernelTester()
43254         .mr(2)
43255         .nr(2)
43256         .kr(1)
43257         .sr(1)
43258         .m(2)
43259         .n(2)
43260         .k(k)
43261         .ks(3)
43262         .a_offset(13)
43263         .zero_index(mz)
43264         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x2__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
43265     }
43266   }
43267 }
43268 
TEST(QC8_IGEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC,qmin)43269 TEST(QC8_IGEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC, qmin) {
43270   GemmMicrokernelTester()
43271     .mr(2)
43272     .nr(2)
43273     .kr(1)
43274     .sr(1)
43275     .m(2)
43276     .n(2)
43277     .k(1)
43278     .qmin(128)
43279     .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x2__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
43280 }
43281 
TEST(QC8_IGEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC,qmax)43282 TEST(QC8_IGEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC, qmax) {
43283   GemmMicrokernelTester()
43284     .mr(2)
43285     .nr(2)
43286     .kr(1)
43287     .sr(1)
43288     .m(2)
43289     .n(2)
43290     .k(1)
43291     .qmax(128)
43292     .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x2__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
43293 }
43294 
TEST(QC8_IGEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC,strided_cm)43295 TEST(QC8_IGEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC, strided_cm) {
43296   GemmMicrokernelTester()
43297     .mr(2)
43298     .nr(2)
43299     .kr(1)
43300     .sr(1)
43301     .m(2)
43302     .n(2)
43303     .k(1)
43304     .cm_stride(5)
43305     .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x2__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
43306 }
43307 
43308 
TEST(QC8_IGEMM_MINMAX_FP32_2X2__SCALAR_LRINTF,k_eq_1)43309 TEST(QC8_IGEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, k_eq_1) {
43310   GemmMicrokernelTester()
43311     .mr(2)
43312     .nr(2)
43313     .kr(1)
43314     .sr(1)
43315     .m(2)
43316     .n(2)
43317     .k(1)
43318     .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
43319 }
43320 
TEST(QC8_IGEMM_MINMAX_FP32_2X2__SCALAR_LRINTF,strided_cn)43321 TEST(QC8_IGEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, strided_cn) {
43322   GemmMicrokernelTester()
43323     .mr(2)
43324     .nr(2)
43325     .kr(1)
43326     .sr(1)
43327     .m(2)
43328     .n(2)
43329     .k(1)
43330     .cn_stride(5)
43331     .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
43332 }
43333 
TEST(QC8_IGEMM_MINMAX_FP32_2X2__SCALAR_LRINTF,k_eq_1_subtile)43334 TEST(QC8_IGEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, k_eq_1_subtile) {
43335   for (uint32_t n = 1; n <= 2; n++) {
43336     for (uint32_t m = 1; m <= 2; m++) {
43337       GemmMicrokernelTester()
43338         .mr(2)
43339         .nr(2)
43340         .kr(1)
43341         .sr(1)
43342         .m(m)
43343         .n(n)
43344         .k(1)
43345         .iterations(1)
43346         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
43347     }
43348   }
43349 }
43350 
TEST(QC8_IGEMM_MINMAX_FP32_2X2__SCALAR_LRINTF,k_eq_1_subtile_m)43351 TEST(QC8_IGEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, k_eq_1_subtile_m) {
43352   for (uint32_t m = 1; m <= 2; m++) {
43353     GemmMicrokernelTester()
43354       .mr(2)
43355       .nr(2)
43356       .kr(1)
43357       .sr(1)
43358       .m(m)
43359       .n(2)
43360       .k(1)
43361       .iterations(1)
43362       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
43363   }
43364 }
43365 
TEST(QC8_IGEMM_MINMAX_FP32_2X2__SCALAR_LRINTF,k_eq_1_subtile_n)43366 TEST(QC8_IGEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, k_eq_1_subtile_n) {
43367   for (uint32_t n = 1; n <= 2; n++) {
43368     GemmMicrokernelTester()
43369       .mr(2)
43370       .nr(2)
43371       .kr(1)
43372       .sr(1)
43373       .m(2)
43374       .n(n)
43375       .k(1)
43376       .iterations(1)
43377       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
43378   }
43379 }
43380 
TEST(QC8_IGEMM_MINMAX_FP32_2X2__SCALAR_LRINTF,k_gt_1)43381 TEST(QC8_IGEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, k_gt_1) {
43382   for (size_t k = 2; k < 10; k++) {
43383     GemmMicrokernelTester()
43384       .mr(2)
43385       .nr(2)
43386       .kr(1)
43387       .sr(1)
43388       .m(2)
43389       .n(2)
43390       .k(k)
43391       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
43392   }
43393 }
43394 
TEST(QC8_IGEMM_MINMAX_FP32_2X2__SCALAR_LRINTF,k_gt_1_subtile)43395 TEST(QC8_IGEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, k_gt_1_subtile) {
43396   for (size_t k = 2; k < 10; k++) {
43397     for (uint32_t n = 1; n <= 2; n++) {
43398       for (uint32_t m = 1; m <= 2; m++) {
43399         GemmMicrokernelTester()
43400           .mr(2)
43401           .nr(2)
43402           .kr(1)
43403           .sr(1)
43404           .m(m)
43405           .n(n)
43406           .k(k)
43407           .iterations(1)
43408           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
43409       }
43410     }
43411   }
43412 }
43413 
TEST(QC8_IGEMM_MINMAX_FP32_2X2__SCALAR_LRINTF,n_gt_2)43414 TEST(QC8_IGEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, n_gt_2) {
43415   for (uint32_t n = 3; n < 4; n++) {
43416     for (size_t k = 1; k <= 5; k += 2) {
43417       GemmMicrokernelTester()
43418         .mr(2)
43419         .nr(2)
43420         .kr(1)
43421         .sr(1)
43422         .m(2)
43423         .n(n)
43424         .k(k)
43425         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
43426     }
43427   }
43428 }
43429 
TEST(QC8_IGEMM_MINMAX_FP32_2X2__SCALAR_LRINTF,n_gt_2_strided_cn)43430 TEST(QC8_IGEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, n_gt_2_strided_cn) {
43431   for (uint32_t n = 3; n < 4; n++) {
43432     for (size_t k = 1; k <= 5; k += 2) {
43433       GemmMicrokernelTester()
43434         .mr(2)
43435         .nr(2)
43436         .kr(1)
43437         .sr(1)
43438         .m(2)
43439         .n(n)
43440         .k(k)
43441         .cn_stride(5)
43442         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
43443     }
43444   }
43445 }
43446 
TEST(QC8_IGEMM_MINMAX_FP32_2X2__SCALAR_LRINTF,n_gt_2_subtile)43447 TEST(QC8_IGEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, n_gt_2_subtile) {
43448   for (uint32_t n = 3; n < 4; n++) {
43449     for (size_t k = 1; k <= 5; k += 2) {
43450       for (uint32_t m = 1; m <= 2; m++) {
43451         GemmMicrokernelTester()
43452           .mr(2)
43453           .nr(2)
43454           .kr(1)
43455           .sr(1)
43456           .m(m)
43457           .n(n)
43458           .k(k)
43459           .iterations(1)
43460           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
43461       }
43462     }
43463   }
43464 }
43465 
TEST(QC8_IGEMM_MINMAX_FP32_2X2__SCALAR_LRINTF,n_div_2)43466 TEST(QC8_IGEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, n_div_2) {
43467   for (uint32_t n = 4; n <= 6; n += 2) {
43468     for (size_t k = 1; k <= 5; k += 2) {
43469       GemmMicrokernelTester()
43470         .mr(2)
43471         .nr(2)
43472         .kr(1)
43473         .sr(1)
43474         .m(2)
43475         .n(n)
43476         .k(k)
43477         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
43478     }
43479   }
43480 }
43481 
TEST(QC8_IGEMM_MINMAX_FP32_2X2__SCALAR_LRINTF,n_div_2_strided_cn)43482 TEST(QC8_IGEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, n_div_2_strided_cn) {
43483   for (uint32_t n = 4; n <= 6; n += 2) {
43484     for (size_t k = 1; k <= 5; k += 2) {
43485       GemmMicrokernelTester()
43486         .mr(2)
43487         .nr(2)
43488         .kr(1)
43489         .sr(1)
43490         .m(2)
43491         .n(n)
43492         .k(k)
43493         .cn_stride(5)
43494         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
43495     }
43496   }
43497 }
43498 
TEST(QC8_IGEMM_MINMAX_FP32_2X2__SCALAR_LRINTF,n_div_2_subtile)43499 TEST(QC8_IGEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, n_div_2_subtile) {
43500   for (uint32_t n = 4; n <= 6; n += 2) {
43501     for (size_t k = 1; k <= 5; k += 2) {
43502       for (uint32_t m = 1; m <= 2; m++) {
43503         GemmMicrokernelTester()
43504           .mr(2)
43505           .nr(2)
43506           .kr(1)
43507           .sr(1)
43508           .m(m)
43509           .n(n)
43510           .k(k)
43511           .iterations(1)
43512           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
43513       }
43514     }
43515   }
43516 }
43517 
TEST(QC8_IGEMM_MINMAX_FP32_2X2__SCALAR_LRINTF,small_kernel)43518 TEST(QC8_IGEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, small_kernel) {
43519   for (size_t k = 1; k <= 5; k += 2) {
43520     GemmMicrokernelTester()
43521       .mr(2)
43522       .nr(2)
43523       .kr(1)
43524       .sr(1)
43525       .m(2)
43526       .n(2)
43527       .k(k)
43528       .ks(3)
43529       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
43530   }
43531 }
43532 
TEST(QC8_IGEMM_MINMAX_FP32_2X2__SCALAR_LRINTF,small_kernel_subtile)43533 TEST(QC8_IGEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, small_kernel_subtile) {
43534   for (size_t k = 1; k <= 5; k += 2) {
43535     for (uint32_t n = 1; n <= 2; n++) {
43536       for (uint32_t m = 1; m <= 2; m++) {
43537         GemmMicrokernelTester()
43538           .mr(2)
43539           .nr(2)
43540           .kr(1)
43541           .sr(1)
43542           .m(m)
43543           .n(n)
43544           .k(k)
43545           .ks(3)
43546           .iterations(1)
43547           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
43548       }
43549     }
43550   }
43551 }
43552 
TEST(QC8_IGEMM_MINMAX_FP32_2X2__SCALAR_LRINTF,n_gt_2_small_kernel)43553 TEST(QC8_IGEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, n_gt_2_small_kernel) {
43554   for (uint32_t n = 3; n < 4; n++) {
43555     for (size_t k = 1; k <= 5; k += 2) {
43556       GemmMicrokernelTester()
43557         .mr(2)
43558         .nr(2)
43559         .kr(1)
43560         .sr(1)
43561         .m(2)
43562         .n(n)
43563         .k(k)
43564         .ks(3)
43565         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
43566     }
43567   }
43568 }
43569 
TEST(QC8_IGEMM_MINMAX_FP32_2X2__SCALAR_LRINTF,n_div_2_small_kernel)43570 TEST(QC8_IGEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, n_div_2_small_kernel) {
43571   for (uint32_t n = 4; n <= 6; n += 2) {
43572     for (size_t k = 1; k <= 5; k += 2) {
43573       GemmMicrokernelTester()
43574         .mr(2)
43575         .nr(2)
43576         .kr(1)
43577         .sr(1)
43578         .m(2)
43579         .n(n)
43580         .k(k)
43581         .ks(3)
43582         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
43583     }
43584   }
43585 }
43586 
TEST(QC8_IGEMM_MINMAX_FP32_2X2__SCALAR_LRINTF,strided_cm_subtile)43587 TEST(QC8_IGEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, strided_cm_subtile) {
43588   for (size_t k = 1; k <= 5; k += 2) {
43589     for (uint32_t n = 1; n <= 2; n++) {
43590       for (uint32_t m = 1; m <= 2; m++) {
43591         GemmMicrokernelTester()
43592           .mr(2)
43593           .nr(2)
43594           .kr(1)
43595           .sr(1)
43596           .m(m)
43597           .n(n)
43598           .k(k)
43599           .cm_stride(5)
43600           .iterations(1)
43601           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
43602       }
43603     }
43604   }
43605 }
43606 
TEST(QC8_IGEMM_MINMAX_FP32_2X2__SCALAR_LRINTF,a_offset)43607 TEST(QC8_IGEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, a_offset) {
43608   for (size_t k = 1; k <= 5; k += 2) {
43609     GemmMicrokernelTester()
43610       .mr(2)
43611       .nr(2)
43612       .kr(1)
43613       .sr(1)
43614       .m(2)
43615       .n(2)
43616       .k(k)
43617       .ks(3)
43618       .a_offset(13)
43619       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
43620   }
43621 }
43622 
TEST(QC8_IGEMM_MINMAX_FP32_2X2__SCALAR_LRINTF,zero)43623 TEST(QC8_IGEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, zero) {
43624   for (size_t k = 1; k <= 5; k += 2) {
43625     for (uint32_t mz = 0; mz < 2; mz++) {
43626       GemmMicrokernelTester()
43627         .mr(2)
43628         .nr(2)
43629         .kr(1)
43630         .sr(1)
43631         .m(2)
43632         .n(2)
43633         .k(k)
43634         .ks(3)
43635         .a_offset(13)
43636         .zero_index(mz)
43637         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
43638     }
43639   }
43640 }
43641 
TEST(QC8_IGEMM_MINMAX_FP32_2X2__SCALAR_LRINTF,qmin)43642 TEST(QC8_IGEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, qmin) {
43643   GemmMicrokernelTester()
43644     .mr(2)
43645     .nr(2)
43646     .kr(1)
43647     .sr(1)
43648     .m(2)
43649     .n(2)
43650     .k(1)
43651     .qmin(128)
43652     .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
43653 }
43654 
TEST(QC8_IGEMM_MINMAX_FP32_2X2__SCALAR_LRINTF,qmax)43655 TEST(QC8_IGEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, qmax) {
43656   GemmMicrokernelTester()
43657     .mr(2)
43658     .nr(2)
43659     .kr(1)
43660     .sr(1)
43661     .m(2)
43662     .n(2)
43663     .k(1)
43664     .qmax(128)
43665     .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
43666 }
43667 
TEST(QC8_IGEMM_MINMAX_FP32_2X2__SCALAR_LRINTF,strided_cm)43668 TEST(QC8_IGEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, strided_cm) {
43669   GemmMicrokernelTester()
43670     .mr(2)
43671     .nr(2)
43672     .kr(1)
43673     .sr(1)
43674     .m(2)
43675     .n(2)
43676     .k(1)
43677     .cm_stride(5)
43678     .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
43679 }
43680 
43681 
TEST(QC8_IGEMM_MINMAX_FP32_2X4__SCALAR_FMAGIC,k_eq_1)43682 TEST(QC8_IGEMM_MINMAX_FP32_2X4__SCALAR_FMAGIC, k_eq_1) {
43683   GemmMicrokernelTester()
43684     .mr(2)
43685     .nr(4)
43686     .kr(1)
43687     .sr(1)
43688     .m(2)
43689     .n(4)
43690     .k(1)
43691     .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
43692 }
43693 
TEST(QC8_IGEMM_MINMAX_FP32_2X4__SCALAR_FMAGIC,strided_cn)43694 TEST(QC8_IGEMM_MINMAX_FP32_2X4__SCALAR_FMAGIC, strided_cn) {
43695   GemmMicrokernelTester()
43696     .mr(2)
43697     .nr(4)
43698     .kr(1)
43699     .sr(1)
43700     .m(2)
43701     .n(4)
43702     .k(1)
43703     .cn_stride(7)
43704     .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
43705 }
43706 
TEST(QC8_IGEMM_MINMAX_FP32_2X4__SCALAR_FMAGIC,k_eq_1_subtile)43707 TEST(QC8_IGEMM_MINMAX_FP32_2X4__SCALAR_FMAGIC, k_eq_1_subtile) {
43708   for (uint32_t n = 1; n <= 4; n++) {
43709     for (uint32_t m = 1; m <= 2; m++) {
43710       GemmMicrokernelTester()
43711         .mr(2)
43712         .nr(4)
43713         .kr(1)
43714         .sr(1)
43715         .m(m)
43716         .n(n)
43717         .k(1)
43718         .iterations(1)
43719         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
43720     }
43721   }
43722 }
43723 
TEST(QC8_IGEMM_MINMAX_FP32_2X4__SCALAR_FMAGIC,k_eq_1_subtile_m)43724 TEST(QC8_IGEMM_MINMAX_FP32_2X4__SCALAR_FMAGIC, k_eq_1_subtile_m) {
43725   for (uint32_t m = 1; m <= 2; m++) {
43726     GemmMicrokernelTester()
43727       .mr(2)
43728       .nr(4)
43729       .kr(1)
43730       .sr(1)
43731       .m(m)
43732       .n(4)
43733       .k(1)
43734       .iterations(1)
43735       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
43736   }
43737 }
43738 
TEST(QC8_IGEMM_MINMAX_FP32_2X4__SCALAR_FMAGIC,k_eq_1_subtile_n)43739 TEST(QC8_IGEMM_MINMAX_FP32_2X4__SCALAR_FMAGIC, k_eq_1_subtile_n) {
43740   for (uint32_t n = 1; n <= 4; n++) {
43741     GemmMicrokernelTester()
43742       .mr(2)
43743       .nr(4)
43744       .kr(1)
43745       .sr(1)
43746       .m(2)
43747       .n(n)
43748       .k(1)
43749       .iterations(1)
43750       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
43751   }
43752 }
43753 
TEST(QC8_IGEMM_MINMAX_FP32_2X4__SCALAR_FMAGIC,k_gt_1)43754 TEST(QC8_IGEMM_MINMAX_FP32_2X4__SCALAR_FMAGIC, k_gt_1) {
43755   for (size_t k = 2; k < 10; k++) {
43756     GemmMicrokernelTester()
43757       .mr(2)
43758       .nr(4)
43759       .kr(1)
43760       .sr(1)
43761       .m(2)
43762       .n(4)
43763       .k(k)
43764       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
43765   }
43766 }
43767 
TEST(QC8_IGEMM_MINMAX_FP32_2X4__SCALAR_FMAGIC,k_gt_1_subtile)43768 TEST(QC8_IGEMM_MINMAX_FP32_2X4__SCALAR_FMAGIC, k_gt_1_subtile) {
43769   for (size_t k = 2; k < 10; k++) {
43770     for (uint32_t n = 1; n <= 4; n++) {
43771       for (uint32_t m = 1; m <= 2; m++) {
43772         GemmMicrokernelTester()
43773           .mr(2)
43774           .nr(4)
43775           .kr(1)
43776           .sr(1)
43777           .m(m)
43778           .n(n)
43779           .k(k)
43780           .iterations(1)
43781           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
43782       }
43783     }
43784   }
43785 }
43786 
TEST(QC8_IGEMM_MINMAX_FP32_2X4__SCALAR_FMAGIC,n_gt_4)43787 TEST(QC8_IGEMM_MINMAX_FP32_2X4__SCALAR_FMAGIC, n_gt_4) {
43788   for (uint32_t n = 5; n < 8; n++) {
43789     for (size_t k = 1; k <= 5; k += 2) {
43790       GemmMicrokernelTester()
43791         .mr(2)
43792         .nr(4)
43793         .kr(1)
43794         .sr(1)
43795         .m(2)
43796         .n(n)
43797         .k(k)
43798         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
43799     }
43800   }
43801 }
43802 
TEST(QC8_IGEMM_MINMAX_FP32_2X4__SCALAR_FMAGIC,n_gt_4_strided_cn)43803 TEST(QC8_IGEMM_MINMAX_FP32_2X4__SCALAR_FMAGIC, n_gt_4_strided_cn) {
43804   for (uint32_t n = 5; n < 8; n++) {
43805     for (size_t k = 1; k <= 5; k += 2) {
43806       GemmMicrokernelTester()
43807         .mr(2)
43808         .nr(4)
43809         .kr(1)
43810         .sr(1)
43811         .m(2)
43812         .n(n)
43813         .k(k)
43814         .cn_stride(7)
43815         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
43816     }
43817   }
43818 }
43819 
TEST(QC8_IGEMM_MINMAX_FP32_2X4__SCALAR_FMAGIC,n_gt_4_subtile)43820 TEST(QC8_IGEMM_MINMAX_FP32_2X4__SCALAR_FMAGIC, n_gt_4_subtile) {
43821   for (uint32_t n = 5; n < 8; n++) {
43822     for (size_t k = 1; k <= 5; k += 2) {
43823       for (uint32_t m = 1; m <= 2; m++) {
43824         GemmMicrokernelTester()
43825           .mr(2)
43826           .nr(4)
43827           .kr(1)
43828           .sr(1)
43829           .m(m)
43830           .n(n)
43831           .k(k)
43832           .iterations(1)
43833           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
43834       }
43835     }
43836   }
43837 }
43838 
TEST(QC8_IGEMM_MINMAX_FP32_2X4__SCALAR_FMAGIC,n_div_4)43839 TEST(QC8_IGEMM_MINMAX_FP32_2X4__SCALAR_FMAGIC, n_div_4) {
43840   for (uint32_t n = 8; n <= 12; n += 4) {
43841     for (size_t k = 1; k <= 5; k += 2) {
43842       GemmMicrokernelTester()
43843         .mr(2)
43844         .nr(4)
43845         .kr(1)
43846         .sr(1)
43847         .m(2)
43848         .n(n)
43849         .k(k)
43850         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
43851     }
43852   }
43853 }
43854 
TEST(QC8_IGEMM_MINMAX_FP32_2X4__SCALAR_FMAGIC,n_div_4_strided_cn)43855 TEST(QC8_IGEMM_MINMAX_FP32_2X4__SCALAR_FMAGIC, n_div_4_strided_cn) {
43856   for (uint32_t n = 8; n <= 12; n += 4) {
43857     for (size_t k = 1; k <= 5; k += 2) {
43858       GemmMicrokernelTester()
43859         .mr(2)
43860         .nr(4)
43861         .kr(1)
43862         .sr(1)
43863         .m(2)
43864         .n(n)
43865         .k(k)
43866         .cn_stride(7)
43867         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
43868     }
43869   }
43870 }
43871 
TEST(QC8_IGEMM_MINMAX_FP32_2X4__SCALAR_FMAGIC,n_div_4_subtile)43872 TEST(QC8_IGEMM_MINMAX_FP32_2X4__SCALAR_FMAGIC, n_div_4_subtile) {
43873   for (uint32_t n = 8; n <= 12; n += 4) {
43874     for (size_t k = 1; k <= 5; k += 2) {
43875       for (uint32_t m = 1; m <= 2; m++) {
43876         GemmMicrokernelTester()
43877           .mr(2)
43878           .nr(4)
43879           .kr(1)
43880           .sr(1)
43881           .m(m)
43882           .n(n)
43883           .k(k)
43884           .iterations(1)
43885           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
43886       }
43887     }
43888   }
43889 }
43890 
TEST(QC8_IGEMM_MINMAX_FP32_2X4__SCALAR_FMAGIC,small_kernel)43891 TEST(QC8_IGEMM_MINMAX_FP32_2X4__SCALAR_FMAGIC, small_kernel) {
43892   for (size_t k = 1; k <= 5; k += 2) {
43893     GemmMicrokernelTester()
43894       .mr(2)
43895       .nr(4)
43896       .kr(1)
43897       .sr(1)
43898       .m(2)
43899       .n(4)
43900       .k(k)
43901       .ks(3)
43902       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
43903   }
43904 }
43905 
TEST(QC8_IGEMM_MINMAX_FP32_2X4__SCALAR_FMAGIC,small_kernel_subtile)43906 TEST(QC8_IGEMM_MINMAX_FP32_2X4__SCALAR_FMAGIC, small_kernel_subtile) {
43907   for (size_t k = 1; k <= 5; k += 2) {
43908     for (uint32_t n = 1; n <= 4; n++) {
43909       for (uint32_t m = 1; m <= 2; m++) {
43910         GemmMicrokernelTester()
43911           .mr(2)
43912           .nr(4)
43913           .kr(1)
43914           .sr(1)
43915           .m(m)
43916           .n(n)
43917           .k(k)
43918           .ks(3)
43919           .iterations(1)
43920           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
43921       }
43922     }
43923   }
43924 }
43925 
TEST(QC8_IGEMM_MINMAX_FP32_2X4__SCALAR_FMAGIC,n_gt_4_small_kernel)43926 TEST(QC8_IGEMM_MINMAX_FP32_2X4__SCALAR_FMAGIC, n_gt_4_small_kernel) {
43927   for (uint32_t n = 5; n < 8; n++) {
43928     for (size_t k = 1; k <= 5; k += 2) {
43929       GemmMicrokernelTester()
43930         .mr(2)
43931         .nr(4)
43932         .kr(1)
43933         .sr(1)
43934         .m(2)
43935         .n(n)
43936         .k(k)
43937         .ks(3)
43938         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
43939     }
43940   }
43941 }
43942 
TEST(QC8_IGEMM_MINMAX_FP32_2X4__SCALAR_FMAGIC,n_div_4_small_kernel)43943 TEST(QC8_IGEMM_MINMAX_FP32_2X4__SCALAR_FMAGIC, n_div_4_small_kernel) {
43944   for (uint32_t n = 8; n <= 12; n += 4) {
43945     for (size_t k = 1; k <= 5; k += 2) {
43946       GemmMicrokernelTester()
43947         .mr(2)
43948         .nr(4)
43949         .kr(1)
43950         .sr(1)
43951         .m(2)
43952         .n(n)
43953         .k(k)
43954         .ks(3)
43955         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
43956     }
43957   }
43958 }
43959 
TEST(QC8_IGEMM_MINMAX_FP32_2X4__SCALAR_FMAGIC,strided_cm_subtile)43960 TEST(QC8_IGEMM_MINMAX_FP32_2X4__SCALAR_FMAGIC, strided_cm_subtile) {
43961   for (size_t k = 1; k <= 5; k += 2) {
43962     for (uint32_t n = 1; n <= 4; n++) {
43963       for (uint32_t m = 1; m <= 2; m++) {
43964         GemmMicrokernelTester()
43965           .mr(2)
43966           .nr(4)
43967           .kr(1)
43968           .sr(1)
43969           .m(m)
43970           .n(n)
43971           .k(k)
43972           .cm_stride(7)
43973           .iterations(1)
43974           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
43975       }
43976     }
43977   }
43978 }
43979 
TEST(QC8_IGEMM_MINMAX_FP32_2X4__SCALAR_FMAGIC,a_offset)43980 TEST(QC8_IGEMM_MINMAX_FP32_2X4__SCALAR_FMAGIC, a_offset) {
43981   for (size_t k = 1; k <= 5; k += 2) {
43982     GemmMicrokernelTester()
43983       .mr(2)
43984       .nr(4)
43985       .kr(1)
43986       .sr(1)
43987       .m(2)
43988       .n(4)
43989       .k(k)
43990       .ks(3)
43991       .a_offset(13)
43992       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
43993   }
43994 }
43995 
TEST(QC8_IGEMM_MINMAX_FP32_2X4__SCALAR_FMAGIC,zero)43996 TEST(QC8_IGEMM_MINMAX_FP32_2X4__SCALAR_FMAGIC, zero) {
43997   for (size_t k = 1; k <= 5; k += 2) {
43998     for (uint32_t mz = 0; mz < 2; mz++) {
43999       GemmMicrokernelTester()
44000         .mr(2)
44001         .nr(4)
44002         .kr(1)
44003         .sr(1)
44004         .m(2)
44005         .n(4)
44006         .k(k)
44007         .ks(3)
44008         .a_offset(13)
44009         .zero_index(mz)
44010         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
44011     }
44012   }
44013 }
44014 
TEST(QC8_IGEMM_MINMAX_FP32_2X4__SCALAR_FMAGIC,qmin)44015 TEST(QC8_IGEMM_MINMAX_FP32_2X4__SCALAR_FMAGIC, qmin) {
44016   GemmMicrokernelTester()
44017     .mr(2)
44018     .nr(4)
44019     .kr(1)
44020     .sr(1)
44021     .m(2)
44022     .n(4)
44023     .k(1)
44024     .qmin(128)
44025     .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
44026 }
44027 
TEST(QC8_IGEMM_MINMAX_FP32_2X4__SCALAR_FMAGIC,qmax)44028 TEST(QC8_IGEMM_MINMAX_FP32_2X4__SCALAR_FMAGIC, qmax) {
44029   GemmMicrokernelTester()
44030     .mr(2)
44031     .nr(4)
44032     .kr(1)
44033     .sr(1)
44034     .m(2)
44035     .n(4)
44036     .k(1)
44037     .qmax(128)
44038     .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
44039 }
44040 
TEST(QC8_IGEMM_MINMAX_FP32_2X4__SCALAR_FMAGIC,strided_cm)44041 TEST(QC8_IGEMM_MINMAX_FP32_2X4__SCALAR_FMAGIC, strided_cm) {
44042   GemmMicrokernelTester()
44043     .mr(2)
44044     .nr(4)
44045     .kr(1)
44046     .sr(1)
44047     .m(2)
44048     .n(4)
44049     .k(1)
44050     .cm_stride(7)
44051     .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
44052 }
44053 
44054 
TEST(QC8_IGEMM_MINMAX_FP32_2X4__SCALAR_LRINTF,k_eq_1)44055 TEST(QC8_IGEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, k_eq_1) {
44056   GemmMicrokernelTester()
44057     .mr(2)
44058     .nr(4)
44059     .kr(1)
44060     .sr(1)
44061     .m(2)
44062     .n(4)
44063     .k(1)
44064     .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
44065 }
44066 
TEST(QC8_IGEMM_MINMAX_FP32_2X4__SCALAR_LRINTF,strided_cn)44067 TEST(QC8_IGEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, strided_cn) {
44068   GemmMicrokernelTester()
44069     .mr(2)
44070     .nr(4)
44071     .kr(1)
44072     .sr(1)
44073     .m(2)
44074     .n(4)
44075     .k(1)
44076     .cn_stride(7)
44077     .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
44078 }
44079 
TEST(QC8_IGEMM_MINMAX_FP32_2X4__SCALAR_LRINTF,k_eq_1_subtile)44080 TEST(QC8_IGEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, k_eq_1_subtile) {
44081   for (uint32_t n = 1; n <= 4; n++) {
44082     for (uint32_t m = 1; m <= 2; m++) {
44083       GemmMicrokernelTester()
44084         .mr(2)
44085         .nr(4)
44086         .kr(1)
44087         .sr(1)
44088         .m(m)
44089         .n(n)
44090         .k(1)
44091         .iterations(1)
44092         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
44093     }
44094   }
44095 }
44096 
TEST(QC8_IGEMM_MINMAX_FP32_2X4__SCALAR_LRINTF,k_eq_1_subtile_m)44097 TEST(QC8_IGEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, k_eq_1_subtile_m) {
44098   for (uint32_t m = 1; m <= 2; m++) {
44099     GemmMicrokernelTester()
44100       .mr(2)
44101       .nr(4)
44102       .kr(1)
44103       .sr(1)
44104       .m(m)
44105       .n(4)
44106       .k(1)
44107       .iterations(1)
44108       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
44109   }
44110 }
44111 
TEST(QC8_IGEMM_MINMAX_FP32_2X4__SCALAR_LRINTF,k_eq_1_subtile_n)44112 TEST(QC8_IGEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, k_eq_1_subtile_n) {
44113   for (uint32_t n = 1; n <= 4; n++) {
44114     GemmMicrokernelTester()
44115       .mr(2)
44116       .nr(4)
44117       .kr(1)
44118       .sr(1)
44119       .m(2)
44120       .n(n)
44121       .k(1)
44122       .iterations(1)
44123       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
44124   }
44125 }
44126 
TEST(QC8_IGEMM_MINMAX_FP32_2X4__SCALAR_LRINTF,k_gt_1)44127 TEST(QC8_IGEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, k_gt_1) {
44128   for (size_t k = 2; k < 10; k++) {
44129     GemmMicrokernelTester()
44130       .mr(2)
44131       .nr(4)
44132       .kr(1)
44133       .sr(1)
44134       .m(2)
44135       .n(4)
44136       .k(k)
44137       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
44138   }
44139 }
44140 
TEST(QC8_IGEMM_MINMAX_FP32_2X4__SCALAR_LRINTF,k_gt_1_subtile)44141 TEST(QC8_IGEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, k_gt_1_subtile) {
44142   for (size_t k = 2; k < 10; k++) {
44143     for (uint32_t n = 1; n <= 4; n++) {
44144       for (uint32_t m = 1; m <= 2; m++) {
44145         GemmMicrokernelTester()
44146           .mr(2)
44147           .nr(4)
44148           .kr(1)
44149           .sr(1)
44150           .m(m)
44151           .n(n)
44152           .k(k)
44153           .iterations(1)
44154           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
44155       }
44156     }
44157   }
44158 }
44159 
TEST(QC8_IGEMM_MINMAX_FP32_2X4__SCALAR_LRINTF,n_gt_4)44160 TEST(QC8_IGEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, n_gt_4) {
44161   for (uint32_t n = 5; n < 8; n++) {
44162     for (size_t k = 1; k <= 5; k += 2) {
44163       GemmMicrokernelTester()
44164         .mr(2)
44165         .nr(4)
44166         .kr(1)
44167         .sr(1)
44168         .m(2)
44169         .n(n)
44170         .k(k)
44171         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
44172     }
44173   }
44174 }
44175 
TEST(QC8_IGEMM_MINMAX_FP32_2X4__SCALAR_LRINTF,n_gt_4_strided_cn)44176 TEST(QC8_IGEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, n_gt_4_strided_cn) {
44177   for (uint32_t n = 5; n < 8; n++) {
44178     for (size_t k = 1; k <= 5; k += 2) {
44179       GemmMicrokernelTester()
44180         .mr(2)
44181         .nr(4)
44182         .kr(1)
44183         .sr(1)
44184         .m(2)
44185         .n(n)
44186         .k(k)
44187         .cn_stride(7)
44188         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
44189     }
44190   }
44191 }
44192 
TEST(QC8_IGEMM_MINMAX_FP32_2X4__SCALAR_LRINTF,n_gt_4_subtile)44193 TEST(QC8_IGEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, n_gt_4_subtile) {
44194   for (uint32_t n = 5; n < 8; n++) {
44195     for (size_t k = 1; k <= 5; k += 2) {
44196       for (uint32_t m = 1; m <= 2; m++) {
44197         GemmMicrokernelTester()
44198           .mr(2)
44199           .nr(4)
44200           .kr(1)
44201           .sr(1)
44202           .m(m)
44203           .n(n)
44204           .k(k)
44205           .iterations(1)
44206           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
44207       }
44208     }
44209   }
44210 }
44211 
TEST(QC8_IGEMM_MINMAX_FP32_2X4__SCALAR_LRINTF,n_div_4)44212 TEST(QC8_IGEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, n_div_4) {
44213   for (uint32_t n = 8; n <= 12; n += 4) {
44214     for (size_t k = 1; k <= 5; k += 2) {
44215       GemmMicrokernelTester()
44216         .mr(2)
44217         .nr(4)
44218         .kr(1)
44219         .sr(1)
44220         .m(2)
44221         .n(n)
44222         .k(k)
44223         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
44224     }
44225   }
44226 }
44227 
TEST(QC8_IGEMM_MINMAX_FP32_2X4__SCALAR_LRINTF,n_div_4_strided_cn)44228 TEST(QC8_IGEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, n_div_4_strided_cn) {
44229   for (uint32_t n = 8; n <= 12; n += 4) {
44230     for (size_t k = 1; k <= 5; k += 2) {
44231       GemmMicrokernelTester()
44232         .mr(2)
44233         .nr(4)
44234         .kr(1)
44235         .sr(1)
44236         .m(2)
44237         .n(n)
44238         .k(k)
44239         .cn_stride(7)
44240         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
44241     }
44242   }
44243 }
44244 
TEST(QC8_IGEMM_MINMAX_FP32_2X4__SCALAR_LRINTF,n_div_4_subtile)44245 TEST(QC8_IGEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, n_div_4_subtile) {
44246   for (uint32_t n = 8; n <= 12; n += 4) {
44247     for (size_t k = 1; k <= 5; k += 2) {
44248       for (uint32_t m = 1; m <= 2; m++) {
44249         GemmMicrokernelTester()
44250           .mr(2)
44251           .nr(4)
44252           .kr(1)
44253           .sr(1)
44254           .m(m)
44255           .n(n)
44256           .k(k)
44257           .iterations(1)
44258           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
44259       }
44260     }
44261   }
44262 }
44263 
TEST(QC8_IGEMM_MINMAX_FP32_2X4__SCALAR_LRINTF,small_kernel)44264 TEST(QC8_IGEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, small_kernel) {
44265   for (size_t k = 1; k <= 5; k += 2) {
44266     GemmMicrokernelTester()
44267       .mr(2)
44268       .nr(4)
44269       .kr(1)
44270       .sr(1)
44271       .m(2)
44272       .n(4)
44273       .k(k)
44274       .ks(3)
44275       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
44276   }
44277 }
44278 
TEST(QC8_IGEMM_MINMAX_FP32_2X4__SCALAR_LRINTF,small_kernel_subtile)44279 TEST(QC8_IGEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, small_kernel_subtile) {
44280   for (size_t k = 1; k <= 5; k += 2) {
44281     for (uint32_t n = 1; n <= 4; n++) {
44282       for (uint32_t m = 1; m <= 2; m++) {
44283         GemmMicrokernelTester()
44284           .mr(2)
44285           .nr(4)
44286           .kr(1)
44287           .sr(1)
44288           .m(m)
44289           .n(n)
44290           .k(k)
44291           .ks(3)
44292           .iterations(1)
44293           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
44294       }
44295     }
44296   }
44297 }
44298 
TEST(QC8_IGEMM_MINMAX_FP32_2X4__SCALAR_LRINTF,n_gt_4_small_kernel)44299 TEST(QC8_IGEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, n_gt_4_small_kernel) {
44300   for (uint32_t n = 5; n < 8; n++) {
44301     for (size_t k = 1; k <= 5; k += 2) {
44302       GemmMicrokernelTester()
44303         .mr(2)
44304         .nr(4)
44305         .kr(1)
44306         .sr(1)
44307         .m(2)
44308         .n(n)
44309         .k(k)
44310         .ks(3)
44311         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
44312     }
44313   }
44314 }
44315 
TEST(QC8_IGEMM_MINMAX_FP32_2X4__SCALAR_LRINTF,n_div_4_small_kernel)44316 TEST(QC8_IGEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, n_div_4_small_kernel) {
44317   for (uint32_t n = 8; n <= 12; n += 4) {
44318     for (size_t k = 1; k <= 5; k += 2) {
44319       GemmMicrokernelTester()
44320         .mr(2)
44321         .nr(4)
44322         .kr(1)
44323         .sr(1)
44324         .m(2)
44325         .n(n)
44326         .k(k)
44327         .ks(3)
44328         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
44329     }
44330   }
44331 }
44332 
TEST(QC8_IGEMM_MINMAX_FP32_2X4__SCALAR_LRINTF,strided_cm_subtile)44333 TEST(QC8_IGEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, strided_cm_subtile) {
44334   for (size_t k = 1; k <= 5; k += 2) {
44335     for (uint32_t n = 1; n <= 4; n++) {
44336       for (uint32_t m = 1; m <= 2; m++) {
44337         GemmMicrokernelTester()
44338           .mr(2)
44339           .nr(4)
44340           .kr(1)
44341           .sr(1)
44342           .m(m)
44343           .n(n)
44344           .k(k)
44345           .cm_stride(7)
44346           .iterations(1)
44347           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
44348       }
44349     }
44350   }
44351 }
44352 
TEST(QC8_IGEMM_MINMAX_FP32_2X4__SCALAR_LRINTF,a_offset)44353 TEST(QC8_IGEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, a_offset) {
44354   for (size_t k = 1; k <= 5; k += 2) {
44355     GemmMicrokernelTester()
44356       .mr(2)
44357       .nr(4)
44358       .kr(1)
44359       .sr(1)
44360       .m(2)
44361       .n(4)
44362       .k(k)
44363       .ks(3)
44364       .a_offset(13)
44365       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
44366   }
44367 }
44368 
TEST(QC8_IGEMM_MINMAX_FP32_2X4__SCALAR_LRINTF,zero)44369 TEST(QC8_IGEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, zero) {
44370   for (size_t k = 1; k <= 5; k += 2) {
44371     for (uint32_t mz = 0; mz < 2; mz++) {
44372       GemmMicrokernelTester()
44373         .mr(2)
44374         .nr(4)
44375         .kr(1)
44376         .sr(1)
44377         .m(2)
44378         .n(4)
44379         .k(k)
44380         .ks(3)
44381         .a_offset(13)
44382         .zero_index(mz)
44383         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
44384     }
44385   }
44386 }
44387 
TEST(QC8_IGEMM_MINMAX_FP32_2X4__SCALAR_LRINTF,qmin)44388 TEST(QC8_IGEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, qmin) {
44389   GemmMicrokernelTester()
44390     .mr(2)
44391     .nr(4)
44392     .kr(1)
44393     .sr(1)
44394     .m(2)
44395     .n(4)
44396     .k(1)
44397     .qmin(128)
44398     .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
44399 }
44400 
TEST(QC8_IGEMM_MINMAX_FP32_2X4__SCALAR_LRINTF,qmax)44401 TEST(QC8_IGEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, qmax) {
44402   GemmMicrokernelTester()
44403     .mr(2)
44404     .nr(4)
44405     .kr(1)
44406     .sr(1)
44407     .m(2)
44408     .n(4)
44409     .k(1)
44410     .qmax(128)
44411     .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
44412 }
44413 
TEST(QC8_IGEMM_MINMAX_FP32_2X4__SCALAR_LRINTF,strided_cm)44414 TEST(QC8_IGEMM_MINMAX_FP32_2X4__SCALAR_LRINTF, strided_cm) {
44415   GemmMicrokernelTester()
44416     .mr(2)
44417     .nr(4)
44418     .kr(1)
44419     .sr(1)
44420     .m(2)
44421     .n(4)
44422     .k(1)
44423     .cm_stride(7)
44424     .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
44425 }
44426 
44427 
TEST(QC8_IGEMM_MINMAX_FP32_3X4__SCALAR_LRINTF,k_eq_1)44428 TEST(QC8_IGEMM_MINMAX_FP32_3X4__SCALAR_LRINTF, k_eq_1) {
44429   GemmMicrokernelTester()
44430     .mr(3)
44431     .nr(4)
44432     .kr(1)
44433     .sr(1)
44434     .m(3)
44435     .n(4)
44436     .k(1)
44437     .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
44438 }
44439 
TEST(QC8_IGEMM_MINMAX_FP32_3X4__SCALAR_LRINTF,strided_cn)44440 TEST(QC8_IGEMM_MINMAX_FP32_3X4__SCALAR_LRINTF, strided_cn) {
44441   GemmMicrokernelTester()
44442     .mr(3)
44443     .nr(4)
44444     .kr(1)
44445     .sr(1)
44446     .m(3)
44447     .n(4)
44448     .k(1)
44449     .cn_stride(7)
44450     .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
44451 }
44452 
TEST(QC8_IGEMM_MINMAX_FP32_3X4__SCALAR_LRINTF,k_eq_1_subtile)44453 TEST(QC8_IGEMM_MINMAX_FP32_3X4__SCALAR_LRINTF, k_eq_1_subtile) {
44454   for (uint32_t n = 1; n <= 4; n++) {
44455     for (uint32_t m = 1; m <= 3; m++) {
44456       GemmMicrokernelTester()
44457         .mr(3)
44458         .nr(4)
44459         .kr(1)
44460         .sr(1)
44461         .m(m)
44462         .n(n)
44463         .k(1)
44464         .iterations(1)
44465         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
44466     }
44467   }
44468 }
44469 
TEST(QC8_IGEMM_MINMAX_FP32_3X4__SCALAR_LRINTF,k_eq_1_subtile_m)44470 TEST(QC8_IGEMM_MINMAX_FP32_3X4__SCALAR_LRINTF, k_eq_1_subtile_m) {
44471   for (uint32_t m = 1; m <= 3; m++) {
44472     GemmMicrokernelTester()
44473       .mr(3)
44474       .nr(4)
44475       .kr(1)
44476       .sr(1)
44477       .m(m)
44478       .n(4)
44479       .k(1)
44480       .iterations(1)
44481       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
44482   }
44483 }
44484 
TEST(QC8_IGEMM_MINMAX_FP32_3X4__SCALAR_LRINTF,k_eq_1_subtile_n)44485 TEST(QC8_IGEMM_MINMAX_FP32_3X4__SCALAR_LRINTF, k_eq_1_subtile_n) {
44486   for (uint32_t n = 1; n <= 4; n++) {
44487     GemmMicrokernelTester()
44488       .mr(3)
44489       .nr(4)
44490       .kr(1)
44491       .sr(1)
44492       .m(3)
44493       .n(n)
44494       .k(1)
44495       .iterations(1)
44496       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
44497   }
44498 }
44499 
TEST(QC8_IGEMM_MINMAX_FP32_3X4__SCALAR_LRINTF,k_gt_1)44500 TEST(QC8_IGEMM_MINMAX_FP32_3X4__SCALAR_LRINTF, k_gt_1) {
44501   for (size_t k = 2; k < 10; k++) {
44502     GemmMicrokernelTester()
44503       .mr(3)
44504       .nr(4)
44505       .kr(1)
44506       .sr(1)
44507       .m(3)
44508       .n(4)
44509       .k(k)
44510       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
44511   }
44512 }
44513 
TEST(QC8_IGEMM_MINMAX_FP32_3X4__SCALAR_LRINTF,k_gt_1_subtile)44514 TEST(QC8_IGEMM_MINMAX_FP32_3X4__SCALAR_LRINTF, k_gt_1_subtile) {
44515   for (size_t k = 2; k < 10; k++) {
44516     for (uint32_t n = 1; n <= 4; n++) {
44517       for (uint32_t m = 1; m <= 3; m++) {
44518         GemmMicrokernelTester()
44519           .mr(3)
44520           .nr(4)
44521           .kr(1)
44522           .sr(1)
44523           .m(m)
44524           .n(n)
44525           .k(k)
44526           .iterations(1)
44527           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
44528       }
44529     }
44530   }
44531 }
44532 
TEST(QC8_IGEMM_MINMAX_FP32_3X4__SCALAR_LRINTF,n_gt_4)44533 TEST(QC8_IGEMM_MINMAX_FP32_3X4__SCALAR_LRINTF, n_gt_4) {
44534   for (uint32_t n = 5; n < 8; n++) {
44535     for (size_t k = 1; k <= 5; k += 2) {
44536       GemmMicrokernelTester()
44537         .mr(3)
44538         .nr(4)
44539         .kr(1)
44540         .sr(1)
44541         .m(3)
44542         .n(n)
44543         .k(k)
44544         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
44545     }
44546   }
44547 }
44548 
TEST(QC8_IGEMM_MINMAX_FP32_3X4__SCALAR_LRINTF,n_gt_4_strided_cn)44549 TEST(QC8_IGEMM_MINMAX_FP32_3X4__SCALAR_LRINTF, n_gt_4_strided_cn) {
44550   for (uint32_t n = 5; n < 8; n++) {
44551     for (size_t k = 1; k <= 5; k += 2) {
44552       GemmMicrokernelTester()
44553         .mr(3)
44554         .nr(4)
44555         .kr(1)
44556         .sr(1)
44557         .m(3)
44558         .n(n)
44559         .k(k)
44560         .cn_stride(7)
44561         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
44562     }
44563   }
44564 }
44565 
TEST(QC8_IGEMM_MINMAX_FP32_3X4__SCALAR_LRINTF,n_gt_4_subtile)44566 TEST(QC8_IGEMM_MINMAX_FP32_3X4__SCALAR_LRINTF, n_gt_4_subtile) {
44567   for (uint32_t n = 5; n < 8; n++) {
44568     for (size_t k = 1; k <= 5; k += 2) {
44569       for (uint32_t m = 1; m <= 3; m++) {
44570         GemmMicrokernelTester()
44571           .mr(3)
44572           .nr(4)
44573           .kr(1)
44574           .sr(1)
44575           .m(m)
44576           .n(n)
44577           .k(k)
44578           .iterations(1)
44579           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
44580       }
44581     }
44582   }
44583 }
44584 
TEST(QC8_IGEMM_MINMAX_FP32_3X4__SCALAR_LRINTF,n_div_4)44585 TEST(QC8_IGEMM_MINMAX_FP32_3X4__SCALAR_LRINTF, n_div_4) {
44586   for (uint32_t n = 8; n <= 12; n += 4) {
44587     for (size_t k = 1; k <= 5; k += 2) {
44588       GemmMicrokernelTester()
44589         .mr(3)
44590         .nr(4)
44591         .kr(1)
44592         .sr(1)
44593         .m(3)
44594         .n(n)
44595         .k(k)
44596         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
44597     }
44598   }
44599 }
44600 
TEST(QC8_IGEMM_MINMAX_FP32_3X4__SCALAR_LRINTF,n_div_4_strided_cn)44601 TEST(QC8_IGEMM_MINMAX_FP32_3X4__SCALAR_LRINTF, n_div_4_strided_cn) {
44602   for (uint32_t n = 8; n <= 12; n += 4) {
44603     for (size_t k = 1; k <= 5; k += 2) {
44604       GemmMicrokernelTester()
44605         .mr(3)
44606         .nr(4)
44607         .kr(1)
44608         .sr(1)
44609         .m(3)
44610         .n(n)
44611         .k(k)
44612         .cn_stride(7)
44613         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
44614     }
44615   }
44616 }
44617 
TEST(QC8_IGEMM_MINMAX_FP32_3X4__SCALAR_LRINTF,n_div_4_subtile)44618 TEST(QC8_IGEMM_MINMAX_FP32_3X4__SCALAR_LRINTF, n_div_4_subtile) {
44619   for (uint32_t n = 8; n <= 12; n += 4) {
44620     for (size_t k = 1; k <= 5; k += 2) {
44621       for (uint32_t m = 1; m <= 3; m++) {
44622         GemmMicrokernelTester()
44623           .mr(3)
44624           .nr(4)
44625           .kr(1)
44626           .sr(1)
44627           .m(m)
44628           .n(n)
44629           .k(k)
44630           .iterations(1)
44631           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
44632       }
44633     }
44634   }
44635 }
44636 
TEST(QC8_IGEMM_MINMAX_FP32_3X4__SCALAR_LRINTF,small_kernel)44637 TEST(QC8_IGEMM_MINMAX_FP32_3X4__SCALAR_LRINTF, small_kernel) {
44638   for (size_t k = 1; k <= 5; k += 2) {
44639     GemmMicrokernelTester()
44640       .mr(3)
44641       .nr(4)
44642       .kr(1)
44643       .sr(1)
44644       .m(3)
44645       .n(4)
44646       .k(k)
44647       .ks(3)
44648       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
44649   }
44650 }
44651 
TEST(QC8_IGEMM_MINMAX_FP32_3X4__SCALAR_LRINTF,small_kernel_subtile)44652 TEST(QC8_IGEMM_MINMAX_FP32_3X4__SCALAR_LRINTF, small_kernel_subtile) {
44653   for (size_t k = 1; k <= 5; k += 2) {
44654     for (uint32_t n = 1; n <= 4; n++) {
44655       for (uint32_t m = 1; m <= 3; m++) {
44656         GemmMicrokernelTester()
44657           .mr(3)
44658           .nr(4)
44659           .kr(1)
44660           .sr(1)
44661           .m(m)
44662           .n(n)
44663           .k(k)
44664           .ks(3)
44665           .iterations(1)
44666           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
44667       }
44668     }
44669   }
44670 }
44671 
TEST(QC8_IGEMM_MINMAX_FP32_3X4__SCALAR_LRINTF,n_gt_4_small_kernel)44672 TEST(QC8_IGEMM_MINMAX_FP32_3X4__SCALAR_LRINTF, n_gt_4_small_kernel) {
44673   for (uint32_t n = 5; n < 8; n++) {
44674     for (size_t k = 1; k <= 5; k += 2) {
44675       GemmMicrokernelTester()
44676         .mr(3)
44677         .nr(4)
44678         .kr(1)
44679         .sr(1)
44680         .m(3)
44681         .n(n)
44682         .k(k)
44683         .ks(3)
44684         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
44685     }
44686   }
44687 }
44688 
TEST(QC8_IGEMM_MINMAX_FP32_3X4__SCALAR_LRINTF,n_div_4_small_kernel)44689 TEST(QC8_IGEMM_MINMAX_FP32_3X4__SCALAR_LRINTF, n_div_4_small_kernel) {
44690   for (uint32_t n = 8; n <= 12; n += 4) {
44691     for (size_t k = 1; k <= 5; k += 2) {
44692       GemmMicrokernelTester()
44693         .mr(3)
44694         .nr(4)
44695         .kr(1)
44696         .sr(1)
44697         .m(3)
44698         .n(n)
44699         .k(k)
44700         .ks(3)
44701         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
44702     }
44703   }
44704 }
44705 
TEST(QC8_IGEMM_MINMAX_FP32_3X4__SCALAR_LRINTF,strided_cm_subtile)44706 TEST(QC8_IGEMM_MINMAX_FP32_3X4__SCALAR_LRINTF, strided_cm_subtile) {
44707   for (size_t k = 1; k <= 5; k += 2) {
44708     for (uint32_t n = 1; n <= 4; n++) {
44709       for (uint32_t m = 1; m <= 3; m++) {
44710         GemmMicrokernelTester()
44711           .mr(3)
44712           .nr(4)
44713           .kr(1)
44714           .sr(1)
44715           .m(m)
44716           .n(n)
44717           .k(k)
44718           .cm_stride(7)
44719           .iterations(1)
44720           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
44721       }
44722     }
44723   }
44724 }
44725 
TEST(QC8_IGEMM_MINMAX_FP32_3X4__SCALAR_LRINTF,a_offset)44726 TEST(QC8_IGEMM_MINMAX_FP32_3X4__SCALAR_LRINTF, a_offset) {
44727   for (size_t k = 1; k <= 5; k += 2) {
44728     GemmMicrokernelTester()
44729       .mr(3)
44730       .nr(4)
44731       .kr(1)
44732       .sr(1)
44733       .m(3)
44734       .n(4)
44735       .k(k)
44736       .ks(3)
44737       .a_offset(17)
44738       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
44739   }
44740 }
44741 
TEST(QC8_IGEMM_MINMAX_FP32_3X4__SCALAR_LRINTF,zero)44742 TEST(QC8_IGEMM_MINMAX_FP32_3X4__SCALAR_LRINTF, zero) {
44743   for (size_t k = 1; k <= 5; k += 2) {
44744     for (uint32_t mz = 0; mz < 3; mz++) {
44745       GemmMicrokernelTester()
44746         .mr(3)
44747         .nr(4)
44748         .kr(1)
44749         .sr(1)
44750         .m(3)
44751         .n(4)
44752         .k(k)
44753         .ks(3)
44754         .a_offset(17)
44755         .zero_index(mz)
44756         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
44757     }
44758   }
44759 }
44760 
TEST(QC8_IGEMM_MINMAX_FP32_3X4__SCALAR_LRINTF,qmin)44761 TEST(QC8_IGEMM_MINMAX_FP32_3X4__SCALAR_LRINTF, qmin) {
44762   GemmMicrokernelTester()
44763     .mr(3)
44764     .nr(4)
44765     .kr(1)
44766     .sr(1)
44767     .m(3)
44768     .n(4)
44769     .k(1)
44770     .qmin(128)
44771     .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
44772 }
44773 
TEST(QC8_IGEMM_MINMAX_FP32_3X4__SCALAR_LRINTF,qmax)44774 TEST(QC8_IGEMM_MINMAX_FP32_3X4__SCALAR_LRINTF, qmax) {
44775   GemmMicrokernelTester()
44776     .mr(3)
44777     .nr(4)
44778     .kr(1)
44779     .sr(1)
44780     .m(3)
44781     .n(4)
44782     .k(1)
44783     .qmax(128)
44784     .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
44785 }
44786 
TEST(QC8_IGEMM_MINMAX_FP32_3X4__SCALAR_LRINTF,strided_cm)44787 TEST(QC8_IGEMM_MINMAX_FP32_3X4__SCALAR_LRINTF, strided_cm) {
44788   GemmMicrokernelTester()
44789     .mr(3)
44790     .nr(4)
44791     .kr(1)
44792     .sr(1)
44793     .m(3)
44794     .n(4)
44795     .k(1)
44796     .cm_stride(7)
44797     .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
44798 }
44799 
44800 
TEST(QC8_IGEMM_MINMAX_FP32_4X2__SCALAR_FMAGIC,k_eq_1)44801 TEST(QC8_IGEMM_MINMAX_FP32_4X2__SCALAR_FMAGIC, k_eq_1) {
44802   GemmMicrokernelTester()
44803     .mr(4)
44804     .nr(2)
44805     .kr(1)
44806     .sr(1)
44807     .m(4)
44808     .n(2)
44809     .k(1)
44810     .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x2__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
44811 }
44812 
TEST(QC8_IGEMM_MINMAX_FP32_4X2__SCALAR_FMAGIC,strided_cn)44813 TEST(QC8_IGEMM_MINMAX_FP32_4X2__SCALAR_FMAGIC, strided_cn) {
44814   GemmMicrokernelTester()
44815     .mr(4)
44816     .nr(2)
44817     .kr(1)
44818     .sr(1)
44819     .m(4)
44820     .n(2)
44821     .k(1)
44822     .cn_stride(5)
44823     .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x2__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
44824 }
44825 
TEST(QC8_IGEMM_MINMAX_FP32_4X2__SCALAR_FMAGIC,k_eq_1_subtile)44826 TEST(QC8_IGEMM_MINMAX_FP32_4X2__SCALAR_FMAGIC, k_eq_1_subtile) {
44827   for (uint32_t n = 1; n <= 2; n++) {
44828     for (uint32_t m = 1; m <= 4; m++) {
44829       GemmMicrokernelTester()
44830         .mr(4)
44831         .nr(2)
44832         .kr(1)
44833         .sr(1)
44834         .m(m)
44835         .n(n)
44836         .k(1)
44837         .iterations(1)
44838         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x2__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
44839     }
44840   }
44841 }
44842 
TEST(QC8_IGEMM_MINMAX_FP32_4X2__SCALAR_FMAGIC,k_eq_1_subtile_m)44843 TEST(QC8_IGEMM_MINMAX_FP32_4X2__SCALAR_FMAGIC, k_eq_1_subtile_m) {
44844   for (uint32_t m = 1; m <= 4; m++) {
44845     GemmMicrokernelTester()
44846       .mr(4)
44847       .nr(2)
44848       .kr(1)
44849       .sr(1)
44850       .m(m)
44851       .n(2)
44852       .k(1)
44853       .iterations(1)
44854       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x2__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
44855   }
44856 }
44857 
TEST(QC8_IGEMM_MINMAX_FP32_4X2__SCALAR_FMAGIC,k_eq_1_subtile_n)44858 TEST(QC8_IGEMM_MINMAX_FP32_4X2__SCALAR_FMAGIC, k_eq_1_subtile_n) {
44859   for (uint32_t n = 1; n <= 2; n++) {
44860     GemmMicrokernelTester()
44861       .mr(4)
44862       .nr(2)
44863       .kr(1)
44864       .sr(1)
44865       .m(4)
44866       .n(n)
44867       .k(1)
44868       .iterations(1)
44869       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x2__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
44870   }
44871 }
44872 
TEST(QC8_IGEMM_MINMAX_FP32_4X2__SCALAR_FMAGIC,k_gt_1)44873 TEST(QC8_IGEMM_MINMAX_FP32_4X2__SCALAR_FMAGIC, k_gt_1) {
44874   for (size_t k = 2; k < 10; k++) {
44875     GemmMicrokernelTester()
44876       .mr(4)
44877       .nr(2)
44878       .kr(1)
44879       .sr(1)
44880       .m(4)
44881       .n(2)
44882       .k(k)
44883       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x2__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
44884   }
44885 }
44886 
TEST(QC8_IGEMM_MINMAX_FP32_4X2__SCALAR_FMAGIC,k_gt_1_subtile)44887 TEST(QC8_IGEMM_MINMAX_FP32_4X2__SCALAR_FMAGIC, k_gt_1_subtile) {
44888   for (size_t k = 2; k < 10; k++) {
44889     for (uint32_t n = 1; n <= 2; n++) {
44890       for (uint32_t m = 1; m <= 4; m++) {
44891         GemmMicrokernelTester()
44892           .mr(4)
44893           .nr(2)
44894           .kr(1)
44895           .sr(1)
44896           .m(m)
44897           .n(n)
44898           .k(k)
44899           .iterations(1)
44900           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x2__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
44901       }
44902     }
44903   }
44904 }
44905 
TEST(QC8_IGEMM_MINMAX_FP32_4X2__SCALAR_FMAGIC,n_gt_2)44906 TEST(QC8_IGEMM_MINMAX_FP32_4X2__SCALAR_FMAGIC, n_gt_2) {
44907   for (uint32_t n = 3; n < 4; n++) {
44908     for (size_t k = 1; k <= 5; k += 2) {
44909       GemmMicrokernelTester()
44910         .mr(4)
44911         .nr(2)
44912         .kr(1)
44913         .sr(1)
44914         .m(4)
44915         .n(n)
44916         .k(k)
44917         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x2__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
44918     }
44919   }
44920 }
44921 
TEST(QC8_IGEMM_MINMAX_FP32_4X2__SCALAR_FMAGIC,n_gt_2_strided_cn)44922 TEST(QC8_IGEMM_MINMAX_FP32_4X2__SCALAR_FMAGIC, n_gt_2_strided_cn) {
44923   for (uint32_t n = 3; n < 4; n++) {
44924     for (size_t k = 1; k <= 5; k += 2) {
44925       GemmMicrokernelTester()
44926         .mr(4)
44927         .nr(2)
44928         .kr(1)
44929         .sr(1)
44930         .m(4)
44931         .n(n)
44932         .k(k)
44933         .cn_stride(5)
44934         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x2__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
44935     }
44936   }
44937 }
44938 
TEST(QC8_IGEMM_MINMAX_FP32_4X2__SCALAR_FMAGIC,n_gt_2_subtile)44939 TEST(QC8_IGEMM_MINMAX_FP32_4X2__SCALAR_FMAGIC, n_gt_2_subtile) {
44940   for (uint32_t n = 3; n < 4; n++) {
44941     for (size_t k = 1; k <= 5; k += 2) {
44942       for (uint32_t m = 1; m <= 4; m++) {
44943         GemmMicrokernelTester()
44944           .mr(4)
44945           .nr(2)
44946           .kr(1)
44947           .sr(1)
44948           .m(m)
44949           .n(n)
44950           .k(k)
44951           .iterations(1)
44952           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x2__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
44953       }
44954     }
44955   }
44956 }
44957 
TEST(QC8_IGEMM_MINMAX_FP32_4X2__SCALAR_FMAGIC,n_div_2)44958 TEST(QC8_IGEMM_MINMAX_FP32_4X2__SCALAR_FMAGIC, n_div_2) {
44959   for (uint32_t n = 4; n <= 6; n += 2) {
44960     for (size_t k = 1; k <= 5; k += 2) {
44961       GemmMicrokernelTester()
44962         .mr(4)
44963         .nr(2)
44964         .kr(1)
44965         .sr(1)
44966         .m(4)
44967         .n(n)
44968         .k(k)
44969         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x2__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
44970     }
44971   }
44972 }
44973 
TEST(QC8_IGEMM_MINMAX_FP32_4X2__SCALAR_FMAGIC,n_div_2_strided_cn)44974 TEST(QC8_IGEMM_MINMAX_FP32_4X2__SCALAR_FMAGIC, n_div_2_strided_cn) {
44975   for (uint32_t n = 4; n <= 6; n += 2) {
44976     for (size_t k = 1; k <= 5; k += 2) {
44977       GemmMicrokernelTester()
44978         .mr(4)
44979         .nr(2)
44980         .kr(1)
44981         .sr(1)
44982         .m(4)
44983         .n(n)
44984         .k(k)
44985         .cn_stride(5)
44986         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x2__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
44987     }
44988   }
44989 }
44990 
TEST(QC8_IGEMM_MINMAX_FP32_4X2__SCALAR_FMAGIC,n_div_2_subtile)44991 TEST(QC8_IGEMM_MINMAX_FP32_4X2__SCALAR_FMAGIC, n_div_2_subtile) {
44992   for (uint32_t n = 4; n <= 6; n += 2) {
44993     for (size_t k = 1; k <= 5; k += 2) {
44994       for (uint32_t m = 1; m <= 4; m++) {
44995         GemmMicrokernelTester()
44996           .mr(4)
44997           .nr(2)
44998           .kr(1)
44999           .sr(1)
45000           .m(m)
45001           .n(n)
45002           .k(k)
45003           .iterations(1)
45004           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x2__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
45005       }
45006     }
45007   }
45008 }
45009 
TEST(QC8_IGEMM_MINMAX_FP32_4X2__SCALAR_FMAGIC,small_kernel)45010 TEST(QC8_IGEMM_MINMAX_FP32_4X2__SCALAR_FMAGIC, small_kernel) {
45011   for (size_t k = 1; k <= 5; k += 2) {
45012     GemmMicrokernelTester()
45013       .mr(4)
45014       .nr(2)
45015       .kr(1)
45016       .sr(1)
45017       .m(4)
45018       .n(2)
45019       .k(k)
45020       .ks(3)
45021       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x2__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
45022   }
45023 }
45024 
TEST(QC8_IGEMM_MINMAX_FP32_4X2__SCALAR_FMAGIC,small_kernel_subtile)45025 TEST(QC8_IGEMM_MINMAX_FP32_4X2__SCALAR_FMAGIC, small_kernel_subtile) {
45026   for (size_t k = 1; k <= 5; k += 2) {
45027     for (uint32_t n = 1; n <= 2; n++) {
45028       for (uint32_t m = 1; m <= 4; m++) {
45029         GemmMicrokernelTester()
45030           .mr(4)
45031           .nr(2)
45032           .kr(1)
45033           .sr(1)
45034           .m(m)
45035           .n(n)
45036           .k(k)
45037           .ks(3)
45038           .iterations(1)
45039           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x2__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
45040       }
45041     }
45042   }
45043 }
45044 
TEST(QC8_IGEMM_MINMAX_FP32_4X2__SCALAR_FMAGIC,n_gt_2_small_kernel)45045 TEST(QC8_IGEMM_MINMAX_FP32_4X2__SCALAR_FMAGIC, n_gt_2_small_kernel) {
45046   for (uint32_t n = 3; n < 4; n++) {
45047     for (size_t k = 1; k <= 5; k += 2) {
45048       GemmMicrokernelTester()
45049         .mr(4)
45050         .nr(2)
45051         .kr(1)
45052         .sr(1)
45053         .m(4)
45054         .n(n)
45055         .k(k)
45056         .ks(3)
45057         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x2__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
45058     }
45059   }
45060 }
45061 
TEST(QC8_IGEMM_MINMAX_FP32_4X2__SCALAR_FMAGIC,n_div_2_small_kernel)45062 TEST(QC8_IGEMM_MINMAX_FP32_4X2__SCALAR_FMAGIC, n_div_2_small_kernel) {
45063   for (uint32_t n = 4; n <= 6; n += 2) {
45064     for (size_t k = 1; k <= 5; k += 2) {
45065       GemmMicrokernelTester()
45066         .mr(4)
45067         .nr(2)
45068         .kr(1)
45069         .sr(1)
45070         .m(4)
45071         .n(n)
45072         .k(k)
45073         .ks(3)
45074         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x2__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
45075     }
45076   }
45077 }
45078 
TEST(QC8_IGEMM_MINMAX_FP32_4X2__SCALAR_FMAGIC,strided_cm_subtile)45079 TEST(QC8_IGEMM_MINMAX_FP32_4X2__SCALAR_FMAGIC, strided_cm_subtile) {
45080   for (size_t k = 1; k <= 5; k += 2) {
45081     for (uint32_t n = 1; n <= 2; n++) {
45082       for (uint32_t m = 1; m <= 4; m++) {
45083         GemmMicrokernelTester()
45084           .mr(4)
45085           .nr(2)
45086           .kr(1)
45087           .sr(1)
45088           .m(m)
45089           .n(n)
45090           .k(k)
45091           .cm_stride(5)
45092           .iterations(1)
45093           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x2__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
45094       }
45095     }
45096   }
45097 }
45098 
TEST(QC8_IGEMM_MINMAX_FP32_4X2__SCALAR_FMAGIC,a_offset)45099 TEST(QC8_IGEMM_MINMAX_FP32_4X2__SCALAR_FMAGIC, a_offset) {
45100   for (size_t k = 1; k <= 5; k += 2) {
45101     GemmMicrokernelTester()
45102       .mr(4)
45103       .nr(2)
45104       .kr(1)
45105       .sr(1)
45106       .m(4)
45107       .n(2)
45108       .k(k)
45109       .ks(3)
45110       .a_offset(23)
45111       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x2__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
45112   }
45113 }
45114 
TEST(QC8_IGEMM_MINMAX_FP32_4X2__SCALAR_FMAGIC,zero)45115 TEST(QC8_IGEMM_MINMAX_FP32_4X2__SCALAR_FMAGIC, zero) {
45116   for (size_t k = 1; k <= 5; k += 2) {
45117     for (uint32_t mz = 0; mz < 4; mz++) {
45118       GemmMicrokernelTester()
45119         .mr(4)
45120         .nr(2)
45121         .kr(1)
45122         .sr(1)
45123         .m(4)
45124         .n(2)
45125         .k(k)
45126         .ks(3)
45127         .a_offset(23)
45128         .zero_index(mz)
45129         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x2__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
45130     }
45131   }
45132 }
45133 
TEST(QC8_IGEMM_MINMAX_FP32_4X2__SCALAR_FMAGIC,qmin)45134 TEST(QC8_IGEMM_MINMAX_FP32_4X2__SCALAR_FMAGIC, qmin) {
45135   GemmMicrokernelTester()
45136     .mr(4)
45137     .nr(2)
45138     .kr(1)
45139     .sr(1)
45140     .m(4)
45141     .n(2)
45142     .k(1)
45143     .qmin(128)
45144     .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x2__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
45145 }
45146 
TEST(QC8_IGEMM_MINMAX_FP32_4X2__SCALAR_FMAGIC,qmax)45147 TEST(QC8_IGEMM_MINMAX_FP32_4X2__SCALAR_FMAGIC, qmax) {
45148   GemmMicrokernelTester()
45149     .mr(4)
45150     .nr(2)
45151     .kr(1)
45152     .sr(1)
45153     .m(4)
45154     .n(2)
45155     .k(1)
45156     .qmax(128)
45157     .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x2__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
45158 }
45159 
TEST(QC8_IGEMM_MINMAX_FP32_4X2__SCALAR_FMAGIC,strided_cm)45160 TEST(QC8_IGEMM_MINMAX_FP32_4X2__SCALAR_FMAGIC, strided_cm) {
45161   GemmMicrokernelTester()
45162     .mr(4)
45163     .nr(2)
45164     .kr(1)
45165     .sr(1)
45166     .m(4)
45167     .n(2)
45168     .k(1)
45169     .cm_stride(5)
45170     .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x2__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
45171 }
45172 
45173 
TEST(QC8_IGEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC,k_eq_1)45174 TEST(QC8_IGEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC, k_eq_1) {
45175   GemmMicrokernelTester()
45176     .mr(4)
45177     .nr(4)
45178     .kr(1)
45179     .sr(1)
45180     .m(4)
45181     .n(4)
45182     .k(1)
45183     .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
45184 }
45185 
TEST(QC8_IGEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC,strided_cn)45186 TEST(QC8_IGEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC, strided_cn) {
45187   GemmMicrokernelTester()
45188     .mr(4)
45189     .nr(4)
45190     .kr(1)
45191     .sr(1)
45192     .m(4)
45193     .n(4)
45194     .k(1)
45195     .cn_stride(7)
45196     .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
45197 }
45198 
TEST(QC8_IGEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC,k_eq_1_subtile)45199 TEST(QC8_IGEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC, k_eq_1_subtile) {
45200   for (uint32_t n = 1; n <= 4; n++) {
45201     for (uint32_t m = 1; m <= 4; m++) {
45202       GemmMicrokernelTester()
45203         .mr(4)
45204         .nr(4)
45205         .kr(1)
45206         .sr(1)
45207         .m(m)
45208         .n(n)
45209         .k(1)
45210         .iterations(1)
45211         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
45212     }
45213   }
45214 }
45215 
TEST(QC8_IGEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC,k_eq_1_subtile_m)45216 TEST(QC8_IGEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC, k_eq_1_subtile_m) {
45217   for (uint32_t m = 1; m <= 4; m++) {
45218     GemmMicrokernelTester()
45219       .mr(4)
45220       .nr(4)
45221       .kr(1)
45222       .sr(1)
45223       .m(m)
45224       .n(4)
45225       .k(1)
45226       .iterations(1)
45227       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
45228   }
45229 }
45230 
TEST(QC8_IGEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC,k_eq_1_subtile_n)45231 TEST(QC8_IGEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC, k_eq_1_subtile_n) {
45232   for (uint32_t n = 1; n <= 4; n++) {
45233     GemmMicrokernelTester()
45234       .mr(4)
45235       .nr(4)
45236       .kr(1)
45237       .sr(1)
45238       .m(4)
45239       .n(n)
45240       .k(1)
45241       .iterations(1)
45242       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
45243   }
45244 }
45245 
TEST(QC8_IGEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC,k_gt_1)45246 TEST(QC8_IGEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC, k_gt_1) {
45247   for (size_t k = 2; k < 10; k++) {
45248     GemmMicrokernelTester()
45249       .mr(4)
45250       .nr(4)
45251       .kr(1)
45252       .sr(1)
45253       .m(4)
45254       .n(4)
45255       .k(k)
45256       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
45257   }
45258 }
45259 
TEST(QC8_IGEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC,k_gt_1_subtile)45260 TEST(QC8_IGEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC, k_gt_1_subtile) {
45261   for (size_t k = 2; k < 10; k++) {
45262     for (uint32_t n = 1; n <= 4; n++) {
45263       for (uint32_t m = 1; m <= 4; m++) {
45264         GemmMicrokernelTester()
45265           .mr(4)
45266           .nr(4)
45267           .kr(1)
45268           .sr(1)
45269           .m(m)
45270           .n(n)
45271           .k(k)
45272           .iterations(1)
45273           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
45274       }
45275     }
45276   }
45277 }
45278 
TEST(QC8_IGEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC,n_gt_4)45279 TEST(QC8_IGEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC, n_gt_4) {
45280   for (uint32_t n = 5; n < 8; n++) {
45281     for (size_t k = 1; k <= 5; k += 2) {
45282       GemmMicrokernelTester()
45283         .mr(4)
45284         .nr(4)
45285         .kr(1)
45286         .sr(1)
45287         .m(4)
45288         .n(n)
45289         .k(k)
45290         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
45291     }
45292   }
45293 }
45294 
TEST(QC8_IGEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC,n_gt_4_strided_cn)45295 TEST(QC8_IGEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC, n_gt_4_strided_cn) {
45296   for (uint32_t n = 5; n < 8; n++) {
45297     for (size_t k = 1; k <= 5; k += 2) {
45298       GemmMicrokernelTester()
45299         .mr(4)
45300         .nr(4)
45301         .kr(1)
45302         .sr(1)
45303         .m(4)
45304         .n(n)
45305         .k(k)
45306         .cn_stride(7)
45307         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
45308     }
45309   }
45310 }
45311 
TEST(QC8_IGEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC,n_gt_4_subtile)45312 TEST(QC8_IGEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC, n_gt_4_subtile) {
45313   for (uint32_t n = 5; n < 8; n++) {
45314     for (size_t k = 1; k <= 5; k += 2) {
45315       for (uint32_t m = 1; m <= 4; m++) {
45316         GemmMicrokernelTester()
45317           .mr(4)
45318           .nr(4)
45319           .kr(1)
45320           .sr(1)
45321           .m(m)
45322           .n(n)
45323           .k(k)
45324           .iterations(1)
45325           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
45326       }
45327     }
45328   }
45329 }
45330 
TEST(QC8_IGEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC,n_div_4)45331 TEST(QC8_IGEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC, n_div_4) {
45332   for (uint32_t n = 8; n <= 12; n += 4) {
45333     for (size_t k = 1; k <= 5; k += 2) {
45334       GemmMicrokernelTester()
45335         .mr(4)
45336         .nr(4)
45337         .kr(1)
45338         .sr(1)
45339         .m(4)
45340         .n(n)
45341         .k(k)
45342         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
45343     }
45344   }
45345 }
45346 
TEST(QC8_IGEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC,n_div_4_strided_cn)45347 TEST(QC8_IGEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC, n_div_4_strided_cn) {
45348   for (uint32_t n = 8; n <= 12; n += 4) {
45349     for (size_t k = 1; k <= 5; k += 2) {
45350       GemmMicrokernelTester()
45351         .mr(4)
45352         .nr(4)
45353         .kr(1)
45354         .sr(1)
45355         .m(4)
45356         .n(n)
45357         .k(k)
45358         .cn_stride(7)
45359         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
45360     }
45361   }
45362 }
45363 
TEST(QC8_IGEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC,n_div_4_subtile)45364 TEST(QC8_IGEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC, n_div_4_subtile) {
45365   for (uint32_t n = 8; n <= 12; n += 4) {
45366     for (size_t k = 1; k <= 5; k += 2) {
45367       for (uint32_t m = 1; m <= 4; m++) {
45368         GemmMicrokernelTester()
45369           .mr(4)
45370           .nr(4)
45371           .kr(1)
45372           .sr(1)
45373           .m(m)
45374           .n(n)
45375           .k(k)
45376           .iterations(1)
45377           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
45378       }
45379     }
45380   }
45381 }
45382 
TEST(QC8_IGEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC,small_kernel)45383 TEST(QC8_IGEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC, small_kernel) {
45384   for (size_t k = 1; k <= 5; k += 2) {
45385     GemmMicrokernelTester()
45386       .mr(4)
45387       .nr(4)
45388       .kr(1)
45389       .sr(1)
45390       .m(4)
45391       .n(4)
45392       .k(k)
45393       .ks(3)
45394       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
45395   }
45396 }
45397 
TEST(QC8_IGEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC,small_kernel_subtile)45398 TEST(QC8_IGEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC, small_kernel_subtile) {
45399   for (size_t k = 1; k <= 5; k += 2) {
45400     for (uint32_t n = 1; n <= 4; n++) {
45401       for (uint32_t m = 1; m <= 4; m++) {
45402         GemmMicrokernelTester()
45403           .mr(4)
45404           .nr(4)
45405           .kr(1)
45406           .sr(1)
45407           .m(m)
45408           .n(n)
45409           .k(k)
45410           .ks(3)
45411           .iterations(1)
45412           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
45413       }
45414     }
45415   }
45416 }
45417 
TEST(QC8_IGEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC,n_gt_4_small_kernel)45418 TEST(QC8_IGEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC, n_gt_4_small_kernel) {
45419   for (uint32_t n = 5; n < 8; n++) {
45420     for (size_t k = 1; k <= 5; k += 2) {
45421       GemmMicrokernelTester()
45422         .mr(4)
45423         .nr(4)
45424         .kr(1)
45425         .sr(1)
45426         .m(4)
45427         .n(n)
45428         .k(k)
45429         .ks(3)
45430         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
45431     }
45432   }
45433 }
45434 
TEST(QC8_IGEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC,n_div_4_small_kernel)45435 TEST(QC8_IGEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC, n_div_4_small_kernel) {
45436   for (uint32_t n = 8; n <= 12; n += 4) {
45437     for (size_t k = 1; k <= 5; k += 2) {
45438       GemmMicrokernelTester()
45439         .mr(4)
45440         .nr(4)
45441         .kr(1)
45442         .sr(1)
45443         .m(4)
45444         .n(n)
45445         .k(k)
45446         .ks(3)
45447         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
45448     }
45449   }
45450 }
45451 
TEST(QC8_IGEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC,strided_cm_subtile)45452 TEST(QC8_IGEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC, strided_cm_subtile) {
45453   for (size_t k = 1; k <= 5; k += 2) {
45454     for (uint32_t n = 1; n <= 4; n++) {
45455       for (uint32_t m = 1; m <= 4; m++) {
45456         GemmMicrokernelTester()
45457           .mr(4)
45458           .nr(4)
45459           .kr(1)
45460           .sr(1)
45461           .m(m)
45462           .n(n)
45463           .k(k)
45464           .cm_stride(7)
45465           .iterations(1)
45466           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
45467       }
45468     }
45469   }
45470 }
45471 
TEST(QC8_IGEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC,a_offset)45472 TEST(QC8_IGEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC, a_offset) {
45473   for (size_t k = 1; k <= 5; k += 2) {
45474     GemmMicrokernelTester()
45475       .mr(4)
45476       .nr(4)
45477       .kr(1)
45478       .sr(1)
45479       .m(4)
45480       .n(4)
45481       .k(k)
45482       .ks(3)
45483       .a_offset(23)
45484       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
45485   }
45486 }
45487 
TEST(QC8_IGEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC,zero)45488 TEST(QC8_IGEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC, zero) {
45489   for (size_t k = 1; k <= 5; k += 2) {
45490     for (uint32_t mz = 0; mz < 4; mz++) {
45491       GemmMicrokernelTester()
45492         .mr(4)
45493         .nr(4)
45494         .kr(1)
45495         .sr(1)
45496         .m(4)
45497         .n(4)
45498         .k(k)
45499         .ks(3)
45500         .a_offset(23)
45501         .zero_index(mz)
45502         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
45503     }
45504   }
45505 }
45506 
TEST(QC8_IGEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC,qmin)45507 TEST(QC8_IGEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC, qmin) {
45508   GemmMicrokernelTester()
45509     .mr(4)
45510     .nr(4)
45511     .kr(1)
45512     .sr(1)
45513     .m(4)
45514     .n(4)
45515     .k(1)
45516     .qmin(128)
45517     .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
45518 }
45519 
TEST(QC8_IGEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC,qmax)45520 TEST(QC8_IGEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC, qmax) {
45521   GemmMicrokernelTester()
45522     .mr(4)
45523     .nr(4)
45524     .kr(1)
45525     .sr(1)
45526     .m(4)
45527     .n(4)
45528     .k(1)
45529     .qmax(128)
45530     .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
45531 }
45532 
TEST(QC8_IGEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC,strided_cm)45533 TEST(QC8_IGEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC, strided_cm) {
45534   GemmMicrokernelTester()
45535     .mr(4)
45536     .nr(4)
45537     .kr(1)
45538     .sr(1)
45539     .m(4)
45540     .n(4)
45541     .k(1)
45542     .cm_stride(7)
45543     .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
45544 }
45545 
45546 
TEST(QC8_IGEMM_MINMAX_FP32_4X4__SCALAR_LRINTF,k_eq_1)45547 TEST(QC8_IGEMM_MINMAX_FP32_4X4__SCALAR_LRINTF, k_eq_1) {
45548   GemmMicrokernelTester()
45549     .mr(4)
45550     .nr(4)
45551     .kr(1)
45552     .sr(1)
45553     .m(4)
45554     .n(4)
45555     .k(1)
45556     .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
45557 }
45558 
TEST(QC8_IGEMM_MINMAX_FP32_4X4__SCALAR_LRINTF,strided_cn)45559 TEST(QC8_IGEMM_MINMAX_FP32_4X4__SCALAR_LRINTF, strided_cn) {
45560   GemmMicrokernelTester()
45561     .mr(4)
45562     .nr(4)
45563     .kr(1)
45564     .sr(1)
45565     .m(4)
45566     .n(4)
45567     .k(1)
45568     .cn_stride(7)
45569     .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
45570 }
45571 
TEST(QC8_IGEMM_MINMAX_FP32_4X4__SCALAR_LRINTF,k_eq_1_subtile)45572 TEST(QC8_IGEMM_MINMAX_FP32_4X4__SCALAR_LRINTF, k_eq_1_subtile) {
45573   for (uint32_t n = 1; n <= 4; n++) {
45574     for (uint32_t m = 1; m <= 4; m++) {
45575       GemmMicrokernelTester()
45576         .mr(4)
45577         .nr(4)
45578         .kr(1)
45579         .sr(1)
45580         .m(m)
45581         .n(n)
45582         .k(1)
45583         .iterations(1)
45584         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
45585     }
45586   }
45587 }
45588 
TEST(QC8_IGEMM_MINMAX_FP32_4X4__SCALAR_LRINTF,k_eq_1_subtile_m)45589 TEST(QC8_IGEMM_MINMAX_FP32_4X4__SCALAR_LRINTF, k_eq_1_subtile_m) {
45590   for (uint32_t m = 1; m <= 4; m++) {
45591     GemmMicrokernelTester()
45592       .mr(4)
45593       .nr(4)
45594       .kr(1)
45595       .sr(1)
45596       .m(m)
45597       .n(4)
45598       .k(1)
45599       .iterations(1)
45600       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
45601   }
45602 }
45603 
TEST(QC8_IGEMM_MINMAX_FP32_4X4__SCALAR_LRINTF,k_eq_1_subtile_n)45604 TEST(QC8_IGEMM_MINMAX_FP32_4X4__SCALAR_LRINTF, k_eq_1_subtile_n) {
45605   for (uint32_t n = 1; n <= 4; n++) {
45606     GemmMicrokernelTester()
45607       .mr(4)
45608       .nr(4)
45609       .kr(1)
45610       .sr(1)
45611       .m(4)
45612       .n(n)
45613       .k(1)
45614       .iterations(1)
45615       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
45616   }
45617 }
45618 
TEST(QC8_IGEMM_MINMAX_FP32_4X4__SCALAR_LRINTF,k_gt_1)45619 TEST(QC8_IGEMM_MINMAX_FP32_4X4__SCALAR_LRINTF, k_gt_1) {
45620   for (size_t k = 2; k < 10; k++) {
45621     GemmMicrokernelTester()
45622       .mr(4)
45623       .nr(4)
45624       .kr(1)
45625       .sr(1)
45626       .m(4)
45627       .n(4)
45628       .k(k)
45629       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
45630   }
45631 }
45632 
TEST(QC8_IGEMM_MINMAX_FP32_4X4__SCALAR_LRINTF,k_gt_1_subtile)45633 TEST(QC8_IGEMM_MINMAX_FP32_4X4__SCALAR_LRINTF, k_gt_1_subtile) {
45634   for (size_t k = 2; k < 10; k++) {
45635     for (uint32_t n = 1; n <= 4; n++) {
45636       for (uint32_t m = 1; m <= 4; m++) {
45637         GemmMicrokernelTester()
45638           .mr(4)
45639           .nr(4)
45640           .kr(1)
45641           .sr(1)
45642           .m(m)
45643           .n(n)
45644           .k(k)
45645           .iterations(1)
45646           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
45647       }
45648     }
45649   }
45650 }
45651 
TEST(QC8_IGEMM_MINMAX_FP32_4X4__SCALAR_LRINTF,n_gt_4)45652 TEST(QC8_IGEMM_MINMAX_FP32_4X4__SCALAR_LRINTF, n_gt_4) {
45653   for (uint32_t n = 5; n < 8; n++) {
45654     for (size_t k = 1; k <= 5; k += 2) {
45655       GemmMicrokernelTester()
45656         .mr(4)
45657         .nr(4)
45658         .kr(1)
45659         .sr(1)
45660         .m(4)
45661         .n(n)
45662         .k(k)
45663         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
45664     }
45665   }
45666 }
45667 
TEST(QC8_IGEMM_MINMAX_FP32_4X4__SCALAR_LRINTF,n_gt_4_strided_cn)45668 TEST(QC8_IGEMM_MINMAX_FP32_4X4__SCALAR_LRINTF, n_gt_4_strided_cn) {
45669   for (uint32_t n = 5; n < 8; n++) {
45670     for (size_t k = 1; k <= 5; k += 2) {
45671       GemmMicrokernelTester()
45672         .mr(4)
45673         .nr(4)
45674         .kr(1)
45675         .sr(1)
45676         .m(4)
45677         .n(n)
45678         .k(k)
45679         .cn_stride(7)
45680         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
45681     }
45682   }
45683 }
45684 
TEST(QC8_IGEMM_MINMAX_FP32_4X4__SCALAR_LRINTF,n_gt_4_subtile)45685 TEST(QC8_IGEMM_MINMAX_FP32_4X4__SCALAR_LRINTF, n_gt_4_subtile) {
45686   for (uint32_t n = 5; n < 8; n++) {
45687     for (size_t k = 1; k <= 5; k += 2) {
45688       for (uint32_t m = 1; m <= 4; m++) {
45689         GemmMicrokernelTester()
45690           .mr(4)
45691           .nr(4)
45692           .kr(1)
45693           .sr(1)
45694           .m(m)
45695           .n(n)
45696           .k(k)
45697           .iterations(1)
45698           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
45699       }
45700     }
45701   }
45702 }
45703 
TEST(QC8_IGEMM_MINMAX_FP32_4X4__SCALAR_LRINTF,n_div_4)45704 TEST(QC8_IGEMM_MINMAX_FP32_4X4__SCALAR_LRINTF, n_div_4) {
45705   for (uint32_t n = 8; n <= 12; n += 4) {
45706     for (size_t k = 1; k <= 5; k += 2) {
45707       GemmMicrokernelTester()
45708         .mr(4)
45709         .nr(4)
45710         .kr(1)
45711         .sr(1)
45712         .m(4)
45713         .n(n)
45714         .k(k)
45715         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
45716     }
45717   }
45718 }
45719 
TEST(QC8_IGEMM_MINMAX_FP32_4X4__SCALAR_LRINTF,n_div_4_strided_cn)45720 TEST(QC8_IGEMM_MINMAX_FP32_4X4__SCALAR_LRINTF, n_div_4_strided_cn) {
45721   for (uint32_t n = 8; n <= 12; n += 4) {
45722     for (size_t k = 1; k <= 5; k += 2) {
45723       GemmMicrokernelTester()
45724         .mr(4)
45725         .nr(4)
45726         .kr(1)
45727         .sr(1)
45728         .m(4)
45729         .n(n)
45730         .k(k)
45731         .cn_stride(7)
45732         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
45733     }
45734   }
45735 }
45736 
TEST(QC8_IGEMM_MINMAX_FP32_4X4__SCALAR_LRINTF,n_div_4_subtile)45737 TEST(QC8_IGEMM_MINMAX_FP32_4X4__SCALAR_LRINTF, n_div_4_subtile) {
45738   for (uint32_t n = 8; n <= 12; n += 4) {
45739     for (size_t k = 1; k <= 5; k += 2) {
45740       for (uint32_t m = 1; m <= 4; m++) {
45741         GemmMicrokernelTester()
45742           .mr(4)
45743           .nr(4)
45744           .kr(1)
45745           .sr(1)
45746           .m(m)
45747           .n(n)
45748           .k(k)
45749           .iterations(1)
45750           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
45751       }
45752     }
45753   }
45754 }
45755 
TEST(QC8_IGEMM_MINMAX_FP32_4X4__SCALAR_LRINTF,small_kernel)45756 TEST(QC8_IGEMM_MINMAX_FP32_4X4__SCALAR_LRINTF, small_kernel) {
45757   for (size_t k = 1; k <= 5; k += 2) {
45758     GemmMicrokernelTester()
45759       .mr(4)
45760       .nr(4)
45761       .kr(1)
45762       .sr(1)
45763       .m(4)
45764       .n(4)
45765       .k(k)
45766       .ks(3)
45767       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
45768   }
45769 }
45770 
TEST(QC8_IGEMM_MINMAX_FP32_4X4__SCALAR_LRINTF,small_kernel_subtile)45771 TEST(QC8_IGEMM_MINMAX_FP32_4X4__SCALAR_LRINTF, small_kernel_subtile) {
45772   for (size_t k = 1; k <= 5; k += 2) {
45773     for (uint32_t n = 1; n <= 4; n++) {
45774       for (uint32_t m = 1; m <= 4; m++) {
45775         GemmMicrokernelTester()
45776           .mr(4)
45777           .nr(4)
45778           .kr(1)
45779           .sr(1)
45780           .m(m)
45781           .n(n)
45782           .k(k)
45783           .ks(3)
45784           .iterations(1)
45785           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
45786       }
45787     }
45788   }
45789 }
45790 
TEST(QC8_IGEMM_MINMAX_FP32_4X4__SCALAR_LRINTF,n_gt_4_small_kernel)45791 TEST(QC8_IGEMM_MINMAX_FP32_4X4__SCALAR_LRINTF, n_gt_4_small_kernel) {
45792   for (uint32_t n = 5; n < 8; n++) {
45793     for (size_t k = 1; k <= 5; k += 2) {
45794       GemmMicrokernelTester()
45795         .mr(4)
45796         .nr(4)
45797         .kr(1)
45798         .sr(1)
45799         .m(4)
45800         .n(n)
45801         .k(k)
45802         .ks(3)
45803         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
45804     }
45805   }
45806 }
45807 
TEST(QC8_IGEMM_MINMAX_FP32_4X4__SCALAR_LRINTF,n_div_4_small_kernel)45808 TEST(QC8_IGEMM_MINMAX_FP32_4X4__SCALAR_LRINTF, n_div_4_small_kernel) {
45809   for (uint32_t n = 8; n <= 12; n += 4) {
45810     for (size_t k = 1; k <= 5; k += 2) {
45811       GemmMicrokernelTester()
45812         .mr(4)
45813         .nr(4)
45814         .kr(1)
45815         .sr(1)
45816         .m(4)
45817         .n(n)
45818         .k(k)
45819         .ks(3)
45820         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
45821     }
45822   }
45823 }
45824 
TEST(QC8_IGEMM_MINMAX_FP32_4X4__SCALAR_LRINTF,strided_cm_subtile)45825 TEST(QC8_IGEMM_MINMAX_FP32_4X4__SCALAR_LRINTF, strided_cm_subtile) {
45826   for (size_t k = 1; k <= 5; k += 2) {
45827     for (uint32_t n = 1; n <= 4; n++) {
45828       for (uint32_t m = 1; m <= 4; m++) {
45829         GemmMicrokernelTester()
45830           .mr(4)
45831           .nr(4)
45832           .kr(1)
45833           .sr(1)
45834           .m(m)
45835           .n(n)
45836           .k(k)
45837           .cm_stride(7)
45838           .iterations(1)
45839           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
45840       }
45841     }
45842   }
45843 }
45844 
TEST(QC8_IGEMM_MINMAX_FP32_4X4__SCALAR_LRINTF,a_offset)45845 TEST(QC8_IGEMM_MINMAX_FP32_4X4__SCALAR_LRINTF, a_offset) {
45846   for (size_t k = 1; k <= 5; k += 2) {
45847     GemmMicrokernelTester()
45848       .mr(4)
45849       .nr(4)
45850       .kr(1)
45851       .sr(1)
45852       .m(4)
45853       .n(4)
45854       .k(k)
45855       .ks(3)
45856       .a_offset(23)
45857       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
45858   }
45859 }
45860 
TEST(QC8_IGEMM_MINMAX_FP32_4X4__SCALAR_LRINTF,zero)45861 TEST(QC8_IGEMM_MINMAX_FP32_4X4__SCALAR_LRINTF, zero) {
45862   for (size_t k = 1; k <= 5; k += 2) {
45863     for (uint32_t mz = 0; mz < 4; mz++) {
45864       GemmMicrokernelTester()
45865         .mr(4)
45866         .nr(4)
45867         .kr(1)
45868         .sr(1)
45869         .m(4)
45870         .n(4)
45871         .k(k)
45872         .ks(3)
45873         .a_offset(23)
45874         .zero_index(mz)
45875         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
45876     }
45877   }
45878 }
45879 
TEST(QC8_IGEMM_MINMAX_FP32_4X4__SCALAR_LRINTF,qmin)45880 TEST(QC8_IGEMM_MINMAX_FP32_4X4__SCALAR_LRINTF, qmin) {
45881   GemmMicrokernelTester()
45882     .mr(4)
45883     .nr(4)
45884     .kr(1)
45885     .sr(1)
45886     .m(4)
45887     .n(4)
45888     .k(1)
45889     .qmin(128)
45890     .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
45891 }
45892 
TEST(QC8_IGEMM_MINMAX_FP32_4X4__SCALAR_LRINTF,qmax)45893 TEST(QC8_IGEMM_MINMAX_FP32_4X4__SCALAR_LRINTF, qmax) {
45894   GemmMicrokernelTester()
45895     .mr(4)
45896     .nr(4)
45897     .kr(1)
45898     .sr(1)
45899     .m(4)
45900     .n(4)
45901     .k(1)
45902     .qmax(128)
45903     .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
45904 }
45905 
TEST(QC8_IGEMM_MINMAX_FP32_4X4__SCALAR_LRINTF,strided_cm)45906 TEST(QC8_IGEMM_MINMAX_FP32_4X4__SCALAR_LRINTF, strided_cm) {
45907   GemmMicrokernelTester()
45908     .mr(4)
45909     .nr(4)
45910     .kr(1)
45911     .sr(1)
45912     .m(4)
45913     .n(4)
45914     .k(1)
45915     .cm_stride(7)
45916     .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
45917 }
45918 
45919 
45920 #if XNN_ARCH_ARM && XNN_ENABLE_ASSEMBLY && XNN_PLATFORM_JIT
TEST(GENERATE_QC8_IGEMM_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_PRFM_LD64,k_eq_8)45921   TEST(GENERATE_QC8_IGEMM_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_PRFM_LD64, k_eq_8) {
45922     TEST_REQUIRES_ARM_NEON_V8;
45923     GemmMicrokernelTester()
45924       .mr(4)
45925       .nr(8)
45926       .kr(1)
45927       .sr(1)
45928       .m(4)
45929       .n(8)
45930       .k(8)
45931       .Test(xnn_generate_qc8_igemm_fp32_ukernel_4x8__aarch32_neonv8_mlal_lane_prfm_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
45932   }
45933 
TEST(GENERATE_QC8_IGEMM_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_PRFM_LD64,strided_cn)45934   TEST(GENERATE_QC8_IGEMM_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_PRFM_LD64, strided_cn) {
45935     TEST_REQUIRES_ARM_NEON_V8;
45936     GemmMicrokernelTester()
45937       .mr(4)
45938       .nr(8)
45939       .kr(1)
45940       .sr(1)
45941       .m(4)
45942       .n(8)
45943       .k(8)
45944       .cn_stride(11)
45945       .Test(xnn_generate_qc8_igemm_fp32_ukernel_4x8__aarch32_neonv8_mlal_lane_prfm_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
45946   }
45947 
TEST(GENERATE_QC8_IGEMM_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_PRFM_LD64,k_eq_8_subtile)45948   TEST(GENERATE_QC8_IGEMM_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_PRFM_LD64, k_eq_8_subtile) {
45949     TEST_REQUIRES_ARM_NEON_V8;
45950     for (uint32_t n = 1; n <= 8; n++) {
45951       for (uint32_t m = 1; m <= 4; m++) {
45952         GemmMicrokernelTester()
45953           .mr(4)
45954           .nr(8)
45955           .kr(1)
45956           .sr(1)
45957           .m(m)
45958           .n(n)
45959           .k(8)
45960           .iterations(1)
45961           .Test(xnn_generate_qc8_igemm_fp32_ukernel_4x8__aarch32_neonv8_mlal_lane_prfm_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
45962       }
45963     }
45964   }
45965 
TEST(GENERATE_QC8_IGEMM_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_PRFM_LD64,k_eq_8_subtile_m)45966   TEST(GENERATE_QC8_IGEMM_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_PRFM_LD64, k_eq_8_subtile_m) {
45967     TEST_REQUIRES_ARM_NEON_V8;
45968     for (uint32_t m = 1; m <= 4; m++) {
45969       GemmMicrokernelTester()
45970         .mr(4)
45971         .nr(8)
45972         .kr(1)
45973         .sr(1)
45974         .m(m)
45975         .n(8)
45976         .k(8)
45977         .iterations(1)
45978         .Test(xnn_generate_qc8_igemm_fp32_ukernel_4x8__aarch32_neonv8_mlal_lane_prfm_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
45979     }
45980   }
45981 
TEST(GENERATE_QC8_IGEMM_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_PRFM_LD64,k_eq_8_subtile_n)45982   TEST(GENERATE_QC8_IGEMM_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_PRFM_LD64, k_eq_8_subtile_n) {
45983     TEST_REQUIRES_ARM_NEON_V8;
45984     for (uint32_t n = 1; n <= 8; n++) {
45985       GemmMicrokernelTester()
45986         .mr(4)
45987         .nr(8)
45988         .kr(1)
45989         .sr(1)
45990         .m(4)
45991         .n(n)
45992         .k(8)
45993         .iterations(1)
45994         .Test(xnn_generate_qc8_igemm_fp32_ukernel_4x8__aarch32_neonv8_mlal_lane_prfm_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
45995     }
45996   }
45997 
TEST(GENERATE_QC8_IGEMM_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_PRFM_LD64,k_lt_8)45998   TEST(GENERATE_QC8_IGEMM_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_PRFM_LD64, k_lt_8) {
45999     TEST_REQUIRES_ARM_NEON_V8;
46000     for (size_t k = 1; k < 8; k++) {
46001       GemmMicrokernelTester()
46002         .mr(4)
46003         .nr(8)
46004         .kr(1)
46005         .sr(1)
46006         .m(4)
46007         .n(8)
46008         .k(k)
46009         .Test(xnn_generate_qc8_igemm_fp32_ukernel_4x8__aarch32_neonv8_mlal_lane_prfm_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
46010     }
46011   }
46012 
TEST(GENERATE_QC8_IGEMM_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_PRFM_LD64,k_lt_8_subtile)46013   TEST(GENERATE_QC8_IGEMM_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_PRFM_LD64, k_lt_8_subtile) {
46014     TEST_REQUIRES_ARM_NEON_V8;
46015     for (size_t k = 1; k < 8; k++) {
46016       for (uint32_t n = 1; n <= 8; n++) {
46017         for (uint32_t m = 1; m <= 4; m++) {
46018           GemmMicrokernelTester()
46019             .mr(4)
46020             .nr(8)
46021             .kr(1)
46022             .sr(1)
46023             .m(m)
46024             .n(n)
46025             .k(k)
46026             .iterations(1)
46027             .Test(xnn_generate_qc8_igemm_fp32_ukernel_4x8__aarch32_neonv8_mlal_lane_prfm_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
46028         }
46029       }
46030     }
46031   }
46032 
TEST(GENERATE_QC8_IGEMM_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_PRFM_LD64,k_gt_8)46033   TEST(GENERATE_QC8_IGEMM_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_PRFM_LD64, k_gt_8) {
46034     TEST_REQUIRES_ARM_NEON_V8;
46035     for (size_t k = 9; k < 16; k++) {
46036       GemmMicrokernelTester()
46037         .mr(4)
46038         .nr(8)
46039         .kr(1)
46040         .sr(1)
46041         .m(4)
46042         .n(8)
46043         .k(k)
46044         .Test(xnn_generate_qc8_igemm_fp32_ukernel_4x8__aarch32_neonv8_mlal_lane_prfm_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
46045     }
46046   }
46047 
TEST(GENERATE_QC8_IGEMM_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_PRFM_LD64,k_gt_8_subtile)46048   TEST(GENERATE_QC8_IGEMM_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_PRFM_LD64, k_gt_8_subtile) {
46049     TEST_REQUIRES_ARM_NEON_V8;
46050     for (size_t k = 9; k < 16; k++) {
46051       for (uint32_t n = 1; n <= 8; n++) {
46052         for (uint32_t m = 1; m <= 4; m++) {
46053           GemmMicrokernelTester()
46054             .mr(4)
46055             .nr(8)
46056             .kr(1)
46057             .sr(1)
46058             .m(m)
46059             .n(n)
46060             .k(k)
46061             .iterations(1)
46062             .Test(xnn_generate_qc8_igemm_fp32_ukernel_4x8__aarch32_neonv8_mlal_lane_prfm_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
46063         }
46064       }
46065     }
46066   }
46067 
TEST(GENERATE_QC8_IGEMM_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_PRFM_LD64,k_div_8)46068   TEST(GENERATE_QC8_IGEMM_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_PRFM_LD64, k_div_8) {
46069     TEST_REQUIRES_ARM_NEON_V8;
46070     for (size_t k = 16; k <= 80; k += 8) {
46071       GemmMicrokernelTester()
46072         .mr(4)
46073         .nr(8)
46074         .kr(1)
46075         .sr(1)
46076         .m(4)
46077         .n(8)
46078         .k(k)
46079         .Test(xnn_generate_qc8_igemm_fp32_ukernel_4x8__aarch32_neonv8_mlal_lane_prfm_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
46080     }
46081   }
46082 
TEST(GENERATE_QC8_IGEMM_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_PRFM_LD64,k_div_8_subtile)46083   TEST(GENERATE_QC8_IGEMM_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_PRFM_LD64, k_div_8_subtile) {
46084     TEST_REQUIRES_ARM_NEON_V8;
46085     for (size_t k = 16; k <= 80; k += 8) {
46086       for (uint32_t n = 1; n <= 8; n++) {
46087         for (uint32_t m = 1; m <= 4; m++) {
46088           GemmMicrokernelTester()
46089             .mr(4)
46090             .nr(8)
46091             .kr(1)
46092             .sr(1)
46093             .m(m)
46094             .n(n)
46095             .k(k)
46096             .iterations(1)
46097             .Test(xnn_generate_qc8_igemm_fp32_ukernel_4x8__aarch32_neonv8_mlal_lane_prfm_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
46098         }
46099       }
46100     }
46101   }
46102 
TEST(GENERATE_QC8_IGEMM_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_PRFM_LD64,n_gt_8)46103   TEST(GENERATE_QC8_IGEMM_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_PRFM_LD64, n_gt_8) {
46104     TEST_REQUIRES_ARM_NEON_V8;
46105     for (uint32_t n = 9; n < 16; n++) {
46106       for (size_t k = 1; k <= 40; k += 9) {
46107         GemmMicrokernelTester()
46108           .mr(4)
46109           .nr(8)
46110           .kr(1)
46111           .sr(1)
46112           .m(4)
46113           .n(n)
46114           .k(k)
46115           .Test(xnn_generate_qc8_igemm_fp32_ukernel_4x8__aarch32_neonv8_mlal_lane_prfm_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
46116       }
46117     }
46118   }
46119 
TEST(GENERATE_QC8_IGEMM_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_PRFM_LD64,n_gt_8_strided_cn)46120   TEST(GENERATE_QC8_IGEMM_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_PRFM_LD64, n_gt_8_strided_cn) {
46121     TEST_REQUIRES_ARM_NEON_V8;
46122     for (uint32_t n = 9; n < 16; n++) {
46123       for (size_t k = 1; k <= 40; k += 9) {
46124         GemmMicrokernelTester()
46125           .mr(4)
46126           .nr(8)
46127           .kr(1)
46128           .sr(1)
46129           .m(4)
46130           .n(n)
46131           .k(k)
46132           .cn_stride(11)
46133           .Test(xnn_generate_qc8_igemm_fp32_ukernel_4x8__aarch32_neonv8_mlal_lane_prfm_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
46134       }
46135     }
46136   }
46137 
TEST(GENERATE_QC8_IGEMM_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_PRFM_LD64,n_gt_8_subtile)46138   TEST(GENERATE_QC8_IGEMM_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_PRFM_LD64, n_gt_8_subtile) {
46139     TEST_REQUIRES_ARM_NEON_V8;
46140     for (uint32_t n = 9; n < 16; n++) {
46141       for (size_t k = 1; k <= 40; k += 9) {
46142         for (uint32_t m = 1; m <= 4; m++) {
46143           GemmMicrokernelTester()
46144             .mr(4)
46145             .nr(8)
46146             .kr(1)
46147             .sr(1)
46148             .m(m)
46149             .n(n)
46150             .k(k)
46151             .iterations(1)
46152             .Test(xnn_generate_qc8_igemm_fp32_ukernel_4x8__aarch32_neonv8_mlal_lane_prfm_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
46153         }
46154       }
46155     }
46156   }
46157 
TEST(GENERATE_QC8_IGEMM_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_PRFM_LD64,n_div_8)46158   TEST(GENERATE_QC8_IGEMM_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_PRFM_LD64, n_div_8) {
46159     TEST_REQUIRES_ARM_NEON_V8;
46160     for (uint32_t n = 16; n <= 24; n += 8) {
46161       for (size_t k = 1; k <= 40; k += 9) {
46162         GemmMicrokernelTester()
46163           .mr(4)
46164           .nr(8)
46165           .kr(1)
46166           .sr(1)
46167           .m(4)
46168           .n(n)
46169           .k(k)
46170           .Test(xnn_generate_qc8_igemm_fp32_ukernel_4x8__aarch32_neonv8_mlal_lane_prfm_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
46171       }
46172     }
46173   }
46174 
TEST(GENERATE_QC8_IGEMM_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_PRFM_LD64,n_div_8_strided_cn)46175   TEST(GENERATE_QC8_IGEMM_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_PRFM_LD64, n_div_8_strided_cn) {
46176     TEST_REQUIRES_ARM_NEON_V8;
46177     for (uint32_t n = 16; n <= 24; n += 8) {
46178       for (size_t k = 1; k <= 40; k += 9) {
46179         GemmMicrokernelTester()
46180           .mr(4)
46181           .nr(8)
46182           .kr(1)
46183           .sr(1)
46184           .m(4)
46185           .n(n)
46186           .k(k)
46187           .cn_stride(11)
46188           .Test(xnn_generate_qc8_igemm_fp32_ukernel_4x8__aarch32_neonv8_mlal_lane_prfm_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
46189       }
46190     }
46191   }
46192 
TEST(GENERATE_QC8_IGEMM_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_PRFM_LD64,n_div_8_subtile)46193   TEST(GENERATE_QC8_IGEMM_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_PRFM_LD64, n_div_8_subtile) {
46194     TEST_REQUIRES_ARM_NEON_V8;
46195     for (uint32_t n = 16; n <= 24; n += 8) {
46196       for (size_t k = 1; k <= 40; k += 9) {
46197         for (uint32_t m = 1; m <= 4; m++) {
46198           GemmMicrokernelTester()
46199             .mr(4)
46200             .nr(8)
46201             .kr(1)
46202             .sr(1)
46203             .m(m)
46204             .n(n)
46205             .k(k)
46206             .iterations(1)
46207             .Test(xnn_generate_qc8_igemm_fp32_ukernel_4x8__aarch32_neonv8_mlal_lane_prfm_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
46208         }
46209       }
46210     }
46211   }
46212 
TEST(GENERATE_QC8_IGEMM_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_PRFM_LD64,small_kernel)46213   TEST(GENERATE_QC8_IGEMM_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_PRFM_LD64, small_kernel) {
46214     TEST_REQUIRES_ARM_NEON_V8;
46215     for (size_t k = 1; k <= 40; k += 9) {
46216       GemmMicrokernelTester()
46217         .mr(4)
46218         .nr(8)
46219         .kr(1)
46220         .sr(1)
46221         .m(4)
46222         .n(8)
46223         .k(k)
46224         .ks(3)
46225         .Test(xnn_generate_qc8_igemm_fp32_ukernel_4x8__aarch32_neonv8_mlal_lane_prfm_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
46226     }
46227   }
46228 
TEST(GENERATE_QC8_IGEMM_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_PRFM_LD64,small_kernel_subtile)46229   TEST(GENERATE_QC8_IGEMM_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_PRFM_LD64, small_kernel_subtile) {
46230     TEST_REQUIRES_ARM_NEON_V8;
46231     for (size_t k = 1; k <= 40; k += 9) {
46232       for (uint32_t n = 1; n <= 8; n++) {
46233         for (uint32_t m = 1; m <= 4; m++) {
46234           GemmMicrokernelTester()
46235             .mr(4)
46236             .nr(8)
46237             .kr(1)
46238             .sr(1)
46239             .m(m)
46240             .n(n)
46241             .k(k)
46242             .ks(3)
46243             .iterations(1)
46244             .Test(xnn_generate_qc8_igemm_fp32_ukernel_4x8__aarch32_neonv8_mlal_lane_prfm_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
46245         }
46246       }
46247     }
46248   }
46249 
TEST(GENERATE_QC8_IGEMM_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_PRFM_LD64,n_gt_8_small_kernel)46250   TEST(GENERATE_QC8_IGEMM_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_PRFM_LD64, n_gt_8_small_kernel) {
46251     TEST_REQUIRES_ARM_NEON_V8;
46252     for (uint32_t n = 9; n < 16; n++) {
46253       for (size_t k = 1; k <= 40; k += 9) {
46254         GemmMicrokernelTester()
46255           .mr(4)
46256           .nr(8)
46257           .kr(1)
46258           .sr(1)
46259           .m(4)
46260           .n(n)
46261           .k(k)
46262           .ks(3)
46263           .Test(xnn_generate_qc8_igemm_fp32_ukernel_4x8__aarch32_neonv8_mlal_lane_prfm_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
46264       }
46265     }
46266   }
46267 
TEST(GENERATE_QC8_IGEMM_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_PRFM_LD64,n_div_8_small_kernel)46268   TEST(GENERATE_QC8_IGEMM_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_PRFM_LD64, n_div_8_small_kernel) {
46269     TEST_REQUIRES_ARM_NEON_V8;
46270     for (uint32_t n = 16; n <= 24; n += 8) {
46271       for (size_t k = 1; k <= 40; k += 9) {
46272         GemmMicrokernelTester()
46273           .mr(4)
46274           .nr(8)
46275           .kr(1)
46276           .sr(1)
46277           .m(4)
46278           .n(n)
46279           .k(k)
46280           .ks(3)
46281           .Test(xnn_generate_qc8_igemm_fp32_ukernel_4x8__aarch32_neonv8_mlal_lane_prfm_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
46282       }
46283     }
46284   }
46285 
TEST(GENERATE_QC8_IGEMM_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_PRFM_LD64,strided_cm_subtile)46286   TEST(GENERATE_QC8_IGEMM_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_PRFM_LD64, strided_cm_subtile) {
46287     TEST_REQUIRES_ARM_NEON_V8;
46288     for (size_t k = 1; k <= 40; k += 9) {
46289       for (uint32_t n = 1; n <= 8; n++) {
46290         for (uint32_t m = 1; m <= 4; m++) {
46291           GemmMicrokernelTester()
46292             .mr(4)
46293             .nr(8)
46294             .kr(1)
46295             .sr(1)
46296             .m(m)
46297             .n(n)
46298             .k(k)
46299             .cm_stride(11)
46300             .iterations(1)
46301             .Test(xnn_generate_qc8_igemm_fp32_ukernel_4x8__aarch32_neonv8_mlal_lane_prfm_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
46302         }
46303       }
46304     }
46305   }
46306 
TEST(GENERATE_QC8_IGEMM_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_PRFM_LD64,a_offset)46307   TEST(GENERATE_QC8_IGEMM_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_PRFM_LD64, a_offset) {
46308     TEST_REQUIRES_ARM_NEON_V8;
46309     for (size_t k = 1; k <= 40; k += 9) {
46310       GemmMicrokernelTester()
46311         .mr(4)
46312         .nr(8)
46313         .kr(1)
46314         .sr(1)
46315         .m(4)
46316         .n(8)
46317         .k(k)
46318         .ks(3)
46319         .a_offset(163)
46320         .Test(xnn_generate_qc8_igemm_fp32_ukernel_4x8__aarch32_neonv8_mlal_lane_prfm_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
46321     }
46322   }
46323 
TEST(GENERATE_QC8_IGEMM_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_PRFM_LD64,zero)46324   TEST(GENERATE_QC8_IGEMM_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_PRFM_LD64, zero) {
46325     TEST_REQUIRES_ARM_NEON_V8;
46326     for (size_t k = 1; k <= 40; k += 9) {
46327       for (uint32_t mz = 0; mz < 4; mz++) {
46328         GemmMicrokernelTester()
46329           .mr(4)
46330           .nr(8)
46331           .kr(1)
46332           .sr(1)
46333           .m(4)
46334           .n(8)
46335           .k(k)
46336           .ks(3)
46337           .a_offset(163)
46338           .zero_index(mz)
46339           .Test(xnn_generate_qc8_igemm_fp32_ukernel_4x8__aarch32_neonv8_mlal_lane_prfm_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
46340       }
46341     }
46342   }
46343 
TEST(GENERATE_QC8_IGEMM_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_PRFM_LD64,qmin)46344   TEST(GENERATE_QC8_IGEMM_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_PRFM_LD64, qmin) {
46345     TEST_REQUIRES_ARM_NEON_V8;
46346     GemmMicrokernelTester()
46347       .mr(4)
46348       .nr(8)
46349       .kr(1)
46350       .sr(1)
46351       .m(4)
46352       .n(8)
46353       .k(8)
46354       .qmin(128)
46355       .Test(xnn_generate_qc8_igemm_fp32_ukernel_4x8__aarch32_neonv8_mlal_lane_prfm_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
46356   }
46357 
TEST(GENERATE_QC8_IGEMM_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_PRFM_LD64,qmax)46358   TEST(GENERATE_QC8_IGEMM_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_PRFM_LD64, qmax) {
46359     TEST_REQUIRES_ARM_NEON_V8;
46360     GemmMicrokernelTester()
46361       .mr(4)
46362       .nr(8)
46363       .kr(1)
46364       .sr(1)
46365       .m(4)
46366       .n(8)
46367       .k(8)
46368       .qmax(128)
46369       .Test(xnn_generate_qc8_igemm_fp32_ukernel_4x8__aarch32_neonv8_mlal_lane_prfm_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
46370   }
46371 
TEST(GENERATE_QC8_IGEMM_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_PRFM_LD64,strided_cm)46372   TEST(GENERATE_QC8_IGEMM_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_PRFM_LD64, strided_cm) {
46373     TEST_REQUIRES_ARM_NEON_V8;
46374     GemmMicrokernelTester()
46375       .mr(4)
46376       .nr(8)
46377       .kr(1)
46378       .sr(1)
46379       .m(4)
46380       .n(8)
46381       .k(8)
46382       .cm_stride(11)
46383       .Test(xnn_generate_qc8_igemm_fp32_ukernel_4x8__aarch32_neonv8_mlal_lane_prfm_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
46384   }
46385 #endif  // XNN_ARCH_ARM && XNN_ENABLE_ASSEMBLY && XNN_PLATFORM_JIT
46386