xref: /aosp_15_r20/external/XNNPACK/test/qc8-igemm-minmax-fp32.cc (revision 4bdc94577ba0e567308109d787f7fec7b531ce36)
1 // Copyright (c) Facebook, Inc. and its affiliates.
2 // All rights reserved.
3 //
4 // Copyright 2019 Google LLC
5 //
6 // This source code is licensed under the BSD-style license found in the
7 // LICENSE file in the root directory of this source tree.
8 //
9 // Auto-generated file. Do not edit!
10 //   Specification: test/qc8-igemm-minmax-fp32.yaml
11 //   Generator: tools/generate-gemm-test.py
12 
13 
14 #include <gtest/gtest.h>
15 
16 #include <xnnpack/allocator.h>
17 #include <xnnpack/common.h>
18 #include <xnnpack/isa-checks.h>
19 #include <xnnpack/microparams-init.h>
20 
21 #include <xnnpack/gemm.h>
22 #include <xnnpack/igemm.h>
23 #include <xnnpack/ppmm.h>
24 #include "gemm-microkernel-tester.h"
25 
26 
27 #if XNN_ARCH_ARM && XNN_ENABLE_ASSEMBLY
TEST(QC8_IGEMM_MINMAX_FP32_1X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A7,k_eq_8)28   TEST(QC8_IGEMM_MINMAX_FP32_1X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A7, k_eq_8) {
29     TEST_REQUIRES_ARM_NEON;
30     GemmMicrokernelTester()
31       .mr(1)
32       .nr(8)
33       .kr(1)
34       .sr(1)
35       .m(1)
36       .n(8)
37       .k(8)
38       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8__aarch32_neon_mlal_lane_prfm_cortex_a7, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
39   }
40 
TEST(QC8_IGEMM_MINMAX_FP32_1X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A7,strided_cn)41   TEST(QC8_IGEMM_MINMAX_FP32_1X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A7, strided_cn) {
42     TEST_REQUIRES_ARM_NEON;
43     GemmMicrokernelTester()
44       .mr(1)
45       .nr(8)
46       .kr(1)
47       .sr(1)
48       .m(1)
49       .n(8)
50       .k(8)
51       .cn_stride(11)
52       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8__aarch32_neon_mlal_lane_prfm_cortex_a7, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
53   }
54 
TEST(QC8_IGEMM_MINMAX_FP32_1X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A7,k_eq_8_subtile)55   TEST(QC8_IGEMM_MINMAX_FP32_1X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A7, k_eq_8_subtile) {
56     TEST_REQUIRES_ARM_NEON;
57     for (uint32_t n = 1; n <= 8; n++) {
58       for (uint32_t m = 1; m <= 1; m++) {
59         GemmMicrokernelTester()
60           .mr(1)
61           .nr(8)
62           .kr(1)
63           .sr(1)
64           .m(m)
65           .n(n)
66           .k(8)
67           .iterations(1)
68           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8__aarch32_neon_mlal_lane_prfm_cortex_a7, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
69       }
70     }
71   }
72 
TEST(QC8_IGEMM_MINMAX_FP32_1X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A7,k_eq_8_subtile_m)73   TEST(QC8_IGEMM_MINMAX_FP32_1X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A7, k_eq_8_subtile_m) {
74     TEST_REQUIRES_ARM_NEON;
75     for (uint32_t m = 1; m <= 1; m++) {
76       GemmMicrokernelTester()
77         .mr(1)
78         .nr(8)
79         .kr(1)
80         .sr(1)
81         .m(m)
82         .n(8)
83         .k(8)
84         .iterations(1)
85         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8__aarch32_neon_mlal_lane_prfm_cortex_a7, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
86     }
87   }
88 
TEST(QC8_IGEMM_MINMAX_FP32_1X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A7,k_eq_8_subtile_n)89   TEST(QC8_IGEMM_MINMAX_FP32_1X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A7, k_eq_8_subtile_n) {
90     TEST_REQUIRES_ARM_NEON;
91     for (uint32_t n = 1; n <= 8; n++) {
92       GemmMicrokernelTester()
93         .mr(1)
94         .nr(8)
95         .kr(1)
96         .sr(1)
97         .m(1)
98         .n(n)
99         .k(8)
100         .iterations(1)
101         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8__aarch32_neon_mlal_lane_prfm_cortex_a7, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
102     }
103   }
104 
TEST(QC8_IGEMM_MINMAX_FP32_1X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A7,k_lt_8)105   TEST(QC8_IGEMM_MINMAX_FP32_1X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A7, k_lt_8) {
106     TEST_REQUIRES_ARM_NEON;
107     for (size_t k = 1; k < 8; k++) {
108       GemmMicrokernelTester()
109         .mr(1)
110         .nr(8)
111         .kr(1)
112         .sr(1)
113         .m(1)
114         .n(8)
115         .k(k)
116         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8__aarch32_neon_mlal_lane_prfm_cortex_a7, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
117     }
118   }
119 
TEST(QC8_IGEMM_MINMAX_FP32_1X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A7,k_lt_8_subtile)120   TEST(QC8_IGEMM_MINMAX_FP32_1X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A7, k_lt_8_subtile) {
121     TEST_REQUIRES_ARM_NEON;
122     for (size_t k = 1; k < 8; k++) {
123       for (uint32_t n = 1; n <= 8; n++) {
124         for (uint32_t m = 1; m <= 1; m++) {
125           GemmMicrokernelTester()
126             .mr(1)
127             .nr(8)
128             .kr(1)
129             .sr(1)
130             .m(m)
131             .n(n)
132             .k(k)
133             .iterations(1)
134             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8__aarch32_neon_mlal_lane_prfm_cortex_a7, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
135         }
136       }
137     }
138   }
139 
TEST(QC8_IGEMM_MINMAX_FP32_1X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A7,k_gt_8)140   TEST(QC8_IGEMM_MINMAX_FP32_1X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A7, k_gt_8) {
141     TEST_REQUIRES_ARM_NEON;
142     for (size_t k = 9; k < 16; k++) {
143       GemmMicrokernelTester()
144         .mr(1)
145         .nr(8)
146         .kr(1)
147         .sr(1)
148         .m(1)
149         .n(8)
150         .k(k)
151         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8__aarch32_neon_mlal_lane_prfm_cortex_a7, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
152     }
153   }
154 
TEST(QC8_IGEMM_MINMAX_FP32_1X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A7,k_gt_8_subtile)155   TEST(QC8_IGEMM_MINMAX_FP32_1X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A7, k_gt_8_subtile) {
156     TEST_REQUIRES_ARM_NEON;
157     for (size_t k = 9; k < 16; k++) {
158       for (uint32_t n = 1; n <= 8; n++) {
159         for (uint32_t m = 1; m <= 1; m++) {
160           GemmMicrokernelTester()
161             .mr(1)
162             .nr(8)
163             .kr(1)
164             .sr(1)
165             .m(m)
166             .n(n)
167             .k(k)
168             .iterations(1)
169             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8__aarch32_neon_mlal_lane_prfm_cortex_a7, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
170         }
171       }
172     }
173   }
174 
TEST(QC8_IGEMM_MINMAX_FP32_1X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A7,k_div_8)175   TEST(QC8_IGEMM_MINMAX_FP32_1X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A7, k_div_8) {
176     TEST_REQUIRES_ARM_NEON;
177     for (size_t k = 16; k <= 80; k += 8) {
178       GemmMicrokernelTester()
179         .mr(1)
180         .nr(8)
181         .kr(1)
182         .sr(1)
183         .m(1)
184         .n(8)
185         .k(k)
186         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8__aarch32_neon_mlal_lane_prfm_cortex_a7, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
187     }
188   }
189 
TEST(QC8_IGEMM_MINMAX_FP32_1X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A7,k_div_8_subtile)190   TEST(QC8_IGEMM_MINMAX_FP32_1X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A7, k_div_8_subtile) {
191     TEST_REQUIRES_ARM_NEON;
192     for (size_t k = 16; k <= 80; k += 8) {
193       for (uint32_t n = 1; n <= 8; n++) {
194         for (uint32_t m = 1; m <= 1; m++) {
195           GemmMicrokernelTester()
196             .mr(1)
197             .nr(8)
198             .kr(1)
199             .sr(1)
200             .m(m)
201             .n(n)
202             .k(k)
203             .iterations(1)
204             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8__aarch32_neon_mlal_lane_prfm_cortex_a7, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
205         }
206       }
207     }
208   }
209 
TEST(QC8_IGEMM_MINMAX_FP32_1X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A7,n_gt_8)210   TEST(QC8_IGEMM_MINMAX_FP32_1X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A7, n_gt_8) {
211     TEST_REQUIRES_ARM_NEON;
212     for (uint32_t n = 9; n < 16; n++) {
213       for (size_t k = 1; k <= 40; k += 9) {
214         GemmMicrokernelTester()
215           .mr(1)
216           .nr(8)
217           .kr(1)
218           .sr(1)
219           .m(1)
220           .n(n)
221           .k(k)
222           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8__aarch32_neon_mlal_lane_prfm_cortex_a7, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
223       }
224     }
225   }
226 
TEST(QC8_IGEMM_MINMAX_FP32_1X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A7,n_gt_8_strided_cn)227   TEST(QC8_IGEMM_MINMAX_FP32_1X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A7, n_gt_8_strided_cn) {
228     TEST_REQUIRES_ARM_NEON;
229     for (uint32_t n = 9; n < 16; n++) {
230       for (size_t k = 1; k <= 40; k += 9) {
231         GemmMicrokernelTester()
232           .mr(1)
233           .nr(8)
234           .kr(1)
235           .sr(1)
236           .m(1)
237           .n(n)
238           .k(k)
239           .cn_stride(11)
240           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8__aarch32_neon_mlal_lane_prfm_cortex_a7, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
241       }
242     }
243   }
244 
TEST(QC8_IGEMM_MINMAX_FP32_1X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A7,n_gt_8_subtile)245   TEST(QC8_IGEMM_MINMAX_FP32_1X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A7, n_gt_8_subtile) {
246     TEST_REQUIRES_ARM_NEON;
247     for (uint32_t n = 9; n < 16; n++) {
248       for (size_t k = 1; k <= 40; k += 9) {
249         for (uint32_t m = 1; m <= 1; m++) {
250           GemmMicrokernelTester()
251             .mr(1)
252             .nr(8)
253             .kr(1)
254             .sr(1)
255             .m(m)
256             .n(n)
257             .k(k)
258             .iterations(1)
259             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8__aarch32_neon_mlal_lane_prfm_cortex_a7, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
260         }
261       }
262     }
263   }
264 
TEST(QC8_IGEMM_MINMAX_FP32_1X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A7,n_div_8)265   TEST(QC8_IGEMM_MINMAX_FP32_1X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A7, n_div_8) {
266     TEST_REQUIRES_ARM_NEON;
267     for (uint32_t n = 16; n <= 24; n += 8) {
268       for (size_t k = 1; k <= 40; k += 9) {
269         GemmMicrokernelTester()
270           .mr(1)
271           .nr(8)
272           .kr(1)
273           .sr(1)
274           .m(1)
275           .n(n)
276           .k(k)
277           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8__aarch32_neon_mlal_lane_prfm_cortex_a7, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
278       }
279     }
280   }
281 
TEST(QC8_IGEMM_MINMAX_FP32_1X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A7,n_div_8_strided_cn)282   TEST(QC8_IGEMM_MINMAX_FP32_1X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A7, n_div_8_strided_cn) {
283     TEST_REQUIRES_ARM_NEON;
284     for (uint32_t n = 16; n <= 24; n += 8) {
285       for (size_t k = 1; k <= 40; k += 9) {
286         GemmMicrokernelTester()
287           .mr(1)
288           .nr(8)
289           .kr(1)
290           .sr(1)
291           .m(1)
292           .n(n)
293           .k(k)
294           .cn_stride(11)
295           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8__aarch32_neon_mlal_lane_prfm_cortex_a7, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
296       }
297     }
298   }
299 
TEST(QC8_IGEMM_MINMAX_FP32_1X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A7,n_div_8_subtile)300   TEST(QC8_IGEMM_MINMAX_FP32_1X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A7, n_div_8_subtile) {
301     TEST_REQUIRES_ARM_NEON;
302     for (uint32_t n = 16; n <= 24; n += 8) {
303       for (size_t k = 1; k <= 40; k += 9) {
304         for (uint32_t m = 1; m <= 1; m++) {
305           GemmMicrokernelTester()
306             .mr(1)
307             .nr(8)
308             .kr(1)
309             .sr(1)
310             .m(m)
311             .n(n)
312             .k(k)
313             .iterations(1)
314             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8__aarch32_neon_mlal_lane_prfm_cortex_a7, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
315         }
316       }
317     }
318   }
319 
TEST(QC8_IGEMM_MINMAX_FP32_1X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A7,small_kernel)320   TEST(QC8_IGEMM_MINMAX_FP32_1X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A7, small_kernel) {
321     TEST_REQUIRES_ARM_NEON;
322     for (size_t k = 1; k <= 40; k += 9) {
323       GemmMicrokernelTester()
324         .mr(1)
325         .nr(8)
326         .kr(1)
327         .sr(1)
328         .m(1)
329         .n(8)
330         .k(k)
331         .ks(3)
332         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8__aarch32_neon_mlal_lane_prfm_cortex_a7, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
333     }
334   }
335 
TEST(QC8_IGEMM_MINMAX_FP32_1X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A7,small_kernel_subtile)336   TEST(QC8_IGEMM_MINMAX_FP32_1X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A7, small_kernel_subtile) {
337     TEST_REQUIRES_ARM_NEON;
338     for (size_t k = 1; k <= 40; k += 9) {
339       for (uint32_t n = 1; n <= 8; n++) {
340         for (uint32_t m = 1; m <= 1; m++) {
341           GemmMicrokernelTester()
342             .mr(1)
343             .nr(8)
344             .kr(1)
345             .sr(1)
346             .m(m)
347             .n(n)
348             .k(k)
349             .ks(3)
350             .iterations(1)
351             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8__aarch32_neon_mlal_lane_prfm_cortex_a7, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
352         }
353       }
354     }
355   }
356 
TEST(QC8_IGEMM_MINMAX_FP32_1X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A7,n_gt_8_small_kernel)357   TEST(QC8_IGEMM_MINMAX_FP32_1X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A7, n_gt_8_small_kernel) {
358     TEST_REQUIRES_ARM_NEON;
359     for (uint32_t n = 9; n < 16; n++) {
360       for (size_t k = 1; k <= 40; k += 9) {
361         GemmMicrokernelTester()
362           .mr(1)
363           .nr(8)
364           .kr(1)
365           .sr(1)
366           .m(1)
367           .n(n)
368           .k(k)
369           .ks(3)
370           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8__aarch32_neon_mlal_lane_prfm_cortex_a7, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
371       }
372     }
373   }
374 
TEST(QC8_IGEMM_MINMAX_FP32_1X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A7,n_div_8_small_kernel)375   TEST(QC8_IGEMM_MINMAX_FP32_1X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A7, n_div_8_small_kernel) {
376     TEST_REQUIRES_ARM_NEON;
377     for (uint32_t n = 16; n <= 24; n += 8) {
378       for (size_t k = 1; k <= 40; k += 9) {
379         GemmMicrokernelTester()
380           .mr(1)
381           .nr(8)
382           .kr(1)
383           .sr(1)
384           .m(1)
385           .n(n)
386           .k(k)
387           .ks(3)
388           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8__aarch32_neon_mlal_lane_prfm_cortex_a7, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
389       }
390     }
391   }
392 
TEST(QC8_IGEMM_MINMAX_FP32_1X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A7,strided_cm_subtile)393   TEST(QC8_IGEMM_MINMAX_FP32_1X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A7, strided_cm_subtile) {
394     TEST_REQUIRES_ARM_NEON;
395     for (size_t k = 1; k <= 40; k += 9) {
396       for (uint32_t n = 1; n <= 8; n++) {
397         for (uint32_t m = 1; m <= 1; m++) {
398           GemmMicrokernelTester()
399             .mr(1)
400             .nr(8)
401             .kr(1)
402             .sr(1)
403             .m(m)
404             .n(n)
405             .k(k)
406             .cm_stride(11)
407             .iterations(1)
408             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8__aarch32_neon_mlal_lane_prfm_cortex_a7, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
409         }
410       }
411     }
412   }
413 
TEST(QC8_IGEMM_MINMAX_FP32_1X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A7,a_offset)414   TEST(QC8_IGEMM_MINMAX_FP32_1X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A7, a_offset) {
415     TEST_REQUIRES_ARM_NEON;
416     for (size_t k = 1; k <= 40; k += 9) {
417       GemmMicrokernelTester()
418         .mr(1)
419         .nr(8)
420         .kr(1)
421         .sr(1)
422         .m(1)
423         .n(8)
424         .k(k)
425         .ks(3)
426         .a_offset(43)
427         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8__aarch32_neon_mlal_lane_prfm_cortex_a7, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
428     }
429   }
430 
TEST(QC8_IGEMM_MINMAX_FP32_1X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A7,zero)431   TEST(QC8_IGEMM_MINMAX_FP32_1X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A7, zero) {
432     TEST_REQUIRES_ARM_NEON;
433     for (size_t k = 1; k <= 40; k += 9) {
434       for (uint32_t mz = 0; mz < 1; mz++) {
435         GemmMicrokernelTester()
436           .mr(1)
437           .nr(8)
438           .kr(1)
439           .sr(1)
440           .m(1)
441           .n(8)
442           .k(k)
443           .ks(3)
444           .a_offset(43)
445           .zero_index(mz)
446           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8__aarch32_neon_mlal_lane_prfm_cortex_a7, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
447       }
448     }
449   }
450 
TEST(QC8_IGEMM_MINMAX_FP32_1X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A7,qmin)451   TEST(QC8_IGEMM_MINMAX_FP32_1X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A7, qmin) {
452     TEST_REQUIRES_ARM_NEON;
453     GemmMicrokernelTester()
454       .mr(1)
455       .nr(8)
456       .kr(1)
457       .sr(1)
458       .m(1)
459       .n(8)
460       .k(8)
461       .qmin(128)
462       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8__aarch32_neon_mlal_lane_prfm_cortex_a7, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
463   }
464 
TEST(QC8_IGEMM_MINMAX_FP32_1X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A7,qmax)465   TEST(QC8_IGEMM_MINMAX_FP32_1X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A7, qmax) {
466     TEST_REQUIRES_ARM_NEON;
467     GemmMicrokernelTester()
468       .mr(1)
469       .nr(8)
470       .kr(1)
471       .sr(1)
472       .m(1)
473       .n(8)
474       .k(8)
475       .qmax(128)
476       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8__aarch32_neon_mlal_lane_prfm_cortex_a7, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
477   }
478 
TEST(QC8_IGEMM_MINMAX_FP32_1X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A7,strided_cm)479   TEST(QC8_IGEMM_MINMAX_FP32_1X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A7, strided_cm) {
480     TEST_REQUIRES_ARM_NEON;
481     GemmMicrokernelTester()
482       .mr(1)
483       .nr(8)
484       .kr(1)
485       .sr(1)
486       .m(1)
487       .n(8)
488       .k(8)
489       .cm_stride(11)
490       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8__aarch32_neon_mlal_lane_prfm_cortex_a7, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
491   }
492 #endif  // XNN_ARCH_ARM && XNN_ENABLE_ASSEMBLY
493 
494 
495 #if XNN_ARCH_ARM && XNN_ENABLE_ASSEMBLY
TEST(QC8_IGEMM_MINMAX_FP32_1X8__AARCH32_NEONV8_MLAL_LANE_PRFM_CORTEX_A35,k_eq_8)496   TEST(QC8_IGEMM_MINMAX_FP32_1X8__AARCH32_NEONV8_MLAL_LANE_PRFM_CORTEX_A35, k_eq_8) {
497     TEST_REQUIRES_ARM_NEON_V8;
498     GemmMicrokernelTester()
499       .mr(1)
500       .nr(8)
501       .kr(1)
502       .sr(1)
503       .m(1)
504       .n(8)
505       .k(8)
506       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8__aarch32_neonv8_mlal_lane_prfm_cortex_a35, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
507   }
508 
TEST(QC8_IGEMM_MINMAX_FP32_1X8__AARCH32_NEONV8_MLAL_LANE_PRFM_CORTEX_A35,strided_cn)509   TEST(QC8_IGEMM_MINMAX_FP32_1X8__AARCH32_NEONV8_MLAL_LANE_PRFM_CORTEX_A35, strided_cn) {
510     TEST_REQUIRES_ARM_NEON_V8;
511     GemmMicrokernelTester()
512       .mr(1)
513       .nr(8)
514       .kr(1)
515       .sr(1)
516       .m(1)
517       .n(8)
518       .k(8)
519       .cn_stride(11)
520       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8__aarch32_neonv8_mlal_lane_prfm_cortex_a35, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
521   }
522 
TEST(QC8_IGEMM_MINMAX_FP32_1X8__AARCH32_NEONV8_MLAL_LANE_PRFM_CORTEX_A35,k_eq_8_subtile)523   TEST(QC8_IGEMM_MINMAX_FP32_1X8__AARCH32_NEONV8_MLAL_LANE_PRFM_CORTEX_A35, k_eq_8_subtile) {
524     TEST_REQUIRES_ARM_NEON_V8;
525     for (uint32_t n = 1; n <= 8; n++) {
526       for (uint32_t m = 1; m <= 1; m++) {
527         GemmMicrokernelTester()
528           .mr(1)
529           .nr(8)
530           .kr(1)
531           .sr(1)
532           .m(m)
533           .n(n)
534           .k(8)
535           .iterations(1)
536           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8__aarch32_neonv8_mlal_lane_prfm_cortex_a35, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
537       }
538     }
539   }
540 
TEST(QC8_IGEMM_MINMAX_FP32_1X8__AARCH32_NEONV8_MLAL_LANE_PRFM_CORTEX_A35,k_eq_8_subtile_m)541   TEST(QC8_IGEMM_MINMAX_FP32_1X8__AARCH32_NEONV8_MLAL_LANE_PRFM_CORTEX_A35, k_eq_8_subtile_m) {
542     TEST_REQUIRES_ARM_NEON_V8;
543     for (uint32_t m = 1; m <= 1; m++) {
544       GemmMicrokernelTester()
545         .mr(1)
546         .nr(8)
547         .kr(1)
548         .sr(1)
549         .m(m)
550         .n(8)
551         .k(8)
552         .iterations(1)
553         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8__aarch32_neonv8_mlal_lane_prfm_cortex_a35, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
554     }
555   }
556 
TEST(QC8_IGEMM_MINMAX_FP32_1X8__AARCH32_NEONV8_MLAL_LANE_PRFM_CORTEX_A35,k_eq_8_subtile_n)557   TEST(QC8_IGEMM_MINMAX_FP32_1X8__AARCH32_NEONV8_MLAL_LANE_PRFM_CORTEX_A35, k_eq_8_subtile_n) {
558     TEST_REQUIRES_ARM_NEON_V8;
559     for (uint32_t n = 1; n <= 8; n++) {
560       GemmMicrokernelTester()
561         .mr(1)
562         .nr(8)
563         .kr(1)
564         .sr(1)
565         .m(1)
566         .n(n)
567         .k(8)
568         .iterations(1)
569         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8__aarch32_neonv8_mlal_lane_prfm_cortex_a35, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
570     }
571   }
572 
TEST(QC8_IGEMM_MINMAX_FP32_1X8__AARCH32_NEONV8_MLAL_LANE_PRFM_CORTEX_A35,k_lt_8)573   TEST(QC8_IGEMM_MINMAX_FP32_1X8__AARCH32_NEONV8_MLAL_LANE_PRFM_CORTEX_A35, k_lt_8) {
574     TEST_REQUIRES_ARM_NEON_V8;
575     for (size_t k = 1; k < 8; k++) {
576       GemmMicrokernelTester()
577         .mr(1)
578         .nr(8)
579         .kr(1)
580         .sr(1)
581         .m(1)
582         .n(8)
583         .k(k)
584         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8__aarch32_neonv8_mlal_lane_prfm_cortex_a35, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
585     }
586   }
587 
TEST(QC8_IGEMM_MINMAX_FP32_1X8__AARCH32_NEONV8_MLAL_LANE_PRFM_CORTEX_A35,k_lt_8_subtile)588   TEST(QC8_IGEMM_MINMAX_FP32_1X8__AARCH32_NEONV8_MLAL_LANE_PRFM_CORTEX_A35, k_lt_8_subtile) {
589     TEST_REQUIRES_ARM_NEON_V8;
590     for (size_t k = 1; k < 8; k++) {
591       for (uint32_t n = 1; n <= 8; n++) {
592         for (uint32_t m = 1; m <= 1; m++) {
593           GemmMicrokernelTester()
594             .mr(1)
595             .nr(8)
596             .kr(1)
597             .sr(1)
598             .m(m)
599             .n(n)
600             .k(k)
601             .iterations(1)
602             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8__aarch32_neonv8_mlal_lane_prfm_cortex_a35, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
603         }
604       }
605     }
606   }
607 
TEST(QC8_IGEMM_MINMAX_FP32_1X8__AARCH32_NEONV8_MLAL_LANE_PRFM_CORTEX_A35,k_gt_8)608   TEST(QC8_IGEMM_MINMAX_FP32_1X8__AARCH32_NEONV8_MLAL_LANE_PRFM_CORTEX_A35, k_gt_8) {
609     TEST_REQUIRES_ARM_NEON_V8;
610     for (size_t k = 9; k < 16; k++) {
611       GemmMicrokernelTester()
612         .mr(1)
613         .nr(8)
614         .kr(1)
615         .sr(1)
616         .m(1)
617         .n(8)
618         .k(k)
619         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8__aarch32_neonv8_mlal_lane_prfm_cortex_a35, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
620     }
621   }
622 
TEST(QC8_IGEMM_MINMAX_FP32_1X8__AARCH32_NEONV8_MLAL_LANE_PRFM_CORTEX_A35,k_gt_8_subtile)623   TEST(QC8_IGEMM_MINMAX_FP32_1X8__AARCH32_NEONV8_MLAL_LANE_PRFM_CORTEX_A35, k_gt_8_subtile) {
624     TEST_REQUIRES_ARM_NEON_V8;
625     for (size_t k = 9; k < 16; k++) {
626       for (uint32_t n = 1; n <= 8; n++) {
627         for (uint32_t m = 1; m <= 1; m++) {
628           GemmMicrokernelTester()
629             .mr(1)
630             .nr(8)
631             .kr(1)
632             .sr(1)
633             .m(m)
634             .n(n)
635             .k(k)
636             .iterations(1)
637             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8__aarch32_neonv8_mlal_lane_prfm_cortex_a35, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
638         }
639       }
640     }
641   }
642 
TEST(QC8_IGEMM_MINMAX_FP32_1X8__AARCH32_NEONV8_MLAL_LANE_PRFM_CORTEX_A35,k_div_8)643   TEST(QC8_IGEMM_MINMAX_FP32_1X8__AARCH32_NEONV8_MLAL_LANE_PRFM_CORTEX_A35, k_div_8) {
644     TEST_REQUIRES_ARM_NEON_V8;
645     for (size_t k = 16; k <= 80; k += 8) {
646       GemmMicrokernelTester()
647         .mr(1)
648         .nr(8)
649         .kr(1)
650         .sr(1)
651         .m(1)
652         .n(8)
653         .k(k)
654         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8__aarch32_neonv8_mlal_lane_prfm_cortex_a35, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
655     }
656   }
657 
TEST(QC8_IGEMM_MINMAX_FP32_1X8__AARCH32_NEONV8_MLAL_LANE_PRFM_CORTEX_A35,k_div_8_subtile)658   TEST(QC8_IGEMM_MINMAX_FP32_1X8__AARCH32_NEONV8_MLAL_LANE_PRFM_CORTEX_A35, k_div_8_subtile) {
659     TEST_REQUIRES_ARM_NEON_V8;
660     for (size_t k = 16; k <= 80; k += 8) {
661       for (uint32_t n = 1; n <= 8; n++) {
662         for (uint32_t m = 1; m <= 1; m++) {
663           GemmMicrokernelTester()
664             .mr(1)
665             .nr(8)
666             .kr(1)
667             .sr(1)
668             .m(m)
669             .n(n)
670             .k(k)
671             .iterations(1)
672             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8__aarch32_neonv8_mlal_lane_prfm_cortex_a35, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
673         }
674       }
675     }
676   }
677 
TEST(QC8_IGEMM_MINMAX_FP32_1X8__AARCH32_NEONV8_MLAL_LANE_PRFM_CORTEX_A35,n_gt_8)678   TEST(QC8_IGEMM_MINMAX_FP32_1X8__AARCH32_NEONV8_MLAL_LANE_PRFM_CORTEX_A35, n_gt_8) {
679     TEST_REQUIRES_ARM_NEON_V8;
680     for (uint32_t n = 9; n < 16; n++) {
681       for (size_t k = 1; k <= 40; k += 9) {
682         GemmMicrokernelTester()
683           .mr(1)
684           .nr(8)
685           .kr(1)
686           .sr(1)
687           .m(1)
688           .n(n)
689           .k(k)
690           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8__aarch32_neonv8_mlal_lane_prfm_cortex_a35, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
691       }
692     }
693   }
694 
TEST(QC8_IGEMM_MINMAX_FP32_1X8__AARCH32_NEONV8_MLAL_LANE_PRFM_CORTEX_A35,n_gt_8_strided_cn)695   TEST(QC8_IGEMM_MINMAX_FP32_1X8__AARCH32_NEONV8_MLAL_LANE_PRFM_CORTEX_A35, n_gt_8_strided_cn) {
696     TEST_REQUIRES_ARM_NEON_V8;
697     for (uint32_t n = 9; n < 16; n++) {
698       for (size_t k = 1; k <= 40; k += 9) {
699         GemmMicrokernelTester()
700           .mr(1)
701           .nr(8)
702           .kr(1)
703           .sr(1)
704           .m(1)
705           .n(n)
706           .k(k)
707           .cn_stride(11)
708           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8__aarch32_neonv8_mlal_lane_prfm_cortex_a35, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
709       }
710     }
711   }
712 
TEST(QC8_IGEMM_MINMAX_FP32_1X8__AARCH32_NEONV8_MLAL_LANE_PRFM_CORTEX_A35,n_gt_8_subtile)713   TEST(QC8_IGEMM_MINMAX_FP32_1X8__AARCH32_NEONV8_MLAL_LANE_PRFM_CORTEX_A35, n_gt_8_subtile) {
714     TEST_REQUIRES_ARM_NEON_V8;
715     for (uint32_t n = 9; n < 16; n++) {
716       for (size_t k = 1; k <= 40; k += 9) {
717         for (uint32_t m = 1; m <= 1; m++) {
718           GemmMicrokernelTester()
719             .mr(1)
720             .nr(8)
721             .kr(1)
722             .sr(1)
723             .m(m)
724             .n(n)
725             .k(k)
726             .iterations(1)
727             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8__aarch32_neonv8_mlal_lane_prfm_cortex_a35, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
728         }
729       }
730     }
731   }
732 
TEST(QC8_IGEMM_MINMAX_FP32_1X8__AARCH32_NEONV8_MLAL_LANE_PRFM_CORTEX_A35,n_div_8)733   TEST(QC8_IGEMM_MINMAX_FP32_1X8__AARCH32_NEONV8_MLAL_LANE_PRFM_CORTEX_A35, n_div_8) {
734     TEST_REQUIRES_ARM_NEON_V8;
735     for (uint32_t n = 16; n <= 24; n += 8) {
736       for (size_t k = 1; k <= 40; k += 9) {
737         GemmMicrokernelTester()
738           .mr(1)
739           .nr(8)
740           .kr(1)
741           .sr(1)
742           .m(1)
743           .n(n)
744           .k(k)
745           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8__aarch32_neonv8_mlal_lane_prfm_cortex_a35, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
746       }
747     }
748   }
749 
TEST(QC8_IGEMM_MINMAX_FP32_1X8__AARCH32_NEONV8_MLAL_LANE_PRFM_CORTEX_A35,n_div_8_strided_cn)750   TEST(QC8_IGEMM_MINMAX_FP32_1X8__AARCH32_NEONV8_MLAL_LANE_PRFM_CORTEX_A35, n_div_8_strided_cn) {
751     TEST_REQUIRES_ARM_NEON_V8;
752     for (uint32_t n = 16; n <= 24; n += 8) {
753       for (size_t k = 1; k <= 40; k += 9) {
754         GemmMicrokernelTester()
755           .mr(1)
756           .nr(8)
757           .kr(1)
758           .sr(1)
759           .m(1)
760           .n(n)
761           .k(k)
762           .cn_stride(11)
763           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8__aarch32_neonv8_mlal_lane_prfm_cortex_a35, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
764       }
765     }
766   }
767 
TEST(QC8_IGEMM_MINMAX_FP32_1X8__AARCH32_NEONV8_MLAL_LANE_PRFM_CORTEX_A35,n_div_8_subtile)768   TEST(QC8_IGEMM_MINMAX_FP32_1X8__AARCH32_NEONV8_MLAL_LANE_PRFM_CORTEX_A35, n_div_8_subtile) {
769     TEST_REQUIRES_ARM_NEON_V8;
770     for (uint32_t n = 16; n <= 24; n += 8) {
771       for (size_t k = 1; k <= 40; k += 9) {
772         for (uint32_t m = 1; m <= 1; m++) {
773           GemmMicrokernelTester()
774             .mr(1)
775             .nr(8)
776             .kr(1)
777             .sr(1)
778             .m(m)
779             .n(n)
780             .k(k)
781             .iterations(1)
782             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8__aarch32_neonv8_mlal_lane_prfm_cortex_a35, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
783         }
784       }
785     }
786   }
787 
TEST(QC8_IGEMM_MINMAX_FP32_1X8__AARCH32_NEONV8_MLAL_LANE_PRFM_CORTEX_A35,small_kernel)788   TEST(QC8_IGEMM_MINMAX_FP32_1X8__AARCH32_NEONV8_MLAL_LANE_PRFM_CORTEX_A35, small_kernel) {
789     TEST_REQUIRES_ARM_NEON_V8;
790     for (size_t k = 1; k <= 40; k += 9) {
791       GemmMicrokernelTester()
792         .mr(1)
793         .nr(8)
794         .kr(1)
795         .sr(1)
796         .m(1)
797         .n(8)
798         .k(k)
799         .ks(3)
800         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8__aarch32_neonv8_mlal_lane_prfm_cortex_a35, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
801     }
802   }
803 
TEST(QC8_IGEMM_MINMAX_FP32_1X8__AARCH32_NEONV8_MLAL_LANE_PRFM_CORTEX_A35,small_kernel_subtile)804   TEST(QC8_IGEMM_MINMAX_FP32_1X8__AARCH32_NEONV8_MLAL_LANE_PRFM_CORTEX_A35, small_kernel_subtile) {
805     TEST_REQUIRES_ARM_NEON_V8;
806     for (size_t k = 1; k <= 40; k += 9) {
807       for (uint32_t n = 1; n <= 8; n++) {
808         for (uint32_t m = 1; m <= 1; m++) {
809           GemmMicrokernelTester()
810             .mr(1)
811             .nr(8)
812             .kr(1)
813             .sr(1)
814             .m(m)
815             .n(n)
816             .k(k)
817             .ks(3)
818             .iterations(1)
819             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8__aarch32_neonv8_mlal_lane_prfm_cortex_a35, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
820         }
821       }
822     }
823   }
824 
TEST(QC8_IGEMM_MINMAX_FP32_1X8__AARCH32_NEONV8_MLAL_LANE_PRFM_CORTEX_A35,n_gt_8_small_kernel)825   TEST(QC8_IGEMM_MINMAX_FP32_1X8__AARCH32_NEONV8_MLAL_LANE_PRFM_CORTEX_A35, n_gt_8_small_kernel) {
826     TEST_REQUIRES_ARM_NEON_V8;
827     for (uint32_t n = 9; n < 16; n++) {
828       for (size_t k = 1; k <= 40; k += 9) {
829         GemmMicrokernelTester()
830           .mr(1)
831           .nr(8)
832           .kr(1)
833           .sr(1)
834           .m(1)
835           .n(n)
836           .k(k)
837           .ks(3)
838           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8__aarch32_neonv8_mlal_lane_prfm_cortex_a35, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
839       }
840     }
841   }
842 
TEST(QC8_IGEMM_MINMAX_FP32_1X8__AARCH32_NEONV8_MLAL_LANE_PRFM_CORTEX_A35,n_div_8_small_kernel)843   TEST(QC8_IGEMM_MINMAX_FP32_1X8__AARCH32_NEONV8_MLAL_LANE_PRFM_CORTEX_A35, n_div_8_small_kernel) {
844     TEST_REQUIRES_ARM_NEON_V8;
845     for (uint32_t n = 16; n <= 24; n += 8) {
846       for (size_t k = 1; k <= 40; k += 9) {
847         GemmMicrokernelTester()
848           .mr(1)
849           .nr(8)
850           .kr(1)
851           .sr(1)
852           .m(1)
853           .n(n)
854           .k(k)
855           .ks(3)
856           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8__aarch32_neonv8_mlal_lane_prfm_cortex_a35, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
857       }
858     }
859   }
860 
TEST(QC8_IGEMM_MINMAX_FP32_1X8__AARCH32_NEONV8_MLAL_LANE_PRFM_CORTEX_A35,strided_cm_subtile)861   TEST(QC8_IGEMM_MINMAX_FP32_1X8__AARCH32_NEONV8_MLAL_LANE_PRFM_CORTEX_A35, strided_cm_subtile) {
862     TEST_REQUIRES_ARM_NEON_V8;
863     for (size_t k = 1; k <= 40; k += 9) {
864       for (uint32_t n = 1; n <= 8; n++) {
865         for (uint32_t m = 1; m <= 1; m++) {
866           GemmMicrokernelTester()
867             .mr(1)
868             .nr(8)
869             .kr(1)
870             .sr(1)
871             .m(m)
872             .n(n)
873             .k(k)
874             .cm_stride(11)
875             .iterations(1)
876             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8__aarch32_neonv8_mlal_lane_prfm_cortex_a35, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
877         }
878       }
879     }
880   }
881 
TEST(QC8_IGEMM_MINMAX_FP32_1X8__AARCH32_NEONV8_MLAL_LANE_PRFM_CORTEX_A35,a_offset)882   TEST(QC8_IGEMM_MINMAX_FP32_1X8__AARCH32_NEONV8_MLAL_LANE_PRFM_CORTEX_A35, a_offset) {
883     TEST_REQUIRES_ARM_NEON_V8;
884     for (size_t k = 1; k <= 40; k += 9) {
885       GemmMicrokernelTester()
886         .mr(1)
887         .nr(8)
888         .kr(1)
889         .sr(1)
890         .m(1)
891         .n(8)
892         .k(k)
893         .ks(3)
894         .a_offset(43)
895         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8__aarch32_neonv8_mlal_lane_prfm_cortex_a35, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
896     }
897   }
898 
TEST(QC8_IGEMM_MINMAX_FP32_1X8__AARCH32_NEONV8_MLAL_LANE_PRFM_CORTEX_A35,zero)899   TEST(QC8_IGEMM_MINMAX_FP32_1X8__AARCH32_NEONV8_MLAL_LANE_PRFM_CORTEX_A35, zero) {
900     TEST_REQUIRES_ARM_NEON_V8;
901     for (size_t k = 1; k <= 40; k += 9) {
902       for (uint32_t mz = 0; mz < 1; mz++) {
903         GemmMicrokernelTester()
904           .mr(1)
905           .nr(8)
906           .kr(1)
907           .sr(1)
908           .m(1)
909           .n(8)
910           .k(k)
911           .ks(3)
912           .a_offset(43)
913           .zero_index(mz)
914           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8__aarch32_neonv8_mlal_lane_prfm_cortex_a35, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
915       }
916     }
917   }
918 
TEST(QC8_IGEMM_MINMAX_FP32_1X8__AARCH32_NEONV8_MLAL_LANE_PRFM_CORTEX_A35,qmin)919   TEST(QC8_IGEMM_MINMAX_FP32_1X8__AARCH32_NEONV8_MLAL_LANE_PRFM_CORTEX_A35, qmin) {
920     TEST_REQUIRES_ARM_NEON_V8;
921     GemmMicrokernelTester()
922       .mr(1)
923       .nr(8)
924       .kr(1)
925       .sr(1)
926       .m(1)
927       .n(8)
928       .k(8)
929       .qmin(128)
930       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8__aarch32_neonv8_mlal_lane_prfm_cortex_a35, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
931   }
932 
TEST(QC8_IGEMM_MINMAX_FP32_1X8__AARCH32_NEONV8_MLAL_LANE_PRFM_CORTEX_A35,qmax)933   TEST(QC8_IGEMM_MINMAX_FP32_1X8__AARCH32_NEONV8_MLAL_LANE_PRFM_CORTEX_A35, qmax) {
934     TEST_REQUIRES_ARM_NEON_V8;
935     GemmMicrokernelTester()
936       .mr(1)
937       .nr(8)
938       .kr(1)
939       .sr(1)
940       .m(1)
941       .n(8)
942       .k(8)
943       .qmax(128)
944       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8__aarch32_neonv8_mlal_lane_prfm_cortex_a35, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
945   }
946 
TEST(QC8_IGEMM_MINMAX_FP32_1X8__AARCH32_NEONV8_MLAL_LANE_PRFM_CORTEX_A35,strided_cm)947   TEST(QC8_IGEMM_MINMAX_FP32_1X8__AARCH32_NEONV8_MLAL_LANE_PRFM_CORTEX_A35, strided_cm) {
948     TEST_REQUIRES_ARM_NEON_V8;
949     GemmMicrokernelTester()
950       .mr(1)
951       .nr(8)
952       .kr(1)
953       .sr(1)
954       .m(1)
955       .n(8)
956       .k(8)
957       .cm_stride(11)
958       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8__aarch32_neonv8_mlal_lane_prfm_cortex_a35, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
959   }
960 #endif  // XNN_ARCH_ARM && XNN_ENABLE_ASSEMBLY
961 
962 
963 #if XNN_ARCH_ARM && XNN_ENABLE_ASSEMBLY
TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_LD64,k_eq_8)964   TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_LD64, k_eq_8) {
965     TEST_REQUIRES_ARM_NEON;
966     GemmMicrokernelTester()
967       .mr(4)
968       .nr(8)
969       .kr(1)
970       .sr(1)
971       .m(4)
972       .n(8)
973       .k(8)
974       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8__aarch32_neon_mlal_lane_ld64, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
975   }
976 
TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_LD64,strided_cn)977   TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_LD64, strided_cn) {
978     TEST_REQUIRES_ARM_NEON;
979     GemmMicrokernelTester()
980       .mr(4)
981       .nr(8)
982       .kr(1)
983       .sr(1)
984       .m(4)
985       .n(8)
986       .k(8)
987       .cn_stride(11)
988       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8__aarch32_neon_mlal_lane_ld64, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
989   }
990 
TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_LD64,k_eq_8_subtile)991   TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_LD64, k_eq_8_subtile) {
992     TEST_REQUIRES_ARM_NEON;
993     for (uint32_t n = 1; n <= 8; n++) {
994       for (uint32_t m = 1; m <= 4; m++) {
995         GemmMicrokernelTester()
996           .mr(4)
997           .nr(8)
998           .kr(1)
999           .sr(1)
1000           .m(m)
1001           .n(n)
1002           .k(8)
1003           .iterations(1)
1004           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8__aarch32_neon_mlal_lane_ld64, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1005       }
1006     }
1007   }
1008 
TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_LD64,k_eq_8_subtile_m)1009   TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_LD64, k_eq_8_subtile_m) {
1010     TEST_REQUIRES_ARM_NEON;
1011     for (uint32_t m = 1; m <= 4; m++) {
1012       GemmMicrokernelTester()
1013         .mr(4)
1014         .nr(8)
1015         .kr(1)
1016         .sr(1)
1017         .m(m)
1018         .n(8)
1019         .k(8)
1020         .iterations(1)
1021         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8__aarch32_neon_mlal_lane_ld64, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1022     }
1023   }
1024 
TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_LD64,k_eq_8_subtile_n)1025   TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_LD64, k_eq_8_subtile_n) {
1026     TEST_REQUIRES_ARM_NEON;
1027     for (uint32_t n = 1; n <= 8; n++) {
1028       GemmMicrokernelTester()
1029         .mr(4)
1030         .nr(8)
1031         .kr(1)
1032         .sr(1)
1033         .m(4)
1034         .n(n)
1035         .k(8)
1036         .iterations(1)
1037         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8__aarch32_neon_mlal_lane_ld64, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1038     }
1039   }
1040 
TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_LD64,k_lt_8)1041   TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_LD64, k_lt_8) {
1042     TEST_REQUIRES_ARM_NEON;
1043     for (size_t k = 1; k < 8; k++) {
1044       GemmMicrokernelTester()
1045         .mr(4)
1046         .nr(8)
1047         .kr(1)
1048         .sr(1)
1049         .m(4)
1050         .n(8)
1051         .k(k)
1052         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8__aarch32_neon_mlal_lane_ld64, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1053     }
1054   }
1055 
TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_LD64,k_lt_8_subtile)1056   TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_LD64, k_lt_8_subtile) {
1057     TEST_REQUIRES_ARM_NEON;
1058     for (size_t k = 1; k < 8; k++) {
1059       for (uint32_t n = 1; n <= 8; n++) {
1060         for (uint32_t m = 1; m <= 4; m++) {
1061           GemmMicrokernelTester()
1062             .mr(4)
1063             .nr(8)
1064             .kr(1)
1065             .sr(1)
1066             .m(m)
1067             .n(n)
1068             .k(k)
1069             .iterations(1)
1070             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8__aarch32_neon_mlal_lane_ld64, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1071         }
1072       }
1073     }
1074   }
1075 
TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_LD64,k_gt_8)1076   TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_LD64, k_gt_8) {
1077     TEST_REQUIRES_ARM_NEON;
1078     for (size_t k = 9; k < 16; k++) {
1079       GemmMicrokernelTester()
1080         .mr(4)
1081         .nr(8)
1082         .kr(1)
1083         .sr(1)
1084         .m(4)
1085         .n(8)
1086         .k(k)
1087         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8__aarch32_neon_mlal_lane_ld64, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1088     }
1089   }
1090 
TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_LD64,k_gt_8_subtile)1091   TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_LD64, k_gt_8_subtile) {
1092     TEST_REQUIRES_ARM_NEON;
1093     for (size_t k = 9; k < 16; k++) {
1094       for (uint32_t n = 1; n <= 8; n++) {
1095         for (uint32_t m = 1; m <= 4; m++) {
1096           GemmMicrokernelTester()
1097             .mr(4)
1098             .nr(8)
1099             .kr(1)
1100             .sr(1)
1101             .m(m)
1102             .n(n)
1103             .k(k)
1104             .iterations(1)
1105             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8__aarch32_neon_mlal_lane_ld64, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1106         }
1107       }
1108     }
1109   }
1110 
TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_LD64,k_div_8)1111   TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_LD64, k_div_8) {
1112     TEST_REQUIRES_ARM_NEON;
1113     for (size_t k = 16; k <= 80; k += 8) {
1114       GemmMicrokernelTester()
1115         .mr(4)
1116         .nr(8)
1117         .kr(1)
1118         .sr(1)
1119         .m(4)
1120         .n(8)
1121         .k(k)
1122         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8__aarch32_neon_mlal_lane_ld64, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1123     }
1124   }
1125 
TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_LD64,k_div_8_subtile)1126   TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_LD64, k_div_8_subtile) {
1127     TEST_REQUIRES_ARM_NEON;
1128     for (size_t k = 16; k <= 80; k += 8) {
1129       for (uint32_t n = 1; n <= 8; n++) {
1130         for (uint32_t m = 1; m <= 4; m++) {
1131           GemmMicrokernelTester()
1132             .mr(4)
1133             .nr(8)
1134             .kr(1)
1135             .sr(1)
1136             .m(m)
1137             .n(n)
1138             .k(k)
1139             .iterations(1)
1140             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8__aarch32_neon_mlal_lane_ld64, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1141         }
1142       }
1143     }
1144   }
1145 
TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_LD64,n_gt_8)1146   TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_LD64, n_gt_8) {
1147     TEST_REQUIRES_ARM_NEON;
1148     for (uint32_t n = 9; n < 16; n++) {
1149       for (size_t k = 1; k <= 40; k += 9) {
1150         GemmMicrokernelTester()
1151           .mr(4)
1152           .nr(8)
1153           .kr(1)
1154           .sr(1)
1155           .m(4)
1156           .n(n)
1157           .k(k)
1158           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8__aarch32_neon_mlal_lane_ld64, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1159       }
1160     }
1161   }
1162 
TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_LD64,n_gt_8_strided_cn)1163   TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_LD64, n_gt_8_strided_cn) {
1164     TEST_REQUIRES_ARM_NEON;
1165     for (uint32_t n = 9; n < 16; n++) {
1166       for (size_t k = 1; k <= 40; k += 9) {
1167         GemmMicrokernelTester()
1168           .mr(4)
1169           .nr(8)
1170           .kr(1)
1171           .sr(1)
1172           .m(4)
1173           .n(n)
1174           .k(k)
1175           .cn_stride(11)
1176           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8__aarch32_neon_mlal_lane_ld64, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1177       }
1178     }
1179   }
1180 
TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_LD64,n_gt_8_subtile)1181   TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_LD64, n_gt_8_subtile) {
1182     TEST_REQUIRES_ARM_NEON;
1183     for (uint32_t n = 9; n < 16; n++) {
1184       for (size_t k = 1; k <= 40; k += 9) {
1185         for (uint32_t m = 1; m <= 4; m++) {
1186           GemmMicrokernelTester()
1187             .mr(4)
1188             .nr(8)
1189             .kr(1)
1190             .sr(1)
1191             .m(m)
1192             .n(n)
1193             .k(k)
1194             .iterations(1)
1195             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8__aarch32_neon_mlal_lane_ld64, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1196         }
1197       }
1198     }
1199   }
1200 
TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_LD64,n_div_8)1201   TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_LD64, n_div_8) {
1202     TEST_REQUIRES_ARM_NEON;
1203     for (uint32_t n = 16; n <= 24; n += 8) {
1204       for (size_t k = 1; k <= 40; k += 9) {
1205         GemmMicrokernelTester()
1206           .mr(4)
1207           .nr(8)
1208           .kr(1)
1209           .sr(1)
1210           .m(4)
1211           .n(n)
1212           .k(k)
1213           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8__aarch32_neon_mlal_lane_ld64, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1214       }
1215     }
1216   }
1217 
TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_LD64,n_div_8_strided_cn)1218   TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_LD64, n_div_8_strided_cn) {
1219     TEST_REQUIRES_ARM_NEON;
1220     for (uint32_t n = 16; n <= 24; n += 8) {
1221       for (size_t k = 1; k <= 40; k += 9) {
1222         GemmMicrokernelTester()
1223           .mr(4)
1224           .nr(8)
1225           .kr(1)
1226           .sr(1)
1227           .m(4)
1228           .n(n)
1229           .k(k)
1230           .cn_stride(11)
1231           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8__aarch32_neon_mlal_lane_ld64, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1232       }
1233     }
1234   }
1235 
TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_LD64,n_div_8_subtile)1236   TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_LD64, n_div_8_subtile) {
1237     TEST_REQUIRES_ARM_NEON;
1238     for (uint32_t n = 16; n <= 24; n += 8) {
1239       for (size_t k = 1; k <= 40; k += 9) {
1240         for (uint32_t m = 1; m <= 4; m++) {
1241           GemmMicrokernelTester()
1242             .mr(4)
1243             .nr(8)
1244             .kr(1)
1245             .sr(1)
1246             .m(m)
1247             .n(n)
1248             .k(k)
1249             .iterations(1)
1250             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8__aarch32_neon_mlal_lane_ld64, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1251         }
1252       }
1253     }
1254   }
1255 
TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_LD64,small_kernel)1256   TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_LD64, small_kernel) {
1257     TEST_REQUIRES_ARM_NEON;
1258     for (size_t k = 1; k <= 40; k += 9) {
1259       GemmMicrokernelTester()
1260         .mr(4)
1261         .nr(8)
1262         .kr(1)
1263         .sr(1)
1264         .m(4)
1265         .n(8)
1266         .k(k)
1267         .ks(3)
1268         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8__aarch32_neon_mlal_lane_ld64, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1269     }
1270   }
1271 
TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_LD64,small_kernel_subtile)1272   TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_LD64, small_kernel_subtile) {
1273     TEST_REQUIRES_ARM_NEON;
1274     for (size_t k = 1; k <= 40; k += 9) {
1275       for (uint32_t n = 1; n <= 8; n++) {
1276         for (uint32_t m = 1; m <= 4; m++) {
1277           GemmMicrokernelTester()
1278             .mr(4)
1279             .nr(8)
1280             .kr(1)
1281             .sr(1)
1282             .m(m)
1283             .n(n)
1284             .k(k)
1285             .ks(3)
1286             .iterations(1)
1287             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8__aarch32_neon_mlal_lane_ld64, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1288         }
1289       }
1290     }
1291   }
1292 
TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_LD64,n_gt_8_small_kernel)1293   TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_LD64, n_gt_8_small_kernel) {
1294     TEST_REQUIRES_ARM_NEON;
1295     for (uint32_t n = 9; n < 16; n++) {
1296       for (size_t k = 1; k <= 40; k += 9) {
1297         GemmMicrokernelTester()
1298           .mr(4)
1299           .nr(8)
1300           .kr(1)
1301           .sr(1)
1302           .m(4)
1303           .n(n)
1304           .k(k)
1305           .ks(3)
1306           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8__aarch32_neon_mlal_lane_ld64, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1307       }
1308     }
1309   }
1310 
TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_LD64,n_div_8_small_kernel)1311   TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_LD64, n_div_8_small_kernel) {
1312     TEST_REQUIRES_ARM_NEON;
1313     for (uint32_t n = 16; n <= 24; n += 8) {
1314       for (size_t k = 1; k <= 40; k += 9) {
1315         GemmMicrokernelTester()
1316           .mr(4)
1317           .nr(8)
1318           .kr(1)
1319           .sr(1)
1320           .m(4)
1321           .n(n)
1322           .k(k)
1323           .ks(3)
1324           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8__aarch32_neon_mlal_lane_ld64, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1325       }
1326     }
1327   }
1328 
TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_LD64,strided_cm_subtile)1329   TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_LD64, strided_cm_subtile) {
1330     TEST_REQUIRES_ARM_NEON;
1331     for (size_t k = 1; k <= 40; k += 9) {
1332       for (uint32_t n = 1; n <= 8; n++) {
1333         for (uint32_t m = 1; m <= 4; m++) {
1334           GemmMicrokernelTester()
1335             .mr(4)
1336             .nr(8)
1337             .kr(1)
1338             .sr(1)
1339             .m(m)
1340             .n(n)
1341             .k(k)
1342             .cm_stride(11)
1343             .iterations(1)
1344             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8__aarch32_neon_mlal_lane_ld64, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1345         }
1346       }
1347     }
1348   }
1349 
TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_LD64,a_offset)1350   TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_LD64, a_offset) {
1351     TEST_REQUIRES_ARM_NEON;
1352     for (size_t k = 1; k <= 40; k += 9) {
1353       GemmMicrokernelTester()
1354         .mr(4)
1355         .nr(8)
1356         .kr(1)
1357         .sr(1)
1358         .m(4)
1359         .n(8)
1360         .k(k)
1361         .ks(3)
1362         .a_offset(163)
1363         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8__aarch32_neon_mlal_lane_ld64, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1364     }
1365   }
1366 
TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_LD64,zero)1367   TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_LD64, zero) {
1368     TEST_REQUIRES_ARM_NEON;
1369     for (size_t k = 1; k <= 40; k += 9) {
1370       for (uint32_t mz = 0; mz < 4; mz++) {
1371         GemmMicrokernelTester()
1372           .mr(4)
1373           .nr(8)
1374           .kr(1)
1375           .sr(1)
1376           .m(4)
1377           .n(8)
1378           .k(k)
1379           .ks(3)
1380           .a_offset(163)
1381           .zero_index(mz)
1382           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8__aarch32_neon_mlal_lane_ld64, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1383       }
1384     }
1385   }
1386 
TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_LD64,qmin)1387   TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_LD64, qmin) {
1388     TEST_REQUIRES_ARM_NEON;
1389     GemmMicrokernelTester()
1390       .mr(4)
1391       .nr(8)
1392       .kr(1)
1393       .sr(1)
1394       .m(4)
1395       .n(8)
1396       .k(8)
1397       .qmin(128)
1398       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8__aarch32_neon_mlal_lane_ld64, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1399   }
1400 
TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_LD64,qmax)1401   TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_LD64, qmax) {
1402     TEST_REQUIRES_ARM_NEON;
1403     GemmMicrokernelTester()
1404       .mr(4)
1405       .nr(8)
1406       .kr(1)
1407       .sr(1)
1408       .m(4)
1409       .n(8)
1410       .k(8)
1411       .qmax(128)
1412       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8__aarch32_neon_mlal_lane_ld64, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1413   }
1414 
TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_LD64,strided_cm)1415   TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_LD64, strided_cm) {
1416     TEST_REQUIRES_ARM_NEON;
1417     GemmMicrokernelTester()
1418       .mr(4)
1419       .nr(8)
1420       .kr(1)
1421       .sr(1)
1422       .m(4)
1423       .n(8)
1424       .k(8)
1425       .cm_stride(11)
1426       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8__aarch32_neon_mlal_lane_ld64, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1427   }
1428 #endif  // XNN_ARCH_ARM && XNN_ENABLE_ASSEMBLY
1429 
1430 
1431 #if XNN_ARCH_ARM && XNN_ENABLE_ASSEMBLY
TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A53,k_eq_8)1432   TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A53, k_eq_8) {
1433     TEST_REQUIRES_ARM_NEON;
1434     GemmMicrokernelTester()
1435       .mr(4)
1436       .nr(8)
1437       .kr(1)
1438       .sr(1)
1439       .m(4)
1440       .n(8)
1441       .k(8)
1442       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8__aarch32_neon_mlal_lane_prfm_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1443   }
1444 
TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A53,strided_cn)1445   TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A53, strided_cn) {
1446     TEST_REQUIRES_ARM_NEON;
1447     GemmMicrokernelTester()
1448       .mr(4)
1449       .nr(8)
1450       .kr(1)
1451       .sr(1)
1452       .m(4)
1453       .n(8)
1454       .k(8)
1455       .cn_stride(11)
1456       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8__aarch32_neon_mlal_lane_prfm_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1457   }
1458 
TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A53,k_eq_8_subtile)1459   TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A53, k_eq_8_subtile) {
1460     TEST_REQUIRES_ARM_NEON;
1461     for (uint32_t n = 1; n <= 8; n++) {
1462       for (uint32_t m = 1; m <= 4; m++) {
1463         GemmMicrokernelTester()
1464           .mr(4)
1465           .nr(8)
1466           .kr(1)
1467           .sr(1)
1468           .m(m)
1469           .n(n)
1470           .k(8)
1471           .iterations(1)
1472           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8__aarch32_neon_mlal_lane_prfm_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1473       }
1474     }
1475   }
1476 
TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A53,k_eq_8_subtile_m)1477   TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A53, k_eq_8_subtile_m) {
1478     TEST_REQUIRES_ARM_NEON;
1479     for (uint32_t m = 1; m <= 4; m++) {
1480       GemmMicrokernelTester()
1481         .mr(4)
1482         .nr(8)
1483         .kr(1)
1484         .sr(1)
1485         .m(m)
1486         .n(8)
1487         .k(8)
1488         .iterations(1)
1489         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8__aarch32_neon_mlal_lane_prfm_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1490     }
1491   }
1492 
TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A53,k_eq_8_subtile_n)1493   TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A53, k_eq_8_subtile_n) {
1494     TEST_REQUIRES_ARM_NEON;
1495     for (uint32_t n = 1; n <= 8; n++) {
1496       GemmMicrokernelTester()
1497         .mr(4)
1498         .nr(8)
1499         .kr(1)
1500         .sr(1)
1501         .m(4)
1502         .n(n)
1503         .k(8)
1504         .iterations(1)
1505         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8__aarch32_neon_mlal_lane_prfm_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1506     }
1507   }
1508 
TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A53,k_lt_8)1509   TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A53, k_lt_8) {
1510     TEST_REQUIRES_ARM_NEON;
1511     for (size_t k = 1; k < 8; k++) {
1512       GemmMicrokernelTester()
1513         .mr(4)
1514         .nr(8)
1515         .kr(1)
1516         .sr(1)
1517         .m(4)
1518         .n(8)
1519         .k(k)
1520         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8__aarch32_neon_mlal_lane_prfm_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1521     }
1522   }
1523 
TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A53,k_lt_8_subtile)1524   TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A53, k_lt_8_subtile) {
1525     TEST_REQUIRES_ARM_NEON;
1526     for (size_t k = 1; k < 8; k++) {
1527       for (uint32_t n = 1; n <= 8; n++) {
1528         for (uint32_t m = 1; m <= 4; m++) {
1529           GemmMicrokernelTester()
1530             .mr(4)
1531             .nr(8)
1532             .kr(1)
1533             .sr(1)
1534             .m(m)
1535             .n(n)
1536             .k(k)
1537             .iterations(1)
1538             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8__aarch32_neon_mlal_lane_prfm_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1539         }
1540       }
1541     }
1542   }
1543 
TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A53,k_gt_8)1544   TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A53, k_gt_8) {
1545     TEST_REQUIRES_ARM_NEON;
1546     for (size_t k = 9; k < 16; k++) {
1547       GemmMicrokernelTester()
1548         .mr(4)
1549         .nr(8)
1550         .kr(1)
1551         .sr(1)
1552         .m(4)
1553         .n(8)
1554         .k(k)
1555         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8__aarch32_neon_mlal_lane_prfm_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1556     }
1557   }
1558 
TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A53,k_gt_8_subtile)1559   TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A53, k_gt_8_subtile) {
1560     TEST_REQUIRES_ARM_NEON;
1561     for (size_t k = 9; k < 16; k++) {
1562       for (uint32_t n = 1; n <= 8; n++) {
1563         for (uint32_t m = 1; m <= 4; m++) {
1564           GemmMicrokernelTester()
1565             .mr(4)
1566             .nr(8)
1567             .kr(1)
1568             .sr(1)
1569             .m(m)
1570             .n(n)
1571             .k(k)
1572             .iterations(1)
1573             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8__aarch32_neon_mlal_lane_prfm_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1574         }
1575       }
1576     }
1577   }
1578 
TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A53,k_div_8)1579   TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A53, k_div_8) {
1580     TEST_REQUIRES_ARM_NEON;
1581     for (size_t k = 16; k <= 80; k += 8) {
1582       GemmMicrokernelTester()
1583         .mr(4)
1584         .nr(8)
1585         .kr(1)
1586         .sr(1)
1587         .m(4)
1588         .n(8)
1589         .k(k)
1590         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8__aarch32_neon_mlal_lane_prfm_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1591     }
1592   }
1593 
TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A53,k_div_8_subtile)1594   TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A53, k_div_8_subtile) {
1595     TEST_REQUIRES_ARM_NEON;
1596     for (size_t k = 16; k <= 80; k += 8) {
1597       for (uint32_t n = 1; n <= 8; n++) {
1598         for (uint32_t m = 1; m <= 4; m++) {
1599           GemmMicrokernelTester()
1600             .mr(4)
1601             .nr(8)
1602             .kr(1)
1603             .sr(1)
1604             .m(m)
1605             .n(n)
1606             .k(k)
1607             .iterations(1)
1608             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8__aarch32_neon_mlal_lane_prfm_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1609         }
1610       }
1611     }
1612   }
1613 
TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A53,n_gt_8)1614   TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A53, n_gt_8) {
1615     TEST_REQUIRES_ARM_NEON;
1616     for (uint32_t n = 9; n < 16; n++) {
1617       for (size_t k = 1; k <= 40; k += 9) {
1618         GemmMicrokernelTester()
1619           .mr(4)
1620           .nr(8)
1621           .kr(1)
1622           .sr(1)
1623           .m(4)
1624           .n(n)
1625           .k(k)
1626           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8__aarch32_neon_mlal_lane_prfm_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1627       }
1628     }
1629   }
1630 
TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A53,n_gt_8_strided_cn)1631   TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A53, n_gt_8_strided_cn) {
1632     TEST_REQUIRES_ARM_NEON;
1633     for (uint32_t n = 9; n < 16; n++) {
1634       for (size_t k = 1; k <= 40; k += 9) {
1635         GemmMicrokernelTester()
1636           .mr(4)
1637           .nr(8)
1638           .kr(1)
1639           .sr(1)
1640           .m(4)
1641           .n(n)
1642           .k(k)
1643           .cn_stride(11)
1644           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8__aarch32_neon_mlal_lane_prfm_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1645       }
1646     }
1647   }
1648 
TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A53,n_gt_8_subtile)1649   TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A53, n_gt_8_subtile) {
1650     TEST_REQUIRES_ARM_NEON;
1651     for (uint32_t n = 9; n < 16; n++) {
1652       for (size_t k = 1; k <= 40; k += 9) {
1653         for (uint32_t m = 1; m <= 4; m++) {
1654           GemmMicrokernelTester()
1655             .mr(4)
1656             .nr(8)
1657             .kr(1)
1658             .sr(1)
1659             .m(m)
1660             .n(n)
1661             .k(k)
1662             .iterations(1)
1663             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8__aarch32_neon_mlal_lane_prfm_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1664         }
1665       }
1666     }
1667   }
1668 
TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A53,n_div_8)1669   TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A53, n_div_8) {
1670     TEST_REQUIRES_ARM_NEON;
1671     for (uint32_t n = 16; n <= 24; n += 8) {
1672       for (size_t k = 1; k <= 40; k += 9) {
1673         GemmMicrokernelTester()
1674           .mr(4)
1675           .nr(8)
1676           .kr(1)
1677           .sr(1)
1678           .m(4)
1679           .n(n)
1680           .k(k)
1681           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8__aarch32_neon_mlal_lane_prfm_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1682       }
1683     }
1684   }
1685 
TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A53,n_div_8_strided_cn)1686   TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A53, n_div_8_strided_cn) {
1687     TEST_REQUIRES_ARM_NEON;
1688     for (uint32_t n = 16; n <= 24; n += 8) {
1689       for (size_t k = 1; k <= 40; k += 9) {
1690         GemmMicrokernelTester()
1691           .mr(4)
1692           .nr(8)
1693           .kr(1)
1694           .sr(1)
1695           .m(4)
1696           .n(n)
1697           .k(k)
1698           .cn_stride(11)
1699           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8__aarch32_neon_mlal_lane_prfm_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1700       }
1701     }
1702   }
1703 
TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A53,n_div_8_subtile)1704   TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A53, n_div_8_subtile) {
1705     TEST_REQUIRES_ARM_NEON;
1706     for (uint32_t n = 16; n <= 24; n += 8) {
1707       for (size_t k = 1; k <= 40; k += 9) {
1708         for (uint32_t m = 1; m <= 4; m++) {
1709           GemmMicrokernelTester()
1710             .mr(4)
1711             .nr(8)
1712             .kr(1)
1713             .sr(1)
1714             .m(m)
1715             .n(n)
1716             .k(k)
1717             .iterations(1)
1718             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8__aarch32_neon_mlal_lane_prfm_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1719         }
1720       }
1721     }
1722   }
1723 
TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A53,small_kernel)1724   TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A53, small_kernel) {
1725     TEST_REQUIRES_ARM_NEON;
1726     for (size_t k = 1; k <= 40; k += 9) {
1727       GemmMicrokernelTester()
1728         .mr(4)
1729         .nr(8)
1730         .kr(1)
1731         .sr(1)
1732         .m(4)
1733         .n(8)
1734         .k(k)
1735         .ks(3)
1736         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8__aarch32_neon_mlal_lane_prfm_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1737     }
1738   }
1739 
TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A53,small_kernel_subtile)1740   TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A53, small_kernel_subtile) {
1741     TEST_REQUIRES_ARM_NEON;
1742     for (size_t k = 1; k <= 40; k += 9) {
1743       for (uint32_t n = 1; n <= 8; n++) {
1744         for (uint32_t m = 1; m <= 4; m++) {
1745           GemmMicrokernelTester()
1746             .mr(4)
1747             .nr(8)
1748             .kr(1)
1749             .sr(1)
1750             .m(m)
1751             .n(n)
1752             .k(k)
1753             .ks(3)
1754             .iterations(1)
1755             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8__aarch32_neon_mlal_lane_prfm_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1756         }
1757       }
1758     }
1759   }
1760 
TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A53,n_gt_8_small_kernel)1761   TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A53, n_gt_8_small_kernel) {
1762     TEST_REQUIRES_ARM_NEON;
1763     for (uint32_t n = 9; n < 16; n++) {
1764       for (size_t k = 1; k <= 40; k += 9) {
1765         GemmMicrokernelTester()
1766           .mr(4)
1767           .nr(8)
1768           .kr(1)
1769           .sr(1)
1770           .m(4)
1771           .n(n)
1772           .k(k)
1773           .ks(3)
1774           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8__aarch32_neon_mlal_lane_prfm_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1775       }
1776     }
1777   }
1778 
TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A53,n_div_8_small_kernel)1779   TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A53, n_div_8_small_kernel) {
1780     TEST_REQUIRES_ARM_NEON;
1781     for (uint32_t n = 16; n <= 24; n += 8) {
1782       for (size_t k = 1; k <= 40; k += 9) {
1783         GemmMicrokernelTester()
1784           .mr(4)
1785           .nr(8)
1786           .kr(1)
1787           .sr(1)
1788           .m(4)
1789           .n(n)
1790           .k(k)
1791           .ks(3)
1792           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8__aarch32_neon_mlal_lane_prfm_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1793       }
1794     }
1795   }
1796 
TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A53,strided_cm_subtile)1797   TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A53, strided_cm_subtile) {
1798     TEST_REQUIRES_ARM_NEON;
1799     for (size_t k = 1; k <= 40; k += 9) {
1800       for (uint32_t n = 1; n <= 8; n++) {
1801         for (uint32_t m = 1; m <= 4; m++) {
1802           GemmMicrokernelTester()
1803             .mr(4)
1804             .nr(8)
1805             .kr(1)
1806             .sr(1)
1807             .m(m)
1808             .n(n)
1809             .k(k)
1810             .cm_stride(11)
1811             .iterations(1)
1812             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8__aarch32_neon_mlal_lane_prfm_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1813         }
1814       }
1815     }
1816   }
1817 
TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A53,a_offset)1818   TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A53, a_offset) {
1819     TEST_REQUIRES_ARM_NEON;
1820     for (size_t k = 1; k <= 40; k += 9) {
1821       GemmMicrokernelTester()
1822         .mr(4)
1823         .nr(8)
1824         .kr(1)
1825         .sr(1)
1826         .m(4)
1827         .n(8)
1828         .k(k)
1829         .ks(3)
1830         .a_offset(163)
1831         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8__aarch32_neon_mlal_lane_prfm_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1832     }
1833   }
1834 
TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A53,zero)1835   TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A53, zero) {
1836     TEST_REQUIRES_ARM_NEON;
1837     for (size_t k = 1; k <= 40; k += 9) {
1838       for (uint32_t mz = 0; mz < 4; mz++) {
1839         GemmMicrokernelTester()
1840           .mr(4)
1841           .nr(8)
1842           .kr(1)
1843           .sr(1)
1844           .m(4)
1845           .n(8)
1846           .k(k)
1847           .ks(3)
1848           .a_offset(163)
1849           .zero_index(mz)
1850           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8__aarch32_neon_mlal_lane_prfm_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1851       }
1852     }
1853   }
1854 
TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A53,qmin)1855   TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A53, qmin) {
1856     TEST_REQUIRES_ARM_NEON;
1857     GemmMicrokernelTester()
1858       .mr(4)
1859       .nr(8)
1860       .kr(1)
1861       .sr(1)
1862       .m(4)
1863       .n(8)
1864       .k(8)
1865       .qmin(128)
1866       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8__aarch32_neon_mlal_lane_prfm_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1867   }
1868 
TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A53,qmax)1869   TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A53, qmax) {
1870     TEST_REQUIRES_ARM_NEON;
1871     GemmMicrokernelTester()
1872       .mr(4)
1873       .nr(8)
1874       .kr(1)
1875       .sr(1)
1876       .m(4)
1877       .n(8)
1878       .k(8)
1879       .qmax(128)
1880       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8__aarch32_neon_mlal_lane_prfm_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1881   }
1882 
TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A53,strided_cm)1883   TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A53, strided_cm) {
1884     TEST_REQUIRES_ARM_NEON;
1885     GemmMicrokernelTester()
1886       .mr(4)
1887       .nr(8)
1888       .kr(1)
1889       .sr(1)
1890       .m(4)
1891       .n(8)
1892       .k(8)
1893       .cm_stride(11)
1894       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8__aarch32_neon_mlal_lane_prfm_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1895   }
1896 #endif  // XNN_ARCH_ARM && XNN_ENABLE_ASSEMBLY
1897 
1898 
1899 #if XNN_ENABLE_ARM_DOTPROD && XNN_ARCH_ARM && !XNN_PLATFORM_IOS && XNN_ENABLE_ASSEMBLY
TEST(QC8_IGEMM_MINMAX_FP32_4X8C4__AARCH32_NEONDOT_CORTEX_A55,k_eq_8)1900   TEST(QC8_IGEMM_MINMAX_FP32_4X8C4__AARCH32_NEONDOT_CORTEX_A55, k_eq_8) {
1901     TEST_REQUIRES_ARM_NEON_DOT;
1902     GemmMicrokernelTester()
1903       .mr(4)
1904       .nr(8)
1905       .kr(4)
1906       .sr(1)
1907       .m(4)
1908       .n(8)
1909       .k(8)
1910       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8c4__aarch32_neondot_cortex_a55, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
1911   }
1912 
TEST(QC8_IGEMM_MINMAX_FP32_4X8C4__AARCH32_NEONDOT_CORTEX_A55,strided_cn)1913   TEST(QC8_IGEMM_MINMAX_FP32_4X8C4__AARCH32_NEONDOT_CORTEX_A55, strided_cn) {
1914     TEST_REQUIRES_ARM_NEON_DOT;
1915     GemmMicrokernelTester()
1916       .mr(4)
1917       .nr(8)
1918       .kr(4)
1919       .sr(1)
1920       .m(4)
1921       .n(8)
1922       .k(8)
1923       .cn_stride(11)
1924       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8c4__aarch32_neondot_cortex_a55, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
1925   }
1926 
TEST(QC8_IGEMM_MINMAX_FP32_4X8C4__AARCH32_NEONDOT_CORTEX_A55,k_eq_8_subtile)1927   TEST(QC8_IGEMM_MINMAX_FP32_4X8C4__AARCH32_NEONDOT_CORTEX_A55, k_eq_8_subtile) {
1928     TEST_REQUIRES_ARM_NEON_DOT;
1929     for (uint32_t n = 1; n <= 8; n++) {
1930       for (uint32_t m = 1; m <= 4; m++) {
1931         GemmMicrokernelTester()
1932           .mr(4)
1933           .nr(8)
1934           .kr(4)
1935           .sr(1)
1936           .m(m)
1937           .n(n)
1938           .k(8)
1939           .iterations(1)
1940           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8c4__aarch32_neondot_cortex_a55, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
1941       }
1942     }
1943   }
1944 
TEST(QC8_IGEMM_MINMAX_FP32_4X8C4__AARCH32_NEONDOT_CORTEX_A55,k_eq_8_subtile_m)1945   TEST(QC8_IGEMM_MINMAX_FP32_4X8C4__AARCH32_NEONDOT_CORTEX_A55, k_eq_8_subtile_m) {
1946     TEST_REQUIRES_ARM_NEON_DOT;
1947     for (uint32_t m = 1; m <= 4; m++) {
1948       GemmMicrokernelTester()
1949         .mr(4)
1950         .nr(8)
1951         .kr(4)
1952         .sr(1)
1953         .m(m)
1954         .n(8)
1955         .k(8)
1956         .iterations(1)
1957         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8c4__aarch32_neondot_cortex_a55, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
1958     }
1959   }
1960 
TEST(QC8_IGEMM_MINMAX_FP32_4X8C4__AARCH32_NEONDOT_CORTEX_A55,k_eq_8_subtile_n)1961   TEST(QC8_IGEMM_MINMAX_FP32_4X8C4__AARCH32_NEONDOT_CORTEX_A55, k_eq_8_subtile_n) {
1962     TEST_REQUIRES_ARM_NEON_DOT;
1963     for (uint32_t n = 1; n <= 8; n++) {
1964       GemmMicrokernelTester()
1965         .mr(4)
1966         .nr(8)
1967         .kr(4)
1968         .sr(1)
1969         .m(4)
1970         .n(n)
1971         .k(8)
1972         .iterations(1)
1973         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8c4__aarch32_neondot_cortex_a55, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
1974     }
1975   }
1976 
TEST(QC8_IGEMM_MINMAX_FP32_4X8C4__AARCH32_NEONDOT_CORTEX_A55,k_lt_8)1977   TEST(QC8_IGEMM_MINMAX_FP32_4X8C4__AARCH32_NEONDOT_CORTEX_A55, k_lt_8) {
1978     TEST_REQUIRES_ARM_NEON_DOT;
1979     for (size_t k = 1; k < 8; k++) {
1980       GemmMicrokernelTester()
1981         .mr(4)
1982         .nr(8)
1983         .kr(4)
1984         .sr(1)
1985         .m(4)
1986         .n(8)
1987         .k(k)
1988         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8c4__aarch32_neondot_cortex_a55, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
1989     }
1990   }
1991 
TEST(QC8_IGEMM_MINMAX_FP32_4X8C4__AARCH32_NEONDOT_CORTEX_A55,k_lt_8_subtile)1992   TEST(QC8_IGEMM_MINMAX_FP32_4X8C4__AARCH32_NEONDOT_CORTEX_A55, k_lt_8_subtile) {
1993     TEST_REQUIRES_ARM_NEON_DOT;
1994     for (size_t k = 1; k < 8; k++) {
1995       for (uint32_t n = 1; n <= 8; n++) {
1996         for (uint32_t m = 1; m <= 4; m++) {
1997           GemmMicrokernelTester()
1998             .mr(4)
1999             .nr(8)
2000             .kr(4)
2001             .sr(1)
2002             .m(m)
2003             .n(n)
2004             .k(k)
2005             .iterations(1)
2006             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8c4__aarch32_neondot_cortex_a55, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2007         }
2008       }
2009     }
2010   }
2011 
TEST(QC8_IGEMM_MINMAX_FP32_4X8C4__AARCH32_NEONDOT_CORTEX_A55,k_gt_8)2012   TEST(QC8_IGEMM_MINMAX_FP32_4X8C4__AARCH32_NEONDOT_CORTEX_A55, k_gt_8) {
2013     TEST_REQUIRES_ARM_NEON_DOT;
2014     for (size_t k = 9; k < 16; k++) {
2015       GemmMicrokernelTester()
2016         .mr(4)
2017         .nr(8)
2018         .kr(4)
2019         .sr(1)
2020         .m(4)
2021         .n(8)
2022         .k(k)
2023         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8c4__aarch32_neondot_cortex_a55, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2024     }
2025   }
2026 
TEST(QC8_IGEMM_MINMAX_FP32_4X8C4__AARCH32_NEONDOT_CORTEX_A55,k_gt_8_subtile)2027   TEST(QC8_IGEMM_MINMAX_FP32_4X8C4__AARCH32_NEONDOT_CORTEX_A55, k_gt_8_subtile) {
2028     TEST_REQUIRES_ARM_NEON_DOT;
2029     for (size_t k = 9; k < 16; k++) {
2030       for (uint32_t n = 1; n <= 8; n++) {
2031         for (uint32_t m = 1; m <= 4; m++) {
2032           GemmMicrokernelTester()
2033             .mr(4)
2034             .nr(8)
2035             .kr(4)
2036             .sr(1)
2037             .m(m)
2038             .n(n)
2039             .k(k)
2040             .iterations(1)
2041             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8c4__aarch32_neondot_cortex_a55, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2042         }
2043       }
2044     }
2045   }
2046 
TEST(QC8_IGEMM_MINMAX_FP32_4X8C4__AARCH32_NEONDOT_CORTEX_A55,k_div_8)2047   TEST(QC8_IGEMM_MINMAX_FP32_4X8C4__AARCH32_NEONDOT_CORTEX_A55, k_div_8) {
2048     TEST_REQUIRES_ARM_NEON_DOT;
2049     for (size_t k = 16; k <= 80; k += 8) {
2050       GemmMicrokernelTester()
2051         .mr(4)
2052         .nr(8)
2053         .kr(4)
2054         .sr(1)
2055         .m(4)
2056         .n(8)
2057         .k(k)
2058         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8c4__aarch32_neondot_cortex_a55, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2059     }
2060   }
2061 
TEST(QC8_IGEMM_MINMAX_FP32_4X8C4__AARCH32_NEONDOT_CORTEX_A55,k_div_8_subtile)2062   TEST(QC8_IGEMM_MINMAX_FP32_4X8C4__AARCH32_NEONDOT_CORTEX_A55, k_div_8_subtile) {
2063     TEST_REQUIRES_ARM_NEON_DOT;
2064     for (size_t k = 16; k <= 80; k += 8) {
2065       for (uint32_t n = 1; n <= 8; n++) {
2066         for (uint32_t m = 1; m <= 4; m++) {
2067           GemmMicrokernelTester()
2068             .mr(4)
2069             .nr(8)
2070             .kr(4)
2071             .sr(1)
2072             .m(m)
2073             .n(n)
2074             .k(k)
2075             .iterations(1)
2076             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8c4__aarch32_neondot_cortex_a55, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2077         }
2078       }
2079     }
2080   }
2081 
TEST(QC8_IGEMM_MINMAX_FP32_4X8C4__AARCH32_NEONDOT_CORTEX_A55,n_gt_8)2082   TEST(QC8_IGEMM_MINMAX_FP32_4X8C4__AARCH32_NEONDOT_CORTEX_A55, n_gt_8) {
2083     TEST_REQUIRES_ARM_NEON_DOT;
2084     for (uint32_t n = 9; n < 16; n++) {
2085       for (size_t k = 1; k <= 40; k += 9) {
2086         GemmMicrokernelTester()
2087           .mr(4)
2088           .nr(8)
2089           .kr(4)
2090           .sr(1)
2091           .m(4)
2092           .n(n)
2093           .k(k)
2094           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8c4__aarch32_neondot_cortex_a55, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2095       }
2096     }
2097   }
2098 
TEST(QC8_IGEMM_MINMAX_FP32_4X8C4__AARCH32_NEONDOT_CORTEX_A55,n_gt_8_strided_cn)2099   TEST(QC8_IGEMM_MINMAX_FP32_4X8C4__AARCH32_NEONDOT_CORTEX_A55, n_gt_8_strided_cn) {
2100     TEST_REQUIRES_ARM_NEON_DOT;
2101     for (uint32_t n = 9; n < 16; n++) {
2102       for (size_t k = 1; k <= 40; k += 9) {
2103         GemmMicrokernelTester()
2104           .mr(4)
2105           .nr(8)
2106           .kr(4)
2107           .sr(1)
2108           .m(4)
2109           .n(n)
2110           .k(k)
2111           .cn_stride(11)
2112           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8c4__aarch32_neondot_cortex_a55, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2113       }
2114     }
2115   }
2116 
TEST(QC8_IGEMM_MINMAX_FP32_4X8C4__AARCH32_NEONDOT_CORTEX_A55,n_gt_8_subtile)2117   TEST(QC8_IGEMM_MINMAX_FP32_4X8C4__AARCH32_NEONDOT_CORTEX_A55, n_gt_8_subtile) {
2118     TEST_REQUIRES_ARM_NEON_DOT;
2119     for (uint32_t n = 9; n < 16; n++) {
2120       for (size_t k = 1; k <= 40; k += 9) {
2121         for (uint32_t m = 1; m <= 4; m++) {
2122           GemmMicrokernelTester()
2123             .mr(4)
2124             .nr(8)
2125             .kr(4)
2126             .sr(1)
2127             .m(m)
2128             .n(n)
2129             .k(k)
2130             .iterations(1)
2131             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8c4__aarch32_neondot_cortex_a55, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2132         }
2133       }
2134     }
2135   }
2136 
TEST(QC8_IGEMM_MINMAX_FP32_4X8C4__AARCH32_NEONDOT_CORTEX_A55,n_div_8)2137   TEST(QC8_IGEMM_MINMAX_FP32_4X8C4__AARCH32_NEONDOT_CORTEX_A55, n_div_8) {
2138     TEST_REQUIRES_ARM_NEON_DOT;
2139     for (uint32_t n = 16; n <= 24; n += 8) {
2140       for (size_t k = 1; k <= 40; k += 9) {
2141         GemmMicrokernelTester()
2142           .mr(4)
2143           .nr(8)
2144           .kr(4)
2145           .sr(1)
2146           .m(4)
2147           .n(n)
2148           .k(k)
2149           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8c4__aarch32_neondot_cortex_a55, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2150       }
2151     }
2152   }
2153 
TEST(QC8_IGEMM_MINMAX_FP32_4X8C4__AARCH32_NEONDOT_CORTEX_A55,n_div_8_strided_cn)2154   TEST(QC8_IGEMM_MINMAX_FP32_4X8C4__AARCH32_NEONDOT_CORTEX_A55, n_div_8_strided_cn) {
2155     TEST_REQUIRES_ARM_NEON_DOT;
2156     for (uint32_t n = 16; n <= 24; n += 8) {
2157       for (size_t k = 1; k <= 40; k += 9) {
2158         GemmMicrokernelTester()
2159           .mr(4)
2160           .nr(8)
2161           .kr(4)
2162           .sr(1)
2163           .m(4)
2164           .n(n)
2165           .k(k)
2166           .cn_stride(11)
2167           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8c4__aarch32_neondot_cortex_a55, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2168       }
2169     }
2170   }
2171 
TEST(QC8_IGEMM_MINMAX_FP32_4X8C4__AARCH32_NEONDOT_CORTEX_A55,n_div_8_subtile)2172   TEST(QC8_IGEMM_MINMAX_FP32_4X8C4__AARCH32_NEONDOT_CORTEX_A55, n_div_8_subtile) {
2173     TEST_REQUIRES_ARM_NEON_DOT;
2174     for (uint32_t n = 16; n <= 24; n += 8) {
2175       for (size_t k = 1; k <= 40; k += 9) {
2176         for (uint32_t m = 1; m <= 4; m++) {
2177           GemmMicrokernelTester()
2178             .mr(4)
2179             .nr(8)
2180             .kr(4)
2181             .sr(1)
2182             .m(m)
2183             .n(n)
2184             .k(k)
2185             .iterations(1)
2186             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8c4__aarch32_neondot_cortex_a55, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2187         }
2188       }
2189     }
2190   }
2191 
TEST(QC8_IGEMM_MINMAX_FP32_4X8C4__AARCH32_NEONDOT_CORTEX_A55,small_kernel)2192   TEST(QC8_IGEMM_MINMAX_FP32_4X8C4__AARCH32_NEONDOT_CORTEX_A55, small_kernel) {
2193     TEST_REQUIRES_ARM_NEON_DOT;
2194     for (size_t k = 1; k <= 40; k += 9) {
2195       GemmMicrokernelTester()
2196         .mr(4)
2197         .nr(8)
2198         .kr(4)
2199         .sr(1)
2200         .m(4)
2201         .n(8)
2202         .k(k)
2203         .ks(3)
2204         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8c4__aarch32_neondot_cortex_a55, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2205     }
2206   }
2207 
TEST(QC8_IGEMM_MINMAX_FP32_4X8C4__AARCH32_NEONDOT_CORTEX_A55,small_kernel_subtile)2208   TEST(QC8_IGEMM_MINMAX_FP32_4X8C4__AARCH32_NEONDOT_CORTEX_A55, small_kernel_subtile) {
2209     TEST_REQUIRES_ARM_NEON_DOT;
2210     for (size_t k = 1; k <= 40; k += 9) {
2211       for (uint32_t n = 1; n <= 8; n++) {
2212         for (uint32_t m = 1; m <= 4; m++) {
2213           GemmMicrokernelTester()
2214             .mr(4)
2215             .nr(8)
2216             .kr(4)
2217             .sr(1)
2218             .m(m)
2219             .n(n)
2220             .k(k)
2221             .ks(3)
2222             .iterations(1)
2223             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8c4__aarch32_neondot_cortex_a55, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2224         }
2225       }
2226     }
2227   }
2228 
TEST(QC8_IGEMM_MINMAX_FP32_4X8C4__AARCH32_NEONDOT_CORTEX_A55,n_gt_8_small_kernel)2229   TEST(QC8_IGEMM_MINMAX_FP32_4X8C4__AARCH32_NEONDOT_CORTEX_A55, n_gt_8_small_kernel) {
2230     TEST_REQUIRES_ARM_NEON_DOT;
2231     for (uint32_t n = 9; n < 16; n++) {
2232       for (size_t k = 1; k <= 40; k += 9) {
2233         GemmMicrokernelTester()
2234           .mr(4)
2235           .nr(8)
2236           .kr(4)
2237           .sr(1)
2238           .m(4)
2239           .n(n)
2240           .k(k)
2241           .ks(3)
2242           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8c4__aarch32_neondot_cortex_a55, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2243       }
2244     }
2245   }
2246 
TEST(QC8_IGEMM_MINMAX_FP32_4X8C4__AARCH32_NEONDOT_CORTEX_A55,n_div_8_small_kernel)2247   TEST(QC8_IGEMM_MINMAX_FP32_4X8C4__AARCH32_NEONDOT_CORTEX_A55, n_div_8_small_kernel) {
2248     TEST_REQUIRES_ARM_NEON_DOT;
2249     for (uint32_t n = 16; n <= 24; n += 8) {
2250       for (size_t k = 1; k <= 40; k += 9) {
2251         GemmMicrokernelTester()
2252           .mr(4)
2253           .nr(8)
2254           .kr(4)
2255           .sr(1)
2256           .m(4)
2257           .n(n)
2258           .k(k)
2259           .ks(3)
2260           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8c4__aarch32_neondot_cortex_a55, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2261       }
2262     }
2263   }
2264 
TEST(QC8_IGEMM_MINMAX_FP32_4X8C4__AARCH32_NEONDOT_CORTEX_A55,strided_cm_subtile)2265   TEST(QC8_IGEMM_MINMAX_FP32_4X8C4__AARCH32_NEONDOT_CORTEX_A55, strided_cm_subtile) {
2266     TEST_REQUIRES_ARM_NEON_DOT;
2267     for (size_t k = 1; k <= 40; k += 9) {
2268       for (uint32_t n = 1; n <= 8; n++) {
2269         for (uint32_t m = 1; m <= 4; m++) {
2270           GemmMicrokernelTester()
2271             .mr(4)
2272             .nr(8)
2273             .kr(4)
2274             .sr(1)
2275             .m(m)
2276             .n(n)
2277             .k(k)
2278             .cm_stride(11)
2279             .iterations(1)
2280             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8c4__aarch32_neondot_cortex_a55, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2281         }
2282       }
2283     }
2284   }
2285 
TEST(QC8_IGEMM_MINMAX_FP32_4X8C4__AARCH32_NEONDOT_CORTEX_A55,a_offset)2286   TEST(QC8_IGEMM_MINMAX_FP32_4X8C4__AARCH32_NEONDOT_CORTEX_A55, a_offset) {
2287     TEST_REQUIRES_ARM_NEON_DOT;
2288     for (size_t k = 1; k <= 40; k += 9) {
2289       GemmMicrokernelTester()
2290         .mr(4)
2291         .nr(8)
2292         .kr(4)
2293         .sr(1)
2294         .m(4)
2295         .n(8)
2296         .k(k)
2297         .ks(3)
2298         .a_offset(163)
2299         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8c4__aarch32_neondot_cortex_a55, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2300     }
2301   }
2302 
TEST(QC8_IGEMM_MINMAX_FP32_4X8C4__AARCH32_NEONDOT_CORTEX_A55,zero)2303   TEST(QC8_IGEMM_MINMAX_FP32_4X8C4__AARCH32_NEONDOT_CORTEX_A55, zero) {
2304     TEST_REQUIRES_ARM_NEON_DOT;
2305     for (size_t k = 1; k <= 40; k += 9) {
2306       for (uint32_t mz = 0; mz < 4; mz++) {
2307         GemmMicrokernelTester()
2308           .mr(4)
2309           .nr(8)
2310           .kr(4)
2311           .sr(1)
2312           .m(4)
2313           .n(8)
2314           .k(k)
2315           .ks(3)
2316           .a_offset(163)
2317           .zero_index(mz)
2318           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8c4__aarch32_neondot_cortex_a55, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2319       }
2320     }
2321   }
2322 
TEST(QC8_IGEMM_MINMAX_FP32_4X8C4__AARCH32_NEONDOT_CORTEX_A55,qmin)2323   TEST(QC8_IGEMM_MINMAX_FP32_4X8C4__AARCH32_NEONDOT_CORTEX_A55, qmin) {
2324     TEST_REQUIRES_ARM_NEON_DOT;
2325     GemmMicrokernelTester()
2326       .mr(4)
2327       .nr(8)
2328       .kr(4)
2329       .sr(1)
2330       .m(4)
2331       .n(8)
2332       .k(8)
2333       .qmin(128)
2334       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8c4__aarch32_neondot_cortex_a55, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2335   }
2336 
TEST(QC8_IGEMM_MINMAX_FP32_4X8C4__AARCH32_NEONDOT_CORTEX_A55,qmax)2337   TEST(QC8_IGEMM_MINMAX_FP32_4X8C4__AARCH32_NEONDOT_CORTEX_A55, qmax) {
2338     TEST_REQUIRES_ARM_NEON_DOT;
2339     GemmMicrokernelTester()
2340       .mr(4)
2341       .nr(8)
2342       .kr(4)
2343       .sr(1)
2344       .m(4)
2345       .n(8)
2346       .k(8)
2347       .qmax(128)
2348       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8c4__aarch32_neondot_cortex_a55, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2349   }
2350 
TEST(QC8_IGEMM_MINMAX_FP32_4X8C4__AARCH32_NEONDOT_CORTEX_A55,strided_cm)2351   TEST(QC8_IGEMM_MINMAX_FP32_4X8C4__AARCH32_NEONDOT_CORTEX_A55, strided_cm) {
2352     TEST_REQUIRES_ARM_NEON_DOT;
2353     GemmMicrokernelTester()
2354       .mr(4)
2355       .nr(8)
2356       .kr(4)
2357       .sr(1)
2358       .m(4)
2359       .n(8)
2360       .k(8)
2361       .cm_stride(11)
2362       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8c4__aarch32_neondot_cortex_a55, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2363   }
2364 #endif  // XNN_ENABLE_ARM_DOTPROD && XNN_ARCH_ARM && !XNN_PLATFORM_IOS && XNN_ENABLE_ASSEMBLY
2365 
2366 
2367 #if XNN_ENABLE_ARM_DOTPROD && XNN_ARCH_ARM && !XNN_PLATFORM_IOS && XNN_ENABLE_ASSEMBLY
TEST(QC8_IGEMM_MINMAX_FP32_4X8C4__AARCH32_NEONDOT_LD64,k_eq_8)2368   TEST(QC8_IGEMM_MINMAX_FP32_4X8C4__AARCH32_NEONDOT_LD64, k_eq_8) {
2369     TEST_REQUIRES_ARM_NEON_DOT;
2370     GemmMicrokernelTester()
2371       .mr(4)
2372       .nr(8)
2373       .kr(4)
2374       .sr(1)
2375       .m(4)
2376       .n(8)
2377       .k(8)
2378       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8c4__aarch32_neondot_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2379   }
2380 
TEST(QC8_IGEMM_MINMAX_FP32_4X8C4__AARCH32_NEONDOT_LD64,strided_cn)2381   TEST(QC8_IGEMM_MINMAX_FP32_4X8C4__AARCH32_NEONDOT_LD64, strided_cn) {
2382     TEST_REQUIRES_ARM_NEON_DOT;
2383     GemmMicrokernelTester()
2384       .mr(4)
2385       .nr(8)
2386       .kr(4)
2387       .sr(1)
2388       .m(4)
2389       .n(8)
2390       .k(8)
2391       .cn_stride(11)
2392       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8c4__aarch32_neondot_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2393   }
2394 
TEST(QC8_IGEMM_MINMAX_FP32_4X8C4__AARCH32_NEONDOT_LD64,k_eq_8_subtile)2395   TEST(QC8_IGEMM_MINMAX_FP32_4X8C4__AARCH32_NEONDOT_LD64, k_eq_8_subtile) {
2396     TEST_REQUIRES_ARM_NEON_DOT;
2397     for (uint32_t n = 1; n <= 8; n++) {
2398       for (uint32_t m = 1; m <= 4; m++) {
2399         GemmMicrokernelTester()
2400           .mr(4)
2401           .nr(8)
2402           .kr(4)
2403           .sr(1)
2404           .m(m)
2405           .n(n)
2406           .k(8)
2407           .iterations(1)
2408           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8c4__aarch32_neondot_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2409       }
2410     }
2411   }
2412 
TEST(QC8_IGEMM_MINMAX_FP32_4X8C4__AARCH32_NEONDOT_LD64,k_eq_8_subtile_m)2413   TEST(QC8_IGEMM_MINMAX_FP32_4X8C4__AARCH32_NEONDOT_LD64, k_eq_8_subtile_m) {
2414     TEST_REQUIRES_ARM_NEON_DOT;
2415     for (uint32_t m = 1; m <= 4; m++) {
2416       GemmMicrokernelTester()
2417         .mr(4)
2418         .nr(8)
2419         .kr(4)
2420         .sr(1)
2421         .m(m)
2422         .n(8)
2423         .k(8)
2424         .iterations(1)
2425         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8c4__aarch32_neondot_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2426     }
2427   }
2428 
TEST(QC8_IGEMM_MINMAX_FP32_4X8C4__AARCH32_NEONDOT_LD64,k_eq_8_subtile_n)2429   TEST(QC8_IGEMM_MINMAX_FP32_4X8C4__AARCH32_NEONDOT_LD64, k_eq_8_subtile_n) {
2430     TEST_REQUIRES_ARM_NEON_DOT;
2431     for (uint32_t n = 1; n <= 8; n++) {
2432       GemmMicrokernelTester()
2433         .mr(4)
2434         .nr(8)
2435         .kr(4)
2436         .sr(1)
2437         .m(4)
2438         .n(n)
2439         .k(8)
2440         .iterations(1)
2441         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8c4__aarch32_neondot_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2442     }
2443   }
2444 
TEST(QC8_IGEMM_MINMAX_FP32_4X8C4__AARCH32_NEONDOT_LD64,k_lt_8)2445   TEST(QC8_IGEMM_MINMAX_FP32_4X8C4__AARCH32_NEONDOT_LD64, k_lt_8) {
2446     TEST_REQUIRES_ARM_NEON_DOT;
2447     for (size_t k = 1; k < 8; k++) {
2448       GemmMicrokernelTester()
2449         .mr(4)
2450         .nr(8)
2451         .kr(4)
2452         .sr(1)
2453         .m(4)
2454         .n(8)
2455         .k(k)
2456         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8c4__aarch32_neondot_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2457     }
2458   }
2459 
TEST(QC8_IGEMM_MINMAX_FP32_4X8C4__AARCH32_NEONDOT_LD64,k_lt_8_subtile)2460   TEST(QC8_IGEMM_MINMAX_FP32_4X8C4__AARCH32_NEONDOT_LD64, k_lt_8_subtile) {
2461     TEST_REQUIRES_ARM_NEON_DOT;
2462     for (size_t k = 1; k < 8; k++) {
2463       for (uint32_t n = 1; n <= 8; n++) {
2464         for (uint32_t m = 1; m <= 4; m++) {
2465           GemmMicrokernelTester()
2466             .mr(4)
2467             .nr(8)
2468             .kr(4)
2469             .sr(1)
2470             .m(m)
2471             .n(n)
2472             .k(k)
2473             .iterations(1)
2474             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8c4__aarch32_neondot_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2475         }
2476       }
2477     }
2478   }
2479 
TEST(QC8_IGEMM_MINMAX_FP32_4X8C4__AARCH32_NEONDOT_LD64,k_gt_8)2480   TEST(QC8_IGEMM_MINMAX_FP32_4X8C4__AARCH32_NEONDOT_LD64, k_gt_8) {
2481     TEST_REQUIRES_ARM_NEON_DOT;
2482     for (size_t k = 9; k < 16; k++) {
2483       GemmMicrokernelTester()
2484         .mr(4)
2485         .nr(8)
2486         .kr(4)
2487         .sr(1)
2488         .m(4)
2489         .n(8)
2490         .k(k)
2491         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8c4__aarch32_neondot_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2492     }
2493   }
2494 
TEST(QC8_IGEMM_MINMAX_FP32_4X8C4__AARCH32_NEONDOT_LD64,k_gt_8_subtile)2495   TEST(QC8_IGEMM_MINMAX_FP32_4X8C4__AARCH32_NEONDOT_LD64, k_gt_8_subtile) {
2496     TEST_REQUIRES_ARM_NEON_DOT;
2497     for (size_t k = 9; k < 16; k++) {
2498       for (uint32_t n = 1; n <= 8; n++) {
2499         for (uint32_t m = 1; m <= 4; m++) {
2500           GemmMicrokernelTester()
2501             .mr(4)
2502             .nr(8)
2503             .kr(4)
2504             .sr(1)
2505             .m(m)
2506             .n(n)
2507             .k(k)
2508             .iterations(1)
2509             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8c4__aarch32_neondot_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2510         }
2511       }
2512     }
2513   }
2514 
TEST(QC8_IGEMM_MINMAX_FP32_4X8C4__AARCH32_NEONDOT_LD64,k_div_8)2515   TEST(QC8_IGEMM_MINMAX_FP32_4X8C4__AARCH32_NEONDOT_LD64, k_div_8) {
2516     TEST_REQUIRES_ARM_NEON_DOT;
2517     for (size_t k = 16; k <= 80; k += 8) {
2518       GemmMicrokernelTester()
2519         .mr(4)
2520         .nr(8)
2521         .kr(4)
2522         .sr(1)
2523         .m(4)
2524         .n(8)
2525         .k(k)
2526         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8c4__aarch32_neondot_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2527     }
2528   }
2529 
TEST(QC8_IGEMM_MINMAX_FP32_4X8C4__AARCH32_NEONDOT_LD64,k_div_8_subtile)2530   TEST(QC8_IGEMM_MINMAX_FP32_4X8C4__AARCH32_NEONDOT_LD64, k_div_8_subtile) {
2531     TEST_REQUIRES_ARM_NEON_DOT;
2532     for (size_t k = 16; k <= 80; k += 8) {
2533       for (uint32_t n = 1; n <= 8; n++) {
2534         for (uint32_t m = 1; m <= 4; m++) {
2535           GemmMicrokernelTester()
2536             .mr(4)
2537             .nr(8)
2538             .kr(4)
2539             .sr(1)
2540             .m(m)
2541             .n(n)
2542             .k(k)
2543             .iterations(1)
2544             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8c4__aarch32_neondot_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2545         }
2546       }
2547     }
2548   }
2549 
TEST(QC8_IGEMM_MINMAX_FP32_4X8C4__AARCH32_NEONDOT_LD64,n_gt_8)2550   TEST(QC8_IGEMM_MINMAX_FP32_4X8C4__AARCH32_NEONDOT_LD64, n_gt_8) {
2551     TEST_REQUIRES_ARM_NEON_DOT;
2552     for (uint32_t n = 9; n < 16; n++) {
2553       for (size_t k = 1; k <= 40; k += 9) {
2554         GemmMicrokernelTester()
2555           .mr(4)
2556           .nr(8)
2557           .kr(4)
2558           .sr(1)
2559           .m(4)
2560           .n(n)
2561           .k(k)
2562           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8c4__aarch32_neondot_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2563       }
2564     }
2565   }
2566 
TEST(QC8_IGEMM_MINMAX_FP32_4X8C4__AARCH32_NEONDOT_LD64,n_gt_8_strided_cn)2567   TEST(QC8_IGEMM_MINMAX_FP32_4X8C4__AARCH32_NEONDOT_LD64, n_gt_8_strided_cn) {
2568     TEST_REQUIRES_ARM_NEON_DOT;
2569     for (uint32_t n = 9; n < 16; n++) {
2570       for (size_t k = 1; k <= 40; k += 9) {
2571         GemmMicrokernelTester()
2572           .mr(4)
2573           .nr(8)
2574           .kr(4)
2575           .sr(1)
2576           .m(4)
2577           .n(n)
2578           .k(k)
2579           .cn_stride(11)
2580           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8c4__aarch32_neondot_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2581       }
2582     }
2583   }
2584 
TEST(QC8_IGEMM_MINMAX_FP32_4X8C4__AARCH32_NEONDOT_LD64,n_gt_8_subtile)2585   TEST(QC8_IGEMM_MINMAX_FP32_4X8C4__AARCH32_NEONDOT_LD64, n_gt_8_subtile) {
2586     TEST_REQUIRES_ARM_NEON_DOT;
2587     for (uint32_t n = 9; n < 16; n++) {
2588       for (size_t k = 1; k <= 40; k += 9) {
2589         for (uint32_t m = 1; m <= 4; m++) {
2590           GemmMicrokernelTester()
2591             .mr(4)
2592             .nr(8)
2593             .kr(4)
2594             .sr(1)
2595             .m(m)
2596             .n(n)
2597             .k(k)
2598             .iterations(1)
2599             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8c4__aarch32_neondot_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2600         }
2601       }
2602     }
2603   }
2604 
TEST(QC8_IGEMM_MINMAX_FP32_4X8C4__AARCH32_NEONDOT_LD64,n_div_8)2605   TEST(QC8_IGEMM_MINMAX_FP32_4X8C4__AARCH32_NEONDOT_LD64, n_div_8) {
2606     TEST_REQUIRES_ARM_NEON_DOT;
2607     for (uint32_t n = 16; n <= 24; n += 8) {
2608       for (size_t k = 1; k <= 40; k += 9) {
2609         GemmMicrokernelTester()
2610           .mr(4)
2611           .nr(8)
2612           .kr(4)
2613           .sr(1)
2614           .m(4)
2615           .n(n)
2616           .k(k)
2617           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8c4__aarch32_neondot_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2618       }
2619     }
2620   }
2621 
TEST(QC8_IGEMM_MINMAX_FP32_4X8C4__AARCH32_NEONDOT_LD64,n_div_8_strided_cn)2622   TEST(QC8_IGEMM_MINMAX_FP32_4X8C4__AARCH32_NEONDOT_LD64, n_div_8_strided_cn) {
2623     TEST_REQUIRES_ARM_NEON_DOT;
2624     for (uint32_t n = 16; n <= 24; n += 8) {
2625       for (size_t k = 1; k <= 40; k += 9) {
2626         GemmMicrokernelTester()
2627           .mr(4)
2628           .nr(8)
2629           .kr(4)
2630           .sr(1)
2631           .m(4)
2632           .n(n)
2633           .k(k)
2634           .cn_stride(11)
2635           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8c4__aarch32_neondot_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2636       }
2637     }
2638   }
2639 
TEST(QC8_IGEMM_MINMAX_FP32_4X8C4__AARCH32_NEONDOT_LD64,n_div_8_subtile)2640   TEST(QC8_IGEMM_MINMAX_FP32_4X8C4__AARCH32_NEONDOT_LD64, n_div_8_subtile) {
2641     TEST_REQUIRES_ARM_NEON_DOT;
2642     for (uint32_t n = 16; n <= 24; n += 8) {
2643       for (size_t k = 1; k <= 40; k += 9) {
2644         for (uint32_t m = 1; m <= 4; m++) {
2645           GemmMicrokernelTester()
2646             .mr(4)
2647             .nr(8)
2648             .kr(4)
2649             .sr(1)
2650             .m(m)
2651             .n(n)
2652             .k(k)
2653             .iterations(1)
2654             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8c4__aarch32_neondot_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2655         }
2656       }
2657     }
2658   }
2659 
TEST(QC8_IGEMM_MINMAX_FP32_4X8C4__AARCH32_NEONDOT_LD64,small_kernel)2660   TEST(QC8_IGEMM_MINMAX_FP32_4X8C4__AARCH32_NEONDOT_LD64, small_kernel) {
2661     TEST_REQUIRES_ARM_NEON_DOT;
2662     for (size_t k = 1; k <= 40; k += 9) {
2663       GemmMicrokernelTester()
2664         .mr(4)
2665         .nr(8)
2666         .kr(4)
2667         .sr(1)
2668         .m(4)
2669         .n(8)
2670         .k(k)
2671         .ks(3)
2672         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8c4__aarch32_neondot_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2673     }
2674   }
2675 
TEST(QC8_IGEMM_MINMAX_FP32_4X8C4__AARCH32_NEONDOT_LD64,small_kernel_subtile)2676   TEST(QC8_IGEMM_MINMAX_FP32_4X8C4__AARCH32_NEONDOT_LD64, small_kernel_subtile) {
2677     TEST_REQUIRES_ARM_NEON_DOT;
2678     for (size_t k = 1; k <= 40; k += 9) {
2679       for (uint32_t n = 1; n <= 8; n++) {
2680         for (uint32_t m = 1; m <= 4; m++) {
2681           GemmMicrokernelTester()
2682             .mr(4)
2683             .nr(8)
2684             .kr(4)
2685             .sr(1)
2686             .m(m)
2687             .n(n)
2688             .k(k)
2689             .ks(3)
2690             .iterations(1)
2691             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8c4__aarch32_neondot_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2692         }
2693       }
2694     }
2695   }
2696 
TEST(QC8_IGEMM_MINMAX_FP32_4X8C4__AARCH32_NEONDOT_LD64,n_gt_8_small_kernel)2697   TEST(QC8_IGEMM_MINMAX_FP32_4X8C4__AARCH32_NEONDOT_LD64, n_gt_8_small_kernel) {
2698     TEST_REQUIRES_ARM_NEON_DOT;
2699     for (uint32_t n = 9; n < 16; n++) {
2700       for (size_t k = 1; k <= 40; k += 9) {
2701         GemmMicrokernelTester()
2702           .mr(4)
2703           .nr(8)
2704           .kr(4)
2705           .sr(1)
2706           .m(4)
2707           .n(n)
2708           .k(k)
2709           .ks(3)
2710           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8c4__aarch32_neondot_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2711       }
2712     }
2713   }
2714 
TEST(QC8_IGEMM_MINMAX_FP32_4X8C4__AARCH32_NEONDOT_LD64,n_div_8_small_kernel)2715   TEST(QC8_IGEMM_MINMAX_FP32_4X8C4__AARCH32_NEONDOT_LD64, n_div_8_small_kernel) {
2716     TEST_REQUIRES_ARM_NEON_DOT;
2717     for (uint32_t n = 16; n <= 24; n += 8) {
2718       for (size_t k = 1; k <= 40; k += 9) {
2719         GemmMicrokernelTester()
2720           .mr(4)
2721           .nr(8)
2722           .kr(4)
2723           .sr(1)
2724           .m(4)
2725           .n(n)
2726           .k(k)
2727           .ks(3)
2728           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8c4__aarch32_neondot_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2729       }
2730     }
2731   }
2732 
TEST(QC8_IGEMM_MINMAX_FP32_4X8C4__AARCH32_NEONDOT_LD64,strided_cm_subtile)2733   TEST(QC8_IGEMM_MINMAX_FP32_4X8C4__AARCH32_NEONDOT_LD64, strided_cm_subtile) {
2734     TEST_REQUIRES_ARM_NEON_DOT;
2735     for (size_t k = 1; k <= 40; k += 9) {
2736       for (uint32_t n = 1; n <= 8; n++) {
2737         for (uint32_t m = 1; m <= 4; m++) {
2738           GemmMicrokernelTester()
2739             .mr(4)
2740             .nr(8)
2741             .kr(4)
2742             .sr(1)
2743             .m(m)
2744             .n(n)
2745             .k(k)
2746             .cm_stride(11)
2747             .iterations(1)
2748             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8c4__aarch32_neondot_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2749         }
2750       }
2751     }
2752   }
2753 
TEST(QC8_IGEMM_MINMAX_FP32_4X8C4__AARCH32_NEONDOT_LD64,a_offset)2754   TEST(QC8_IGEMM_MINMAX_FP32_4X8C4__AARCH32_NEONDOT_LD64, a_offset) {
2755     TEST_REQUIRES_ARM_NEON_DOT;
2756     for (size_t k = 1; k <= 40; k += 9) {
2757       GemmMicrokernelTester()
2758         .mr(4)
2759         .nr(8)
2760         .kr(4)
2761         .sr(1)
2762         .m(4)
2763         .n(8)
2764         .k(k)
2765         .ks(3)
2766         .a_offset(163)
2767         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8c4__aarch32_neondot_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2768     }
2769   }
2770 
TEST(QC8_IGEMM_MINMAX_FP32_4X8C4__AARCH32_NEONDOT_LD64,zero)2771   TEST(QC8_IGEMM_MINMAX_FP32_4X8C4__AARCH32_NEONDOT_LD64, zero) {
2772     TEST_REQUIRES_ARM_NEON_DOT;
2773     for (size_t k = 1; k <= 40; k += 9) {
2774       for (uint32_t mz = 0; mz < 4; mz++) {
2775         GemmMicrokernelTester()
2776           .mr(4)
2777           .nr(8)
2778           .kr(4)
2779           .sr(1)
2780           .m(4)
2781           .n(8)
2782           .k(k)
2783           .ks(3)
2784           .a_offset(163)
2785           .zero_index(mz)
2786           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8c4__aarch32_neondot_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2787       }
2788     }
2789   }
2790 
TEST(QC8_IGEMM_MINMAX_FP32_4X8C4__AARCH32_NEONDOT_LD64,qmin)2791   TEST(QC8_IGEMM_MINMAX_FP32_4X8C4__AARCH32_NEONDOT_LD64, qmin) {
2792     TEST_REQUIRES_ARM_NEON_DOT;
2793     GemmMicrokernelTester()
2794       .mr(4)
2795       .nr(8)
2796       .kr(4)
2797       .sr(1)
2798       .m(4)
2799       .n(8)
2800       .k(8)
2801       .qmin(128)
2802       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8c4__aarch32_neondot_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2803   }
2804 
TEST(QC8_IGEMM_MINMAX_FP32_4X8C4__AARCH32_NEONDOT_LD64,qmax)2805   TEST(QC8_IGEMM_MINMAX_FP32_4X8C4__AARCH32_NEONDOT_LD64, qmax) {
2806     TEST_REQUIRES_ARM_NEON_DOT;
2807     GemmMicrokernelTester()
2808       .mr(4)
2809       .nr(8)
2810       .kr(4)
2811       .sr(1)
2812       .m(4)
2813       .n(8)
2814       .k(8)
2815       .qmax(128)
2816       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8c4__aarch32_neondot_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2817   }
2818 
TEST(QC8_IGEMM_MINMAX_FP32_4X8C4__AARCH32_NEONDOT_LD64,strided_cm)2819   TEST(QC8_IGEMM_MINMAX_FP32_4X8C4__AARCH32_NEONDOT_LD64, strided_cm) {
2820     TEST_REQUIRES_ARM_NEON_DOT;
2821     GemmMicrokernelTester()
2822       .mr(4)
2823       .nr(8)
2824       .kr(4)
2825       .sr(1)
2826       .m(4)
2827       .n(8)
2828       .k(8)
2829       .cm_stride(11)
2830       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8c4__aarch32_neondot_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2831   }
2832 #endif  // XNN_ENABLE_ARM_DOTPROD && XNN_ARCH_ARM && !XNN_PLATFORM_IOS && XNN_ENABLE_ASSEMBLY
2833 
2834 
2835 #if XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_PRFM,k_eq_16)2836   TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_PRFM, k_eq_16) {
2837     TEST_REQUIRES_ARM_NEON;
2838     GemmMicrokernelTester()
2839       .mr(1)
2840       .nr(8)
2841       .kr(8)
2842       .sr(1)
2843       .m(1)
2844       .n(8)
2845       .k(16)
2846       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2847   }
2848 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_PRFM,strided_cn)2849   TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_PRFM, strided_cn) {
2850     TEST_REQUIRES_ARM_NEON;
2851     GemmMicrokernelTester()
2852       .mr(1)
2853       .nr(8)
2854       .kr(8)
2855       .sr(1)
2856       .m(1)
2857       .n(8)
2858       .k(16)
2859       .cn_stride(11)
2860       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2861   }
2862 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_PRFM,k_eq_16_subtile)2863   TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_PRFM, k_eq_16_subtile) {
2864     TEST_REQUIRES_ARM_NEON;
2865     for (uint32_t n = 1; n <= 8; n++) {
2866       for (uint32_t m = 1; m <= 1; m++) {
2867         GemmMicrokernelTester()
2868           .mr(1)
2869           .nr(8)
2870           .kr(8)
2871           .sr(1)
2872           .m(m)
2873           .n(n)
2874           .k(16)
2875           .iterations(1)
2876           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2877       }
2878     }
2879   }
2880 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_PRFM,k_eq_16_subtile_m)2881   TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_PRFM, k_eq_16_subtile_m) {
2882     TEST_REQUIRES_ARM_NEON;
2883     for (uint32_t m = 1; m <= 1; m++) {
2884       GemmMicrokernelTester()
2885         .mr(1)
2886         .nr(8)
2887         .kr(8)
2888         .sr(1)
2889         .m(m)
2890         .n(8)
2891         .k(16)
2892         .iterations(1)
2893         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2894     }
2895   }
2896 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_PRFM,k_eq_16_subtile_n)2897   TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_PRFM, k_eq_16_subtile_n) {
2898     TEST_REQUIRES_ARM_NEON;
2899     for (uint32_t n = 1; n <= 8; n++) {
2900       GemmMicrokernelTester()
2901         .mr(1)
2902         .nr(8)
2903         .kr(8)
2904         .sr(1)
2905         .m(1)
2906         .n(n)
2907         .k(16)
2908         .iterations(1)
2909         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2910     }
2911   }
2912 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_PRFM,k_lt_16)2913   TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_PRFM, k_lt_16) {
2914     TEST_REQUIRES_ARM_NEON;
2915     for (size_t k = 1; k < 16; k++) {
2916       GemmMicrokernelTester()
2917         .mr(1)
2918         .nr(8)
2919         .kr(8)
2920         .sr(1)
2921         .m(1)
2922         .n(8)
2923         .k(k)
2924         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2925     }
2926   }
2927 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_PRFM,k_lt_16_subtile)2928   TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_PRFM, k_lt_16_subtile) {
2929     TEST_REQUIRES_ARM_NEON;
2930     for (size_t k = 1; k < 16; k++) {
2931       for (uint32_t n = 1; n <= 8; n++) {
2932         for (uint32_t m = 1; m <= 1; m++) {
2933           GemmMicrokernelTester()
2934             .mr(1)
2935             .nr(8)
2936             .kr(8)
2937             .sr(1)
2938             .m(m)
2939             .n(n)
2940             .k(k)
2941             .iterations(1)
2942             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2943         }
2944       }
2945     }
2946   }
2947 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_PRFM,k_gt_16)2948   TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_PRFM, k_gt_16) {
2949     TEST_REQUIRES_ARM_NEON;
2950     for (size_t k = 17; k < 32; k++) {
2951       GemmMicrokernelTester()
2952         .mr(1)
2953         .nr(8)
2954         .kr(8)
2955         .sr(1)
2956         .m(1)
2957         .n(8)
2958         .k(k)
2959         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2960     }
2961   }
2962 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_PRFM,k_gt_16_subtile)2963   TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_PRFM, k_gt_16_subtile) {
2964     TEST_REQUIRES_ARM_NEON;
2965     for (size_t k = 17; k < 32; k++) {
2966       for (uint32_t n = 1; n <= 8; n++) {
2967         for (uint32_t m = 1; m <= 1; m++) {
2968           GemmMicrokernelTester()
2969             .mr(1)
2970             .nr(8)
2971             .kr(8)
2972             .sr(1)
2973             .m(m)
2974             .n(n)
2975             .k(k)
2976             .iterations(1)
2977             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2978         }
2979       }
2980     }
2981   }
2982 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_PRFM,k_div_16)2983   TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_PRFM, k_div_16) {
2984     TEST_REQUIRES_ARM_NEON;
2985     for (size_t k = 32; k <= 160; k += 16) {
2986       GemmMicrokernelTester()
2987         .mr(1)
2988         .nr(8)
2989         .kr(8)
2990         .sr(1)
2991         .m(1)
2992         .n(8)
2993         .k(k)
2994         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2995     }
2996   }
2997 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_PRFM,k_div_16_subtile)2998   TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_PRFM, k_div_16_subtile) {
2999     TEST_REQUIRES_ARM_NEON;
3000     for (size_t k = 32; k <= 160; k += 16) {
3001       for (uint32_t n = 1; n <= 8; n++) {
3002         for (uint32_t m = 1; m <= 1; m++) {
3003           GemmMicrokernelTester()
3004             .mr(1)
3005             .nr(8)
3006             .kr(8)
3007             .sr(1)
3008             .m(m)
3009             .n(n)
3010             .k(k)
3011             .iterations(1)
3012             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3013         }
3014       }
3015     }
3016   }
3017 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_PRFM,n_gt_8)3018   TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_PRFM, n_gt_8) {
3019     TEST_REQUIRES_ARM_NEON;
3020     for (uint32_t n = 9; n < 16; n++) {
3021       for (size_t k = 1; k <= 80; k += 17) {
3022         GemmMicrokernelTester()
3023           .mr(1)
3024           .nr(8)
3025           .kr(8)
3026           .sr(1)
3027           .m(1)
3028           .n(n)
3029           .k(k)
3030           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3031       }
3032     }
3033   }
3034 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_PRFM,n_gt_8_strided_cn)3035   TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_PRFM, n_gt_8_strided_cn) {
3036     TEST_REQUIRES_ARM_NEON;
3037     for (uint32_t n = 9; n < 16; n++) {
3038       for (size_t k = 1; k <= 80; k += 17) {
3039         GemmMicrokernelTester()
3040           .mr(1)
3041           .nr(8)
3042           .kr(8)
3043           .sr(1)
3044           .m(1)
3045           .n(n)
3046           .k(k)
3047           .cn_stride(11)
3048           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3049       }
3050     }
3051   }
3052 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_PRFM,n_gt_8_subtile)3053   TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_PRFM, n_gt_8_subtile) {
3054     TEST_REQUIRES_ARM_NEON;
3055     for (uint32_t n = 9; n < 16; n++) {
3056       for (size_t k = 1; k <= 80; k += 17) {
3057         for (uint32_t m = 1; m <= 1; m++) {
3058           GemmMicrokernelTester()
3059             .mr(1)
3060             .nr(8)
3061             .kr(8)
3062             .sr(1)
3063             .m(m)
3064             .n(n)
3065             .k(k)
3066             .iterations(1)
3067             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3068         }
3069       }
3070     }
3071   }
3072 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_PRFM,n_div_8)3073   TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_PRFM, n_div_8) {
3074     TEST_REQUIRES_ARM_NEON;
3075     for (uint32_t n = 16; n <= 24; n += 8) {
3076       for (size_t k = 1; k <= 80; k += 17) {
3077         GemmMicrokernelTester()
3078           .mr(1)
3079           .nr(8)
3080           .kr(8)
3081           .sr(1)
3082           .m(1)
3083           .n(n)
3084           .k(k)
3085           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3086       }
3087     }
3088   }
3089 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_PRFM,n_div_8_strided_cn)3090   TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_PRFM, n_div_8_strided_cn) {
3091     TEST_REQUIRES_ARM_NEON;
3092     for (uint32_t n = 16; n <= 24; n += 8) {
3093       for (size_t k = 1; k <= 80; k += 17) {
3094         GemmMicrokernelTester()
3095           .mr(1)
3096           .nr(8)
3097           .kr(8)
3098           .sr(1)
3099           .m(1)
3100           .n(n)
3101           .k(k)
3102           .cn_stride(11)
3103           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3104       }
3105     }
3106   }
3107 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_PRFM,n_div_8_subtile)3108   TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_PRFM, n_div_8_subtile) {
3109     TEST_REQUIRES_ARM_NEON;
3110     for (uint32_t n = 16; n <= 24; n += 8) {
3111       for (size_t k = 1; k <= 80; k += 17) {
3112         for (uint32_t m = 1; m <= 1; m++) {
3113           GemmMicrokernelTester()
3114             .mr(1)
3115             .nr(8)
3116             .kr(8)
3117             .sr(1)
3118             .m(m)
3119             .n(n)
3120             .k(k)
3121             .iterations(1)
3122             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3123         }
3124       }
3125     }
3126   }
3127 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_PRFM,small_kernel)3128   TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_PRFM, small_kernel) {
3129     TEST_REQUIRES_ARM_NEON;
3130     for (size_t k = 1; k <= 80; k += 17) {
3131       GemmMicrokernelTester()
3132         .mr(1)
3133         .nr(8)
3134         .kr(8)
3135         .sr(1)
3136         .m(1)
3137         .n(8)
3138         .k(k)
3139         .ks(3)
3140         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3141     }
3142   }
3143 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_PRFM,small_kernel_subtile)3144   TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_PRFM, small_kernel_subtile) {
3145     TEST_REQUIRES_ARM_NEON;
3146     for (size_t k = 1; k <= 80; k += 17) {
3147       for (uint32_t n = 1; n <= 8; n++) {
3148         for (uint32_t m = 1; m <= 1; m++) {
3149           GemmMicrokernelTester()
3150             .mr(1)
3151             .nr(8)
3152             .kr(8)
3153             .sr(1)
3154             .m(m)
3155             .n(n)
3156             .k(k)
3157             .ks(3)
3158             .iterations(1)
3159             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3160         }
3161       }
3162     }
3163   }
3164 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_PRFM,n_gt_8_small_kernel)3165   TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_PRFM, n_gt_8_small_kernel) {
3166     TEST_REQUIRES_ARM_NEON;
3167     for (uint32_t n = 9; n < 16; n++) {
3168       for (size_t k = 1; k <= 80; k += 17) {
3169         GemmMicrokernelTester()
3170           .mr(1)
3171           .nr(8)
3172           .kr(8)
3173           .sr(1)
3174           .m(1)
3175           .n(n)
3176           .k(k)
3177           .ks(3)
3178           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3179       }
3180     }
3181   }
3182 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_PRFM,n_div_8_small_kernel)3183   TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_PRFM, n_div_8_small_kernel) {
3184     TEST_REQUIRES_ARM_NEON;
3185     for (uint32_t n = 16; n <= 24; n += 8) {
3186       for (size_t k = 1; k <= 80; k += 17) {
3187         GemmMicrokernelTester()
3188           .mr(1)
3189           .nr(8)
3190           .kr(8)
3191           .sr(1)
3192           .m(1)
3193           .n(n)
3194           .k(k)
3195           .ks(3)
3196           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3197       }
3198     }
3199   }
3200 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_PRFM,strided_cm_subtile)3201   TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_PRFM, strided_cm_subtile) {
3202     TEST_REQUIRES_ARM_NEON;
3203     for (size_t k = 1; k <= 80; k += 17) {
3204       for (uint32_t n = 1; n <= 8; n++) {
3205         for (uint32_t m = 1; m <= 1; m++) {
3206           GemmMicrokernelTester()
3207             .mr(1)
3208             .nr(8)
3209             .kr(8)
3210             .sr(1)
3211             .m(m)
3212             .n(n)
3213             .k(k)
3214             .cm_stride(11)
3215             .iterations(1)
3216             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3217         }
3218       }
3219     }
3220   }
3221 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_PRFM,a_offset)3222   TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_PRFM, a_offset) {
3223     TEST_REQUIRES_ARM_NEON;
3224     for (size_t k = 1; k <= 80; k += 17) {
3225       GemmMicrokernelTester()
3226         .mr(1)
3227         .nr(8)
3228         .kr(8)
3229         .sr(1)
3230         .m(1)
3231         .n(8)
3232         .k(k)
3233         .ks(3)
3234         .a_offset(83)
3235         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3236     }
3237   }
3238 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_PRFM,zero)3239   TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_PRFM, zero) {
3240     TEST_REQUIRES_ARM_NEON;
3241     for (size_t k = 1; k <= 80; k += 17) {
3242       for (uint32_t mz = 0; mz < 1; mz++) {
3243         GemmMicrokernelTester()
3244           .mr(1)
3245           .nr(8)
3246           .kr(8)
3247           .sr(1)
3248           .m(1)
3249           .n(8)
3250           .k(k)
3251           .ks(3)
3252           .a_offset(83)
3253           .zero_index(mz)
3254           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3255       }
3256     }
3257   }
3258 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_PRFM,qmin)3259   TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_PRFM, qmin) {
3260     TEST_REQUIRES_ARM_NEON;
3261     GemmMicrokernelTester()
3262       .mr(1)
3263       .nr(8)
3264       .kr(8)
3265       .sr(1)
3266       .m(1)
3267       .n(8)
3268       .k(16)
3269       .qmin(128)
3270       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3271   }
3272 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_PRFM,qmax)3273   TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_PRFM, qmax) {
3274     TEST_REQUIRES_ARM_NEON;
3275     GemmMicrokernelTester()
3276       .mr(1)
3277       .nr(8)
3278       .kr(8)
3279       .sr(1)
3280       .m(1)
3281       .n(8)
3282       .k(16)
3283       .qmax(128)
3284       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3285   }
3286 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_PRFM,strided_cm)3287   TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_PRFM, strided_cm) {
3288     TEST_REQUIRES_ARM_NEON;
3289     GemmMicrokernelTester()
3290       .mr(1)
3291       .nr(8)
3292       .kr(8)
3293       .sr(1)
3294       .m(1)
3295       .n(8)
3296       .k(16)
3297       .cm_stride(11)
3298       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3299   }
3300 #endif  // XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
3301 
3302 
3303 #if XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL,k_eq_16)3304   TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL, k_eq_16) {
3305     TEST_REQUIRES_ARM_NEON;
3306     GemmMicrokernelTester()
3307       .mr(2)
3308       .nr(8)
3309       .kr(8)
3310       .sr(1)
3311       .m(2)
3312       .n(8)
3313       .k(16)
3314       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3315   }
3316 
TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL,strided_cn)3317   TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL, strided_cn) {
3318     TEST_REQUIRES_ARM_NEON;
3319     GemmMicrokernelTester()
3320       .mr(2)
3321       .nr(8)
3322       .kr(8)
3323       .sr(1)
3324       .m(2)
3325       .n(8)
3326       .k(16)
3327       .cn_stride(11)
3328       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3329   }
3330 
TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL,k_eq_16_subtile)3331   TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL, k_eq_16_subtile) {
3332     TEST_REQUIRES_ARM_NEON;
3333     for (uint32_t n = 1; n <= 8; n++) {
3334       for (uint32_t m = 1; m <= 2; m++) {
3335         GemmMicrokernelTester()
3336           .mr(2)
3337           .nr(8)
3338           .kr(8)
3339           .sr(1)
3340           .m(m)
3341           .n(n)
3342           .k(16)
3343           .iterations(1)
3344           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3345       }
3346     }
3347   }
3348 
TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL,k_eq_16_subtile_m)3349   TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL, k_eq_16_subtile_m) {
3350     TEST_REQUIRES_ARM_NEON;
3351     for (uint32_t m = 1; m <= 2; m++) {
3352       GemmMicrokernelTester()
3353         .mr(2)
3354         .nr(8)
3355         .kr(8)
3356         .sr(1)
3357         .m(m)
3358         .n(8)
3359         .k(16)
3360         .iterations(1)
3361         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3362     }
3363   }
3364 
TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL,k_eq_16_subtile_n)3365   TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL, k_eq_16_subtile_n) {
3366     TEST_REQUIRES_ARM_NEON;
3367     for (uint32_t n = 1; n <= 8; n++) {
3368       GemmMicrokernelTester()
3369         .mr(2)
3370         .nr(8)
3371         .kr(8)
3372         .sr(1)
3373         .m(2)
3374         .n(n)
3375         .k(16)
3376         .iterations(1)
3377         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3378     }
3379   }
3380 
TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL,k_lt_16)3381   TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL, k_lt_16) {
3382     TEST_REQUIRES_ARM_NEON;
3383     for (size_t k = 1; k < 16; k++) {
3384       GemmMicrokernelTester()
3385         .mr(2)
3386         .nr(8)
3387         .kr(8)
3388         .sr(1)
3389         .m(2)
3390         .n(8)
3391         .k(k)
3392         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3393     }
3394   }
3395 
TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL,k_lt_16_subtile)3396   TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL, k_lt_16_subtile) {
3397     TEST_REQUIRES_ARM_NEON;
3398     for (size_t k = 1; k < 16; k++) {
3399       for (uint32_t n = 1; n <= 8; n++) {
3400         for (uint32_t m = 1; m <= 2; m++) {
3401           GemmMicrokernelTester()
3402             .mr(2)
3403             .nr(8)
3404             .kr(8)
3405             .sr(1)
3406             .m(m)
3407             .n(n)
3408             .k(k)
3409             .iterations(1)
3410             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3411         }
3412       }
3413     }
3414   }
3415 
TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL,k_gt_16)3416   TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL, k_gt_16) {
3417     TEST_REQUIRES_ARM_NEON;
3418     for (size_t k = 17; k < 32; k++) {
3419       GemmMicrokernelTester()
3420         .mr(2)
3421         .nr(8)
3422         .kr(8)
3423         .sr(1)
3424         .m(2)
3425         .n(8)
3426         .k(k)
3427         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3428     }
3429   }
3430 
TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL,k_gt_16_subtile)3431   TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL, k_gt_16_subtile) {
3432     TEST_REQUIRES_ARM_NEON;
3433     for (size_t k = 17; k < 32; k++) {
3434       for (uint32_t n = 1; n <= 8; n++) {
3435         for (uint32_t m = 1; m <= 2; m++) {
3436           GemmMicrokernelTester()
3437             .mr(2)
3438             .nr(8)
3439             .kr(8)
3440             .sr(1)
3441             .m(m)
3442             .n(n)
3443             .k(k)
3444             .iterations(1)
3445             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3446         }
3447       }
3448     }
3449   }
3450 
TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL,k_div_16)3451   TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL, k_div_16) {
3452     TEST_REQUIRES_ARM_NEON;
3453     for (size_t k = 32; k <= 160; k += 16) {
3454       GemmMicrokernelTester()
3455         .mr(2)
3456         .nr(8)
3457         .kr(8)
3458         .sr(1)
3459         .m(2)
3460         .n(8)
3461         .k(k)
3462         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3463     }
3464   }
3465 
TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL,k_div_16_subtile)3466   TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL, k_div_16_subtile) {
3467     TEST_REQUIRES_ARM_NEON;
3468     for (size_t k = 32; k <= 160; k += 16) {
3469       for (uint32_t n = 1; n <= 8; n++) {
3470         for (uint32_t m = 1; m <= 2; m++) {
3471           GemmMicrokernelTester()
3472             .mr(2)
3473             .nr(8)
3474             .kr(8)
3475             .sr(1)
3476             .m(m)
3477             .n(n)
3478             .k(k)
3479             .iterations(1)
3480             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3481         }
3482       }
3483     }
3484   }
3485 
TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL,n_gt_8)3486   TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL, n_gt_8) {
3487     TEST_REQUIRES_ARM_NEON;
3488     for (uint32_t n = 9; n < 16; n++) {
3489       for (size_t k = 1; k <= 80; k += 17) {
3490         GemmMicrokernelTester()
3491           .mr(2)
3492           .nr(8)
3493           .kr(8)
3494           .sr(1)
3495           .m(2)
3496           .n(n)
3497           .k(k)
3498           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3499       }
3500     }
3501   }
3502 
TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL,n_gt_8_strided_cn)3503   TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL, n_gt_8_strided_cn) {
3504     TEST_REQUIRES_ARM_NEON;
3505     for (uint32_t n = 9; n < 16; n++) {
3506       for (size_t k = 1; k <= 80; k += 17) {
3507         GemmMicrokernelTester()
3508           .mr(2)
3509           .nr(8)
3510           .kr(8)
3511           .sr(1)
3512           .m(2)
3513           .n(n)
3514           .k(k)
3515           .cn_stride(11)
3516           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3517       }
3518     }
3519   }
3520 
TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL,n_gt_8_subtile)3521   TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL, n_gt_8_subtile) {
3522     TEST_REQUIRES_ARM_NEON;
3523     for (uint32_t n = 9; n < 16; n++) {
3524       for (size_t k = 1; k <= 80; k += 17) {
3525         for (uint32_t m = 1; m <= 2; m++) {
3526           GemmMicrokernelTester()
3527             .mr(2)
3528             .nr(8)
3529             .kr(8)
3530             .sr(1)
3531             .m(m)
3532             .n(n)
3533             .k(k)
3534             .iterations(1)
3535             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3536         }
3537       }
3538     }
3539   }
3540 
TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL,n_div_8)3541   TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL, n_div_8) {
3542     TEST_REQUIRES_ARM_NEON;
3543     for (uint32_t n = 16; n <= 24; n += 8) {
3544       for (size_t k = 1; k <= 80; k += 17) {
3545         GemmMicrokernelTester()
3546           .mr(2)
3547           .nr(8)
3548           .kr(8)
3549           .sr(1)
3550           .m(2)
3551           .n(n)
3552           .k(k)
3553           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3554       }
3555     }
3556   }
3557 
TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL,n_div_8_strided_cn)3558   TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL, n_div_8_strided_cn) {
3559     TEST_REQUIRES_ARM_NEON;
3560     for (uint32_t n = 16; n <= 24; n += 8) {
3561       for (size_t k = 1; k <= 80; k += 17) {
3562         GemmMicrokernelTester()
3563           .mr(2)
3564           .nr(8)
3565           .kr(8)
3566           .sr(1)
3567           .m(2)
3568           .n(n)
3569           .k(k)
3570           .cn_stride(11)
3571           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3572       }
3573     }
3574   }
3575 
TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL,n_div_8_subtile)3576   TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL, n_div_8_subtile) {
3577     TEST_REQUIRES_ARM_NEON;
3578     for (uint32_t n = 16; n <= 24; n += 8) {
3579       for (size_t k = 1; k <= 80; k += 17) {
3580         for (uint32_t m = 1; m <= 2; m++) {
3581           GemmMicrokernelTester()
3582             .mr(2)
3583             .nr(8)
3584             .kr(8)
3585             .sr(1)
3586             .m(m)
3587             .n(n)
3588             .k(k)
3589             .iterations(1)
3590             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3591         }
3592       }
3593     }
3594   }
3595 
TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL,small_kernel)3596   TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL, small_kernel) {
3597     TEST_REQUIRES_ARM_NEON;
3598     for (size_t k = 1; k <= 80; k += 17) {
3599       GemmMicrokernelTester()
3600         .mr(2)
3601         .nr(8)
3602         .kr(8)
3603         .sr(1)
3604         .m(2)
3605         .n(8)
3606         .k(k)
3607         .ks(3)
3608         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3609     }
3610   }
3611 
TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL,small_kernel_subtile)3612   TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL, small_kernel_subtile) {
3613     TEST_REQUIRES_ARM_NEON;
3614     for (size_t k = 1; k <= 80; k += 17) {
3615       for (uint32_t n = 1; n <= 8; n++) {
3616         for (uint32_t m = 1; m <= 2; m++) {
3617           GemmMicrokernelTester()
3618             .mr(2)
3619             .nr(8)
3620             .kr(8)
3621             .sr(1)
3622             .m(m)
3623             .n(n)
3624             .k(k)
3625             .ks(3)
3626             .iterations(1)
3627             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3628         }
3629       }
3630     }
3631   }
3632 
TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL,n_gt_8_small_kernel)3633   TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL, n_gt_8_small_kernel) {
3634     TEST_REQUIRES_ARM_NEON;
3635     for (uint32_t n = 9; n < 16; n++) {
3636       for (size_t k = 1; k <= 80; k += 17) {
3637         GemmMicrokernelTester()
3638           .mr(2)
3639           .nr(8)
3640           .kr(8)
3641           .sr(1)
3642           .m(2)
3643           .n(n)
3644           .k(k)
3645           .ks(3)
3646           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3647       }
3648     }
3649   }
3650 
TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL,n_div_8_small_kernel)3651   TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL, n_div_8_small_kernel) {
3652     TEST_REQUIRES_ARM_NEON;
3653     for (uint32_t n = 16; n <= 24; n += 8) {
3654       for (size_t k = 1; k <= 80; k += 17) {
3655         GemmMicrokernelTester()
3656           .mr(2)
3657           .nr(8)
3658           .kr(8)
3659           .sr(1)
3660           .m(2)
3661           .n(n)
3662           .k(k)
3663           .ks(3)
3664           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3665       }
3666     }
3667   }
3668 
TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL,strided_cm_subtile)3669   TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL, strided_cm_subtile) {
3670     TEST_REQUIRES_ARM_NEON;
3671     for (size_t k = 1; k <= 80; k += 17) {
3672       for (uint32_t n = 1; n <= 8; n++) {
3673         for (uint32_t m = 1; m <= 2; m++) {
3674           GemmMicrokernelTester()
3675             .mr(2)
3676             .nr(8)
3677             .kr(8)
3678             .sr(1)
3679             .m(m)
3680             .n(n)
3681             .k(k)
3682             .cm_stride(11)
3683             .iterations(1)
3684             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3685         }
3686       }
3687     }
3688   }
3689 
TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL,a_offset)3690   TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL, a_offset) {
3691     TEST_REQUIRES_ARM_NEON;
3692     for (size_t k = 1; k <= 80; k += 17) {
3693       GemmMicrokernelTester()
3694         .mr(2)
3695         .nr(8)
3696         .kr(8)
3697         .sr(1)
3698         .m(2)
3699         .n(8)
3700         .k(k)
3701         .ks(3)
3702         .a_offset(163)
3703         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3704     }
3705   }
3706 
TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL,zero)3707   TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL, zero) {
3708     TEST_REQUIRES_ARM_NEON;
3709     for (size_t k = 1; k <= 80; k += 17) {
3710       for (uint32_t mz = 0; mz < 2; mz++) {
3711         GemmMicrokernelTester()
3712           .mr(2)
3713           .nr(8)
3714           .kr(8)
3715           .sr(1)
3716           .m(2)
3717           .n(8)
3718           .k(k)
3719           .ks(3)
3720           .a_offset(163)
3721           .zero_index(mz)
3722           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3723       }
3724     }
3725   }
3726 
TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL,qmin)3727   TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL, qmin) {
3728     TEST_REQUIRES_ARM_NEON;
3729     GemmMicrokernelTester()
3730       .mr(2)
3731       .nr(8)
3732       .kr(8)
3733       .sr(1)
3734       .m(2)
3735       .n(8)
3736       .k(16)
3737       .qmin(128)
3738       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3739   }
3740 
TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL,qmax)3741   TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL, qmax) {
3742     TEST_REQUIRES_ARM_NEON;
3743     GemmMicrokernelTester()
3744       .mr(2)
3745       .nr(8)
3746       .kr(8)
3747       .sr(1)
3748       .m(2)
3749       .n(8)
3750       .k(16)
3751       .qmax(128)
3752       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3753   }
3754 
TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL,strided_cm)3755   TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL, strided_cm) {
3756     TEST_REQUIRES_ARM_NEON;
3757     GemmMicrokernelTester()
3758       .mr(2)
3759       .nr(8)
3760       .kr(8)
3761       .sr(1)
3762       .m(2)
3763       .n(8)
3764       .k(16)
3765       .cm_stride(11)
3766       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3767   }
3768 #endif  // XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
3769 
3770 
3771 #if XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_CORTEX_A53,k_eq_16)3772   TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_CORTEX_A53, k_eq_16) {
3773     TEST_REQUIRES_ARM_NEON;
3774     GemmMicrokernelTester()
3775       .mr(2)
3776       .nr(8)
3777       .kr(8)
3778       .sr(1)
3779       .m(2)
3780       .n(8)
3781       .k(16)
3782       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3783   }
3784 
TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_CORTEX_A53,strided_cn)3785   TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_CORTEX_A53, strided_cn) {
3786     TEST_REQUIRES_ARM_NEON;
3787     GemmMicrokernelTester()
3788       .mr(2)
3789       .nr(8)
3790       .kr(8)
3791       .sr(1)
3792       .m(2)
3793       .n(8)
3794       .k(16)
3795       .cn_stride(11)
3796       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3797   }
3798 
TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_CORTEX_A53,k_eq_16_subtile)3799   TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_CORTEX_A53, k_eq_16_subtile) {
3800     TEST_REQUIRES_ARM_NEON;
3801     for (uint32_t n = 1; n <= 8; n++) {
3802       for (uint32_t m = 1; m <= 2; m++) {
3803         GemmMicrokernelTester()
3804           .mr(2)
3805           .nr(8)
3806           .kr(8)
3807           .sr(1)
3808           .m(m)
3809           .n(n)
3810           .k(16)
3811           .iterations(1)
3812           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3813       }
3814     }
3815   }
3816 
TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_CORTEX_A53,k_eq_16_subtile_m)3817   TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_CORTEX_A53, k_eq_16_subtile_m) {
3818     TEST_REQUIRES_ARM_NEON;
3819     for (uint32_t m = 1; m <= 2; m++) {
3820       GemmMicrokernelTester()
3821         .mr(2)
3822         .nr(8)
3823         .kr(8)
3824         .sr(1)
3825         .m(m)
3826         .n(8)
3827         .k(16)
3828         .iterations(1)
3829         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3830     }
3831   }
3832 
TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_CORTEX_A53,k_eq_16_subtile_n)3833   TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_CORTEX_A53, k_eq_16_subtile_n) {
3834     TEST_REQUIRES_ARM_NEON;
3835     for (uint32_t n = 1; n <= 8; n++) {
3836       GemmMicrokernelTester()
3837         .mr(2)
3838         .nr(8)
3839         .kr(8)
3840         .sr(1)
3841         .m(2)
3842         .n(n)
3843         .k(16)
3844         .iterations(1)
3845         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3846     }
3847   }
3848 
TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_CORTEX_A53,k_lt_16)3849   TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_CORTEX_A53, k_lt_16) {
3850     TEST_REQUIRES_ARM_NEON;
3851     for (size_t k = 1; k < 16; k++) {
3852       GemmMicrokernelTester()
3853         .mr(2)
3854         .nr(8)
3855         .kr(8)
3856         .sr(1)
3857         .m(2)
3858         .n(8)
3859         .k(k)
3860         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3861     }
3862   }
3863 
TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_CORTEX_A53,k_lt_16_subtile)3864   TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_CORTEX_A53, k_lt_16_subtile) {
3865     TEST_REQUIRES_ARM_NEON;
3866     for (size_t k = 1; k < 16; k++) {
3867       for (uint32_t n = 1; n <= 8; n++) {
3868         for (uint32_t m = 1; m <= 2; m++) {
3869           GemmMicrokernelTester()
3870             .mr(2)
3871             .nr(8)
3872             .kr(8)
3873             .sr(1)
3874             .m(m)
3875             .n(n)
3876             .k(k)
3877             .iterations(1)
3878             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3879         }
3880       }
3881     }
3882   }
3883 
TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_CORTEX_A53,k_gt_16)3884   TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_CORTEX_A53, k_gt_16) {
3885     TEST_REQUIRES_ARM_NEON;
3886     for (size_t k = 17; k < 32; k++) {
3887       GemmMicrokernelTester()
3888         .mr(2)
3889         .nr(8)
3890         .kr(8)
3891         .sr(1)
3892         .m(2)
3893         .n(8)
3894         .k(k)
3895         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3896     }
3897   }
3898 
TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_CORTEX_A53,k_gt_16_subtile)3899   TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_CORTEX_A53, k_gt_16_subtile) {
3900     TEST_REQUIRES_ARM_NEON;
3901     for (size_t k = 17; k < 32; k++) {
3902       for (uint32_t n = 1; n <= 8; n++) {
3903         for (uint32_t m = 1; m <= 2; m++) {
3904           GemmMicrokernelTester()
3905             .mr(2)
3906             .nr(8)
3907             .kr(8)
3908             .sr(1)
3909             .m(m)
3910             .n(n)
3911             .k(k)
3912             .iterations(1)
3913             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3914         }
3915       }
3916     }
3917   }
3918 
TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_CORTEX_A53,k_div_16)3919   TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_CORTEX_A53, k_div_16) {
3920     TEST_REQUIRES_ARM_NEON;
3921     for (size_t k = 32; k <= 160; k += 16) {
3922       GemmMicrokernelTester()
3923         .mr(2)
3924         .nr(8)
3925         .kr(8)
3926         .sr(1)
3927         .m(2)
3928         .n(8)
3929         .k(k)
3930         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3931     }
3932   }
3933 
TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_CORTEX_A53,k_div_16_subtile)3934   TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_CORTEX_A53, k_div_16_subtile) {
3935     TEST_REQUIRES_ARM_NEON;
3936     for (size_t k = 32; k <= 160; k += 16) {
3937       for (uint32_t n = 1; n <= 8; n++) {
3938         for (uint32_t m = 1; m <= 2; m++) {
3939           GemmMicrokernelTester()
3940             .mr(2)
3941             .nr(8)
3942             .kr(8)
3943             .sr(1)
3944             .m(m)
3945             .n(n)
3946             .k(k)
3947             .iterations(1)
3948             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3949         }
3950       }
3951     }
3952   }
3953 
TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_CORTEX_A53,n_gt_8)3954   TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_CORTEX_A53, n_gt_8) {
3955     TEST_REQUIRES_ARM_NEON;
3956     for (uint32_t n = 9; n < 16; n++) {
3957       for (size_t k = 1; k <= 80; k += 17) {
3958         GemmMicrokernelTester()
3959           .mr(2)
3960           .nr(8)
3961           .kr(8)
3962           .sr(1)
3963           .m(2)
3964           .n(n)
3965           .k(k)
3966           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3967       }
3968     }
3969   }
3970 
TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_CORTEX_A53,n_gt_8_strided_cn)3971   TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_CORTEX_A53, n_gt_8_strided_cn) {
3972     TEST_REQUIRES_ARM_NEON;
3973     for (uint32_t n = 9; n < 16; n++) {
3974       for (size_t k = 1; k <= 80; k += 17) {
3975         GemmMicrokernelTester()
3976           .mr(2)
3977           .nr(8)
3978           .kr(8)
3979           .sr(1)
3980           .m(2)
3981           .n(n)
3982           .k(k)
3983           .cn_stride(11)
3984           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3985       }
3986     }
3987   }
3988 
TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_CORTEX_A53,n_gt_8_subtile)3989   TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_CORTEX_A53, n_gt_8_subtile) {
3990     TEST_REQUIRES_ARM_NEON;
3991     for (uint32_t n = 9; n < 16; n++) {
3992       for (size_t k = 1; k <= 80; k += 17) {
3993         for (uint32_t m = 1; m <= 2; m++) {
3994           GemmMicrokernelTester()
3995             .mr(2)
3996             .nr(8)
3997             .kr(8)
3998             .sr(1)
3999             .m(m)
4000             .n(n)
4001             .k(k)
4002             .iterations(1)
4003             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4004         }
4005       }
4006     }
4007   }
4008 
TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_CORTEX_A53,n_div_8)4009   TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_CORTEX_A53, n_div_8) {
4010     TEST_REQUIRES_ARM_NEON;
4011     for (uint32_t n = 16; n <= 24; n += 8) {
4012       for (size_t k = 1; k <= 80; k += 17) {
4013         GemmMicrokernelTester()
4014           .mr(2)
4015           .nr(8)
4016           .kr(8)
4017           .sr(1)
4018           .m(2)
4019           .n(n)
4020           .k(k)
4021           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4022       }
4023     }
4024   }
4025 
TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_CORTEX_A53,n_div_8_strided_cn)4026   TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_CORTEX_A53, n_div_8_strided_cn) {
4027     TEST_REQUIRES_ARM_NEON;
4028     for (uint32_t n = 16; n <= 24; n += 8) {
4029       for (size_t k = 1; k <= 80; k += 17) {
4030         GemmMicrokernelTester()
4031           .mr(2)
4032           .nr(8)
4033           .kr(8)
4034           .sr(1)
4035           .m(2)
4036           .n(n)
4037           .k(k)
4038           .cn_stride(11)
4039           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4040       }
4041     }
4042   }
4043 
TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_CORTEX_A53,n_div_8_subtile)4044   TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_CORTEX_A53, n_div_8_subtile) {
4045     TEST_REQUIRES_ARM_NEON;
4046     for (uint32_t n = 16; n <= 24; n += 8) {
4047       for (size_t k = 1; k <= 80; k += 17) {
4048         for (uint32_t m = 1; m <= 2; m++) {
4049           GemmMicrokernelTester()
4050             .mr(2)
4051             .nr(8)
4052             .kr(8)
4053             .sr(1)
4054             .m(m)
4055             .n(n)
4056             .k(k)
4057             .iterations(1)
4058             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4059         }
4060       }
4061     }
4062   }
4063 
TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_CORTEX_A53,small_kernel)4064   TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_CORTEX_A53, small_kernel) {
4065     TEST_REQUIRES_ARM_NEON;
4066     for (size_t k = 1; k <= 80; k += 17) {
4067       GemmMicrokernelTester()
4068         .mr(2)
4069         .nr(8)
4070         .kr(8)
4071         .sr(1)
4072         .m(2)
4073         .n(8)
4074         .k(k)
4075         .ks(3)
4076         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4077     }
4078   }
4079 
TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_CORTEX_A53,small_kernel_subtile)4080   TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_CORTEX_A53, small_kernel_subtile) {
4081     TEST_REQUIRES_ARM_NEON;
4082     for (size_t k = 1; k <= 80; k += 17) {
4083       for (uint32_t n = 1; n <= 8; n++) {
4084         for (uint32_t m = 1; m <= 2; m++) {
4085           GemmMicrokernelTester()
4086             .mr(2)
4087             .nr(8)
4088             .kr(8)
4089             .sr(1)
4090             .m(m)
4091             .n(n)
4092             .k(k)
4093             .ks(3)
4094             .iterations(1)
4095             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4096         }
4097       }
4098     }
4099   }
4100 
TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_CORTEX_A53,n_gt_8_small_kernel)4101   TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_CORTEX_A53, n_gt_8_small_kernel) {
4102     TEST_REQUIRES_ARM_NEON;
4103     for (uint32_t n = 9; n < 16; n++) {
4104       for (size_t k = 1; k <= 80; k += 17) {
4105         GemmMicrokernelTester()
4106           .mr(2)
4107           .nr(8)
4108           .kr(8)
4109           .sr(1)
4110           .m(2)
4111           .n(n)
4112           .k(k)
4113           .ks(3)
4114           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4115       }
4116     }
4117   }
4118 
TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_CORTEX_A53,n_div_8_small_kernel)4119   TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_CORTEX_A53, n_div_8_small_kernel) {
4120     TEST_REQUIRES_ARM_NEON;
4121     for (uint32_t n = 16; n <= 24; n += 8) {
4122       for (size_t k = 1; k <= 80; k += 17) {
4123         GemmMicrokernelTester()
4124           .mr(2)
4125           .nr(8)
4126           .kr(8)
4127           .sr(1)
4128           .m(2)
4129           .n(n)
4130           .k(k)
4131           .ks(3)
4132           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4133       }
4134     }
4135   }
4136 
TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_CORTEX_A53,strided_cm_subtile)4137   TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_CORTEX_A53, strided_cm_subtile) {
4138     TEST_REQUIRES_ARM_NEON;
4139     for (size_t k = 1; k <= 80; k += 17) {
4140       for (uint32_t n = 1; n <= 8; n++) {
4141         for (uint32_t m = 1; m <= 2; m++) {
4142           GemmMicrokernelTester()
4143             .mr(2)
4144             .nr(8)
4145             .kr(8)
4146             .sr(1)
4147             .m(m)
4148             .n(n)
4149             .k(k)
4150             .cm_stride(11)
4151             .iterations(1)
4152             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4153         }
4154       }
4155     }
4156   }
4157 
TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_CORTEX_A53,a_offset)4158   TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_CORTEX_A53, a_offset) {
4159     TEST_REQUIRES_ARM_NEON;
4160     for (size_t k = 1; k <= 80; k += 17) {
4161       GemmMicrokernelTester()
4162         .mr(2)
4163         .nr(8)
4164         .kr(8)
4165         .sr(1)
4166         .m(2)
4167         .n(8)
4168         .k(k)
4169         .ks(3)
4170         .a_offset(163)
4171         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4172     }
4173   }
4174 
TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_CORTEX_A53,zero)4175   TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_CORTEX_A53, zero) {
4176     TEST_REQUIRES_ARM_NEON;
4177     for (size_t k = 1; k <= 80; k += 17) {
4178       for (uint32_t mz = 0; mz < 2; mz++) {
4179         GemmMicrokernelTester()
4180           .mr(2)
4181           .nr(8)
4182           .kr(8)
4183           .sr(1)
4184           .m(2)
4185           .n(8)
4186           .k(k)
4187           .ks(3)
4188           .a_offset(163)
4189           .zero_index(mz)
4190           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4191       }
4192     }
4193   }
4194 
TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_CORTEX_A53,qmin)4195   TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_CORTEX_A53, qmin) {
4196     TEST_REQUIRES_ARM_NEON;
4197     GemmMicrokernelTester()
4198       .mr(2)
4199       .nr(8)
4200       .kr(8)
4201       .sr(1)
4202       .m(2)
4203       .n(8)
4204       .k(16)
4205       .qmin(128)
4206       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4207   }
4208 
TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_CORTEX_A53,qmax)4209   TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_CORTEX_A53, qmax) {
4210     TEST_REQUIRES_ARM_NEON;
4211     GemmMicrokernelTester()
4212       .mr(2)
4213       .nr(8)
4214       .kr(8)
4215       .sr(1)
4216       .m(2)
4217       .n(8)
4218       .k(16)
4219       .qmax(128)
4220       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4221   }
4222 
TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_CORTEX_A53,strided_cm)4223   TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_CORTEX_A53, strided_cm) {
4224     TEST_REQUIRES_ARM_NEON;
4225     GemmMicrokernelTester()
4226       .mr(2)
4227       .nr(8)
4228       .kr(8)
4229       .sr(1)
4230       .m(2)
4231       .n(8)
4232       .k(16)
4233       .cm_stride(11)
4234       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4235   }
4236 #endif  // XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
4237 
4238 
4239 #if XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
TEST(QC8_IGEMM_MINMAX_FP32_2X8C16__AARCH64_NEON_MLAL,k_eq_16)4240   TEST(QC8_IGEMM_MINMAX_FP32_2X8C16__AARCH64_NEON_MLAL, k_eq_16) {
4241     TEST_REQUIRES_ARM_NEON;
4242     GemmMicrokernelTester()
4243       .mr(2)
4244       .nr(8)
4245       .kr(16)
4246       .sr(1)
4247       .m(2)
4248       .n(8)
4249       .k(16)
4250       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c16__aarch64_neon_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4251   }
4252 
TEST(QC8_IGEMM_MINMAX_FP32_2X8C16__AARCH64_NEON_MLAL,strided_cn)4253   TEST(QC8_IGEMM_MINMAX_FP32_2X8C16__AARCH64_NEON_MLAL, strided_cn) {
4254     TEST_REQUIRES_ARM_NEON;
4255     GemmMicrokernelTester()
4256       .mr(2)
4257       .nr(8)
4258       .kr(16)
4259       .sr(1)
4260       .m(2)
4261       .n(8)
4262       .k(16)
4263       .cn_stride(11)
4264       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c16__aarch64_neon_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4265   }
4266 
TEST(QC8_IGEMM_MINMAX_FP32_2X8C16__AARCH64_NEON_MLAL,k_eq_16_subtile)4267   TEST(QC8_IGEMM_MINMAX_FP32_2X8C16__AARCH64_NEON_MLAL, k_eq_16_subtile) {
4268     TEST_REQUIRES_ARM_NEON;
4269     for (uint32_t n = 1; n <= 8; n++) {
4270       for (uint32_t m = 1; m <= 2; m++) {
4271         GemmMicrokernelTester()
4272           .mr(2)
4273           .nr(8)
4274           .kr(16)
4275           .sr(1)
4276           .m(m)
4277           .n(n)
4278           .k(16)
4279           .iterations(1)
4280           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c16__aarch64_neon_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4281       }
4282     }
4283   }
4284 
TEST(QC8_IGEMM_MINMAX_FP32_2X8C16__AARCH64_NEON_MLAL,k_eq_16_subtile_m)4285   TEST(QC8_IGEMM_MINMAX_FP32_2X8C16__AARCH64_NEON_MLAL, k_eq_16_subtile_m) {
4286     TEST_REQUIRES_ARM_NEON;
4287     for (uint32_t m = 1; m <= 2; m++) {
4288       GemmMicrokernelTester()
4289         .mr(2)
4290         .nr(8)
4291         .kr(16)
4292         .sr(1)
4293         .m(m)
4294         .n(8)
4295         .k(16)
4296         .iterations(1)
4297         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c16__aarch64_neon_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4298     }
4299   }
4300 
TEST(QC8_IGEMM_MINMAX_FP32_2X8C16__AARCH64_NEON_MLAL,k_eq_16_subtile_n)4301   TEST(QC8_IGEMM_MINMAX_FP32_2X8C16__AARCH64_NEON_MLAL, k_eq_16_subtile_n) {
4302     TEST_REQUIRES_ARM_NEON;
4303     for (uint32_t n = 1; n <= 8; n++) {
4304       GemmMicrokernelTester()
4305         .mr(2)
4306         .nr(8)
4307         .kr(16)
4308         .sr(1)
4309         .m(2)
4310         .n(n)
4311         .k(16)
4312         .iterations(1)
4313         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c16__aarch64_neon_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4314     }
4315   }
4316 
TEST(QC8_IGEMM_MINMAX_FP32_2X8C16__AARCH64_NEON_MLAL,k_lt_16)4317   TEST(QC8_IGEMM_MINMAX_FP32_2X8C16__AARCH64_NEON_MLAL, k_lt_16) {
4318     TEST_REQUIRES_ARM_NEON;
4319     for (size_t k = 1; k < 16; k++) {
4320       GemmMicrokernelTester()
4321         .mr(2)
4322         .nr(8)
4323         .kr(16)
4324         .sr(1)
4325         .m(2)
4326         .n(8)
4327         .k(k)
4328         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c16__aarch64_neon_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4329     }
4330   }
4331 
TEST(QC8_IGEMM_MINMAX_FP32_2X8C16__AARCH64_NEON_MLAL,k_lt_16_subtile)4332   TEST(QC8_IGEMM_MINMAX_FP32_2X8C16__AARCH64_NEON_MLAL, k_lt_16_subtile) {
4333     TEST_REQUIRES_ARM_NEON;
4334     for (size_t k = 1; k < 16; k++) {
4335       for (uint32_t n = 1; n <= 8; n++) {
4336         for (uint32_t m = 1; m <= 2; m++) {
4337           GemmMicrokernelTester()
4338             .mr(2)
4339             .nr(8)
4340             .kr(16)
4341             .sr(1)
4342             .m(m)
4343             .n(n)
4344             .k(k)
4345             .iterations(1)
4346             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c16__aarch64_neon_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4347         }
4348       }
4349     }
4350   }
4351 
TEST(QC8_IGEMM_MINMAX_FP32_2X8C16__AARCH64_NEON_MLAL,k_gt_16)4352   TEST(QC8_IGEMM_MINMAX_FP32_2X8C16__AARCH64_NEON_MLAL, k_gt_16) {
4353     TEST_REQUIRES_ARM_NEON;
4354     for (size_t k = 17; k < 32; k++) {
4355       GemmMicrokernelTester()
4356         .mr(2)
4357         .nr(8)
4358         .kr(16)
4359         .sr(1)
4360         .m(2)
4361         .n(8)
4362         .k(k)
4363         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c16__aarch64_neon_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4364     }
4365   }
4366 
TEST(QC8_IGEMM_MINMAX_FP32_2X8C16__AARCH64_NEON_MLAL,k_gt_16_subtile)4367   TEST(QC8_IGEMM_MINMAX_FP32_2X8C16__AARCH64_NEON_MLAL, k_gt_16_subtile) {
4368     TEST_REQUIRES_ARM_NEON;
4369     for (size_t k = 17; k < 32; k++) {
4370       for (uint32_t n = 1; n <= 8; n++) {
4371         for (uint32_t m = 1; m <= 2; m++) {
4372           GemmMicrokernelTester()
4373             .mr(2)
4374             .nr(8)
4375             .kr(16)
4376             .sr(1)
4377             .m(m)
4378             .n(n)
4379             .k(k)
4380             .iterations(1)
4381             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c16__aarch64_neon_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4382         }
4383       }
4384     }
4385   }
4386 
TEST(QC8_IGEMM_MINMAX_FP32_2X8C16__AARCH64_NEON_MLAL,k_div_16)4387   TEST(QC8_IGEMM_MINMAX_FP32_2X8C16__AARCH64_NEON_MLAL, k_div_16) {
4388     TEST_REQUIRES_ARM_NEON;
4389     for (size_t k = 32; k <= 160; k += 16) {
4390       GemmMicrokernelTester()
4391         .mr(2)
4392         .nr(8)
4393         .kr(16)
4394         .sr(1)
4395         .m(2)
4396         .n(8)
4397         .k(k)
4398         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c16__aarch64_neon_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4399     }
4400   }
4401 
TEST(QC8_IGEMM_MINMAX_FP32_2X8C16__AARCH64_NEON_MLAL,k_div_16_subtile)4402   TEST(QC8_IGEMM_MINMAX_FP32_2X8C16__AARCH64_NEON_MLAL, k_div_16_subtile) {
4403     TEST_REQUIRES_ARM_NEON;
4404     for (size_t k = 32; k <= 160; k += 16) {
4405       for (uint32_t n = 1; n <= 8; n++) {
4406         for (uint32_t m = 1; m <= 2; m++) {
4407           GemmMicrokernelTester()
4408             .mr(2)
4409             .nr(8)
4410             .kr(16)
4411             .sr(1)
4412             .m(m)
4413             .n(n)
4414             .k(k)
4415             .iterations(1)
4416             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c16__aarch64_neon_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4417         }
4418       }
4419     }
4420   }
4421 
TEST(QC8_IGEMM_MINMAX_FP32_2X8C16__AARCH64_NEON_MLAL,n_gt_8)4422   TEST(QC8_IGEMM_MINMAX_FP32_2X8C16__AARCH64_NEON_MLAL, n_gt_8) {
4423     TEST_REQUIRES_ARM_NEON;
4424     for (uint32_t n = 9; n < 16; n++) {
4425       for (size_t k = 1; k <= 80; k += 17) {
4426         GemmMicrokernelTester()
4427           .mr(2)
4428           .nr(8)
4429           .kr(16)
4430           .sr(1)
4431           .m(2)
4432           .n(n)
4433           .k(k)
4434           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c16__aarch64_neon_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4435       }
4436     }
4437   }
4438 
TEST(QC8_IGEMM_MINMAX_FP32_2X8C16__AARCH64_NEON_MLAL,n_gt_8_strided_cn)4439   TEST(QC8_IGEMM_MINMAX_FP32_2X8C16__AARCH64_NEON_MLAL, n_gt_8_strided_cn) {
4440     TEST_REQUIRES_ARM_NEON;
4441     for (uint32_t n = 9; n < 16; n++) {
4442       for (size_t k = 1; k <= 80; k += 17) {
4443         GemmMicrokernelTester()
4444           .mr(2)
4445           .nr(8)
4446           .kr(16)
4447           .sr(1)
4448           .m(2)
4449           .n(n)
4450           .k(k)
4451           .cn_stride(11)
4452           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c16__aarch64_neon_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4453       }
4454     }
4455   }
4456 
TEST(QC8_IGEMM_MINMAX_FP32_2X8C16__AARCH64_NEON_MLAL,n_gt_8_subtile)4457   TEST(QC8_IGEMM_MINMAX_FP32_2X8C16__AARCH64_NEON_MLAL, n_gt_8_subtile) {
4458     TEST_REQUIRES_ARM_NEON;
4459     for (uint32_t n = 9; n < 16; n++) {
4460       for (size_t k = 1; k <= 80; k += 17) {
4461         for (uint32_t m = 1; m <= 2; m++) {
4462           GemmMicrokernelTester()
4463             .mr(2)
4464             .nr(8)
4465             .kr(16)
4466             .sr(1)
4467             .m(m)
4468             .n(n)
4469             .k(k)
4470             .iterations(1)
4471             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c16__aarch64_neon_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4472         }
4473       }
4474     }
4475   }
4476 
TEST(QC8_IGEMM_MINMAX_FP32_2X8C16__AARCH64_NEON_MLAL,n_div_8)4477   TEST(QC8_IGEMM_MINMAX_FP32_2X8C16__AARCH64_NEON_MLAL, n_div_8) {
4478     TEST_REQUIRES_ARM_NEON;
4479     for (uint32_t n = 16; n <= 24; n += 8) {
4480       for (size_t k = 1; k <= 80; k += 17) {
4481         GemmMicrokernelTester()
4482           .mr(2)
4483           .nr(8)
4484           .kr(16)
4485           .sr(1)
4486           .m(2)
4487           .n(n)
4488           .k(k)
4489           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c16__aarch64_neon_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4490       }
4491     }
4492   }
4493 
TEST(QC8_IGEMM_MINMAX_FP32_2X8C16__AARCH64_NEON_MLAL,n_div_8_strided_cn)4494   TEST(QC8_IGEMM_MINMAX_FP32_2X8C16__AARCH64_NEON_MLAL, n_div_8_strided_cn) {
4495     TEST_REQUIRES_ARM_NEON;
4496     for (uint32_t n = 16; n <= 24; n += 8) {
4497       for (size_t k = 1; k <= 80; k += 17) {
4498         GemmMicrokernelTester()
4499           .mr(2)
4500           .nr(8)
4501           .kr(16)
4502           .sr(1)
4503           .m(2)
4504           .n(n)
4505           .k(k)
4506           .cn_stride(11)
4507           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c16__aarch64_neon_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4508       }
4509     }
4510   }
4511 
TEST(QC8_IGEMM_MINMAX_FP32_2X8C16__AARCH64_NEON_MLAL,n_div_8_subtile)4512   TEST(QC8_IGEMM_MINMAX_FP32_2X8C16__AARCH64_NEON_MLAL, n_div_8_subtile) {
4513     TEST_REQUIRES_ARM_NEON;
4514     for (uint32_t n = 16; n <= 24; n += 8) {
4515       for (size_t k = 1; k <= 80; k += 17) {
4516         for (uint32_t m = 1; m <= 2; m++) {
4517           GemmMicrokernelTester()
4518             .mr(2)
4519             .nr(8)
4520             .kr(16)
4521             .sr(1)
4522             .m(m)
4523             .n(n)
4524             .k(k)
4525             .iterations(1)
4526             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c16__aarch64_neon_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4527         }
4528       }
4529     }
4530   }
4531 
TEST(QC8_IGEMM_MINMAX_FP32_2X8C16__AARCH64_NEON_MLAL,small_kernel)4532   TEST(QC8_IGEMM_MINMAX_FP32_2X8C16__AARCH64_NEON_MLAL, small_kernel) {
4533     TEST_REQUIRES_ARM_NEON;
4534     for (size_t k = 1; k <= 80; k += 17) {
4535       GemmMicrokernelTester()
4536         .mr(2)
4537         .nr(8)
4538         .kr(16)
4539         .sr(1)
4540         .m(2)
4541         .n(8)
4542         .k(k)
4543         .ks(3)
4544         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c16__aarch64_neon_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4545     }
4546   }
4547 
TEST(QC8_IGEMM_MINMAX_FP32_2X8C16__AARCH64_NEON_MLAL,small_kernel_subtile)4548   TEST(QC8_IGEMM_MINMAX_FP32_2X8C16__AARCH64_NEON_MLAL, small_kernel_subtile) {
4549     TEST_REQUIRES_ARM_NEON;
4550     for (size_t k = 1; k <= 80; k += 17) {
4551       for (uint32_t n = 1; n <= 8; n++) {
4552         for (uint32_t m = 1; m <= 2; m++) {
4553           GemmMicrokernelTester()
4554             .mr(2)
4555             .nr(8)
4556             .kr(16)
4557             .sr(1)
4558             .m(m)
4559             .n(n)
4560             .k(k)
4561             .ks(3)
4562             .iterations(1)
4563             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c16__aarch64_neon_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4564         }
4565       }
4566     }
4567   }
4568 
TEST(QC8_IGEMM_MINMAX_FP32_2X8C16__AARCH64_NEON_MLAL,n_gt_8_small_kernel)4569   TEST(QC8_IGEMM_MINMAX_FP32_2X8C16__AARCH64_NEON_MLAL, n_gt_8_small_kernel) {
4570     TEST_REQUIRES_ARM_NEON;
4571     for (uint32_t n = 9; n < 16; n++) {
4572       for (size_t k = 1; k <= 80; k += 17) {
4573         GemmMicrokernelTester()
4574           .mr(2)
4575           .nr(8)
4576           .kr(16)
4577           .sr(1)
4578           .m(2)
4579           .n(n)
4580           .k(k)
4581           .ks(3)
4582           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c16__aarch64_neon_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4583       }
4584     }
4585   }
4586 
TEST(QC8_IGEMM_MINMAX_FP32_2X8C16__AARCH64_NEON_MLAL,n_div_8_small_kernel)4587   TEST(QC8_IGEMM_MINMAX_FP32_2X8C16__AARCH64_NEON_MLAL, n_div_8_small_kernel) {
4588     TEST_REQUIRES_ARM_NEON;
4589     for (uint32_t n = 16; n <= 24; n += 8) {
4590       for (size_t k = 1; k <= 80; k += 17) {
4591         GemmMicrokernelTester()
4592           .mr(2)
4593           .nr(8)
4594           .kr(16)
4595           .sr(1)
4596           .m(2)
4597           .n(n)
4598           .k(k)
4599           .ks(3)
4600           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c16__aarch64_neon_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4601       }
4602     }
4603   }
4604 
TEST(QC8_IGEMM_MINMAX_FP32_2X8C16__AARCH64_NEON_MLAL,strided_cm_subtile)4605   TEST(QC8_IGEMM_MINMAX_FP32_2X8C16__AARCH64_NEON_MLAL, strided_cm_subtile) {
4606     TEST_REQUIRES_ARM_NEON;
4607     for (size_t k = 1; k <= 80; k += 17) {
4608       for (uint32_t n = 1; n <= 8; n++) {
4609         for (uint32_t m = 1; m <= 2; m++) {
4610           GemmMicrokernelTester()
4611             .mr(2)
4612             .nr(8)
4613             .kr(16)
4614             .sr(1)
4615             .m(m)
4616             .n(n)
4617             .k(k)
4618             .cm_stride(11)
4619             .iterations(1)
4620             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c16__aarch64_neon_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4621         }
4622       }
4623     }
4624   }
4625 
TEST(QC8_IGEMM_MINMAX_FP32_2X8C16__AARCH64_NEON_MLAL,a_offset)4626   TEST(QC8_IGEMM_MINMAX_FP32_2X8C16__AARCH64_NEON_MLAL, a_offset) {
4627     TEST_REQUIRES_ARM_NEON;
4628     for (size_t k = 1; k <= 80; k += 17) {
4629       GemmMicrokernelTester()
4630         .mr(2)
4631         .nr(8)
4632         .kr(16)
4633         .sr(1)
4634         .m(2)
4635         .n(8)
4636         .k(k)
4637         .ks(3)
4638         .a_offset(163)
4639         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c16__aarch64_neon_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4640     }
4641   }
4642 
TEST(QC8_IGEMM_MINMAX_FP32_2X8C16__AARCH64_NEON_MLAL,zero)4643   TEST(QC8_IGEMM_MINMAX_FP32_2X8C16__AARCH64_NEON_MLAL, zero) {
4644     TEST_REQUIRES_ARM_NEON;
4645     for (size_t k = 1; k <= 80; k += 17) {
4646       for (uint32_t mz = 0; mz < 2; mz++) {
4647         GemmMicrokernelTester()
4648           .mr(2)
4649           .nr(8)
4650           .kr(16)
4651           .sr(1)
4652           .m(2)
4653           .n(8)
4654           .k(k)
4655           .ks(3)
4656           .a_offset(163)
4657           .zero_index(mz)
4658           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c16__aarch64_neon_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4659       }
4660     }
4661   }
4662 
TEST(QC8_IGEMM_MINMAX_FP32_2X8C16__AARCH64_NEON_MLAL,qmin)4663   TEST(QC8_IGEMM_MINMAX_FP32_2X8C16__AARCH64_NEON_MLAL, qmin) {
4664     TEST_REQUIRES_ARM_NEON;
4665     GemmMicrokernelTester()
4666       .mr(2)
4667       .nr(8)
4668       .kr(16)
4669       .sr(1)
4670       .m(2)
4671       .n(8)
4672       .k(16)
4673       .qmin(128)
4674       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c16__aarch64_neon_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4675   }
4676 
TEST(QC8_IGEMM_MINMAX_FP32_2X8C16__AARCH64_NEON_MLAL,qmax)4677   TEST(QC8_IGEMM_MINMAX_FP32_2X8C16__AARCH64_NEON_MLAL, qmax) {
4678     TEST_REQUIRES_ARM_NEON;
4679     GemmMicrokernelTester()
4680       .mr(2)
4681       .nr(8)
4682       .kr(16)
4683       .sr(1)
4684       .m(2)
4685       .n(8)
4686       .k(16)
4687       .qmax(128)
4688       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c16__aarch64_neon_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4689   }
4690 
TEST(QC8_IGEMM_MINMAX_FP32_2X8C16__AARCH64_NEON_MLAL,strided_cm)4691   TEST(QC8_IGEMM_MINMAX_FP32_2X8C16__AARCH64_NEON_MLAL, strided_cm) {
4692     TEST_REQUIRES_ARM_NEON;
4693     GemmMicrokernelTester()
4694       .mr(2)
4695       .nr(8)
4696       .kr(16)
4697       .sr(1)
4698       .m(2)
4699       .n(8)
4700       .k(16)
4701       .cm_stride(11)
4702       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c16__aarch64_neon_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4703   }
4704 #endif  // XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
4705 
4706 
4707 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QC8_IGEMM_MINMAX_FP32_1X8__NEONV8_MLAL_LANE,k_eq_8)4708   TEST(QC8_IGEMM_MINMAX_FP32_1X8__NEONV8_MLAL_LANE, k_eq_8) {
4709     TEST_REQUIRES_ARM_NEON_V8;
4710     GemmMicrokernelTester()
4711       .mr(1)
4712       .nr(8)
4713       .kr(1)
4714       .sr(1)
4715       .m(1)
4716       .n(8)
4717       .k(8)
4718       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4719   }
4720 
TEST(QC8_IGEMM_MINMAX_FP32_1X8__NEONV8_MLAL_LANE,strided_cn)4721   TEST(QC8_IGEMM_MINMAX_FP32_1X8__NEONV8_MLAL_LANE, strided_cn) {
4722     TEST_REQUIRES_ARM_NEON_V8;
4723     GemmMicrokernelTester()
4724       .mr(1)
4725       .nr(8)
4726       .kr(1)
4727       .sr(1)
4728       .m(1)
4729       .n(8)
4730       .k(8)
4731       .cn_stride(11)
4732       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4733   }
4734 
TEST(QC8_IGEMM_MINMAX_FP32_1X8__NEONV8_MLAL_LANE,k_eq_8_subtile)4735   TEST(QC8_IGEMM_MINMAX_FP32_1X8__NEONV8_MLAL_LANE, k_eq_8_subtile) {
4736     TEST_REQUIRES_ARM_NEON_V8;
4737     for (uint32_t n = 1; n <= 8; n++) {
4738       for (uint32_t m = 1; m <= 1; m++) {
4739         GemmMicrokernelTester()
4740           .mr(1)
4741           .nr(8)
4742           .kr(1)
4743           .sr(1)
4744           .m(m)
4745           .n(n)
4746           .k(8)
4747           .iterations(1)
4748           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4749       }
4750     }
4751   }
4752 
TEST(QC8_IGEMM_MINMAX_FP32_1X8__NEONV8_MLAL_LANE,k_eq_8_subtile_m)4753   TEST(QC8_IGEMM_MINMAX_FP32_1X8__NEONV8_MLAL_LANE, k_eq_8_subtile_m) {
4754     TEST_REQUIRES_ARM_NEON_V8;
4755     for (uint32_t m = 1; m <= 1; m++) {
4756       GemmMicrokernelTester()
4757         .mr(1)
4758         .nr(8)
4759         .kr(1)
4760         .sr(1)
4761         .m(m)
4762         .n(8)
4763         .k(8)
4764         .iterations(1)
4765         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4766     }
4767   }
4768 
TEST(QC8_IGEMM_MINMAX_FP32_1X8__NEONV8_MLAL_LANE,k_eq_8_subtile_n)4769   TEST(QC8_IGEMM_MINMAX_FP32_1X8__NEONV8_MLAL_LANE, k_eq_8_subtile_n) {
4770     TEST_REQUIRES_ARM_NEON_V8;
4771     for (uint32_t n = 1; n <= 8; n++) {
4772       GemmMicrokernelTester()
4773         .mr(1)
4774         .nr(8)
4775         .kr(1)
4776         .sr(1)
4777         .m(1)
4778         .n(n)
4779         .k(8)
4780         .iterations(1)
4781         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4782     }
4783   }
4784 
TEST(QC8_IGEMM_MINMAX_FP32_1X8__NEONV8_MLAL_LANE,k_lt_8)4785   TEST(QC8_IGEMM_MINMAX_FP32_1X8__NEONV8_MLAL_LANE, k_lt_8) {
4786     TEST_REQUIRES_ARM_NEON_V8;
4787     for (size_t k = 1; k < 8; k++) {
4788       GemmMicrokernelTester()
4789         .mr(1)
4790         .nr(8)
4791         .kr(1)
4792         .sr(1)
4793         .m(1)
4794         .n(8)
4795         .k(k)
4796         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4797     }
4798   }
4799 
TEST(QC8_IGEMM_MINMAX_FP32_1X8__NEONV8_MLAL_LANE,k_lt_8_subtile)4800   TEST(QC8_IGEMM_MINMAX_FP32_1X8__NEONV8_MLAL_LANE, k_lt_8_subtile) {
4801     TEST_REQUIRES_ARM_NEON_V8;
4802     for (size_t k = 1; k < 8; k++) {
4803       for (uint32_t n = 1; n <= 8; n++) {
4804         for (uint32_t m = 1; m <= 1; m++) {
4805           GemmMicrokernelTester()
4806             .mr(1)
4807             .nr(8)
4808             .kr(1)
4809             .sr(1)
4810             .m(m)
4811             .n(n)
4812             .k(k)
4813             .iterations(1)
4814             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4815         }
4816       }
4817     }
4818   }
4819 
TEST(QC8_IGEMM_MINMAX_FP32_1X8__NEONV8_MLAL_LANE,k_gt_8)4820   TEST(QC8_IGEMM_MINMAX_FP32_1X8__NEONV8_MLAL_LANE, k_gt_8) {
4821     TEST_REQUIRES_ARM_NEON_V8;
4822     for (size_t k = 9; k < 16; k++) {
4823       GemmMicrokernelTester()
4824         .mr(1)
4825         .nr(8)
4826         .kr(1)
4827         .sr(1)
4828         .m(1)
4829         .n(8)
4830         .k(k)
4831         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4832     }
4833   }
4834 
TEST(QC8_IGEMM_MINMAX_FP32_1X8__NEONV8_MLAL_LANE,k_gt_8_subtile)4835   TEST(QC8_IGEMM_MINMAX_FP32_1X8__NEONV8_MLAL_LANE, k_gt_8_subtile) {
4836     TEST_REQUIRES_ARM_NEON_V8;
4837     for (size_t k = 9; k < 16; k++) {
4838       for (uint32_t n = 1; n <= 8; n++) {
4839         for (uint32_t m = 1; m <= 1; m++) {
4840           GemmMicrokernelTester()
4841             .mr(1)
4842             .nr(8)
4843             .kr(1)
4844             .sr(1)
4845             .m(m)
4846             .n(n)
4847             .k(k)
4848             .iterations(1)
4849             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4850         }
4851       }
4852     }
4853   }
4854 
TEST(QC8_IGEMM_MINMAX_FP32_1X8__NEONV8_MLAL_LANE,k_div_8)4855   TEST(QC8_IGEMM_MINMAX_FP32_1X8__NEONV8_MLAL_LANE, k_div_8) {
4856     TEST_REQUIRES_ARM_NEON_V8;
4857     for (size_t k = 16; k <= 80; k += 8) {
4858       GemmMicrokernelTester()
4859         .mr(1)
4860         .nr(8)
4861         .kr(1)
4862         .sr(1)
4863         .m(1)
4864         .n(8)
4865         .k(k)
4866         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4867     }
4868   }
4869 
TEST(QC8_IGEMM_MINMAX_FP32_1X8__NEONV8_MLAL_LANE,k_div_8_subtile)4870   TEST(QC8_IGEMM_MINMAX_FP32_1X8__NEONV8_MLAL_LANE, k_div_8_subtile) {
4871     TEST_REQUIRES_ARM_NEON_V8;
4872     for (size_t k = 16; k <= 80; k += 8) {
4873       for (uint32_t n = 1; n <= 8; n++) {
4874         for (uint32_t m = 1; m <= 1; m++) {
4875           GemmMicrokernelTester()
4876             .mr(1)
4877             .nr(8)
4878             .kr(1)
4879             .sr(1)
4880             .m(m)
4881             .n(n)
4882             .k(k)
4883             .iterations(1)
4884             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4885         }
4886       }
4887     }
4888   }
4889 
TEST(QC8_IGEMM_MINMAX_FP32_1X8__NEONV8_MLAL_LANE,n_gt_8)4890   TEST(QC8_IGEMM_MINMAX_FP32_1X8__NEONV8_MLAL_LANE, n_gt_8) {
4891     TEST_REQUIRES_ARM_NEON_V8;
4892     for (uint32_t n = 9; n < 16; n++) {
4893       for (size_t k = 1; k <= 40; k += 9) {
4894         GemmMicrokernelTester()
4895           .mr(1)
4896           .nr(8)
4897           .kr(1)
4898           .sr(1)
4899           .m(1)
4900           .n(n)
4901           .k(k)
4902           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4903       }
4904     }
4905   }
4906 
TEST(QC8_IGEMM_MINMAX_FP32_1X8__NEONV8_MLAL_LANE,n_gt_8_strided_cn)4907   TEST(QC8_IGEMM_MINMAX_FP32_1X8__NEONV8_MLAL_LANE, n_gt_8_strided_cn) {
4908     TEST_REQUIRES_ARM_NEON_V8;
4909     for (uint32_t n = 9; n < 16; n++) {
4910       for (size_t k = 1; k <= 40; k += 9) {
4911         GemmMicrokernelTester()
4912           .mr(1)
4913           .nr(8)
4914           .kr(1)
4915           .sr(1)
4916           .m(1)
4917           .n(n)
4918           .k(k)
4919           .cn_stride(11)
4920           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4921       }
4922     }
4923   }
4924 
TEST(QC8_IGEMM_MINMAX_FP32_1X8__NEONV8_MLAL_LANE,n_gt_8_subtile)4925   TEST(QC8_IGEMM_MINMAX_FP32_1X8__NEONV8_MLAL_LANE, n_gt_8_subtile) {
4926     TEST_REQUIRES_ARM_NEON_V8;
4927     for (uint32_t n = 9; n < 16; n++) {
4928       for (size_t k = 1; k <= 40; k += 9) {
4929         for (uint32_t m = 1; m <= 1; m++) {
4930           GemmMicrokernelTester()
4931             .mr(1)
4932             .nr(8)
4933             .kr(1)
4934             .sr(1)
4935             .m(m)
4936             .n(n)
4937             .k(k)
4938             .iterations(1)
4939             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4940         }
4941       }
4942     }
4943   }
4944 
TEST(QC8_IGEMM_MINMAX_FP32_1X8__NEONV8_MLAL_LANE,n_div_8)4945   TEST(QC8_IGEMM_MINMAX_FP32_1X8__NEONV8_MLAL_LANE, n_div_8) {
4946     TEST_REQUIRES_ARM_NEON_V8;
4947     for (uint32_t n = 16; n <= 24; n += 8) {
4948       for (size_t k = 1; k <= 40; k += 9) {
4949         GemmMicrokernelTester()
4950           .mr(1)
4951           .nr(8)
4952           .kr(1)
4953           .sr(1)
4954           .m(1)
4955           .n(n)
4956           .k(k)
4957           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4958       }
4959     }
4960   }
4961 
TEST(QC8_IGEMM_MINMAX_FP32_1X8__NEONV8_MLAL_LANE,n_div_8_strided_cn)4962   TEST(QC8_IGEMM_MINMAX_FP32_1X8__NEONV8_MLAL_LANE, n_div_8_strided_cn) {
4963     TEST_REQUIRES_ARM_NEON_V8;
4964     for (uint32_t n = 16; n <= 24; n += 8) {
4965       for (size_t k = 1; k <= 40; k += 9) {
4966         GemmMicrokernelTester()
4967           .mr(1)
4968           .nr(8)
4969           .kr(1)
4970           .sr(1)
4971           .m(1)
4972           .n(n)
4973           .k(k)
4974           .cn_stride(11)
4975           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4976       }
4977     }
4978   }
4979 
TEST(QC8_IGEMM_MINMAX_FP32_1X8__NEONV8_MLAL_LANE,n_div_8_subtile)4980   TEST(QC8_IGEMM_MINMAX_FP32_1X8__NEONV8_MLAL_LANE, n_div_8_subtile) {
4981     TEST_REQUIRES_ARM_NEON_V8;
4982     for (uint32_t n = 16; n <= 24; n += 8) {
4983       for (size_t k = 1; k <= 40; k += 9) {
4984         for (uint32_t m = 1; m <= 1; m++) {
4985           GemmMicrokernelTester()
4986             .mr(1)
4987             .nr(8)
4988             .kr(1)
4989             .sr(1)
4990             .m(m)
4991             .n(n)
4992             .k(k)
4993             .iterations(1)
4994             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4995         }
4996       }
4997     }
4998   }
4999 
TEST(QC8_IGEMM_MINMAX_FP32_1X8__NEONV8_MLAL_LANE,small_kernel)5000   TEST(QC8_IGEMM_MINMAX_FP32_1X8__NEONV8_MLAL_LANE, small_kernel) {
5001     TEST_REQUIRES_ARM_NEON_V8;
5002     for (size_t k = 1; k <= 40; k += 9) {
5003       GemmMicrokernelTester()
5004         .mr(1)
5005         .nr(8)
5006         .kr(1)
5007         .sr(1)
5008         .m(1)
5009         .n(8)
5010         .k(k)
5011         .ks(3)
5012         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
5013     }
5014   }
5015 
TEST(QC8_IGEMM_MINMAX_FP32_1X8__NEONV8_MLAL_LANE,small_kernel_subtile)5016   TEST(QC8_IGEMM_MINMAX_FP32_1X8__NEONV8_MLAL_LANE, small_kernel_subtile) {
5017     TEST_REQUIRES_ARM_NEON_V8;
5018     for (size_t k = 1; k <= 40; k += 9) {
5019       for (uint32_t n = 1; n <= 8; n++) {
5020         for (uint32_t m = 1; m <= 1; m++) {
5021           GemmMicrokernelTester()
5022             .mr(1)
5023             .nr(8)
5024             .kr(1)
5025             .sr(1)
5026             .m(m)
5027             .n(n)
5028             .k(k)
5029             .ks(3)
5030             .iterations(1)
5031             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
5032         }
5033       }
5034     }
5035   }
5036 
TEST(QC8_IGEMM_MINMAX_FP32_1X8__NEONV8_MLAL_LANE,n_gt_8_small_kernel)5037   TEST(QC8_IGEMM_MINMAX_FP32_1X8__NEONV8_MLAL_LANE, n_gt_8_small_kernel) {
5038     TEST_REQUIRES_ARM_NEON_V8;
5039     for (uint32_t n = 9; n < 16; n++) {
5040       for (size_t k = 1; k <= 40; k += 9) {
5041         GemmMicrokernelTester()
5042           .mr(1)
5043           .nr(8)
5044           .kr(1)
5045           .sr(1)
5046           .m(1)
5047           .n(n)
5048           .k(k)
5049           .ks(3)
5050           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
5051       }
5052     }
5053   }
5054 
TEST(QC8_IGEMM_MINMAX_FP32_1X8__NEONV8_MLAL_LANE,n_div_8_small_kernel)5055   TEST(QC8_IGEMM_MINMAX_FP32_1X8__NEONV8_MLAL_LANE, n_div_8_small_kernel) {
5056     TEST_REQUIRES_ARM_NEON_V8;
5057     for (uint32_t n = 16; n <= 24; n += 8) {
5058       for (size_t k = 1; k <= 40; k += 9) {
5059         GemmMicrokernelTester()
5060           .mr(1)
5061           .nr(8)
5062           .kr(1)
5063           .sr(1)
5064           .m(1)
5065           .n(n)
5066           .k(k)
5067           .ks(3)
5068           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
5069       }
5070     }
5071   }
5072 
TEST(QC8_IGEMM_MINMAX_FP32_1X8__NEONV8_MLAL_LANE,strided_cm_subtile)5073   TEST(QC8_IGEMM_MINMAX_FP32_1X8__NEONV8_MLAL_LANE, strided_cm_subtile) {
5074     TEST_REQUIRES_ARM_NEON_V8;
5075     for (size_t k = 1; k <= 40; k += 9) {
5076       for (uint32_t n = 1; n <= 8; n++) {
5077         for (uint32_t m = 1; m <= 1; m++) {
5078           GemmMicrokernelTester()
5079             .mr(1)
5080             .nr(8)
5081             .kr(1)
5082             .sr(1)
5083             .m(m)
5084             .n(n)
5085             .k(k)
5086             .cm_stride(11)
5087             .iterations(1)
5088             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
5089         }
5090       }
5091     }
5092   }
5093 
TEST(QC8_IGEMM_MINMAX_FP32_1X8__NEONV8_MLAL_LANE,a_offset)5094   TEST(QC8_IGEMM_MINMAX_FP32_1X8__NEONV8_MLAL_LANE, a_offset) {
5095     TEST_REQUIRES_ARM_NEON_V8;
5096     for (size_t k = 1; k <= 40; k += 9) {
5097       GemmMicrokernelTester()
5098         .mr(1)
5099         .nr(8)
5100         .kr(1)
5101         .sr(1)
5102         .m(1)
5103         .n(8)
5104         .k(k)
5105         .ks(3)
5106         .a_offset(43)
5107         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
5108     }
5109   }
5110 
TEST(QC8_IGEMM_MINMAX_FP32_1X8__NEONV8_MLAL_LANE,zero)5111   TEST(QC8_IGEMM_MINMAX_FP32_1X8__NEONV8_MLAL_LANE, zero) {
5112     TEST_REQUIRES_ARM_NEON_V8;
5113     for (size_t k = 1; k <= 40; k += 9) {
5114       for (uint32_t mz = 0; mz < 1; mz++) {
5115         GemmMicrokernelTester()
5116           .mr(1)
5117           .nr(8)
5118           .kr(1)
5119           .sr(1)
5120           .m(1)
5121           .n(8)
5122           .k(k)
5123           .ks(3)
5124           .a_offset(43)
5125           .zero_index(mz)
5126           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
5127       }
5128     }
5129   }
5130 
TEST(QC8_IGEMM_MINMAX_FP32_1X8__NEONV8_MLAL_LANE,qmin)5131   TEST(QC8_IGEMM_MINMAX_FP32_1X8__NEONV8_MLAL_LANE, qmin) {
5132     TEST_REQUIRES_ARM_NEON_V8;
5133     GemmMicrokernelTester()
5134       .mr(1)
5135       .nr(8)
5136       .kr(1)
5137       .sr(1)
5138       .m(1)
5139       .n(8)
5140       .k(8)
5141       .qmin(128)
5142       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
5143   }
5144 
TEST(QC8_IGEMM_MINMAX_FP32_1X8__NEONV8_MLAL_LANE,qmax)5145   TEST(QC8_IGEMM_MINMAX_FP32_1X8__NEONV8_MLAL_LANE, qmax) {
5146     TEST_REQUIRES_ARM_NEON_V8;
5147     GemmMicrokernelTester()
5148       .mr(1)
5149       .nr(8)
5150       .kr(1)
5151       .sr(1)
5152       .m(1)
5153       .n(8)
5154       .k(8)
5155       .qmax(128)
5156       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
5157   }
5158 
TEST(QC8_IGEMM_MINMAX_FP32_1X8__NEONV8_MLAL_LANE,strided_cm)5159   TEST(QC8_IGEMM_MINMAX_FP32_1X8__NEONV8_MLAL_LANE, strided_cm) {
5160     TEST_REQUIRES_ARM_NEON_V8;
5161     GemmMicrokernelTester()
5162       .mr(1)
5163       .nr(8)
5164       .kr(1)
5165       .sr(1)
5166       .m(1)
5167       .n(8)
5168       .k(8)
5169       .cm_stride(11)
5170       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
5171   }
5172 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
5173 
5174 
5175 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD2R,k_eq_16)5176   TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD2R, k_eq_16) {
5177     TEST_REQUIRES_ARM_NEON;
5178     GemmMicrokernelTester()
5179       .mr(1)
5180       .nr(8)
5181       .kr(2)
5182       .sr(1)
5183       .m(1)
5184       .n(8)
5185       .k(16)
5186       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
5187   }
5188 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD2R,strided_cn)5189   TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD2R, strided_cn) {
5190     TEST_REQUIRES_ARM_NEON;
5191     GemmMicrokernelTester()
5192       .mr(1)
5193       .nr(8)
5194       .kr(2)
5195       .sr(1)
5196       .m(1)
5197       .n(8)
5198       .k(16)
5199       .cn_stride(11)
5200       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
5201   }
5202 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD2R,k_eq_16_subtile)5203   TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD2R, k_eq_16_subtile) {
5204     TEST_REQUIRES_ARM_NEON;
5205     for (uint32_t n = 1; n <= 8; n++) {
5206       for (uint32_t m = 1; m <= 1; m++) {
5207         GemmMicrokernelTester()
5208           .mr(1)
5209           .nr(8)
5210           .kr(2)
5211           .sr(1)
5212           .m(m)
5213           .n(n)
5214           .k(16)
5215           .iterations(1)
5216           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
5217       }
5218     }
5219   }
5220 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD2R,k_eq_16_subtile_m)5221   TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD2R, k_eq_16_subtile_m) {
5222     TEST_REQUIRES_ARM_NEON;
5223     for (uint32_t m = 1; m <= 1; m++) {
5224       GemmMicrokernelTester()
5225         .mr(1)
5226         .nr(8)
5227         .kr(2)
5228         .sr(1)
5229         .m(m)
5230         .n(8)
5231         .k(16)
5232         .iterations(1)
5233         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
5234     }
5235   }
5236 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD2R,k_eq_16_subtile_n)5237   TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD2R, k_eq_16_subtile_n) {
5238     TEST_REQUIRES_ARM_NEON;
5239     for (uint32_t n = 1; n <= 8; n++) {
5240       GemmMicrokernelTester()
5241         .mr(1)
5242         .nr(8)
5243         .kr(2)
5244         .sr(1)
5245         .m(1)
5246         .n(n)
5247         .k(16)
5248         .iterations(1)
5249         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
5250     }
5251   }
5252 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD2R,k_lt_16)5253   TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD2R, k_lt_16) {
5254     TEST_REQUIRES_ARM_NEON;
5255     for (size_t k = 1; k < 16; k++) {
5256       GemmMicrokernelTester()
5257         .mr(1)
5258         .nr(8)
5259         .kr(2)
5260         .sr(1)
5261         .m(1)
5262         .n(8)
5263         .k(k)
5264         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
5265     }
5266   }
5267 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD2R,k_lt_16_subtile)5268   TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD2R, k_lt_16_subtile) {
5269     TEST_REQUIRES_ARM_NEON;
5270     for (size_t k = 1; k < 16; k++) {
5271       for (uint32_t n = 1; n <= 8; n++) {
5272         for (uint32_t m = 1; m <= 1; m++) {
5273           GemmMicrokernelTester()
5274             .mr(1)
5275             .nr(8)
5276             .kr(2)
5277             .sr(1)
5278             .m(m)
5279             .n(n)
5280             .k(k)
5281             .iterations(1)
5282             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
5283         }
5284       }
5285     }
5286   }
5287 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD2R,k_gt_16)5288   TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD2R, k_gt_16) {
5289     TEST_REQUIRES_ARM_NEON;
5290     for (size_t k = 17; k < 32; k++) {
5291       GemmMicrokernelTester()
5292         .mr(1)
5293         .nr(8)
5294         .kr(2)
5295         .sr(1)
5296         .m(1)
5297         .n(8)
5298         .k(k)
5299         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
5300     }
5301   }
5302 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD2R,k_gt_16_subtile)5303   TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD2R, k_gt_16_subtile) {
5304     TEST_REQUIRES_ARM_NEON;
5305     for (size_t k = 17; k < 32; k++) {
5306       for (uint32_t n = 1; n <= 8; n++) {
5307         for (uint32_t m = 1; m <= 1; m++) {
5308           GemmMicrokernelTester()
5309             .mr(1)
5310             .nr(8)
5311             .kr(2)
5312             .sr(1)
5313             .m(m)
5314             .n(n)
5315             .k(k)
5316             .iterations(1)
5317             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
5318         }
5319       }
5320     }
5321   }
5322 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD2R,k_div_16)5323   TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD2R, k_div_16) {
5324     TEST_REQUIRES_ARM_NEON;
5325     for (size_t k = 32; k <= 160; k += 16) {
5326       GemmMicrokernelTester()
5327         .mr(1)
5328         .nr(8)
5329         .kr(2)
5330         .sr(1)
5331         .m(1)
5332         .n(8)
5333         .k(k)
5334         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
5335     }
5336   }
5337 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD2R,k_div_16_subtile)5338   TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD2R, k_div_16_subtile) {
5339     TEST_REQUIRES_ARM_NEON;
5340     for (size_t k = 32; k <= 160; k += 16) {
5341       for (uint32_t n = 1; n <= 8; n++) {
5342         for (uint32_t m = 1; m <= 1; m++) {
5343           GemmMicrokernelTester()
5344             .mr(1)
5345             .nr(8)
5346             .kr(2)
5347             .sr(1)
5348             .m(m)
5349             .n(n)
5350             .k(k)
5351             .iterations(1)
5352             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
5353         }
5354       }
5355     }
5356   }
5357 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD2R,n_gt_8)5358   TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD2R, n_gt_8) {
5359     TEST_REQUIRES_ARM_NEON;
5360     for (uint32_t n = 9; n < 16; n++) {
5361       for (size_t k = 1; k <= 80; k += 17) {
5362         GemmMicrokernelTester()
5363           .mr(1)
5364           .nr(8)
5365           .kr(2)
5366           .sr(1)
5367           .m(1)
5368           .n(n)
5369           .k(k)
5370           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
5371       }
5372     }
5373   }
5374 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD2R,n_gt_8_strided_cn)5375   TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD2R, n_gt_8_strided_cn) {
5376     TEST_REQUIRES_ARM_NEON;
5377     for (uint32_t n = 9; n < 16; n++) {
5378       for (size_t k = 1; k <= 80; k += 17) {
5379         GemmMicrokernelTester()
5380           .mr(1)
5381           .nr(8)
5382           .kr(2)
5383           .sr(1)
5384           .m(1)
5385           .n(n)
5386           .k(k)
5387           .cn_stride(11)
5388           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
5389       }
5390     }
5391   }
5392 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD2R,n_gt_8_subtile)5393   TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD2R, n_gt_8_subtile) {
5394     TEST_REQUIRES_ARM_NEON;
5395     for (uint32_t n = 9; n < 16; n++) {
5396       for (size_t k = 1; k <= 80; k += 17) {
5397         for (uint32_t m = 1; m <= 1; m++) {
5398           GemmMicrokernelTester()
5399             .mr(1)
5400             .nr(8)
5401             .kr(2)
5402             .sr(1)
5403             .m(m)
5404             .n(n)
5405             .k(k)
5406             .iterations(1)
5407             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
5408         }
5409       }
5410     }
5411   }
5412 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD2R,n_div_8)5413   TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD2R, n_div_8) {
5414     TEST_REQUIRES_ARM_NEON;
5415     for (uint32_t n = 16; n <= 24; n += 8) {
5416       for (size_t k = 1; k <= 80; k += 17) {
5417         GemmMicrokernelTester()
5418           .mr(1)
5419           .nr(8)
5420           .kr(2)
5421           .sr(1)
5422           .m(1)
5423           .n(n)
5424           .k(k)
5425           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
5426       }
5427     }
5428   }
5429 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD2R,n_div_8_strided_cn)5430   TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD2R, n_div_8_strided_cn) {
5431     TEST_REQUIRES_ARM_NEON;
5432     for (uint32_t n = 16; n <= 24; n += 8) {
5433       for (size_t k = 1; k <= 80; k += 17) {
5434         GemmMicrokernelTester()
5435           .mr(1)
5436           .nr(8)
5437           .kr(2)
5438           .sr(1)
5439           .m(1)
5440           .n(n)
5441           .k(k)
5442           .cn_stride(11)
5443           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
5444       }
5445     }
5446   }
5447 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD2R,n_div_8_subtile)5448   TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD2R, n_div_8_subtile) {
5449     TEST_REQUIRES_ARM_NEON;
5450     for (uint32_t n = 16; n <= 24; n += 8) {
5451       for (size_t k = 1; k <= 80; k += 17) {
5452         for (uint32_t m = 1; m <= 1; m++) {
5453           GemmMicrokernelTester()
5454             .mr(1)
5455             .nr(8)
5456             .kr(2)
5457             .sr(1)
5458             .m(m)
5459             .n(n)
5460             .k(k)
5461             .iterations(1)
5462             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
5463         }
5464       }
5465     }
5466   }
5467 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD2R,small_kernel)5468   TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD2R, small_kernel) {
5469     TEST_REQUIRES_ARM_NEON;
5470     for (size_t k = 1; k <= 80; k += 17) {
5471       GemmMicrokernelTester()
5472         .mr(1)
5473         .nr(8)
5474         .kr(2)
5475         .sr(1)
5476         .m(1)
5477         .n(8)
5478         .k(k)
5479         .ks(3)
5480         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
5481     }
5482   }
5483 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD2R,small_kernel_subtile)5484   TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD2R, small_kernel_subtile) {
5485     TEST_REQUIRES_ARM_NEON;
5486     for (size_t k = 1; k <= 80; k += 17) {
5487       for (uint32_t n = 1; n <= 8; n++) {
5488         for (uint32_t m = 1; m <= 1; m++) {
5489           GemmMicrokernelTester()
5490             .mr(1)
5491             .nr(8)
5492             .kr(2)
5493             .sr(1)
5494             .m(m)
5495             .n(n)
5496             .k(k)
5497             .ks(3)
5498             .iterations(1)
5499             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
5500         }
5501       }
5502     }
5503   }
5504 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD2R,n_gt_8_small_kernel)5505   TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD2R, n_gt_8_small_kernel) {
5506     TEST_REQUIRES_ARM_NEON;
5507     for (uint32_t n = 9; n < 16; n++) {
5508       for (size_t k = 1; k <= 80; k += 17) {
5509         GemmMicrokernelTester()
5510           .mr(1)
5511           .nr(8)
5512           .kr(2)
5513           .sr(1)
5514           .m(1)
5515           .n(n)
5516           .k(k)
5517           .ks(3)
5518           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
5519       }
5520     }
5521   }
5522 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD2R,n_div_8_small_kernel)5523   TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD2R, n_div_8_small_kernel) {
5524     TEST_REQUIRES_ARM_NEON;
5525     for (uint32_t n = 16; n <= 24; n += 8) {
5526       for (size_t k = 1; k <= 80; k += 17) {
5527         GemmMicrokernelTester()
5528           .mr(1)
5529           .nr(8)
5530           .kr(2)
5531           .sr(1)
5532           .m(1)
5533           .n(n)
5534           .k(k)
5535           .ks(3)
5536           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
5537       }
5538     }
5539   }
5540 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD2R,strided_cm_subtile)5541   TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD2R, strided_cm_subtile) {
5542     TEST_REQUIRES_ARM_NEON;
5543     for (size_t k = 1; k <= 80; k += 17) {
5544       for (uint32_t n = 1; n <= 8; n++) {
5545         for (uint32_t m = 1; m <= 1; m++) {
5546           GemmMicrokernelTester()
5547             .mr(1)
5548             .nr(8)
5549             .kr(2)
5550             .sr(1)
5551             .m(m)
5552             .n(n)
5553             .k(k)
5554             .cm_stride(11)
5555             .iterations(1)
5556             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
5557         }
5558       }
5559     }
5560   }
5561 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD2R,a_offset)5562   TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD2R, a_offset) {
5563     TEST_REQUIRES_ARM_NEON;
5564     for (size_t k = 1; k <= 80; k += 17) {
5565       GemmMicrokernelTester()
5566         .mr(1)
5567         .nr(8)
5568         .kr(2)
5569         .sr(1)
5570         .m(1)
5571         .n(8)
5572         .k(k)
5573         .ks(3)
5574         .a_offset(83)
5575         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
5576     }
5577   }
5578 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD2R,zero)5579   TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD2R, zero) {
5580     TEST_REQUIRES_ARM_NEON;
5581     for (size_t k = 1; k <= 80; k += 17) {
5582       for (uint32_t mz = 0; mz < 1; mz++) {
5583         GemmMicrokernelTester()
5584           .mr(1)
5585           .nr(8)
5586           .kr(2)
5587           .sr(1)
5588           .m(1)
5589           .n(8)
5590           .k(k)
5591           .ks(3)
5592           .a_offset(83)
5593           .zero_index(mz)
5594           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
5595       }
5596     }
5597   }
5598 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD2R,qmin)5599   TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD2R, qmin) {
5600     TEST_REQUIRES_ARM_NEON;
5601     GemmMicrokernelTester()
5602       .mr(1)
5603       .nr(8)
5604       .kr(2)
5605       .sr(1)
5606       .m(1)
5607       .n(8)
5608       .k(16)
5609       .qmin(128)
5610       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
5611   }
5612 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD2R,qmax)5613   TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD2R, qmax) {
5614     TEST_REQUIRES_ARM_NEON;
5615     GemmMicrokernelTester()
5616       .mr(1)
5617       .nr(8)
5618       .kr(2)
5619       .sr(1)
5620       .m(1)
5621       .n(8)
5622       .k(16)
5623       .qmax(128)
5624       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
5625   }
5626 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD2R,strided_cm)5627   TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD2R, strided_cm) {
5628     TEST_REQUIRES_ARM_NEON;
5629     GemmMicrokernelTester()
5630       .mr(1)
5631       .nr(8)
5632       .kr(2)
5633       .sr(1)
5634       .m(1)
5635       .n(8)
5636       .k(16)
5637       .cm_stride(11)
5638       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
5639   }
5640 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
5641 
5642 
5643 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD4R,k_eq_16)5644   TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD4R, k_eq_16) {
5645     TEST_REQUIRES_ARM_NEON;
5646     GemmMicrokernelTester()
5647       .mr(1)
5648       .nr(8)
5649       .kr(2)
5650       .sr(1)
5651       .m(1)
5652       .n(8)
5653       .k(16)
5654       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld4r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
5655   }
5656 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD4R,strided_cn)5657   TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD4R, strided_cn) {
5658     TEST_REQUIRES_ARM_NEON;
5659     GemmMicrokernelTester()
5660       .mr(1)
5661       .nr(8)
5662       .kr(2)
5663       .sr(1)
5664       .m(1)
5665       .n(8)
5666       .k(16)
5667       .cn_stride(11)
5668       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld4r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
5669   }
5670 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD4R,k_eq_16_subtile)5671   TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD4R, k_eq_16_subtile) {
5672     TEST_REQUIRES_ARM_NEON;
5673     for (uint32_t n = 1; n <= 8; n++) {
5674       for (uint32_t m = 1; m <= 1; m++) {
5675         GemmMicrokernelTester()
5676           .mr(1)
5677           .nr(8)
5678           .kr(2)
5679           .sr(1)
5680           .m(m)
5681           .n(n)
5682           .k(16)
5683           .iterations(1)
5684           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld4r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
5685       }
5686     }
5687   }
5688 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD4R,k_eq_16_subtile_m)5689   TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD4R, k_eq_16_subtile_m) {
5690     TEST_REQUIRES_ARM_NEON;
5691     for (uint32_t m = 1; m <= 1; m++) {
5692       GemmMicrokernelTester()
5693         .mr(1)
5694         .nr(8)
5695         .kr(2)
5696         .sr(1)
5697         .m(m)
5698         .n(8)
5699         .k(16)
5700         .iterations(1)
5701         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld4r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
5702     }
5703   }
5704 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD4R,k_eq_16_subtile_n)5705   TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD4R, k_eq_16_subtile_n) {
5706     TEST_REQUIRES_ARM_NEON;
5707     for (uint32_t n = 1; n <= 8; n++) {
5708       GemmMicrokernelTester()
5709         .mr(1)
5710         .nr(8)
5711         .kr(2)
5712         .sr(1)
5713         .m(1)
5714         .n(n)
5715         .k(16)
5716         .iterations(1)
5717         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld4r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
5718     }
5719   }
5720 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD4R,k_lt_16)5721   TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD4R, k_lt_16) {
5722     TEST_REQUIRES_ARM_NEON;
5723     for (size_t k = 1; k < 16; k++) {
5724       GemmMicrokernelTester()
5725         .mr(1)
5726         .nr(8)
5727         .kr(2)
5728         .sr(1)
5729         .m(1)
5730         .n(8)
5731         .k(k)
5732         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld4r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
5733     }
5734   }
5735 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD4R,k_lt_16_subtile)5736   TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD4R, k_lt_16_subtile) {
5737     TEST_REQUIRES_ARM_NEON;
5738     for (size_t k = 1; k < 16; k++) {
5739       for (uint32_t n = 1; n <= 8; n++) {
5740         for (uint32_t m = 1; m <= 1; m++) {
5741           GemmMicrokernelTester()
5742             .mr(1)
5743             .nr(8)
5744             .kr(2)
5745             .sr(1)
5746             .m(m)
5747             .n(n)
5748             .k(k)
5749             .iterations(1)
5750             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld4r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
5751         }
5752       }
5753     }
5754   }
5755 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD4R,k_gt_16)5756   TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD4R, k_gt_16) {
5757     TEST_REQUIRES_ARM_NEON;
5758     for (size_t k = 17; k < 32; k++) {
5759       GemmMicrokernelTester()
5760         .mr(1)
5761         .nr(8)
5762         .kr(2)
5763         .sr(1)
5764         .m(1)
5765         .n(8)
5766         .k(k)
5767         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld4r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
5768     }
5769   }
5770 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD4R,k_gt_16_subtile)5771   TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD4R, k_gt_16_subtile) {
5772     TEST_REQUIRES_ARM_NEON;
5773     for (size_t k = 17; k < 32; k++) {
5774       for (uint32_t n = 1; n <= 8; n++) {
5775         for (uint32_t m = 1; m <= 1; m++) {
5776           GemmMicrokernelTester()
5777             .mr(1)
5778             .nr(8)
5779             .kr(2)
5780             .sr(1)
5781             .m(m)
5782             .n(n)
5783             .k(k)
5784             .iterations(1)
5785             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld4r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
5786         }
5787       }
5788     }
5789   }
5790 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD4R,k_div_16)5791   TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD4R, k_div_16) {
5792     TEST_REQUIRES_ARM_NEON;
5793     for (size_t k = 32; k <= 160; k += 16) {
5794       GemmMicrokernelTester()
5795         .mr(1)
5796         .nr(8)
5797         .kr(2)
5798         .sr(1)
5799         .m(1)
5800         .n(8)
5801         .k(k)
5802         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld4r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
5803     }
5804   }
5805 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD4R,k_div_16_subtile)5806   TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD4R, k_div_16_subtile) {
5807     TEST_REQUIRES_ARM_NEON;
5808     for (size_t k = 32; k <= 160; k += 16) {
5809       for (uint32_t n = 1; n <= 8; n++) {
5810         for (uint32_t m = 1; m <= 1; m++) {
5811           GemmMicrokernelTester()
5812             .mr(1)
5813             .nr(8)
5814             .kr(2)
5815             .sr(1)
5816             .m(m)
5817             .n(n)
5818             .k(k)
5819             .iterations(1)
5820             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld4r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
5821         }
5822       }
5823     }
5824   }
5825 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD4R,n_gt_8)5826   TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD4R, n_gt_8) {
5827     TEST_REQUIRES_ARM_NEON;
5828     for (uint32_t n = 9; n < 16; n++) {
5829       for (size_t k = 1; k <= 80; k += 17) {
5830         GemmMicrokernelTester()
5831           .mr(1)
5832           .nr(8)
5833           .kr(2)
5834           .sr(1)
5835           .m(1)
5836           .n(n)
5837           .k(k)
5838           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld4r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
5839       }
5840     }
5841   }
5842 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD4R,n_gt_8_strided_cn)5843   TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD4R, n_gt_8_strided_cn) {
5844     TEST_REQUIRES_ARM_NEON;
5845     for (uint32_t n = 9; n < 16; n++) {
5846       for (size_t k = 1; k <= 80; k += 17) {
5847         GemmMicrokernelTester()
5848           .mr(1)
5849           .nr(8)
5850           .kr(2)
5851           .sr(1)
5852           .m(1)
5853           .n(n)
5854           .k(k)
5855           .cn_stride(11)
5856           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld4r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
5857       }
5858     }
5859   }
5860 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD4R,n_gt_8_subtile)5861   TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD4R, n_gt_8_subtile) {
5862     TEST_REQUIRES_ARM_NEON;
5863     for (uint32_t n = 9; n < 16; n++) {
5864       for (size_t k = 1; k <= 80; k += 17) {
5865         for (uint32_t m = 1; m <= 1; m++) {
5866           GemmMicrokernelTester()
5867             .mr(1)
5868             .nr(8)
5869             .kr(2)
5870             .sr(1)
5871             .m(m)
5872             .n(n)
5873             .k(k)
5874             .iterations(1)
5875             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld4r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
5876         }
5877       }
5878     }
5879   }
5880 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD4R,n_div_8)5881   TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD4R, n_div_8) {
5882     TEST_REQUIRES_ARM_NEON;
5883     for (uint32_t n = 16; n <= 24; n += 8) {
5884       for (size_t k = 1; k <= 80; k += 17) {
5885         GemmMicrokernelTester()
5886           .mr(1)
5887           .nr(8)
5888           .kr(2)
5889           .sr(1)
5890           .m(1)
5891           .n(n)
5892           .k(k)
5893           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld4r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
5894       }
5895     }
5896   }
5897 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD4R,n_div_8_strided_cn)5898   TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD4R, n_div_8_strided_cn) {
5899     TEST_REQUIRES_ARM_NEON;
5900     for (uint32_t n = 16; n <= 24; n += 8) {
5901       for (size_t k = 1; k <= 80; k += 17) {
5902         GemmMicrokernelTester()
5903           .mr(1)
5904           .nr(8)
5905           .kr(2)
5906           .sr(1)
5907           .m(1)
5908           .n(n)
5909           .k(k)
5910           .cn_stride(11)
5911           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld4r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
5912       }
5913     }
5914   }
5915 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD4R,n_div_8_subtile)5916   TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD4R, n_div_8_subtile) {
5917     TEST_REQUIRES_ARM_NEON;
5918     for (uint32_t n = 16; n <= 24; n += 8) {
5919       for (size_t k = 1; k <= 80; k += 17) {
5920         for (uint32_t m = 1; m <= 1; m++) {
5921           GemmMicrokernelTester()
5922             .mr(1)
5923             .nr(8)
5924             .kr(2)
5925             .sr(1)
5926             .m(m)
5927             .n(n)
5928             .k(k)
5929             .iterations(1)
5930             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld4r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
5931         }
5932       }
5933     }
5934   }
5935 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD4R,small_kernel)5936   TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD4R, small_kernel) {
5937     TEST_REQUIRES_ARM_NEON;
5938     for (size_t k = 1; k <= 80; k += 17) {
5939       GemmMicrokernelTester()
5940         .mr(1)
5941         .nr(8)
5942         .kr(2)
5943         .sr(1)
5944         .m(1)
5945         .n(8)
5946         .k(k)
5947         .ks(3)
5948         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld4r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
5949     }
5950   }
5951 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD4R,small_kernel_subtile)5952   TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD4R, small_kernel_subtile) {
5953     TEST_REQUIRES_ARM_NEON;
5954     for (size_t k = 1; k <= 80; k += 17) {
5955       for (uint32_t n = 1; n <= 8; n++) {
5956         for (uint32_t m = 1; m <= 1; m++) {
5957           GemmMicrokernelTester()
5958             .mr(1)
5959             .nr(8)
5960             .kr(2)
5961             .sr(1)
5962             .m(m)
5963             .n(n)
5964             .k(k)
5965             .ks(3)
5966             .iterations(1)
5967             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld4r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
5968         }
5969       }
5970     }
5971   }
5972 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD4R,n_gt_8_small_kernel)5973   TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD4R, n_gt_8_small_kernel) {
5974     TEST_REQUIRES_ARM_NEON;
5975     for (uint32_t n = 9; n < 16; n++) {
5976       for (size_t k = 1; k <= 80; k += 17) {
5977         GemmMicrokernelTester()
5978           .mr(1)
5979           .nr(8)
5980           .kr(2)
5981           .sr(1)
5982           .m(1)
5983           .n(n)
5984           .k(k)
5985           .ks(3)
5986           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld4r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
5987       }
5988     }
5989   }
5990 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD4R,n_div_8_small_kernel)5991   TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD4R, n_div_8_small_kernel) {
5992     TEST_REQUIRES_ARM_NEON;
5993     for (uint32_t n = 16; n <= 24; n += 8) {
5994       for (size_t k = 1; k <= 80; k += 17) {
5995         GemmMicrokernelTester()
5996           .mr(1)
5997           .nr(8)
5998           .kr(2)
5999           .sr(1)
6000           .m(1)
6001           .n(n)
6002           .k(k)
6003           .ks(3)
6004           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld4r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
6005       }
6006     }
6007   }
6008 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD4R,strided_cm_subtile)6009   TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD4R, strided_cm_subtile) {
6010     TEST_REQUIRES_ARM_NEON;
6011     for (size_t k = 1; k <= 80; k += 17) {
6012       for (uint32_t n = 1; n <= 8; n++) {
6013         for (uint32_t m = 1; m <= 1; m++) {
6014           GemmMicrokernelTester()
6015             .mr(1)
6016             .nr(8)
6017             .kr(2)
6018             .sr(1)
6019             .m(m)
6020             .n(n)
6021             .k(k)
6022             .cm_stride(11)
6023             .iterations(1)
6024             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld4r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
6025         }
6026       }
6027     }
6028   }
6029 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD4R,a_offset)6030   TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD4R, a_offset) {
6031     TEST_REQUIRES_ARM_NEON;
6032     for (size_t k = 1; k <= 80; k += 17) {
6033       GemmMicrokernelTester()
6034         .mr(1)
6035         .nr(8)
6036         .kr(2)
6037         .sr(1)
6038         .m(1)
6039         .n(8)
6040         .k(k)
6041         .ks(3)
6042         .a_offset(83)
6043         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld4r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
6044     }
6045   }
6046 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD4R,zero)6047   TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD4R, zero) {
6048     TEST_REQUIRES_ARM_NEON;
6049     for (size_t k = 1; k <= 80; k += 17) {
6050       for (uint32_t mz = 0; mz < 1; mz++) {
6051         GemmMicrokernelTester()
6052           .mr(1)
6053           .nr(8)
6054           .kr(2)
6055           .sr(1)
6056           .m(1)
6057           .n(8)
6058           .k(k)
6059           .ks(3)
6060           .a_offset(83)
6061           .zero_index(mz)
6062           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld4r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
6063       }
6064     }
6065   }
6066 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD4R,qmin)6067   TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD4R, qmin) {
6068     TEST_REQUIRES_ARM_NEON;
6069     GemmMicrokernelTester()
6070       .mr(1)
6071       .nr(8)
6072       .kr(2)
6073       .sr(1)
6074       .m(1)
6075       .n(8)
6076       .k(16)
6077       .qmin(128)
6078       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld4r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
6079   }
6080 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD4R,qmax)6081   TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD4R, qmax) {
6082     TEST_REQUIRES_ARM_NEON;
6083     GemmMicrokernelTester()
6084       .mr(1)
6085       .nr(8)
6086       .kr(2)
6087       .sr(1)
6088       .m(1)
6089       .n(8)
6090       .k(16)
6091       .qmax(128)
6092       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld4r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
6093   }
6094 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD4R,strided_cm)6095   TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD4R, strided_cm) {
6096     TEST_REQUIRES_ARM_NEON;
6097     GemmMicrokernelTester()
6098       .mr(1)
6099       .nr(8)
6100       .kr(2)
6101       .sr(1)
6102       .m(1)
6103       .n(8)
6104       .k(16)
6105       .cm_stride(11)
6106       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld4r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
6107   }
6108 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
6109 
6110 
6111 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QC8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD1R,k_eq_16)6112   TEST(QC8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD1R, k_eq_16) {
6113     TEST_REQUIRES_ARM_NEON;
6114     GemmMicrokernelTester()
6115       .mr(1)
6116       .nr(8)
6117       .kr(4)
6118       .sr(1)
6119       .m(1)
6120       .n(8)
6121       .k(16)
6122       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c4__neon_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
6123   }
6124 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD1R,strided_cn)6125   TEST(QC8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD1R, strided_cn) {
6126     TEST_REQUIRES_ARM_NEON;
6127     GemmMicrokernelTester()
6128       .mr(1)
6129       .nr(8)
6130       .kr(4)
6131       .sr(1)
6132       .m(1)
6133       .n(8)
6134       .k(16)
6135       .cn_stride(11)
6136       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c4__neon_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
6137   }
6138 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD1R,k_eq_16_subtile)6139   TEST(QC8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD1R, k_eq_16_subtile) {
6140     TEST_REQUIRES_ARM_NEON;
6141     for (uint32_t n = 1; n <= 8; n++) {
6142       for (uint32_t m = 1; m <= 1; m++) {
6143         GemmMicrokernelTester()
6144           .mr(1)
6145           .nr(8)
6146           .kr(4)
6147           .sr(1)
6148           .m(m)
6149           .n(n)
6150           .k(16)
6151           .iterations(1)
6152           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c4__neon_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
6153       }
6154     }
6155   }
6156 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD1R,k_eq_16_subtile_m)6157   TEST(QC8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD1R, k_eq_16_subtile_m) {
6158     TEST_REQUIRES_ARM_NEON;
6159     for (uint32_t m = 1; m <= 1; m++) {
6160       GemmMicrokernelTester()
6161         .mr(1)
6162         .nr(8)
6163         .kr(4)
6164         .sr(1)
6165         .m(m)
6166         .n(8)
6167         .k(16)
6168         .iterations(1)
6169         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c4__neon_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
6170     }
6171   }
6172 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD1R,k_eq_16_subtile_n)6173   TEST(QC8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD1R, k_eq_16_subtile_n) {
6174     TEST_REQUIRES_ARM_NEON;
6175     for (uint32_t n = 1; n <= 8; n++) {
6176       GemmMicrokernelTester()
6177         .mr(1)
6178         .nr(8)
6179         .kr(4)
6180         .sr(1)
6181         .m(1)
6182         .n(n)
6183         .k(16)
6184         .iterations(1)
6185         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c4__neon_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
6186     }
6187   }
6188 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD1R,k_lt_16)6189   TEST(QC8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD1R, k_lt_16) {
6190     TEST_REQUIRES_ARM_NEON;
6191     for (size_t k = 1; k < 16; k++) {
6192       GemmMicrokernelTester()
6193         .mr(1)
6194         .nr(8)
6195         .kr(4)
6196         .sr(1)
6197         .m(1)
6198         .n(8)
6199         .k(k)
6200         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c4__neon_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
6201     }
6202   }
6203 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD1R,k_lt_16_subtile)6204   TEST(QC8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD1R, k_lt_16_subtile) {
6205     TEST_REQUIRES_ARM_NEON;
6206     for (size_t k = 1; k < 16; k++) {
6207       for (uint32_t n = 1; n <= 8; n++) {
6208         for (uint32_t m = 1; m <= 1; m++) {
6209           GemmMicrokernelTester()
6210             .mr(1)
6211             .nr(8)
6212             .kr(4)
6213             .sr(1)
6214             .m(m)
6215             .n(n)
6216             .k(k)
6217             .iterations(1)
6218             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c4__neon_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
6219         }
6220       }
6221     }
6222   }
6223 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD1R,k_gt_16)6224   TEST(QC8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD1R, k_gt_16) {
6225     TEST_REQUIRES_ARM_NEON;
6226     for (size_t k = 17; k < 32; k++) {
6227       GemmMicrokernelTester()
6228         .mr(1)
6229         .nr(8)
6230         .kr(4)
6231         .sr(1)
6232         .m(1)
6233         .n(8)
6234         .k(k)
6235         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c4__neon_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
6236     }
6237   }
6238 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD1R,k_gt_16_subtile)6239   TEST(QC8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD1R, k_gt_16_subtile) {
6240     TEST_REQUIRES_ARM_NEON;
6241     for (size_t k = 17; k < 32; k++) {
6242       for (uint32_t n = 1; n <= 8; n++) {
6243         for (uint32_t m = 1; m <= 1; m++) {
6244           GemmMicrokernelTester()
6245             .mr(1)
6246             .nr(8)
6247             .kr(4)
6248             .sr(1)
6249             .m(m)
6250             .n(n)
6251             .k(k)
6252             .iterations(1)
6253             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c4__neon_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
6254         }
6255       }
6256     }
6257   }
6258 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD1R,k_div_16)6259   TEST(QC8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD1R, k_div_16) {
6260     TEST_REQUIRES_ARM_NEON;
6261     for (size_t k = 32; k <= 160; k += 16) {
6262       GemmMicrokernelTester()
6263         .mr(1)
6264         .nr(8)
6265         .kr(4)
6266         .sr(1)
6267         .m(1)
6268         .n(8)
6269         .k(k)
6270         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c4__neon_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
6271     }
6272   }
6273 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD1R,k_div_16_subtile)6274   TEST(QC8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD1R, k_div_16_subtile) {
6275     TEST_REQUIRES_ARM_NEON;
6276     for (size_t k = 32; k <= 160; k += 16) {
6277       for (uint32_t n = 1; n <= 8; n++) {
6278         for (uint32_t m = 1; m <= 1; m++) {
6279           GemmMicrokernelTester()
6280             .mr(1)
6281             .nr(8)
6282             .kr(4)
6283             .sr(1)
6284             .m(m)
6285             .n(n)
6286             .k(k)
6287             .iterations(1)
6288             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c4__neon_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
6289         }
6290       }
6291     }
6292   }
6293 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD1R,n_gt_8)6294   TEST(QC8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD1R, n_gt_8) {
6295     TEST_REQUIRES_ARM_NEON;
6296     for (uint32_t n = 9; n < 16; n++) {
6297       for (size_t k = 1; k <= 80; k += 17) {
6298         GemmMicrokernelTester()
6299           .mr(1)
6300           .nr(8)
6301           .kr(4)
6302           .sr(1)
6303           .m(1)
6304           .n(n)
6305           .k(k)
6306           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c4__neon_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
6307       }
6308     }
6309   }
6310 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD1R,n_gt_8_strided_cn)6311   TEST(QC8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD1R, n_gt_8_strided_cn) {
6312     TEST_REQUIRES_ARM_NEON;
6313     for (uint32_t n = 9; n < 16; n++) {
6314       for (size_t k = 1; k <= 80; k += 17) {
6315         GemmMicrokernelTester()
6316           .mr(1)
6317           .nr(8)
6318           .kr(4)
6319           .sr(1)
6320           .m(1)
6321           .n(n)
6322           .k(k)
6323           .cn_stride(11)
6324           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c4__neon_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
6325       }
6326     }
6327   }
6328 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD1R,n_gt_8_subtile)6329   TEST(QC8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD1R, n_gt_8_subtile) {
6330     TEST_REQUIRES_ARM_NEON;
6331     for (uint32_t n = 9; n < 16; n++) {
6332       for (size_t k = 1; k <= 80; k += 17) {
6333         for (uint32_t m = 1; m <= 1; m++) {
6334           GemmMicrokernelTester()
6335             .mr(1)
6336             .nr(8)
6337             .kr(4)
6338             .sr(1)
6339             .m(m)
6340             .n(n)
6341             .k(k)
6342             .iterations(1)
6343             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c4__neon_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
6344         }
6345       }
6346     }
6347   }
6348 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD1R,n_div_8)6349   TEST(QC8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD1R, n_div_8) {
6350     TEST_REQUIRES_ARM_NEON;
6351     for (uint32_t n = 16; n <= 24; n += 8) {
6352       for (size_t k = 1; k <= 80; k += 17) {
6353         GemmMicrokernelTester()
6354           .mr(1)
6355           .nr(8)
6356           .kr(4)
6357           .sr(1)
6358           .m(1)
6359           .n(n)
6360           .k(k)
6361           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c4__neon_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
6362       }
6363     }
6364   }
6365 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD1R,n_div_8_strided_cn)6366   TEST(QC8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD1R, n_div_8_strided_cn) {
6367     TEST_REQUIRES_ARM_NEON;
6368     for (uint32_t n = 16; n <= 24; n += 8) {
6369       for (size_t k = 1; k <= 80; k += 17) {
6370         GemmMicrokernelTester()
6371           .mr(1)
6372           .nr(8)
6373           .kr(4)
6374           .sr(1)
6375           .m(1)
6376           .n(n)
6377           .k(k)
6378           .cn_stride(11)
6379           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c4__neon_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
6380       }
6381     }
6382   }
6383 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD1R,n_div_8_subtile)6384   TEST(QC8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD1R, n_div_8_subtile) {
6385     TEST_REQUIRES_ARM_NEON;
6386     for (uint32_t n = 16; n <= 24; n += 8) {
6387       for (size_t k = 1; k <= 80; k += 17) {
6388         for (uint32_t m = 1; m <= 1; m++) {
6389           GemmMicrokernelTester()
6390             .mr(1)
6391             .nr(8)
6392             .kr(4)
6393             .sr(1)
6394             .m(m)
6395             .n(n)
6396             .k(k)
6397             .iterations(1)
6398             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c4__neon_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
6399         }
6400       }
6401     }
6402   }
6403 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD1R,small_kernel)6404   TEST(QC8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD1R, small_kernel) {
6405     TEST_REQUIRES_ARM_NEON;
6406     for (size_t k = 1; k <= 80; k += 17) {
6407       GemmMicrokernelTester()
6408         .mr(1)
6409         .nr(8)
6410         .kr(4)
6411         .sr(1)
6412         .m(1)
6413         .n(8)
6414         .k(k)
6415         .ks(3)
6416         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c4__neon_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
6417     }
6418   }
6419 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD1R,small_kernel_subtile)6420   TEST(QC8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD1R, small_kernel_subtile) {
6421     TEST_REQUIRES_ARM_NEON;
6422     for (size_t k = 1; k <= 80; k += 17) {
6423       for (uint32_t n = 1; n <= 8; n++) {
6424         for (uint32_t m = 1; m <= 1; m++) {
6425           GemmMicrokernelTester()
6426             .mr(1)
6427             .nr(8)
6428             .kr(4)
6429             .sr(1)
6430             .m(m)
6431             .n(n)
6432             .k(k)
6433             .ks(3)
6434             .iterations(1)
6435             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c4__neon_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
6436         }
6437       }
6438     }
6439   }
6440 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD1R,n_gt_8_small_kernel)6441   TEST(QC8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD1R, n_gt_8_small_kernel) {
6442     TEST_REQUIRES_ARM_NEON;
6443     for (uint32_t n = 9; n < 16; n++) {
6444       for (size_t k = 1; k <= 80; k += 17) {
6445         GemmMicrokernelTester()
6446           .mr(1)
6447           .nr(8)
6448           .kr(4)
6449           .sr(1)
6450           .m(1)
6451           .n(n)
6452           .k(k)
6453           .ks(3)
6454           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c4__neon_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
6455       }
6456     }
6457   }
6458 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD1R,n_div_8_small_kernel)6459   TEST(QC8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD1R, n_div_8_small_kernel) {
6460     TEST_REQUIRES_ARM_NEON;
6461     for (uint32_t n = 16; n <= 24; n += 8) {
6462       for (size_t k = 1; k <= 80; k += 17) {
6463         GemmMicrokernelTester()
6464           .mr(1)
6465           .nr(8)
6466           .kr(4)
6467           .sr(1)
6468           .m(1)
6469           .n(n)
6470           .k(k)
6471           .ks(3)
6472           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c4__neon_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
6473       }
6474     }
6475   }
6476 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD1R,strided_cm_subtile)6477   TEST(QC8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD1R, strided_cm_subtile) {
6478     TEST_REQUIRES_ARM_NEON;
6479     for (size_t k = 1; k <= 80; k += 17) {
6480       for (uint32_t n = 1; n <= 8; n++) {
6481         for (uint32_t m = 1; m <= 1; m++) {
6482           GemmMicrokernelTester()
6483             .mr(1)
6484             .nr(8)
6485             .kr(4)
6486             .sr(1)
6487             .m(m)
6488             .n(n)
6489             .k(k)
6490             .cm_stride(11)
6491             .iterations(1)
6492             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c4__neon_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
6493         }
6494       }
6495     }
6496   }
6497 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD1R,a_offset)6498   TEST(QC8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD1R, a_offset) {
6499     TEST_REQUIRES_ARM_NEON;
6500     for (size_t k = 1; k <= 80; k += 17) {
6501       GemmMicrokernelTester()
6502         .mr(1)
6503         .nr(8)
6504         .kr(4)
6505         .sr(1)
6506         .m(1)
6507         .n(8)
6508         .k(k)
6509         .ks(3)
6510         .a_offset(83)
6511         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c4__neon_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
6512     }
6513   }
6514 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD1R,zero)6515   TEST(QC8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD1R, zero) {
6516     TEST_REQUIRES_ARM_NEON;
6517     for (size_t k = 1; k <= 80; k += 17) {
6518       for (uint32_t mz = 0; mz < 1; mz++) {
6519         GemmMicrokernelTester()
6520           .mr(1)
6521           .nr(8)
6522           .kr(4)
6523           .sr(1)
6524           .m(1)
6525           .n(8)
6526           .k(k)
6527           .ks(3)
6528           .a_offset(83)
6529           .zero_index(mz)
6530           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c4__neon_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
6531       }
6532     }
6533   }
6534 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD1R,qmin)6535   TEST(QC8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD1R, qmin) {
6536     TEST_REQUIRES_ARM_NEON;
6537     GemmMicrokernelTester()
6538       .mr(1)
6539       .nr(8)
6540       .kr(4)
6541       .sr(1)
6542       .m(1)
6543       .n(8)
6544       .k(16)
6545       .qmin(128)
6546       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c4__neon_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
6547   }
6548 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD1R,qmax)6549   TEST(QC8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD1R, qmax) {
6550     TEST_REQUIRES_ARM_NEON;
6551     GemmMicrokernelTester()
6552       .mr(1)
6553       .nr(8)
6554       .kr(4)
6555       .sr(1)
6556       .m(1)
6557       .n(8)
6558       .k(16)
6559       .qmax(128)
6560       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c4__neon_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
6561   }
6562 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD1R,strided_cm)6563   TEST(QC8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD1R, strided_cm) {
6564     TEST_REQUIRES_ARM_NEON;
6565     GemmMicrokernelTester()
6566       .mr(1)
6567       .nr(8)
6568       .kr(4)
6569       .sr(1)
6570       .m(1)
6571       .n(8)
6572       .k(16)
6573       .cm_stride(11)
6574       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c4__neon_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
6575   }
6576 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
6577 
6578 
6579 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QC8_IGEMM_MINMAX_FP32_1X8C4S2__NEONV8_MLAL,k_eq_16)6580   TEST(QC8_IGEMM_MINMAX_FP32_1X8C4S2__NEONV8_MLAL, k_eq_16) {
6581     TEST_REQUIRES_ARM_NEON_V8;
6582     GemmMicrokernelTester()
6583       .mr(1)
6584       .nr(8)
6585       .kr(4)
6586       .sr(2)
6587       .m(1)
6588       .n(8)
6589       .k(16)
6590       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c4s2__neonv8_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
6591   }
6592 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C4S2__NEONV8_MLAL,strided_cn)6593   TEST(QC8_IGEMM_MINMAX_FP32_1X8C4S2__NEONV8_MLAL, strided_cn) {
6594     TEST_REQUIRES_ARM_NEON_V8;
6595     GemmMicrokernelTester()
6596       .mr(1)
6597       .nr(8)
6598       .kr(4)
6599       .sr(2)
6600       .m(1)
6601       .n(8)
6602       .k(16)
6603       .cn_stride(11)
6604       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c4s2__neonv8_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
6605   }
6606 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C4S2__NEONV8_MLAL,k_eq_16_subtile)6607   TEST(QC8_IGEMM_MINMAX_FP32_1X8C4S2__NEONV8_MLAL, k_eq_16_subtile) {
6608     TEST_REQUIRES_ARM_NEON_V8;
6609     for (uint32_t n = 1; n <= 8; n++) {
6610       for (uint32_t m = 1; m <= 1; m++) {
6611         GemmMicrokernelTester()
6612           .mr(1)
6613           .nr(8)
6614           .kr(4)
6615           .sr(2)
6616           .m(m)
6617           .n(n)
6618           .k(16)
6619           .iterations(1)
6620           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c4s2__neonv8_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
6621       }
6622     }
6623   }
6624 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C4S2__NEONV8_MLAL,k_eq_16_subtile_m)6625   TEST(QC8_IGEMM_MINMAX_FP32_1X8C4S2__NEONV8_MLAL, k_eq_16_subtile_m) {
6626     TEST_REQUIRES_ARM_NEON_V8;
6627     for (uint32_t m = 1; m <= 1; m++) {
6628       GemmMicrokernelTester()
6629         .mr(1)
6630         .nr(8)
6631         .kr(4)
6632         .sr(2)
6633         .m(m)
6634         .n(8)
6635         .k(16)
6636         .iterations(1)
6637         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c4s2__neonv8_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
6638     }
6639   }
6640 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C4S2__NEONV8_MLAL,k_eq_16_subtile_n)6641   TEST(QC8_IGEMM_MINMAX_FP32_1X8C4S2__NEONV8_MLAL, k_eq_16_subtile_n) {
6642     TEST_REQUIRES_ARM_NEON_V8;
6643     for (uint32_t n = 1; n <= 8; n++) {
6644       GemmMicrokernelTester()
6645         .mr(1)
6646         .nr(8)
6647         .kr(4)
6648         .sr(2)
6649         .m(1)
6650         .n(n)
6651         .k(16)
6652         .iterations(1)
6653         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c4s2__neonv8_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
6654     }
6655   }
6656 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C4S2__NEONV8_MLAL,k_lt_16)6657   TEST(QC8_IGEMM_MINMAX_FP32_1X8C4S2__NEONV8_MLAL, k_lt_16) {
6658     TEST_REQUIRES_ARM_NEON_V8;
6659     for (size_t k = 1; k < 16; k++) {
6660       GemmMicrokernelTester()
6661         .mr(1)
6662         .nr(8)
6663         .kr(4)
6664         .sr(2)
6665         .m(1)
6666         .n(8)
6667         .k(k)
6668         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c4s2__neonv8_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
6669     }
6670   }
6671 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C4S2__NEONV8_MLAL,k_lt_16_subtile)6672   TEST(QC8_IGEMM_MINMAX_FP32_1X8C4S2__NEONV8_MLAL, k_lt_16_subtile) {
6673     TEST_REQUIRES_ARM_NEON_V8;
6674     for (size_t k = 1; k < 16; k++) {
6675       for (uint32_t n = 1; n <= 8; n++) {
6676         for (uint32_t m = 1; m <= 1; m++) {
6677           GemmMicrokernelTester()
6678             .mr(1)
6679             .nr(8)
6680             .kr(4)
6681             .sr(2)
6682             .m(m)
6683             .n(n)
6684             .k(k)
6685             .iterations(1)
6686             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c4s2__neonv8_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
6687         }
6688       }
6689     }
6690   }
6691 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C4S2__NEONV8_MLAL,k_gt_16)6692   TEST(QC8_IGEMM_MINMAX_FP32_1X8C4S2__NEONV8_MLAL, k_gt_16) {
6693     TEST_REQUIRES_ARM_NEON_V8;
6694     for (size_t k = 17; k < 32; k++) {
6695       GemmMicrokernelTester()
6696         .mr(1)
6697         .nr(8)
6698         .kr(4)
6699         .sr(2)
6700         .m(1)
6701         .n(8)
6702         .k(k)
6703         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c4s2__neonv8_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
6704     }
6705   }
6706 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C4S2__NEONV8_MLAL,k_gt_16_subtile)6707   TEST(QC8_IGEMM_MINMAX_FP32_1X8C4S2__NEONV8_MLAL, k_gt_16_subtile) {
6708     TEST_REQUIRES_ARM_NEON_V8;
6709     for (size_t k = 17; k < 32; k++) {
6710       for (uint32_t n = 1; n <= 8; n++) {
6711         for (uint32_t m = 1; m <= 1; m++) {
6712           GemmMicrokernelTester()
6713             .mr(1)
6714             .nr(8)
6715             .kr(4)
6716             .sr(2)
6717             .m(m)
6718             .n(n)
6719             .k(k)
6720             .iterations(1)
6721             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c4s2__neonv8_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
6722         }
6723       }
6724     }
6725   }
6726 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C4S2__NEONV8_MLAL,k_div_16)6727   TEST(QC8_IGEMM_MINMAX_FP32_1X8C4S2__NEONV8_MLAL, k_div_16) {
6728     TEST_REQUIRES_ARM_NEON_V8;
6729     for (size_t k = 32; k <= 160; k += 16) {
6730       GemmMicrokernelTester()
6731         .mr(1)
6732         .nr(8)
6733         .kr(4)
6734         .sr(2)
6735         .m(1)
6736         .n(8)
6737         .k(k)
6738         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c4s2__neonv8_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
6739     }
6740   }
6741 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C4S2__NEONV8_MLAL,k_div_16_subtile)6742   TEST(QC8_IGEMM_MINMAX_FP32_1X8C4S2__NEONV8_MLAL, k_div_16_subtile) {
6743     TEST_REQUIRES_ARM_NEON_V8;
6744     for (size_t k = 32; k <= 160; k += 16) {
6745       for (uint32_t n = 1; n <= 8; n++) {
6746         for (uint32_t m = 1; m <= 1; m++) {
6747           GemmMicrokernelTester()
6748             .mr(1)
6749             .nr(8)
6750             .kr(4)
6751             .sr(2)
6752             .m(m)
6753             .n(n)
6754             .k(k)
6755             .iterations(1)
6756             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c4s2__neonv8_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
6757         }
6758       }
6759     }
6760   }
6761 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C4S2__NEONV8_MLAL,n_gt_8)6762   TEST(QC8_IGEMM_MINMAX_FP32_1X8C4S2__NEONV8_MLAL, n_gt_8) {
6763     TEST_REQUIRES_ARM_NEON_V8;
6764     for (uint32_t n = 9; n < 16; n++) {
6765       for (size_t k = 1; k <= 80; k += 17) {
6766         GemmMicrokernelTester()
6767           .mr(1)
6768           .nr(8)
6769           .kr(4)
6770           .sr(2)
6771           .m(1)
6772           .n(n)
6773           .k(k)
6774           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c4s2__neonv8_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
6775       }
6776     }
6777   }
6778 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C4S2__NEONV8_MLAL,n_gt_8_strided_cn)6779   TEST(QC8_IGEMM_MINMAX_FP32_1X8C4S2__NEONV8_MLAL, n_gt_8_strided_cn) {
6780     TEST_REQUIRES_ARM_NEON_V8;
6781     for (uint32_t n = 9; n < 16; n++) {
6782       for (size_t k = 1; k <= 80; k += 17) {
6783         GemmMicrokernelTester()
6784           .mr(1)
6785           .nr(8)
6786           .kr(4)
6787           .sr(2)
6788           .m(1)
6789           .n(n)
6790           .k(k)
6791           .cn_stride(11)
6792           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c4s2__neonv8_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
6793       }
6794     }
6795   }
6796 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C4S2__NEONV8_MLAL,n_gt_8_subtile)6797   TEST(QC8_IGEMM_MINMAX_FP32_1X8C4S2__NEONV8_MLAL, n_gt_8_subtile) {
6798     TEST_REQUIRES_ARM_NEON_V8;
6799     for (uint32_t n = 9; n < 16; n++) {
6800       for (size_t k = 1; k <= 80; k += 17) {
6801         for (uint32_t m = 1; m <= 1; m++) {
6802           GemmMicrokernelTester()
6803             .mr(1)
6804             .nr(8)
6805             .kr(4)
6806             .sr(2)
6807             .m(m)
6808             .n(n)
6809             .k(k)
6810             .iterations(1)
6811             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c4s2__neonv8_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
6812         }
6813       }
6814     }
6815   }
6816 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C4S2__NEONV8_MLAL,n_div_8)6817   TEST(QC8_IGEMM_MINMAX_FP32_1X8C4S2__NEONV8_MLAL, n_div_8) {
6818     TEST_REQUIRES_ARM_NEON_V8;
6819     for (uint32_t n = 16; n <= 24; n += 8) {
6820       for (size_t k = 1; k <= 80; k += 17) {
6821         GemmMicrokernelTester()
6822           .mr(1)
6823           .nr(8)
6824           .kr(4)
6825           .sr(2)
6826           .m(1)
6827           .n(n)
6828           .k(k)
6829           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c4s2__neonv8_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
6830       }
6831     }
6832   }
6833 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C4S2__NEONV8_MLAL,n_div_8_strided_cn)6834   TEST(QC8_IGEMM_MINMAX_FP32_1X8C4S2__NEONV8_MLAL, n_div_8_strided_cn) {
6835     TEST_REQUIRES_ARM_NEON_V8;
6836     for (uint32_t n = 16; n <= 24; n += 8) {
6837       for (size_t k = 1; k <= 80; k += 17) {
6838         GemmMicrokernelTester()
6839           .mr(1)
6840           .nr(8)
6841           .kr(4)
6842           .sr(2)
6843           .m(1)
6844           .n(n)
6845           .k(k)
6846           .cn_stride(11)
6847           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c4s2__neonv8_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
6848       }
6849     }
6850   }
6851 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C4S2__NEONV8_MLAL,n_div_8_subtile)6852   TEST(QC8_IGEMM_MINMAX_FP32_1X8C4S2__NEONV8_MLAL, n_div_8_subtile) {
6853     TEST_REQUIRES_ARM_NEON_V8;
6854     for (uint32_t n = 16; n <= 24; n += 8) {
6855       for (size_t k = 1; k <= 80; k += 17) {
6856         for (uint32_t m = 1; m <= 1; m++) {
6857           GemmMicrokernelTester()
6858             .mr(1)
6859             .nr(8)
6860             .kr(4)
6861             .sr(2)
6862             .m(m)
6863             .n(n)
6864             .k(k)
6865             .iterations(1)
6866             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c4s2__neonv8_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
6867         }
6868       }
6869     }
6870   }
6871 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C4S2__NEONV8_MLAL,small_kernel)6872   TEST(QC8_IGEMM_MINMAX_FP32_1X8C4S2__NEONV8_MLAL, small_kernel) {
6873     TEST_REQUIRES_ARM_NEON_V8;
6874     for (size_t k = 1; k <= 80; k += 17) {
6875       GemmMicrokernelTester()
6876         .mr(1)
6877         .nr(8)
6878         .kr(4)
6879         .sr(2)
6880         .m(1)
6881         .n(8)
6882         .k(k)
6883         .ks(3)
6884         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c4s2__neonv8_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
6885     }
6886   }
6887 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C4S2__NEONV8_MLAL,small_kernel_subtile)6888   TEST(QC8_IGEMM_MINMAX_FP32_1X8C4S2__NEONV8_MLAL, small_kernel_subtile) {
6889     TEST_REQUIRES_ARM_NEON_V8;
6890     for (size_t k = 1; k <= 80; k += 17) {
6891       for (uint32_t n = 1; n <= 8; n++) {
6892         for (uint32_t m = 1; m <= 1; m++) {
6893           GemmMicrokernelTester()
6894             .mr(1)
6895             .nr(8)
6896             .kr(4)
6897             .sr(2)
6898             .m(m)
6899             .n(n)
6900             .k(k)
6901             .ks(3)
6902             .iterations(1)
6903             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c4s2__neonv8_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
6904         }
6905       }
6906     }
6907   }
6908 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C4S2__NEONV8_MLAL,n_gt_8_small_kernel)6909   TEST(QC8_IGEMM_MINMAX_FP32_1X8C4S2__NEONV8_MLAL, n_gt_8_small_kernel) {
6910     TEST_REQUIRES_ARM_NEON_V8;
6911     for (uint32_t n = 9; n < 16; n++) {
6912       for (size_t k = 1; k <= 80; k += 17) {
6913         GemmMicrokernelTester()
6914           .mr(1)
6915           .nr(8)
6916           .kr(4)
6917           .sr(2)
6918           .m(1)
6919           .n(n)
6920           .k(k)
6921           .ks(3)
6922           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c4s2__neonv8_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
6923       }
6924     }
6925   }
6926 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C4S2__NEONV8_MLAL,n_div_8_small_kernel)6927   TEST(QC8_IGEMM_MINMAX_FP32_1X8C4S2__NEONV8_MLAL, n_div_8_small_kernel) {
6928     TEST_REQUIRES_ARM_NEON_V8;
6929     for (uint32_t n = 16; n <= 24; n += 8) {
6930       for (size_t k = 1; k <= 80; k += 17) {
6931         GemmMicrokernelTester()
6932           .mr(1)
6933           .nr(8)
6934           .kr(4)
6935           .sr(2)
6936           .m(1)
6937           .n(n)
6938           .k(k)
6939           .ks(3)
6940           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c4s2__neonv8_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
6941       }
6942     }
6943   }
6944 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C4S2__NEONV8_MLAL,strided_cm_subtile)6945   TEST(QC8_IGEMM_MINMAX_FP32_1X8C4S2__NEONV8_MLAL, strided_cm_subtile) {
6946     TEST_REQUIRES_ARM_NEON_V8;
6947     for (size_t k = 1; k <= 80; k += 17) {
6948       for (uint32_t n = 1; n <= 8; n++) {
6949         for (uint32_t m = 1; m <= 1; m++) {
6950           GemmMicrokernelTester()
6951             .mr(1)
6952             .nr(8)
6953             .kr(4)
6954             .sr(2)
6955             .m(m)
6956             .n(n)
6957             .k(k)
6958             .cm_stride(11)
6959             .iterations(1)
6960             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c4s2__neonv8_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
6961         }
6962       }
6963     }
6964   }
6965 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C4S2__NEONV8_MLAL,a_offset)6966   TEST(QC8_IGEMM_MINMAX_FP32_1X8C4S2__NEONV8_MLAL, a_offset) {
6967     TEST_REQUIRES_ARM_NEON_V8;
6968     for (size_t k = 1; k <= 80; k += 17) {
6969       GemmMicrokernelTester()
6970         .mr(1)
6971         .nr(8)
6972         .kr(4)
6973         .sr(2)
6974         .m(1)
6975         .n(8)
6976         .k(k)
6977         .ks(3)
6978         .a_offset(83)
6979         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c4s2__neonv8_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
6980     }
6981   }
6982 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C4S2__NEONV8_MLAL,zero)6983   TEST(QC8_IGEMM_MINMAX_FP32_1X8C4S2__NEONV8_MLAL, zero) {
6984     TEST_REQUIRES_ARM_NEON_V8;
6985     for (size_t k = 1; k <= 80; k += 17) {
6986       for (uint32_t mz = 0; mz < 1; mz++) {
6987         GemmMicrokernelTester()
6988           .mr(1)
6989           .nr(8)
6990           .kr(4)
6991           .sr(2)
6992           .m(1)
6993           .n(8)
6994           .k(k)
6995           .ks(3)
6996           .a_offset(83)
6997           .zero_index(mz)
6998           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c4s2__neonv8_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
6999       }
7000     }
7001   }
7002 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C4S2__NEONV8_MLAL,qmin)7003   TEST(QC8_IGEMM_MINMAX_FP32_1X8C4S2__NEONV8_MLAL, qmin) {
7004     TEST_REQUIRES_ARM_NEON_V8;
7005     GemmMicrokernelTester()
7006       .mr(1)
7007       .nr(8)
7008       .kr(4)
7009       .sr(2)
7010       .m(1)
7011       .n(8)
7012       .k(16)
7013       .qmin(128)
7014       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c4s2__neonv8_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
7015   }
7016 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C4S2__NEONV8_MLAL,qmax)7017   TEST(QC8_IGEMM_MINMAX_FP32_1X8C4S2__NEONV8_MLAL, qmax) {
7018     TEST_REQUIRES_ARM_NEON_V8;
7019     GemmMicrokernelTester()
7020       .mr(1)
7021       .nr(8)
7022       .kr(4)
7023       .sr(2)
7024       .m(1)
7025       .n(8)
7026       .k(16)
7027       .qmax(128)
7028       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c4s2__neonv8_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
7029   }
7030 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C4S2__NEONV8_MLAL,strided_cm)7031   TEST(QC8_IGEMM_MINMAX_FP32_1X8C4S2__NEONV8_MLAL, strided_cm) {
7032     TEST_REQUIRES_ARM_NEON_V8;
7033     GemmMicrokernelTester()
7034       .mr(1)
7035       .nr(8)
7036       .kr(4)
7037       .sr(2)
7038       .m(1)
7039       .n(8)
7040       .k(16)
7041       .cm_stride(11)
7042       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c4s2__neonv8_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
7043   }
7044 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
7045 
7046 
7047 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__NEONV8_MLAL,k_eq_16)7048   TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__NEONV8_MLAL, k_eq_16) {
7049     TEST_REQUIRES_ARM_NEON_V8;
7050     GemmMicrokernelTester()
7051       .mr(1)
7052       .nr(8)
7053       .kr(8)
7054       .sr(1)
7055       .m(1)
7056       .n(8)
7057       .k(16)
7058       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c8__neonv8_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
7059   }
7060 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__NEONV8_MLAL,strided_cn)7061   TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__NEONV8_MLAL, strided_cn) {
7062     TEST_REQUIRES_ARM_NEON_V8;
7063     GemmMicrokernelTester()
7064       .mr(1)
7065       .nr(8)
7066       .kr(8)
7067       .sr(1)
7068       .m(1)
7069       .n(8)
7070       .k(16)
7071       .cn_stride(11)
7072       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c8__neonv8_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
7073   }
7074 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__NEONV8_MLAL,k_eq_16_subtile)7075   TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__NEONV8_MLAL, k_eq_16_subtile) {
7076     TEST_REQUIRES_ARM_NEON_V8;
7077     for (uint32_t n = 1; n <= 8; n++) {
7078       for (uint32_t m = 1; m <= 1; m++) {
7079         GemmMicrokernelTester()
7080           .mr(1)
7081           .nr(8)
7082           .kr(8)
7083           .sr(1)
7084           .m(m)
7085           .n(n)
7086           .k(16)
7087           .iterations(1)
7088           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c8__neonv8_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
7089       }
7090     }
7091   }
7092 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__NEONV8_MLAL,k_eq_16_subtile_m)7093   TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__NEONV8_MLAL, k_eq_16_subtile_m) {
7094     TEST_REQUIRES_ARM_NEON_V8;
7095     for (uint32_t m = 1; m <= 1; m++) {
7096       GemmMicrokernelTester()
7097         .mr(1)
7098         .nr(8)
7099         .kr(8)
7100         .sr(1)
7101         .m(m)
7102         .n(8)
7103         .k(16)
7104         .iterations(1)
7105         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c8__neonv8_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
7106     }
7107   }
7108 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__NEONV8_MLAL,k_eq_16_subtile_n)7109   TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__NEONV8_MLAL, k_eq_16_subtile_n) {
7110     TEST_REQUIRES_ARM_NEON_V8;
7111     for (uint32_t n = 1; n <= 8; n++) {
7112       GemmMicrokernelTester()
7113         .mr(1)
7114         .nr(8)
7115         .kr(8)
7116         .sr(1)
7117         .m(1)
7118         .n(n)
7119         .k(16)
7120         .iterations(1)
7121         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c8__neonv8_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
7122     }
7123   }
7124 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__NEONV8_MLAL,k_lt_16)7125   TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__NEONV8_MLAL, k_lt_16) {
7126     TEST_REQUIRES_ARM_NEON_V8;
7127     for (size_t k = 1; k < 16; k++) {
7128       GemmMicrokernelTester()
7129         .mr(1)
7130         .nr(8)
7131         .kr(8)
7132         .sr(1)
7133         .m(1)
7134         .n(8)
7135         .k(k)
7136         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c8__neonv8_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
7137     }
7138   }
7139 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__NEONV8_MLAL,k_lt_16_subtile)7140   TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__NEONV8_MLAL, k_lt_16_subtile) {
7141     TEST_REQUIRES_ARM_NEON_V8;
7142     for (size_t k = 1; k < 16; k++) {
7143       for (uint32_t n = 1; n <= 8; n++) {
7144         for (uint32_t m = 1; m <= 1; m++) {
7145           GemmMicrokernelTester()
7146             .mr(1)
7147             .nr(8)
7148             .kr(8)
7149             .sr(1)
7150             .m(m)
7151             .n(n)
7152             .k(k)
7153             .iterations(1)
7154             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c8__neonv8_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
7155         }
7156       }
7157     }
7158   }
7159 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__NEONV8_MLAL,k_gt_16)7160   TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__NEONV8_MLAL, k_gt_16) {
7161     TEST_REQUIRES_ARM_NEON_V8;
7162     for (size_t k = 17; k < 32; k++) {
7163       GemmMicrokernelTester()
7164         .mr(1)
7165         .nr(8)
7166         .kr(8)
7167         .sr(1)
7168         .m(1)
7169         .n(8)
7170         .k(k)
7171         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c8__neonv8_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
7172     }
7173   }
7174 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__NEONV8_MLAL,k_gt_16_subtile)7175   TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__NEONV8_MLAL, k_gt_16_subtile) {
7176     TEST_REQUIRES_ARM_NEON_V8;
7177     for (size_t k = 17; k < 32; k++) {
7178       for (uint32_t n = 1; n <= 8; n++) {
7179         for (uint32_t m = 1; m <= 1; m++) {
7180           GemmMicrokernelTester()
7181             .mr(1)
7182             .nr(8)
7183             .kr(8)
7184             .sr(1)
7185             .m(m)
7186             .n(n)
7187             .k(k)
7188             .iterations(1)
7189             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c8__neonv8_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
7190         }
7191       }
7192     }
7193   }
7194 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__NEONV8_MLAL,k_div_16)7195   TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__NEONV8_MLAL, k_div_16) {
7196     TEST_REQUIRES_ARM_NEON_V8;
7197     for (size_t k = 32; k <= 160; k += 16) {
7198       GemmMicrokernelTester()
7199         .mr(1)
7200         .nr(8)
7201         .kr(8)
7202         .sr(1)
7203         .m(1)
7204         .n(8)
7205         .k(k)
7206         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c8__neonv8_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
7207     }
7208   }
7209 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__NEONV8_MLAL,k_div_16_subtile)7210   TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__NEONV8_MLAL, k_div_16_subtile) {
7211     TEST_REQUIRES_ARM_NEON_V8;
7212     for (size_t k = 32; k <= 160; k += 16) {
7213       for (uint32_t n = 1; n <= 8; n++) {
7214         for (uint32_t m = 1; m <= 1; m++) {
7215           GemmMicrokernelTester()
7216             .mr(1)
7217             .nr(8)
7218             .kr(8)
7219             .sr(1)
7220             .m(m)
7221             .n(n)
7222             .k(k)
7223             .iterations(1)
7224             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c8__neonv8_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
7225         }
7226       }
7227     }
7228   }
7229 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__NEONV8_MLAL,n_gt_8)7230   TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__NEONV8_MLAL, n_gt_8) {
7231     TEST_REQUIRES_ARM_NEON_V8;
7232     for (uint32_t n = 9; n < 16; n++) {
7233       for (size_t k = 1; k <= 80; k += 17) {
7234         GemmMicrokernelTester()
7235           .mr(1)
7236           .nr(8)
7237           .kr(8)
7238           .sr(1)
7239           .m(1)
7240           .n(n)
7241           .k(k)
7242           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c8__neonv8_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
7243       }
7244     }
7245   }
7246 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__NEONV8_MLAL,n_gt_8_strided_cn)7247   TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__NEONV8_MLAL, n_gt_8_strided_cn) {
7248     TEST_REQUIRES_ARM_NEON_V8;
7249     for (uint32_t n = 9; n < 16; n++) {
7250       for (size_t k = 1; k <= 80; k += 17) {
7251         GemmMicrokernelTester()
7252           .mr(1)
7253           .nr(8)
7254           .kr(8)
7255           .sr(1)
7256           .m(1)
7257           .n(n)
7258           .k(k)
7259           .cn_stride(11)
7260           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c8__neonv8_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
7261       }
7262     }
7263   }
7264 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__NEONV8_MLAL,n_gt_8_subtile)7265   TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__NEONV8_MLAL, n_gt_8_subtile) {
7266     TEST_REQUIRES_ARM_NEON_V8;
7267     for (uint32_t n = 9; n < 16; n++) {
7268       for (size_t k = 1; k <= 80; k += 17) {
7269         for (uint32_t m = 1; m <= 1; m++) {
7270           GemmMicrokernelTester()
7271             .mr(1)
7272             .nr(8)
7273             .kr(8)
7274             .sr(1)
7275             .m(m)
7276             .n(n)
7277             .k(k)
7278             .iterations(1)
7279             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c8__neonv8_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
7280         }
7281       }
7282     }
7283   }
7284 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__NEONV8_MLAL,n_div_8)7285   TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__NEONV8_MLAL, n_div_8) {
7286     TEST_REQUIRES_ARM_NEON_V8;
7287     for (uint32_t n = 16; n <= 24; n += 8) {
7288       for (size_t k = 1; k <= 80; k += 17) {
7289         GemmMicrokernelTester()
7290           .mr(1)
7291           .nr(8)
7292           .kr(8)
7293           .sr(1)
7294           .m(1)
7295           .n(n)
7296           .k(k)
7297           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c8__neonv8_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
7298       }
7299     }
7300   }
7301 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__NEONV8_MLAL,n_div_8_strided_cn)7302   TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__NEONV8_MLAL, n_div_8_strided_cn) {
7303     TEST_REQUIRES_ARM_NEON_V8;
7304     for (uint32_t n = 16; n <= 24; n += 8) {
7305       for (size_t k = 1; k <= 80; k += 17) {
7306         GemmMicrokernelTester()
7307           .mr(1)
7308           .nr(8)
7309           .kr(8)
7310           .sr(1)
7311           .m(1)
7312           .n(n)
7313           .k(k)
7314           .cn_stride(11)
7315           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c8__neonv8_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
7316       }
7317     }
7318   }
7319 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__NEONV8_MLAL,n_div_8_subtile)7320   TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__NEONV8_MLAL, n_div_8_subtile) {
7321     TEST_REQUIRES_ARM_NEON_V8;
7322     for (uint32_t n = 16; n <= 24; n += 8) {
7323       for (size_t k = 1; k <= 80; k += 17) {
7324         for (uint32_t m = 1; m <= 1; m++) {
7325           GemmMicrokernelTester()
7326             .mr(1)
7327             .nr(8)
7328             .kr(8)
7329             .sr(1)
7330             .m(m)
7331             .n(n)
7332             .k(k)
7333             .iterations(1)
7334             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c8__neonv8_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
7335         }
7336       }
7337     }
7338   }
7339 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__NEONV8_MLAL,small_kernel)7340   TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__NEONV8_MLAL, small_kernel) {
7341     TEST_REQUIRES_ARM_NEON_V8;
7342     for (size_t k = 1; k <= 80; k += 17) {
7343       GemmMicrokernelTester()
7344         .mr(1)
7345         .nr(8)
7346         .kr(8)
7347         .sr(1)
7348         .m(1)
7349         .n(8)
7350         .k(k)
7351         .ks(3)
7352         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c8__neonv8_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
7353     }
7354   }
7355 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__NEONV8_MLAL,small_kernel_subtile)7356   TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__NEONV8_MLAL, small_kernel_subtile) {
7357     TEST_REQUIRES_ARM_NEON_V8;
7358     for (size_t k = 1; k <= 80; k += 17) {
7359       for (uint32_t n = 1; n <= 8; n++) {
7360         for (uint32_t m = 1; m <= 1; m++) {
7361           GemmMicrokernelTester()
7362             .mr(1)
7363             .nr(8)
7364             .kr(8)
7365             .sr(1)
7366             .m(m)
7367             .n(n)
7368             .k(k)
7369             .ks(3)
7370             .iterations(1)
7371             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c8__neonv8_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
7372         }
7373       }
7374     }
7375   }
7376 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__NEONV8_MLAL,n_gt_8_small_kernel)7377   TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__NEONV8_MLAL, n_gt_8_small_kernel) {
7378     TEST_REQUIRES_ARM_NEON_V8;
7379     for (uint32_t n = 9; n < 16; n++) {
7380       for (size_t k = 1; k <= 80; k += 17) {
7381         GemmMicrokernelTester()
7382           .mr(1)
7383           .nr(8)
7384           .kr(8)
7385           .sr(1)
7386           .m(1)
7387           .n(n)
7388           .k(k)
7389           .ks(3)
7390           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c8__neonv8_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
7391       }
7392     }
7393   }
7394 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__NEONV8_MLAL,n_div_8_small_kernel)7395   TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__NEONV8_MLAL, n_div_8_small_kernel) {
7396     TEST_REQUIRES_ARM_NEON_V8;
7397     for (uint32_t n = 16; n <= 24; n += 8) {
7398       for (size_t k = 1; k <= 80; k += 17) {
7399         GemmMicrokernelTester()
7400           .mr(1)
7401           .nr(8)
7402           .kr(8)
7403           .sr(1)
7404           .m(1)
7405           .n(n)
7406           .k(k)
7407           .ks(3)
7408           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c8__neonv8_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
7409       }
7410     }
7411   }
7412 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__NEONV8_MLAL,strided_cm_subtile)7413   TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__NEONV8_MLAL, strided_cm_subtile) {
7414     TEST_REQUIRES_ARM_NEON_V8;
7415     for (size_t k = 1; k <= 80; k += 17) {
7416       for (uint32_t n = 1; n <= 8; n++) {
7417         for (uint32_t m = 1; m <= 1; m++) {
7418           GemmMicrokernelTester()
7419             .mr(1)
7420             .nr(8)
7421             .kr(8)
7422             .sr(1)
7423             .m(m)
7424             .n(n)
7425             .k(k)
7426             .cm_stride(11)
7427             .iterations(1)
7428             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c8__neonv8_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
7429         }
7430       }
7431     }
7432   }
7433 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__NEONV8_MLAL,a_offset)7434   TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__NEONV8_MLAL, a_offset) {
7435     TEST_REQUIRES_ARM_NEON_V8;
7436     for (size_t k = 1; k <= 80; k += 17) {
7437       GemmMicrokernelTester()
7438         .mr(1)
7439         .nr(8)
7440         .kr(8)
7441         .sr(1)
7442         .m(1)
7443         .n(8)
7444         .k(k)
7445         .ks(3)
7446         .a_offset(83)
7447         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c8__neonv8_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
7448     }
7449   }
7450 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__NEONV8_MLAL,zero)7451   TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__NEONV8_MLAL, zero) {
7452     TEST_REQUIRES_ARM_NEON_V8;
7453     for (size_t k = 1; k <= 80; k += 17) {
7454       for (uint32_t mz = 0; mz < 1; mz++) {
7455         GemmMicrokernelTester()
7456           .mr(1)
7457           .nr(8)
7458           .kr(8)
7459           .sr(1)
7460           .m(1)
7461           .n(8)
7462           .k(k)
7463           .ks(3)
7464           .a_offset(83)
7465           .zero_index(mz)
7466           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c8__neonv8_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
7467       }
7468     }
7469   }
7470 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__NEONV8_MLAL,qmin)7471   TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__NEONV8_MLAL, qmin) {
7472     TEST_REQUIRES_ARM_NEON_V8;
7473     GemmMicrokernelTester()
7474       .mr(1)
7475       .nr(8)
7476       .kr(8)
7477       .sr(1)
7478       .m(1)
7479       .n(8)
7480       .k(16)
7481       .qmin(128)
7482       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c8__neonv8_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
7483   }
7484 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__NEONV8_MLAL,qmax)7485   TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__NEONV8_MLAL, qmax) {
7486     TEST_REQUIRES_ARM_NEON_V8;
7487     GemmMicrokernelTester()
7488       .mr(1)
7489       .nr(8)
7490       .kr(8)
7491       .sr(1)
7492       .m(1)
7493       .n(8)
7494       .k(16)
7495       .qmax(128)
7496       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c8__neonv8_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
7497   }
7498 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__NEONV8_MLAL,strided_cm)7499   TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__NEONV8_MLAL, strided_cm) {
7500     TEST_REQUIRES_ARM_NEON_V8;
7501     GemmMicrokernelTester()
7502       .mr(1)
7503       .nr(8)
7504       .kr(8)
7505       .sr(1)
7506       .m(1)
7507       .n(8)
7508       .k(16)
7509       .cm_stride(11)
7510       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c8__neonv8_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
7511   }
7512 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
7513 
7514 
7515 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QC8_IGEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE,k_eq_8)7516   TEST(QC8_IGEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE, k_eq_8) {
7517     TEST_REQUIRES_ARM_NEON;
7518     GemmMicrokernelTester()
7519       .mr(1)
7520       .nr(16)
7521       .kr(1)
7522       .sr(1)
7523       .m(1)
7524       .n(16)
7525       .k(8)
7526       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x16__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
7527   }
7528 
TEST(QC8_IGEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE,strided_cn)7529   TEST(QC8_IGEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE, strided_cn) {
7530     TEST_REQUIRES_ARM_NEON;
7531     GemmMicrokernelTester()
7532       .mr(1)
7533       .nr(16)
7534       .kr(1)
7535       .sr(1)
7536       .m(1)
7537       .n(16)
7538       .k(8)
7539       .cn_stride(19)
7540       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x16__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
7541   }
7542 
TEST(QC8_IGEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE,k_eq_8_subtile)7543   TEST(QC8_IGEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE, k_eq_8_subtile) {
7544     TEST_REQUIRES_ARM_NEON;
7545     for (uint32_t n = 1; n <= 16; n++) {
7546       for (uint32_t m = 1; m <= 1; m++) {
7547         GemmMicrokernelTester()
7548           .mr(1)
7549           .nr(16)
7550           .kr(1)
7551           .sr(1)
7552           .m(m)
7553           .n(n)
7554           .k(8)
7555           .iterations(1)
7556           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x16__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
7557       }
7558     }
7559   }
7560 
TEST(QC8_IGEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE,k_eq_8_subtile_m)7561   TEST(QC8_IGEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE, k_eq_8_subtile_m) {
7562     TEST_REQUIRES_ARM_NEON;
7563     for (uint32_t m = 1; m <= 1; m++) {
7564       GemmMicrokernelTester()
7565         .mr(1)
7566         .nr(16)
7567         .kr(1)
7568         .sr(1)
7569         .m(m)
7570         .n(16)
7571         .k(8)
7572         .iterations(1)
7573         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x16__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
7574     }
7575   }
7576 
TEST(QC8_IGEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE,k_eq_8_subtile_n)7577   TEST(QC8_IGEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE, k_eq_8_subtile_n) {
7578     TEST_REQUIRES_ARM_NEON;
7579     for (uint32_t n = 1; n <= 16; n++) {
7580       GemmMicrokernelTester()
7581         .mr(1)
7582         .nr(16)
7583         .kr(1)
7584         .sr(1)
7585         .m(1)
7586         .n(n)
7587         .k(8)
7588         .iterations(1)
7589         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x16__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
7590     }
7591   }
7592 
TEST(QC8_IGEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE,k_lt_8)7593   TEST(QC8_IGEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE, k_lt_8) {
7594     TEST_REQUIRES_ARM_NEON;
7595     for (size_t k = 1; k < 8; k++) {
7596       GemmMicrokernelTester()
7597         .mr(1)
7598         .nr(16)
7599         .kr(1)
7600         .sr(1)
7601         .m(1)
7602         .n(16)
7603         .k(k)
7604         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x16__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
7605     }
7606   }
7607 
TEST(QC8_IGEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE,k_lt_8_subtile)7608   TEST(QC8_IGEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE, k_lt_8_subtile) {
7609     TEST_REQUIRES_ARM_NEON;
7610     for (size_t k = 1; k < 8; k++) {
7611       for (uint32_t n = 1; n <= 16; n++) {
7612         for (uint32_t m = 1; m <= 1; m++) {
7613           GemmMicrokernelTester()
7614             .mr(1)
7615             .nr(16)
7616             .kr(1)
7617             .sr(1)
7618             .m(m)
7619             .n(n)
7620             .k(k)
7621             .iterations(1)
7622             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x16__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
7623         }
7624       }
7625     }
7626   }
7627 
TEST(QC8_IGEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE,k_gt_8)7628   TEST(QC8_IGEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE, k_gt_8) {
7629     TEST_REQUIRES_ARM_NEON;
7630     for (size_t k = 9; k < 16; k++) {
7631       GemmMicrokernelTester()
7632         .mr(1)
7633         .nr(16)
7634         .kr(1)
7635         .sr(1)
7636         .m(1)
7637         .n(16)
7638         .k(k)
7639         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x16__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
7640     }
7641   }
7642 
TEST(QC8_IGEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE,k_gt_8_subtile)7643   TEST(QC8_IGEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE, k_gt_8_subtile) {
7644     TEST_REQUIRES_ARM_NEON;
7645     for (size_t k = 9; k < 16; k++) {
7646       for (uint32_t n = 1; n <= 16; n++) {
7647         for (uint32_t m = 1; m <= 1; m++) {
7648           GemmMicrokernelTester()
7649             .mr(1)
7650             .nr(16)
7651             .kr(1)
7652             .sr(1)
7653             .m(m)
7654             .n(n)
7655             .k(k)
7656             .iterations(1)
7657             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x16__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
7658         }
7659       }
7660     }
7661   }
7662 
TEST(QC8_IGEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE,k_div_8)7663   TEST(QC8_IGEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE, k_div_8) {
7664     TEST_REQUIRES_ARM_NEON;
7665     for (size_t k = 16; k <= 80; k += 8) {
7666       GemmMicrokernelTester()
7667         .mr(1)
7668         .nr(16)
7669         .kr(1)
7670         .sr(1)
7671         .m(1)
7672         .n(16)
7673         .k(k)
7674         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x16__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
7675     }
7676   }
7677 
TEST(QC8_IGEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE,k_div_8_subtile)7678   TEST(QC8_IGEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE, k_div_8_subtile) {
7679     TEST_REQUIRES_ARM_NEON;
7680     for (size_t k = 16; k <= 80; k += 8) {
7681       for (uint32_t n = 1; n <= 16; n++) {
7682         for (uint32_t m = 1; m <= 1; m++) {
7683           GemmMicrokernelTester()
7684             .mr(1)
7685             .nr(16)
7686             .kr(1)
7687             .sr(1)
7688             .m(m)
7689             .n(n)
7690             .k(k)
7691             .iterations(1)
7692             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x16__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
7693         }
7694       }
7695     }
7696   }
7697 
TEST(QC8_IGEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE,n_gt_16)7698   TEST(QC8_IGEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE, n_gt_16) {
7699     TEST_REQUIRES_ARM_NEON;
7700     for (uint32_t n = 17; n < 32; n++) {
7701       for (size_t k = 1; k <= 40; k += 9) {
7702         GemmMicrokernelTester()
7703           .mr(1)
7704           .nr(16)
7705           .kr(1)
7706           .sr(1)
7707           .m(1)
7708           .n(n)
7709           .k(k)
7710           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x16__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
7711       }
7712     }
7713   }
7714 
TEST(QC8_IGEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE,n_gt_16_strided_cn)7715   TEST(QC8_IGEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE, n_gt_16_strided_cn) {
7716     TEST_REQUIRES_ARM_NEON;
7717     for (uint32_t n = 17; n < 32; n++) {
7718       for (size_t k = 1; k <= 40; k += 9) {
7719         GemmMicrokernelTester()
7720           .mr(1)
7721           .nr(16)
7722           .kr(1)
7723           .sr(1)
7724           .m(1)
7725           .n(n)
7726           .k(k)
7727           .cn_stride(19)
7728           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x16__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
7729       }
7730     }
7731   }
7732 
TEST(QC8_IGEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE,n_gt_16_subtile)7733   TEST(QC8_IGEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE, n_gt_16_subtile) {
7734     TEST_REQUIRES_ARM_NEON;
7735     for (uint32_t n = 17; n < 32; n++) {
7736       for (size_t k = 1; k <= 40; k += 9) {
7737         for (uint32_t m = 1; m <= 1; m++) {
7738           GemmMicrokernelTester()
7739             .mr(1)
7740             .nr(16)
7741             .kr(1)
7742             .sr(1)
7743             .m(m)
7744             .n(n)
7745             .k(k)
7746             .iterations(1)
7747             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x16__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
7748         }
7749       }
7750     }
7751   }
7752 
TEST(QC8_IGEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE,n_div_16)7753   TEST(QC8_IGEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE, n_div_16) {
7754     TEST_REQUIRES_ARM_NEON;
7755     for (uint32_t n = 32; n <= 48; n += 16) {
7756       for (size_t k = 1; k <= 40; k += 9) {
7757         GemmMicrokernelTester()
7758           .mr(1)
7759           .nr(16)
7760           .kr(1)
7761           .sr(1)
7762           .m(1)
7763           .n(n)
7764           .k(k)
7765           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x16__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
7766       }
7767     }
7768   }
7769 
TEST(QC8_IGEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE,n_div_16_strided_cn)7770   TEST(QC8_IGEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE, n_div_16_strided_cn) {
7771     TEST_REQUIRES_ARM_NEON;
7772     for (uint32_t n = 32; n <= 48; n += 16) {
7773       for (size_t k = 1; k <= 40; k += 9) {
7774         GemmMicrokernelTester()
7775           .mr(1)
7776           .nr(16)
7777           .kr(1)
7778           .sr(1)
7779           .m(1)
7780           .n(n)
7781           .k(k)
7782           .cn_stride(19)
7783           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x16__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
7784       }
7785     }
7786   }
7787 
TEST(QC8_IGEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE,n_div_16_subtile)7788   TEST(QC8_IGEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE, n_div_16_subtile) {
7789     TEST_REQUIRES_ARM_NEON;
7790     for (uint32_t n = 32; n <= 48; n += 16) {
7791       for (size_t k = 1; k <= 40; k += 9) {
7792         for (uint32_t m = 1; m <= 1; m++) {
7793           GemmMicrokernelTester()
7794             .mr(1)
7795             .nr(16)
7796             .kr(1)
7797             .sr(1)
7798             .m(m)
7799             .n(n)
7800             .k(k)
7801             .iterations(1)
7802             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x16__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
7803         }
7804       }
7805     }
7806   }
7807 
TEST(QC8_IGEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE,small_kernel)7808   TEST(QC8_IGEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE, small_kernel) {
7809     TEST_REQUIRES_ARM_NEON;
7810     for (size_t k = 1; k <= 40; k += 9) {
7811       GemmMicrokernelTester()
7812         .mr(1)
7813         .nr(16)
7814         .kr(1)
7815         .sr(1)
7816         .m(1)
7817         .n(16)
7818         .k(k)
7819         .ks(3)
7820         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x16__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
7821     }
7822   }
7823 
TEST(QC8_IGEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE,small_kernel_subtile)7824   TEST(QC8_IGEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE, small_kernel_subtile) {
7825     TEST_REQUIRES_ARM_NEON;
7826     for (size_t k = 1; k <= 40; k += 9) {
7827       for (uint32_t n = 1; n <= 16; n++) {
7828         for (uint32_t m = 1; m <= 1; m++) {
7829           GemmMicrokernelTester()
7830             .mr(1)
7831             .nr(16)
7832             .kr(1)
7833             .sr(1)
7834             .m(m)
7835             .n(n)
7836             .k(k)
7837             .ks(3)
7838             .iterations(1)
7839             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x16__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
7840         }
7841       }
7842     }
7843   }
7844 
TEST(QC8_IGEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE,n_gt_16_small_kernel)7845   TEST(QC8_IGEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE, n_gt_16_small_kernel) {
7846     TEST_REQUIRES_ARM_NEON;
7847     for (uint32_t n = 17; n < 32; n++) {
7848       for (size_t k = 1; k <= 40; k += 9) {
7849         GemmMicrokernelTester()
7850           .mr(1)
7851           .nr(16)
7852           .kr(1)
7853           .sr(1)
7854           .m(1)
7855           .n(n)
7856           .k(k)
7857           .ks(3)
7858           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x16__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
7859       }
7860     }
7861   }
7862 
TEST(QC8_IGEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE,n_div_16_small_kernel)7863   TEST(QC8_IGEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE, n_div_16_small_kernel) {
7864     TEST_REQUIRES_ARM_NEON;
7865     for (uint32_t n = 32; n <= 48; n += 16) {
7866       for (size_t k = 1; k <= 40; k += 9) {
7867         GemmMicrokernelTester()
7868           .mr(1)
7869           .nr(16)
7870           .kr(1)
7871           .sr(1)
7872           .m(1)
7873           .n(n)
7874           .k(k)
7875           .ks(3)
7876           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x16__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
7877       }
7878     }
7879   }
7880 
TEST(QC8_IGEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE,strided_cm_subtile)7881   TEST(QC8_IGEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE, strided_cm_subtile) {
7882     TEST_REQUIRES_ARM_NEON;
7883     for (size_t k = 1; k <= 40; k += 9) {
7884       for (uint32_t n = 1; n <= 16; n++) {
7885         for (uint32_t m = 1; m <= 1; m++) {
7886           GemmMicrokernelTester()
7887             .mr(1)
7888             .nr(16)
7889             .kr(1)
7890             .sr(1)
7891             .m(m)
7892             .n(n)
7893             .k(k)
7894             .cm_stride(19)
7895             .iterations(1)
7896             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x16__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
7897         }
7898       }
7899     }
7900   }
7901 
TEST(QC8_IGEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE,a_offset)7902   TEST(QC8_IGEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE, a_offset) {
7903     TEST_REQUIRES_ARM_NEON;
7904     for (size_t k = 1; k <= 40; k += 9) {
7905       GemmMicrokernelTester()
7906         .mr(1)
7907         .nr(16)
7908         .kr(1)
7909         .sr(1)
7910         .m(1)
7911         .n(16)
7912         .k(k)
7913         .ks(3)
7914         .a_offset(43)
7915         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x16__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
7916     }
7917   }
7918 
TEST(QC8_IGEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE,zero)7919   TEST(QC8_IGEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE, zero) {
7920     TEST_REQUIRES_ARM_NEON;
7921     for (size_t k = 1; k <= 40; k += 9) {
7922       for (uint32_t mz = 0; mz < 1; mz++) {
7923         GemmMicrokernelTester()
7924           .mr(1)
7925           .nr(16)
7926           .kr(1)
7927           .sr(1)
7928           .m(1)
7929           .n(16)
7930           .k(k)
7931           .ks(3)
7932           .a_offset(43)
7933           .zero_index(mz)
7934           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x16__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
7935       }
7936     }
7937   }
7938 
TEST(QC8_IGEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE,qmin)7939   TEST(QC8_IGEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE, qmin) {
7940     TEST_REQUIRES_ARM_NEON;
7941     GemmMicrokernelTester()
7942       .mr(1)
7943       .nr(16)
7944       .kr(1)
7945       .sr(1)
7946       .m(1)
7947       .n(16)
7948       .k(8)
7949       .qmin(128)
7950       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x16__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
7951   }
7952 
TEST(QC8_IGEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE,qmax)7953   TEST(QC8_IGEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE, qmax) {
7954     TEST_REQUIRES_ARM_NEON;
7955     GemmMicrokernelTester()
7956       .mr(1)
7957       .nr(16)
7958       .kr(1)
7959       .sr(1)
7960       .m(1)
7961       .n(16)
7962       .k(8)
7963       .qmax(128)
7964       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x16__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
7965   }
7966 
TEST(QC8_IGEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE,strided_cm)7967   TEST(QC8_IGEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE, strided_cm) {
7968     TEST_REQUIRES_ARM_NEON;
7969     GemmMicrokernelTester()
7970       .mr(1)
7971       .nr(16)
7972       .kr(1)
7973       .sr(1)
7974       .m(1)
7975       .n(16)
7976       .k(8)
7977       .cm_stride(19)
7978       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x16__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
7979   }
7980 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
7981 
7982 
7983 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QC8_IGEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE_PRFM,k_eq_8)7984   TEST(QC8_IGEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE_PRFM, k_eq_8) {
7985     TEST_REQUIRES_ARM_NEON_V8;
7986     GemmMicrokernelTester()
7987       .mr(1)
7988       .nr(16)
7989       .kr(1)
7990       .sr(1)
7991       .m(1)
7992       .n(16)
7993       .k(8)
7994       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x16__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
7995   }
7996 
TEST(QC8_IGEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE_PRFM,strided_cn)7997   TEST(QC8_IGEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE_PRFM, strided_cn) {
7998     TEST_REQUIRES_ARM_NEON_V8;
7999     GemmMicrokernelTester()
8000       .mr(1)
8001       .nr(16)
8002       .kr(1)
8003       .sr(1)
8004       .m(1)
8005       .n(16)
8006       .k(8)
8007       .cn_stride(19)
8008       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x16__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8009   }
8010 
TEST(QC8_IGEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE_PRFM,k_eq_8_subtile)8011   TEST(QC8_IGEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE_PRFM, k_eq_8_subtile) {
8012     TEST_REQUIRES_ARM_NEON_V8;
8013     for (uint32_t n = 1; n <= 16; n++) {
8014       for (uint32_t m = 1; m <= 1; m++) {
8015         GemmMicrokernelTester()
8016           .mr(1)
8017           .nr(16)
8018           .kr(1)
8019           .sr(1)
8020           .m(m)
8021           .n(n)
8022           .k(8)
8023           .iterations(1)
8024           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x16__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8025       }
8026     }
8027   }
8028 
TEST(QC8_IGEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE_PRFM,k_eq_8_subtile_m)8029   TEST(QC8_IGEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE_PRFM, k_eq_8_subtile_m) {
8030     TEST_REQUIRES_ARM_NEON_V8;
8031     for (uint32_t m = 1; m <= 1; m++) {
8032       GemmMicrokernelTester()
8033         .mr(1)
8034         .nr(16)
8035         .kr(1)
8036         .sr(1)
8037         .m(m)
8038         .n(16)
8039         .k(8)
8040         .iterations(1)
8041         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x16__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8042     }
8043   }
8044 
TEST(QC8_IGEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE_PRFM,k_eq_8_subtile_n)8045   TEST(QC8_IGEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE_PRFM, k_eq_8_subtile_n) {
8046     TEST_REQUIRES_ARM_NEON_V8;
8047     for (uint32_t n = 1; n <= 16; n++) {
8048       GemmMicrokernelTester()
8049         .mr(1)
8050         .nr(16)
8051         .kr(1)
8052         .sr(1)
8053         .m(1)
8054         .n(n)
8055         .k(8)
8056         .iterations(1)
8057         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x16__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8058     }
8059   }
8060 
TEST(QC8_IGEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE_PRFM,k_lt_8)8061   TEST(QC8_IGEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE_PRFM, k_lt_8) {
8062     TEST_REQUIRES_ARM_NEON_V8;
8063     for (size_t k = 1; k < 8; k++) {
8064       GemmMicrokernelTester()
8065         .mr(1)
8066         .nr(16)
8067         .kr(1)
8068         .sr(1)
8069         .m(1)
8070         .n(16)
8071         .k(k)
8072         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x16__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8073     }
8074   }
8075 
TEST(QC8_IGEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE_PRFM,k_lt_8_subtile)8076   TEST(QC8_IGEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE_PRFM, k_lt_8_subtile) {
8077     TEST_REQUIRES_ARM_NEON_V8;
8078     for (size_t k = 1; k < 8; k++) {
8079       for (uint32_t n = 1; n <= 16; n++) {
8080         for (uint32_t m = 1; m <= 1; m++) {
8081           GemmMicrokernelTester()
8082             .mr(1)
8083             .nr(16)
8084             .kr(1)
8085             .sr(1)
8086             .m(m)
8087             .n(n)
8088             .k(k)
8089             .iterations(1)
8090             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x16__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8091         }
8092       }
8093     }
8094   }
8095 
TEST(QC8_IGEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE_PRFM,k_gt_8)8096   TEST(QC8_IGEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE_PRFM, k_gt_8) {
8097     TEST_REQUIRES_ARM_NEON_V8;
8098     for (size_t k = 9; k < 16; k++) {
8099       GemmMicrokernelTester()
8100         .mr(1)
8101         .nr(16)
8102         .kr(1)
8103         .sr(1)
8104         .m(1)
8105         .n(16)
8106         .k(k)
8107         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x16__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8108     }
8109   }
8110 
TEST(QC8_IGEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE_PRFM,k_gt_8_subtile)8111   TEST(QC8_IGEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE_PRFM, k_gt_8_subtile) {
8112     TEST_REQUIRES_ARM_NEON_V8;
8113     for (size_t k = 9; k < 16; k++) {
8114       for (uint32_t n = 1; n <= 16; n++) {
8115         for (uint32_t m = 1; m <= 1; m++) {
8116           GemmMicrokernelTester()
8117             .mr(1)
8118             .nr(16)
8119             .kr(1)
8120             .sr(1)
8121             .m(m)
8122             .n(n)
8123             .k(k)
8124             .iterations(1)
8125             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x16__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8126         }
8127       }
8128     }
8129   }
8130 
TEST(QC8_IGEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE_PRFM,k_div_8)8131   TEST(QC8_IGEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE_PRFM, k_div_8) {
8132     TEST_REQUIRES_ARM_NEON_V8;
8133     for (size_t k = 16; k <= 80; k += 8) {
8134       GemmMicrokernelTester()
8135         .mr(1)
8136         .nr(16)
8137         .kr(1)
8138         .sr(1)
8139         .m(1)
8140         .n(16)
8141         .k(k)
8142         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x16__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8143     }
8144   }
8145 
TEST(QC8_IGEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE_PRFM,k_div_8_subtile)8146   TEST(QC8_IGEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE_PRFM, k_div_8_subtile) {
8147     TEST_REQUIRES_ARM_NEON_V8;
8148     for (size_t k = 16; k <= 80; k += 8) {
8149       for (uint32_t n = 1; n <= 16; n++) {
8150         for (uint32_t m = 1; m <= 1; m++) {
8151           GemmMicrokernelTester()
8152             .mr(1)
8153             .nr(16)
8154             .kr(1)
8155             .sr(1)
8156             .m(m)
8157             .n(n)
8158             .k(k)
8159             .iterations(1)
8160             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x16__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8161         }
8162       }
8163     }
8164   }
8165 
TEST(QC8_IGEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE_PRFM,n_gt_16)8166   TEST(QC8_IGEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE_PRFM, n_gt_16) {
8167     TEST_REQUIRES_ARM_NEON_V8;
8168     for (uint32_t n = 17; n < 32; n++) {
8169       for (size_t k = 1; k <= 40; k += 9) {
8170         GemmMicrokernelTester()
8171           .mr(1)
8172           .nr(16)
8173           .kr(1)
8174           .sr(1)
8175           .m(1)
8176           .n(n)
8177           .k(k)
8178           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x16__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8179       }
8180     }
8181   }
8182 
TEST(QC8_IGEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE_PRFM,n_gt_16_strided_cn)8183   TEST(QC8_IGEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE_PRFM, n_gt_16_strided_cn) {
8184     TEST_REQUIRES_ARM_NEON_V8;
8185     for (uint32_t n = 17; n < 32; n++) {
8186       for (size_t k = 1; k <= 40; k += 9) {
8187         GemmMicrokernelTester()
8188           .mr(1)
8189           .nr(16)
8190           .kr(1)
8191           .sr(1)
8192           .m(1)
8193           .n(n)
8194           .k(k)
8195           .cn_stride(19)
8196           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x16__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8197       }
8198     }
8199   }
8200 
TEST(QC8_IGEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE_PRFM,n_gt_16_subtile)8201   TEST(QC8_IGEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE_PRFM, n_gt_16_subtile) {
8202     TEST_REQUIRES_ARM_NEON_V8;
8203     for (uint32_t n = 17; n < 32; n++) {
8204       for (size_t k = 1; k <= 40; k += 9) {
8205         for (uint32_t m = 1; m <= 1; m++) {
8206           GemmMicrokernelTester()
8207             .mr(1)
8208             .nr(16)
8209             .kr(1)
8210             .sr(1)
8211             .m(m)
8212             .n(n)
8213             .k(k)
8214             .iterations(1)
8215             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x16__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8216         }
8217       }
8218     }
8219   }
8220 
TEST(QC8_IGEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE_PRFM,n_div_16)8221   TEST(QC8_IGEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE_PRFM, n_div_16) {
8222     TEST_REQUIRES_ARM_NEON_V8;
8223     for (uint32_t n = 32; n <= 48; n += 16) {
8224       for (size_t k = 1; k <= 40; k += 9) {
8225         GemmMicrokernelTester()
8226           .mr(1)
8227           .nr(16)
8228           .kr(1)
8229           .sr(1)
8230           .m(1)
8231           .n(n)
8232           .k(k)
8233           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x16__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8234       }
8235     }
8236   }
8237 
TEST(QC8_IGEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE_PRFM,n_div_16_strided_cn)8238   TEST(QC8_IGEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE_PRFM, n_div_16_strided_cn) {
8239     TEST_REQUIRES_ARM_NEON_V8;
8240     for (uint32_t n = 32; n <= 48; n += 16) {
8241       for (size_t k = 1; k <= 40; k += 9) {
8242         GemmMicrokernelTester()
8243           .mr(1)
8244           .nr(16)
8245           .kr(1)
8246           .sr(1)
8247           .m(1)
8248           .n(n)
8249           .k(k)
8250           .cn_stride(19)
8251           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x16__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8252       }
8253     }
8254   }
8255 
TEST(QC8_IGEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE_PRFM,n_div_16_subtile)8256   TEST(QC8_IGEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE_PRFM, n_div_16_subtile) {
8257     TEST_REQUIRES_ARM_NEON_V8;
8258     for (uint32_t n = 32; n <= 48; n += 16) {
8259       for (size_t k = 1; k <= 40; k += 9) {
8260         for (uint32_t m = 1; m <= 1; m++) {
8261           GemmMicrokernelTester()
8262             .mr(1)
8263             .nr(16)
8264             .kr(1)
8265             .sr(1)
8266             .m(m)
8267             .n(n)
8268             .k(k)
8269             .iterations(1)
8270             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x16__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8271         }
8272       }
8273     }
8274   }
8275 
TEST(QC8_IGEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE_PRFM,small_kernel)8276   TEST(QC8_IGEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE_PRFM, small_kernel) {
8277     TEST_REQUIRES_ARM_NEON_V8;
8278     for (size_t k = 1; k <= 40; k += 9) {
8279       GemmMicrokernelTester()
8280         .mr(1)
8281         .nr(16)
8282         .kr(1)
8283         .sr(1)
8284         .m(1)
8285         .n(16)
8286         .k(k)
8287         .ks(3)
8288         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x16__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8289     }
8290   }
8291 
TEST(QC8_IGEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE_PRFM,small_kernel_subtile)8292   TEST(QC8_IGEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE_PRFM, small_kernel_subtile) {
8293     TEST_REQUIRES_ARM_NEON_V8;
8294     for (size_t k = 1; k <= 40; k += 9) {
8295       for (uint32_t n = 1; n <= 16; n++) {
8296         for (uint32_t m = 1; m <= 1; m++) {
8297           GemmMicrokernelTester()
8298             .mr(1)
8299             .nr(16)
8300             .kr(1)
8301             .sr(1)
8302             .m(m)
8303             .n(n)
8304             .k(k)
8305             .ks(3)
8306             .iterations(1)
8307             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x16__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8308         }
8309       }
8310     }
8311   }
8312 
TEST(QC8_IGEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE_PRFM,n_gt_16_small_kernel)8313   TEST(QC8_IGEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE_PRFM, n_gt_16_small_kernel) {
8314     TEST_REQUIRES_ARM_NEON_V8;
8315     for (uint32_t n = 17; n < 32; n++) {
8316       for (size_t k = 1; k <= 40; k += 9) {
8317         GemmMicrokernelTester()
8318           .mr(1)
8319           .nr(16)
8320           .kr(1)
8321           .sr(1)
8322           .m(1)
8323           .n(n)
8324           .k(k)
8325           .ks(3)
8326           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x16__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8327       }
8328     }
8329   }
8330 
TEST(QC8_IGEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE_PRFM,n_div_16_small_kernel)8331   TEST(QC8_IGEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE_PRFM, n_div_16_small_kernel) {
8332     TEST_REQUIRES_ARM_NEON_V8;
8333     for (uint32_t n = 32; n <= 48; n += 16) {
8334       for (size_t k = 1; k <= 40; k += 9) {
8335         GemmMicrokernelTester()
8336           .mr(1)
8337           .nr(16)
8338           .kr(1)
8339           .sr(1)
8340           .m(1)
8341           .n(n)
8342           .k(k)
8343           .ks(3)
8344           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x16__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8345       }
8346     }
8347   }
8348 
TEST(QC8_IGEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE_PRFM,strided_cm_subtile)8349   TEST(QC8_IGEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE_PRFM, strided_cm_subtile) {
8350     TEST_REQUIRES_ARM_NEON_V8;
8351     for (size_t k = 1; k <= 40; k += 9) {
8352       for (uint32_t n = 1; n <= 16; n++) {
8353         for (uint32_t m = 1; m <= 1; m++) {
8354           GemmMicrokernelTester()
8355             .mr(1)
8356             .nr(16)
8357             .kr(1)
8358             .sr(1)
8359             .m(m)
8360             .n(n)
8361             .k(k)
8362             .cm_stride(19)
8363             .iterations(1)
8364             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x16__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8365         }
8366       }
8367     }
8368   }
8369 
TEST(QC8_IGEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE_PRFM,a_offset)8370   TEST(QC8_IGEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE_PRFM, a_offset) {
8371     TEST_REQUIRES_ARM_NEON_V8;
8372     for (size_t k = 1; k <= 40; k += 9) {
8373       GemmMicrokernelTester()
8374         .mr(1)
8375         .nr(16)
8376         .kr(1)
8377         .sr(1)
8378         .m(1)
8379         .n(16)
8380         .k(k)
8381         .ks(3)
8382         .a_offset(43)
8383         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x16__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8384     }
8385   }
8386 
TEST(QC8_IGEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE_PRFM,zero)8387   TEST(QC8_IGEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE_PRFM, zero) {
8388     TEST_REQUIRES_ARM_NEON_V8;
8389     for (size_t k = 1; k <= 40; k += 9) {
8390       for (uint32_t mz = 0; mz < 1; mz++) {
8391         GemmMicrokernelTester()
8392           .mr(1)
8393           .nr(16)
8394           .kr(1)
8395           .sr(1)
8396           .m(1)
8397           .n(16)
8398           .k(k)
8399           .ks(3)
8400           .a_offset(43)
8401           .zero_index(mz)
8402           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x16__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8403       }
8404     }
8405   }
8406 
TEST(QC8_IGEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE_PRFM,qmin)8407   TEST(QC8_IGEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE_PRFM, qmin) {
8408     TEST_REQUIRES_ARM_NEON_V8;
8409     GemmMicrokernelTester()
8410       .mr(1)
8411       .nr(16)
8412       .kr(1)
8413       .sr(1)
8414       .m(1)
8415       .n(16)
8416       .k(8)
8417       .qmin(128)
8418       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x16__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8419   }
8420 
TEST(QC8_IGEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE_PRFM,qmax)8421   TEST(QC8_IGEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE_PRFM, qmax) {
8422     TEST_REQUIRES_ARM_NEON_V8;
8423     GemmMicrokernelTester()
8424       .mr(1)
8425       .nr(16)
8426       .kr(1)
8427       .sr(1)
8428       .m(1)
8429       .n(16)
8430       .k(8)
8431       .qmax(128)
8432       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x16__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8433   }
8434 
TEST(QC8_IGEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE_PRFM,strided_cm)8435   TEST(QC8_IGEMM_MINMAX_FP32_1X16__NEONV8_MLAL_LANE_PRFM, strided_cm) {
8436     TEST_REQUIRES_ARM_NEON_V8;
8437     GemmMicrokernelTester()
8438       .mr(1)
8439       .nr(16)
8440       .kr(1)
8441       .sr(1)
8442       .m(1)
8443       .n(16)
8444       .k(8)
8445       .cm_stride(19)
8446       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x16__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8447   }
8448 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
8449 
8450 
8451 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD2R,k_eq_16)8452   TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD2R, k_eq_16) {
8453     TEST_REQUIRES_ARM_NEON;
8454     GemmMicrokernelTester()
8455       .mr(2)
8456       .nr(8)
8457       .kr(2)
8458       .sr(1)
8459       .m(2)
8460       .n(8)
8461       .k(16)
8462       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
8463   }
8464 
TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD2R,strided_cn)8465   TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD2R, strided_cn) {
8466     TEST_REQUIRES_ARM_NEON;
8467     GemmMicrokernelTester()
8468       .mr(2)
8469       .nr(8)
8470       .kr(2)
8471       .sr(1)
8472       .m(2)
8473       .n(8)
8474       .k(16)
8475       .cn_stride(11)
8476       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
8477   }
8478 
TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD2R,k_eq_16_subtile)8479   TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD2R, k_eq_16_subtile) {
8480     TEST_REQUIRES_ARM_NEON;
8481     for (uint32_t n = 1; n <= 8; n++) {
8482       for (uint32_t m = 1; m <= 2; m++) {
8483         GemmMicrokernelTester()
8484           .mr(2)
8485           .nr(8)
8486           .kr(2)
8487           .sr(1)
8488           .m(m)
8489           .n(n)
8490           .k(16)
8491           .iterations(1)
8492           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
8493       }
8494     }
8495   }
8496 
TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD2R,k_eq_16_subtile_m)8497   TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD2R, k_eq_16_subtile_m) {
8498     TEST_REQUIRES_ARM_NEON;
8499     for (uint32_t m = 1; m <= 2; m++) {
8500       GemmMicrokernelTester()
8501         .mr(2)
8502         .nr(8)
8503         .kr(2)
8504         .sr(1)
8505         .m(m)
8506         .n(8)
8507         .k(16)
8508         .iterations(1)
8509         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
8510     }
8511   }
8512 
TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD2R,k_eq_16_subtile_n)8513   TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD2R, k_eq_16_subtile_n) {
8514     TEST_REQUIRES_ARM_NEON;
8515     for (uint32_t n = 1; n <= 8; n++) {
8516       GemmMicrokernelTester()
8517         .mr(2)
8518         .nr(8)
8519         .kr(2)
8520         .sr(1)
8521         .m(2)
8522         .n(n)
8523         .k(16)
8524         .iterations(1)
8525         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
8526     }
8527   }
8528 
TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD2R,k_lt_16)8529   TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD2R, k_lt_16) {
8530     TEST_REQUIRES_ARM_NEON;
8531     for (size_t k = 1; k < 16; k++) {
8532       GemmMicrokernelTester()
8533         .mr(2)
8534         .nr(8)
8535         .kr(2)
8536         .sr(1)
8537         .m(2)
8538         .n(8)
8539         .k(k)
8540         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
8541     }
8542   }
8543 
TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD2R,k_lt_16_subtile)8544   TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD2R, k_lt_16_subtile) {
8545     TEST_REQUIRES_ARM_NEON;
8546     for (size_t k = 1; k < 16; k++) {
8547       for (uint32_t n = 1; n <= 8; n++) {
8548         for (uint32_t m = 1; m <= 2; m++) {
8549           GemmMicrokernelTester()
8550             .mr(2)
8551             .nr(8)
8552             .kr(2)
8553             .sr(1)
8554             .m(m)
8555             .n(n)
8556             .k(k)
8557             .iterations(1)
8558             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
8559         }
8560       }
8561     }
8562   }
8563 
TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD2R,k_gt_16)8564   TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD2R, k_gt_16) {
8565     TEST_REQUIRES_ARM_NEON;
8566     for (size_t k = 17; k < 32; k++) {
8567       GemmMicrokernelTester()
8568         .mr(2)
8569         .nr(8)
8570         .kr(2)
8571         .sr(1)
8572         .m(2)
8573         .n(8)
8574         .k(k)
8575         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
8576     }
8577   }
8578 
TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD2R,k_gt_16_subtile)8579   TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD2R, k_gt_16_subtile) {
8580     TEST_REQUIRES_ARM_NEON;
8581     for (size_t k = 17; k < 32; k++) {
8582       for (uint32_t n = 1; n <= 8; n++) {
8583         for (uint32_t m = 1; m <= 2; m++) {
8584           GemmMicrokernelTester()
8585             .mr(2)
8586             .nr(8)
8587             .kr(2)
8588             .sr(1)
8589             .m(m)
8590             .n(n)
8591             .k(k)
8592             .iterations(1)
8593             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
8594         }
8595       }
8596     }
8597   }
8598 
TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD2R,k_div_16)8599   TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD2R, k_div_16) {
8600     TEST_REQUIRES_ARM_NEON;
8601     for (size_t k = 32; k <= 160; k += 16) {
8602       GemmMicrokernelTester()
8603         .mr(2)
8604         .nr(8)
8605         .kr(2)
8606         .sr(1)
8607         .m(2)
8608         .n(8)
8609         .k(k)
8610         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
8611     }
8612   }
8613 
TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD2R,k_div_16_subtile)8614   TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD2R, k_div_16_subtile) {
8615     TEST_REQUIRES_ARM_NEON;
8616     for (size_t k = 32; k <= 160; k += 16) {
8617       for (uint32_t n = 1; n <= 8; n++) {
8618         for (uint32_t m = 1; m <= 2; m++) {
8619           GemmMicrokernelTester()
8620             .mr(2)
8621             .nr(8)
8622             .kr(2)
8623             .sr(1)
8624             .m(m)
8625             .n(n)
8626             .k(k)
8627             .iterations(1)
8628             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
8629         }
8630       }
8631     }
8632   }
8633 
TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD2R,n_gt_8)8634   TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD2R, n_gt_8) {
8635     TEST_REQUIRES_ARM_NEON;
8636     for (uint32_t n = 9; n < 16; n++) {
8637       for (size_t k = 1; k <= 80; k += 17) {
8638         GemmMicrokernelTester()
8639           .mr(2)
8640           .nr(8)
8641           .kr(2)
8642           .sr(1)
8643           .m(2)
8644           .n(n)
8645           .k(k)
8646           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
8647       }
8648     }
8649   }
8650 
TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD2R,n_gt_8_strided_cn)8651   TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD2R, n_gt_8_strided_cn) {
8652     TEST_REQUIRES_ARM_NEON;
8653     for (uint32_t n = 9; n < 16; n++) {
8654       for (size_t k = 1; k <= 80; k += 17) {
8655         GemmMicrokernelTester()
8656           .mr(2)
8657           .nr(8)
8658           .kr(2)
8659           .sr(1)
8660           .m(2)
8661           .n(n)
8662           .k(k)
8663           .cn_stride(11)
8664           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
8665       }
8666     }
8667   }
8668 
TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD2R,n_gt_8_subtile)8669   TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD2R, n_gt_8_subtile) {
8670     TEST_REQUIRES_ARM_NEON;
8671     for (uint32_t n = 9; n < 16; n++) {
8672       for (size_t k = 1; k <= 80; k += 17) {
8673         for (uint32_t m = 1; m <= 2; m++) {
8674           GemmMicrokernelTester()
8675             .mr(2)
8676             .nr(8)
8677             .kr(2)
8678             .sr(1)
8679             .m(m)
8680             .n(n)
8681             .k(k)
8682             .iterations(1)
8683             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
8684         }
8685       }
8686     }
8687   }
8688 
TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD2R,n_div_8)8689   TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD2R, n_div_8) {
8690     TEST_REQUIRES_ARM_NEON;
8691     for (uint32_t n = 16; n <= 24; n += 8) {
8692       for (size_t k = 1; k <= 80; k += 17) {
8693         GemmMicrokernelTester()
8694           .mr(2)
8695           .nr(8)
8696           .kr(2)
8697           .sr(1)
8698           .m(2)
8699           .n(n)
8700           .k(k)
8701           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
8702       }
8703     }
8704   }
8705 
TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD2R,n_div_8_strided_cn)8706   TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD2R, n_div_8_strided_cn) {
8707     TEST_REQUIRES_ARM_NEON;
8708     for (uint32_t n = 16; n <= 24; n += 8) {
8709       for (size_t k = 1; k <= 80; k += 17) {
8710         GemmMicrokernelTester()
8711           .mr(2)
8712           .nr(8)
8713           .kr(2)
8714           .sr(1)
8715           .m(2)
8716           .n(n)
8717           .k(k)
8718           .cn_stride(11)
8719           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
8720       }
8721     }
8722   }
8723 
TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD2R,n_div_8_subtile)8724   TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD2R, n_div_8_subtile) {
8725     TEST_REQUIRES_ARM_NEON;
8726     for (uint32_t n = 16; n <= 24; n += 8) {
8727       for (size_t k = 1; k <= 80; k += 17) {
8728         for (uint32_t m = 1; m <= 2; m++) {
8729           GemmMicrokernelTester()
8730             .mr(2)
8731             .nr(8)
8732             .kr(2)
8733             .sr(1)
8734             .m(m)
8735             .n(n)
8736             .k(k)
8737             .iterations(1)
8738             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
8739         }
8740       }
8741     }
8742   }
8743 
TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD2R,small_kernel)8744   TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD2R, small_kernel) {
8745     TEST_REQUIRES_ARM_NEON;
8746     for (size_t k = 1; k <= 80; k += 17) {
8747       GemmMicrokernelTester()
8748         .mr(2)
8749         .nr(8)
8750         .kr(2)
8751         .sr(1)
8752         .m(2)
8753         .n(8)
8754         .k(k)
8755         .ks(3)
8756         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
8757     }
8758   }
8759 
TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD2R,small_kernel_subtile)8760   TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD2R, small_kernel_subtile) {
8761     TEST_REQUIRES_ARM_NEON;
8762     for (size_t k = 1; k <= 80; k += 17) {
8763       for (uint32_t n = 1; n <= 8; n++) {
8764         for (uint32_t m = 1; m <= 2; m++) {
8765           GemmMicrokernelTester()
8766             .mr(2)
8767             .nr(8)
8768             .kr(2)
8769             .sr(1)
8770             .m(m)
8771             .n(n)
8772             .k(k)
8773             .ks(3)
8774             .iterations(1)
8775             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
8776         }
8777       }
8778     }
8779   }
8780 
TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD2R,n_gt_8_small_kernel)8781   TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD2R, n_gt_8_small_kernel) {
8782     TEST_REQUIRES_ARM_NEON;
8783     for (uint32_t n = 9; n < 16; n++) {
8784       for (size_t k = 1; k <= 80; k += 17) {
8785         GemmMicrokernelTester()
8786           .mr(2)
8787           .nr(8)
8788           .kr(2)
8789           .sr(1)
8790           .m(2)
8791           .n(n)
8792           .k(k)
8793           .ks(3)
8794           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
8795       }
8796     }
8797   }
8798 
TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD2R,n_div_8_small_kernel)8799   TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD2R, n_div_8_small_kernel) {
8800     TEST_REQUIRES_ARM_NEON;
8801     for (uint32_t n = 16; n <= 24; n += 8) {
8802       for (size_t k = 1; k <= 80; k += 17) {
8803         GemmMicrokernelTester()
8804           .mr(2)
8805           .nr(8)
8806           .kr(2)
8807           .sr(1)
8808           .m(2)
8809           .n(n)
8810           .k(k)
8811           .ks(3)
8812           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
8813       }
8814     }
8815   }
8816 
TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD2R,strided_cm_subtile)8817   TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD2R, strided_cm_subtile) {
8818     TEST_REQUIRES_ARM_NEON;
8819     for (size_t k = 1; k <= 80; k += 17) {
8820       for (uint32_t n = 1; n <= 8; n++) {
8821         for (uint32_t m = 1; m <= 2; m++) {
8822           GemmMicrokernelTester()
8823             .mr(2)
8824             .nr(8)
8825             .kr(2)
8826             .sr(1)
8827             .m(m)
8828             .n(n)
8829             .k(k)
8830             .cm_stride(11)
8831             .iterations(1)
8832             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
8833         }
8834       }
8835     }
8836   }
8837 
TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD2R,a_offset)8838   TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD2R, a_offset) {
8839     TEST_REQUIRES_ARM_NEON;
8840     for (size_t k = 1; k <= 80; k += 17) {
8841       GemmMicrokernelTester()
8842         .mr(2)
8843         .nr(8)
8844         .kr(2)
8845         .sr(1)
8846         .m(2)
8847         .n(8)
8848         .k(k)
8849         .ks(3)
8850         .a_offset(163)
8851         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
8852     }
8853   }
8854 
TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD2R,zero)8855   TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD2R, zero) {
8856     TEST_REQUIRES_ARM_NEON;
8857     for (size_t k = 1; k <= 80; k += 17) {
8858       for (uint32_t mz = 0; mz < 2; mz++) {
8859         GemmMicrokernelTester()
8860           .mr(2)
8861           .nr(8)
8862           .kr(2)
8863           .sr(1)
8864           .m(2)
8865           .n(8)
8866           .k(k)
8867           .ks(3)
8868           .a_offset(163)
8869           .zero_index(mz)
8870           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
8871       }
8872     }
8873   }
8874 
TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD2R,qmin)8875   TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD2R, qmin) {
8876     TEST_REQUIRES_ARM_NEON;
8877     GemmMicrokernelTester()
8878       .mr(2)
8879       .nr(8)
8880       .kr(2)
8881       .sr(1)
8882       .m(2)
8883       .n(8)
8884       .k(16)
8885       .qmin(128)
8886       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
8887   }
8888 
TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD2R,qmax)8889   TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD2R, qmax) {
8890     TEST_REQUIRES_ARM_NEON;
8891     GemmMicrokernelTester()
8892       .mr(2)
8893       .nr(8)
8894       .kr(2)
8895       .sr(1)
8896       .m(2)
8897       .n(8)
8898       .k(16)
8899       .qmax(128)
8900       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
8901   }
8902 
TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD2R,strided_cm)8903   TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD2R, strided_cm) {
8904     TEST_REQUIRES_ARM_NEON;
8905     GemmMicrokernelTester()
8906       .mr(2)
8907       .nr(8)
8908       .kr(2)
8909       .sr(1)
8910       .m(2)
8911       .n(8)
8912       .k(16)
8913       .cm_stride(11)
8914       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
8915   }
8916 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
8917 
8918 
8919 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD4R,k_eq_16)8920   TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD4R, k_eq_16) {
8921     TEST_REQUIRES_ARM_NEON;
8922     GemmMicrokernelTester()
8923       .mr(2)
8924       .nr(8)
8925       .kr(2)
8926       .sr(1)
8927       .m(2)
8928       .n(8)
8929       .k(16)
8930       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld4r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
8931   }
8932 
TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD4R,strided_cn)8933   TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD4R, strided_cn) {
8934     TEST_REQUIRES_ARM_NEON;
8935     GemmMicrokernelTester()
8936       .mr(2)
8937       .nr(8)
8938       .kr(2)
8939       .sr(1)
8940       .m(2)
8941       .n(8)
8942       .k(16)
8943       .cn_stride(11)
8944       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld4r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
8945   }
8946 
TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD4R,k_eq_16_subtile)8947   TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD4R, k_eq_16_subtile) {
8948     TEST_REQUIRES_ARM_NEON;
8949     for (uint32_t n = 1; n <= 8; n++) {
8950       for (uint32_t m = 1; m <= 2; m++) {
8951         GemmMicrokernelTester()
8952           .mr(2)
8953           .nr(8)
8954           .kr(2)
8955           .sr(1)
8956           .m(m)
8957           .n(n)
8958           .k(16)
8959           .iterations(1)
8960           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld4r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
8961       }
8962     }
8963   }
8964 
TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD4R,k_eq_16_subtile_m)8965   TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD4R, k_eq_16_subtile_m) {
8966     TEST_REQUIRES_ARM_NEON;
8967     for (uint32_t m = 1; m <= 2; m++) {
8968       GemmMicrokernelTester()
8969         .mr(2)
8970         .nr(8)
8971         .kr(2)
8972         .sr(1)
8973         .m(m)
8974         .n(8)
8975         .k(16)
8976         .iterations(1)
8977         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld4r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
8978     }
8979   }
8980 
TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD4R,k_eq_16_subtile_n)8981   TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD4R, k_eq_16_subtile_n) {
8982     TEST_REQUIRES_ARM_NEON;
8983     for (uint32_t n = 1; n <= 8; n++) {
8984       GemmMicrokernelTester()
8985         .mr(2)
8986         .nr(8)
8987         .kr(2)
8988         .sr(1)
8989         .m(2)
8990         .n(n)
8991         .k(16)
8992         .iterations(1)
8993         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld4r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
8994     }
8995   }
8996 
TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD4R,k_lt_16)8997   TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD4R, k_lt_16) {
8998     TEST_REQUIRES_ARM_NEON;
8999     for (size_t k = 1; k < 16; k++) {
9000       GemmMicrokernelTester()
9001         .mr(2)
9002         .nr(8)
9003         .kr(2)
9004         .sr(1)
9005         .m(2)
9006         .n(8)
9007         .k(k)
9008         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld4r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
9009     }
9010   }
9011 
TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD4R,k_lt_16_subtile)9012   TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD4R, k_lt_16_subtile) {
9013     TEST_REQUIRES_ARM_NEON;
9014     for (size_t k = 1; k < 16; k++) {
9015       for (uint32_t n = 1; n <= 8; n++) {
9016         for (uint32_t m = 1; m <= 2; m++) {
9017           GemmMicrokernelTester()
9018             .mr(2)
9019             .nr(8)
9020             .kr(2)
9021             .sr(1)
9022             .m(m)
9023             .n(n)
9024             .k(k)
9025             .iterations(1)
9026             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld4r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
9027         }
9028       }
9029     }
9030   }
9031 
TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD4R,k_gt_16)9032   TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD4R, k_gt_16) {
9033     TEST_REQUIRES_ARM_NEON;
9034     for (size_t k = 17; k < 32; k++) {
9035       GemmMicrokernelTester()
9036         .mr(2)
9037         .nr(8)
9038         .kr(2)
9039         .sr(1)
9040         .m(2)
9041         .n(8)
9042         .k(k)
9043         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld4r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
9044     }
9045   }
9046 
TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD4R,k_gt_16_subtile)9047   TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD4R, k_gt_16_subtile) {
9048     TEST_REQUIRES_ARM_NEON;
9049     for (size_t k = 17; k < 32; k++) {
9050       for (uint32_t n = 1; n <= 8; n++) {
9051         for (uint32_t m = 1; m <= 2; m++) {
9052           GemmMicrokernelTester()
9053             .mr(2)
9054             .nr(8)
9055             .kr(2)
9056             .sr(1)
9057             .m(m)
9058             .n(n)
9059             .k(k)
9060             .iterations(1)
9061             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld4r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
9062         }
9063       }
9064     }
9065   }
9066 
TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD4R,k_div_16)9067   TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD4R, k_div_16) {
9068     TEST_REQUIRES_ARM_NEON;
9069     for (size_t k = 32; k <= 160; k += 16) {
9070       GemmMicrokernelTester()
9071         .mr(2)
9072         .nr(8)
9073         .kr(2)
9074         .sr(1)
9075         .m(2)
9076         .n(8)
9077         .k(k)
9078         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld4r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
9079     }
9080   }
9081 
TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD4R,k_div_16_subtile)9082   TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD4R, k_div_16_subtile) {
9083     TEST_REQUIRES_ARM_NEON;
9084     for (size_t k = 32; k <= 160; k += 16) {
9085       for (uint32_t n = 1; n <= 8; n++) {
9086         for (uint32_t m = 1; m <= 2; m++) {
9087           GemmMicrokernelTester()
9088             .mr(2)
9089             .nr(8)
9090             .kr(2)
9091             .sr(1)
9092             .m(m)
9093             .n(n)
9094             .k(k)
9095             .iterations(1)
9096             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld4r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
9097         }
9098       }
9099     }
9100   }
9101 
TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD4R,n_gt_8)9102   TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD4R, n_gt_8) {
9103     TEST_REQUIRES_ARM_NEON;
9104     for (uint32_t n = 9; n < 16; n++) {
9105       for (size_t k = 1; k <= 80; k += 17) {
9106         GemmMicrokernelTester()
9107           .mr(2)
9108           .nr(8)
9109           .kr(2)
9110           .sr(1)
9111           .m(2)
9112           .n(n)
9113           .k(k)
9114           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld4r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
9115       }
9116     }
9117   }
9118 
TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD4R,n_gt_8_strided_cn)9119   TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD4R, n_gt_8_strided_cn) {
9120     TEST_REQUIRES_ARM_NEON;
9121     for (uint32_t n = 9; n < 16; n++) {
9122       for (size_t k = 1; k <= 80; k += 17) {
9123         GemmMicrokernelTester()
9124           .mr(2)
9125           .nr(8)
9126           .kr(2)
9127           .sr(1)
9128           .m(2)
9129           .n(n)
9130           .k(k)
9131           .cn_stride(11)
9132           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld4r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
9133       }
9134     }
9135   }
9136 
TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD4R,n_gt_8_subtile)9137   TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD4R, n_gt_8_subtile) {
9138     TEST_REQUIRES_ARM_NEON;
9139     for (uint32_t n = 9; n < 16; n++) {
9140       for (size_t k = 1; k <= 80; k += 17) {
9141         for (uint32_t m = 1; m <= 2; m++) {
9142           GemmMicrokernelTester()
9143             .mr(2)
9144             .nr(8)
9145             .kr(2)
9146             .sr(1)
9147             .m(m)
9148             .n(n)
9149             .k(k)
9150             .iterations(1)
9151             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld4r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
9152         }
9153       }
9154     }
9155   }
9156 
TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD4R,n_div_8)9157   TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD4R, n_div_8) {
9158     TEST_REQUIRES_ARM_NEON;
9159     for (uint32_t n = 16; n <= 24; n += 8) {
9160       for (size_t k = 1; k <= 80; k += 17) {
9161         GemmMicrokernelTester()
9162           .mr(2)
9163           .nr(8)
9164           .kr(2)
9165           .sr(1)
9166           .m(2)
9167           .n(n)
9168           .k(k)
9169           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld4r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
9170       }
9171     }
9172   }
9173 
TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD4R,n_div_8_strided_cn)9174   TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD4R, n_div_8_strided_cn) {
9175     TEST_REQUIRES_ARM_NEON;
9176     for (uint32_t n = 16; n <= 24; n += 8) {
9177       for (size_t k = 1; k <= 80; k += 17) {
9178         GemmMicrokernelTester()
9179           .mr(2)
9180           .nr(8)
9181           .kr(2)
9182           .sr(1)
9183           .m(2)
9184           .n(n)
9185           .k(k)
9186           .cn_stride(11)
9187           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld4r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
9188       }
9189     }
9190   }
9191 
TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD4R,n_div_8_subtile)9192   TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD4R, n_div_8_subtile) {
9193     TEST_REQUIRES_ARM_NEON;
9194     for (uint32_t n = 16; n <= 24; n += 8) {
9195       for (size_t k = 1; k <= 80; k += 17) {
9196         for (uint32_t m = 1; m <= 2; m++) {
9197           GemmMicrokernelTester()
9198             .mr(2)
9199             .nr(8)
9200             .kr(2)
9201             .sr(1)
9202             .m(m)
9203             .n(n)
9204             .k(k)
9205             .iterations(1)
9206             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld4r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
9207         }
9208       }
9209     }
9210   }
9211 
TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD4R,small_kernel)9212   TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD4R, small_kernel) {
9213     TEST_REQUIRES_ARM_NEON;
9214     for (size_t k = 1; k <= 80; k += 17) {
9215       GemmMicrokernelTester()
9216         .mr(2)
9217         .nr(8)
9218         .kr(2)
9219         .sr(1)
9220         .m(2)
9221         .n(8)
9222         .k(k)
9223         .ks(3)
9224         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld4r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
9225     }
9226   }
9227 
TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD4R,small_kernel_subtile)9228   TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD4R, small_kernel_subtile) {
9229     TEST_REQUIRES_ARM_NEON;
9230     for (size_t k = 1; k <= 80; k += 17) {
9231       for (uint32_t n = 1; n <= 8; n++) {
9232         for (uint32_t m = 1; m <= 2; m++) {
9233           GemmMicrokernelTester()
9234             .mr(2)
9235             .nr(8)
9236             .kr(2)
9237             .sr(1)
9238             .m(m)
9239             .n(n)
9240             .k(k)
9241             .ks(3)
9242             .iterations(1)
9243             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld4r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
9244         }
9245       }
9246     }
9247   }
9248 
TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD4R,n_gt_8_small_kernel)9249   TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD4R, n_gt_8_small_kernel) {
9250     TEST_REQUIRES_ARM_NEON;
9251     for (uint32_t n = 9; n < 16; n++) {
9252       for (size_t k = 1; k <= 80; k += 17) {
9253         GemmMicrokernelTester()
9254           .mr(2)
9255           .nr(8)
9256           .kr(2)
9257           .sr(1)
9258           .m(2)
9259           .n(n)
9260           .k(k)
9261           .ks(3)
9262           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld4r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
9263       }
9264     }
9265   }
9266 
TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD4R,n_div_8_small_kernel)9267   TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD4R, n_div_8_small_kernel) {
9268     TEST_REQUIRES_ARM_NEON;
9269     for (uint32_t n = 16; n <= 24; n += 8) {
9270       for (size_t k = 1; k <= 80; k += 17) {
9271         GemmMicrokernelTester()
9272           .mr(2)
9273           .nr(8)
9274           .kr(2)
9275           .sr(1)
9276           .m(2)
9277           .n(n)
9278           .k(k)
9279           .ks(3)
9280           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld4r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
9281       }
9282     }
9283   }
9284 
TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD4R,strided_cm_subtile)9285   TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD4R, strided_cm_subtile) {
9286     TEST_REQUIRES_ARM_NEON;
9287     for (size_t k = 1; k <= 80; k += 17) {
9288       for (uint32_t n = 1; n <= 8; n++) {
9289         for (uint32_t m = 1; m <= 2; m++) {
9290           GemmMicrokernelTester()
9291             .mr(2)
9292             .nr(8)
9293             .kr(2)
9294             .sr(1)
9295             .m(m)
9296             .n(n)
9297             .k(k)
9298             .cm_stride(11)
9299             .iterations(1)
9300             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld4r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
9301         }
9302       }
9303     }
9304   }
9305 
TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD4R,a_offset)9306   TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD4R, a_offset) {
9307     TEST_REQUIRES_ARM_NEON;
9308     for (size_t k = 1; k <= 80; k += 17) {
9309       GemmMicrokernelTester()
9310         .mr(2)
9311         .nr(8)
9312         .kr(2)
9313         .sr(1)
9314         .m(2)
9315         .n(8)
9316         .k(k)
9317         .ks(3)
9318         .a_offset(163)
9319         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld4r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
9320     }
9321   }
9322 
TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD4R,zero)9323   TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD4R, zero) {
9324     TEST_REQUIRES_ARM_NEON;
9325     for (size_t k = 1; k <= 80; k += 17) {
9326       for (uint32_t mz = 0; mz < 2; mz++) {
9327         GemmMicrokernelTester()
9328           .mr(2)
9329           .nr(8)
9330           .kr(2)
9331           .sr(1)
9332           .m(2)
9333           .n(8)
9334           .k(k)
9335           .ks(3)
9336           .a_offset(163)
9337           .zero_index(mz)
9338           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld4r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
9339       }
9340     }
9341   }
9342 
TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD4R,qmin)9343   TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD4R, qmin) {
9344     TEST_REQUIRES_ARM_NEON;
9345     GemmMicrokernelTester()
9346       .mr(2)
9347       .nr(8)
9348       .kr(2)
9349       .sr(1)
9350       .m(2)
9351       .n(8)
9352       .k(16)
9353       .qmin(128)
9354       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld4r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
9355   }
9356 
TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD4R,qmax)9357   TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD4R, qmax) {
9358     TEST_REQUIRES_ARM_NEON;
9359     GemmMicrokernelTester()
9360       .mr(2)
9361       .nr(8)
9362       .kr(2)
9363       .sr(1)
9364       .m(2)
9365       .n(8)
9366       .k(16)
9367       .qmax(128)
9368       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld4r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
9369   }
9370 
TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD4R,strided_cm)9371   TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD4R, strided_cm) {
9372     TEST_REQUIRES_ARM_NEON;
9373     GemmMicrokernelTester()
9374       .mr(2)
9375       .nr(8)
9376       .kr(2)
9377       .sr(1)
9378       .m(2)
9379       .n(8)
9380       .k(16)
9381       .cm_stride(11)
9382       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld4r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
9383   }
9384 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
9385 
9386 
9387 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_DUP,k_eq_16)9388   TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_DUP, k_eq_16) {
9389     TEST_REQUIRES_ARM_NEON_V8;
9390     GemmMicrokernelTester()
9391       .mr(2)
9392       .nr(8)
9393       .kr(2)
9394       .sr(1)
9395       .m(2)
9396       .n(8)
9397       .k(16)
9398       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
9399   }
9400 
TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_DUP,strided_cn)9401   TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_DUP, strided_cn) {
9402     TEST_REQUIRES_ARM_NEON_V8;
9403     GemmMicrokernelTester()
9404       .mr(2)
9405       .nr(8)
9406       .kr(2)
9407       .sr(1)
9408       .m(2)
9409       .n(8)
9410       .k(16)
9411       .cn_stride(11)
9412       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
9413   }
9414 
TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_DUP,k_eq_16_subtile)9415   TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_DUP, k_eq_16_subtile) {
9416     TEST_REQUIRES_ARM_NEON_V8;
9417     for (uint32_t n = 1; n <= 8; n++) {
9418       for (uint32_t m = 1; m <= 2; m++) {
9419         GemmMicrokernelTester()
9420           .mr(2)
9421           .nr(8)
9422           .kr(2)
9423           .sr(1)
9424           .m(m)
9425           .n(n)
9426           .k(16)
9427           .iterations(1)
9428           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
9429       }
9430     }
9431   }
9432 
TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_DUP,k_eq_16_subtile_m)9433   TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_DUP, k_eq_16_subtile_m) {
9434     TEST_REQUIRES_ARM_NEON_V8;
9435     for (uint32_t m = 1; m <= 2; m++) {
9436       GemmMicrokernelTester()
9437         .mr(2)
9438         .nr(8)
9439         .kr(2)
9440         .sr(1)
9441         .m(m)
9442         .n(8)
9443         .k(16)
9444         .iterations(1)
9445         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
9446     }
9447   }
9448 
TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_DUP,k_eq_16_subtile_n)9449   TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_DUP, k_eq_16_subtile_n) {
9450     TEST_REQUIRES_ARM_NEON_V8;
9451     for (uint32_t n = 1; n <= 8; n++) {
9452       GemmMicrokernelTester()
9453         .mr(2)
9454         .nr(8)
9455         .kr(2)
9456         .sr(1)
9457         .m(2)
9458         .n(n)
9459         .k(16)
9460         .iterations(1)
9461         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
9462     }
9463   }
9464 
TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_DUP,k_lt_16)9465   TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_DUP, k_lt_16) {
9466     TEST_REQUIRES_ARM_NEON_V8;
9467     for (size_t k = 1; k < 16; k++) {
9468       GemmMicrokernelTester()
9469         .mr(2)
9470         .nr(8)
9471         .kr(2)
9472         .sr(1)
9473         .m(2)
9474         .n(8)
9475         .k(k)
9476         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
9477     }
9478   }
9479 
TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_DUP,k_lt_16_subtile)9480   TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_DUP, k_lt_16_subtile) {
9481     TEST_REQUIRES_ARM_NEON_V8;
9482     for (size_t k = 1; k < 16; k++) {
9483       for (uint32_t n = 1; n <= 8; n++) {
9484         for (uint32_t m = 1; m <= 2; m++) {
9485           GemmMicrokernelTester()
9486             .mr(2)
9487             .nr(8)
9488             .kr(2)
9489             .sr(1)
9490             .m(m)
9491             .n(n)
9492             .k(k)
9493             .iterations(1)
9494             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
9495         }
9496       }
9497     }
9498   }
9499 
TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_DUP,k_gt_16)9500   TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_DUP, k_gt_16) {
9501     TEST_REQUIRES_ARM_NEON_V8;
9502     for (size_t k = 17; k < 32; k++) {
9503       GemmMicrokernelTester()
9504         .mr(2)
9505         .nr(8)
9506         .kr(2)
9507         .sr(1)
9508         .m(2)
9509         .n(8)
9510         .k(k)
9511         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
9512     }
9513   }
9514 
TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_DUP,k_gt_16_subtile)9515   TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_DUP, k_gt_16_subtile) {
9516     TEST_REQUIRES_ARM_NEON_V8;
9517     for (size_t k = 17; k < 32; k++) {
9518       for (uint32_t n = 1; n <= 8; n++) {
9519         for (uint32_t m = 1; m <= 2; m++) {
9520           GemmMicrokernelTester()
9521             .mr(2)
9522             .nr(8)
9523             .kr(2)
9524             .sr(1)
9525             .m(m)
9526             .n(n)
9527             .k(k)
9528             .iterations(1)
9529             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
9530         }
9531       }
9532     }
9533   }
9534 
TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_DUP,k_div_16)9535   TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_DUP, k_div_16) {
9536     TEST_REQUIRES_ARM_NEON_V8;
9537     for (size_t k = 32; k <= 160; k += 16) {
9538       GemmMicrokernelTester()
9539         .mr(2)
9540         .nr(8)
9541         .kr(2)
9542         .sr(1)
9543         .m(2)
9544         .n(8)
9545         .k(k)
9546         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
9547     }
9548   }
9549 
TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_DUP,k_div_16_subtile)9550   TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_DUP, k_div_16_subtile) {
9551     TEST_REQUIRES_ARM_NEON_V8;
9552     for (size_t k = 32; k <= 160; k += 16) {
9553       for (uint32_t n = 1; n <= 8; n++) {
9554         for (uint32_t m = 1; m <= 2; m++) {
9555           GemmMicrokernelTester()
9556             .mr(2)
9557             .nr(8)
9558             .kr(2)
9559             .sr(1)
9560             .m(m)
9561             .n(n)
9562             .k(k)
9563             .iterations(1)
9564             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
9565         }
9566       }
9567     }
9568   }
9569 
TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_DUP,n_gt_8)9570   TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_DUP, n_gt_8) {
9571     TEST_REQUIRES_ARM_NEON_V8;
9572     for (uint32_t n = 9; n < 16; n++) {
9573       for (size_t k = 1; k <= 80; k += 17) {
9574         GemmMicrokernelTester()
9575           .mr(2)
9576           .nr(8)
9577           .kr(2)
9578           .sr(1)
9579           .m(2)
9580           .n(n)
9581           .k(k)
9582           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
9583       }
9584     }
9585   }
9586 
TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_DUP,n_gt_8_strided_cn)9587   TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_DUP, n_gt_8_strided_cn) {
9588     TEST_REQUIRES_ARM_NEON_V8;
9589     for (uint32_t n = 9; n < 16; n++) {
9590       for (size_t k = 1; k <= 80; k += 17) {
9591         GemmMicrokernelTester()
9592           .mr(2)
9593           .nr(8)
9594           .kr(2)
9595           .sr(1)
9596           .m(2)
9597           .n(n)
9598           .k(k)
9599           .cn_stride(11)
9600           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
9601       }
9602     }
9603   }
9604 
TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_DUP,n_gt_8_subtile)9605   TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_DUP, n_gt_8_subtile) {
9606     TEST_REQUIRES_ARM_NEON_V8;
9607     for (uint32_t n = 9; n < 16; n++) {
9608       for (size_t k = 1; k <= 80; k += 17) {
9609         for (uint32_t m = 1; m <= 2; m++) {
9610           GemmMicrokernelTester()
9611             .mr(2)
9612             .nr(8)
9613             .kr(2)
9614             .sr(1)
9615             .m(m)
9616             .n(n)
9617             .k(k)
9618             .iterations(1)
9619             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
9620         }
9621       }
9622     }
9623   }
9624 
TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_DUP,n_div_8)9625   TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_DUP, n_div_8) {
9626     TEST_REQUIRES_ARM_NEON_V8;
9627     for (uint32_t n = 16; n <= 24; n += 8) {
9628       for (size_t k = 1; k <= 80; k += 17) {
9629         GemmMicrokernelTester()
9630           .mr(2)
9631           .nr(8)
9632           .kr(2)
9633           .sr(1)
9634           .m(2)
9635           .n(n)
9636           .k(k)
9637           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
9638       }
9639     }
9640   }
9641 
TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_DUP,n_div_8_strided_cn)9642   TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_DUP, n_div_8_strided_cn) {
9643     TEST_REQUIRES_ARM_NEON_V8;
9644     for (uint32_t n = 16; n <= 24; n += 8) {
9645       for (size_t k = 1; k <= 80; k += 17) {
9646         GemmMicrokernelTester()
9647           .mr(2)
9648           .nr(8)
9649           .kr(2)
9650           .sr(1)
9651           .m(2)
9652           .n(n)
9653           .k(k)
9654           .cn_stride(11)
9655           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
9656       }
9657     }
9658   }
9659 
TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_DUP,n_div_8_subtile)9660   TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_DUP, n_div_8_subtile) {
9661     TEST_REQUIRES_ARM_NEON_V8;
9662     for (uint32_t n = 16; n <= 24; n += 8) {
9663       for (size_t k = 1; k <= 80; k += 17) {
9664         for (uint32_t m = 1; m <= 2; m++) {
9665           GemmMicrokernelTester()
9666             .mr(2)
9667             .nr(8)
9668             .kr(2)
9669             .sr(1)
9670             .m(m)
9671             .n(n)
9672             .k(k)
9673             .iterations(1)
9674             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
9675         }
9676       }
9677     }
9678   }
9679 
TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_DUP,small_kernel)9680   TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_DUP, small_kernel) {
9681     TEST_REQUIRES_ARM_NEON_V8;
9682     for (size_t k = 1; k <= 80; k += 17) {
9683       GemmMicrokernelTester()
9684         .mr(2)
9685         .nr(8)
9686         .kr(2)
9687         .sr(1)
9688         .m(2)
9689         .n(8)
9690         .k(k)
9691         .ks(3)
9692         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
9693     }
9694   }
9695 
TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_DUP,small_kernel_subtile)9696   TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_DUP, small_kernel_subtile) {
9697     TEST_REQUIRES_ARM_NEON_V8;
9698     for (size_t k = 1; k <= 80; k += 17) {
9699       for (uint32_t n = 1; n <= 8; n++) {
9700         for (uint32_t m = 1; m <= 2; m++) {
9701           GemmMicrokernelTester()
9702             .mr(2)
9703             .nr(8)
9704             .kr(2)
9705             .sr(1)
9706             .m(m)
9707             .n(n)
9708             .k(k)
9709             .ks(3)
9710             .iterations(1)
9711             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
9712         }
9713       }
9714     }
9715   }
9716 
TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_DUP,n_gt_8_small_kernel)9717   TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_DUP, n_gt_8_small_kernel) {
9718     TEST_REQUIRES_ARM_NEON_V8;
9719     for (uint32_t n = 9; n < 16; n++) {
9720       for (size_t k = 1; k <= 80; k += 17) {
9721         GemmMicrokernelTester()
9722           .mr(2)
9723           .nr(8)
9724           .kr(2)
9725           .sr(1)
9726           .m(2)
9727           .n(n)
9728           .k(k)
9729           .ks(3)
9730           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
9731       }
9732     }
9733   }
9734 
TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_DUP,n_div_8_small_kernel)9735   TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_DUP, n_div_8_small_kernel) {
9736     TEST_REQUIRES_ARM_NEON_V8;
9737     for (uint32_t n = 16; n <= 24; n += 8) {
9738       for (size_t k = 1; k <= 80; k += 17) {
9739         GemmMicrokernelTester()
9740           .mr(2)
9741           .nr(8)
9742           .kr(2)
9743           .sr(1)
9744           .m(2)
9745           .n(n)
9746           .k(k)
9747           .ks(3)
9748           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
9749       }
9750     }
9751   }
9752 
TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_DUP,strided_cm_subtile)9753   TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_DUP, strided_cm_subtile) {
9754     TEST_REQUIRES_ARM_NEON_V8;
9755     for (size_t k = 1; k <= 80; k += 17) {
9756       for (uint32_t n = 1; n <= 8; n++) {
9757         for (uint32_t m = 1; m <= 2; m++) {
9758           GemmMicrokernelTester()
9759             .mr(2)
9760             .nr(8)
9761             .kr(2)
9762             .sr(1)
9763             .m(m)
9764             .n(n)
9765             .k(k)
9766             .cm_stride(11)
9767             .iterations(1)
9768             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
9769         }
9770       }
9771     }
9772   }
9773 
TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_DUP,a_offset)9774   TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_DUP, a_offset) {
9775     TEST_REQUIRES_ARM_NEON_V8;
9776     for (size_t k = 1; k <= 80; k += 17) {
9777       GemmMicrokernelTester()
9778         .mr(2)
9779         .nr(8)
9780         .kr(2)
9781         .sr(1)
9782         .m(2)
9783         .n(8)
9784         .k(k)
9785         .ks(3)
9786         .a_offset(163)
9787         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
9788     }
9789   }
9790 
TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_DUP,zero)9791   TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_DUP, zero) {
9792     TEST_REQUIRES_ARM_NEON_V8;
9793     for (size_t k = 1; k <= 80; k += 17) {
9794       for (uint32_t mz = 0; mz < 2; mz++) {
9795         GemmMicrokernelTester()
9796           .mr(2)
9797           .nr(8)
9798           .kr(2)
9799           .sr(1)
9800           .m(2)
9801           .n(8)
9802           .k(k)
9803           .ks(3)
9804           .a_offset(163)
9805           .zero_index(mz)
9806           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
9807       }
9808     }
9809   }
9810 
TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_DUP,qmin)9811   TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_DUP, qmin) {
9812     TEST_REQUIRES_ARM_NEON_V8;
9813     GemmMicrokernelTester()
9814       .mr(2)
9815       .nr(8)
9816       .kr(2)
9817       .sr(1)
9818       .m(2)
9819       .n(8)
9820       .k(16)
9821       .qmin(128)
9822       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
9823   }
9824 
TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_DUP,qmax)9825   TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_DUP, qmax) {
9826     TEST_REQUIRES_ARM_NEON_V8;
9827     GemmMicrokernelTester()
9828       .mr(2)
9829       .nr(8)
9830       .kr(2)
9831       .sr(1)
9832       .m(2)
9833       .n(8)
9834       .k(16)
9835       .qmax(128)
9836       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
9837   }
9838 
TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_DUP,strided_cm)9839   TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_DUP, strided_cm) {
9840     TEST_REQUIRES_ARM_NEON_V8;
9841     GemmMicrokernelTester()
9842       .mr(2)
9843       .nr(8)
9844       .kr(2)
9845       .sr(1)
9846       .m(2)
9847       .n(8)
9848       .k(16)
9849       .cm_stride(11)
9850       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
9851   }
9852 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
9853 
9854 
9855 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QC8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_DUP,k_eq_16)9856   TEST(QC8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_DUP, k_eq_16) {
9857     TEST_REQUIRES_ARM_NEON;
9858     GemmMicrokernelTester()
9859       .mr(2)
9860       .nr(8)
9861       .kr(4)
9862       .sr(1)
9863       .m(2)
9864       .n(8)
9865       .k(16)
9866       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c4__neon_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
9867   }
9868 
TEST(QC8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_DUP,strided_cn)9869   TEST(QC8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_DUP, strided_cn) {
9870     TEST_REQUIRES_ARM_NEON;
9871     GemmMicrokernelTester()
9872       .mr(2)
9873       .nr(8)
9874       .kr(4)
9875       .sr(1)
9876       .m(2)
9877       .n(8)
9878       .k(16)
9879       .cn_stride(11)
9880       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c4__neon_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
9881   }
9882 
TEST(QC8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_DUP,k_eq_16_subtile)9883   TEST(QC8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_DUP, k_eq_16_subtile) {
9884     TEST_REQUIRES_ARM_NEON;
9885     for (uint32_t n = 1; n <= 8; n++) {
9886       for (uint32_t m = 1; m <= 2; m++) {
9887         GemmMicrokernelTester()
9888           .mr(2)
9889           .nr(8)
9890           .kr(4)
9891           .sr(1)
9892           .m(m)
9893           .n(n)
9894           .k(16)
9895           .iterations(1)
9896           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c4__neon_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
9897       }
9898     }
9899   }
9900 
TEST(QC8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_DUP,k_eq_16_subtile_m)9901   TEST(QC8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_DUP, k_eq_16_subtile_m) {
9902     TEST_REQUIRES_ARM_NEON;
9903     for (uint32_t m = 1; m <= 2; m++) {
9904       GemmMicrokernelTester()
9905         .mr(2)
9906         .nr(8)
9907         .kr(4)
9908         .sr(1)
9909         .m(m)
9910         .n(8)
9911         .k(16)
9912         .iterations(1)
9913         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c4__neon_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
9914     }
9915   }
9916 
TEST(QC8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_DUP,k_eq_16_subtile_n)9917   TEST(QC8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_DUP, k_eq_16_subtile_n) {
9918     TEST_REQUIRES_ARM_NEON;
9919     for (uint32_t n = 1; n <= 8; n++) {
9920       GemmMicrokernelTester()
9921         .mr(2)
9922         .nr(8)
9923         .kr(4)
9924         .sr(1)
9925         .m(2)
9926         .n(n)
9927         .k(16)
9928         .iterations(1)
9929         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c4__neon_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
9930     }
9931   }
9932 
TEST(QC8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_DUP,k_lt_16)9933   TEST(QC8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_DUP, k_lt_16) {
9934     TEST_REQUIRES_ARM_NEON;
9935     for (size_t k = 1; k < 16; k++) {
9936       GemmMicrokernelTester()
9937         .mr(2)
9938         .nr(8)
9939         .kr(4)
9940         .sr(1)
9941         .m(2)
9942         .n(8)
9943         .k(k)
9944         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c4__neon_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
9945     }
9946   }
9947 
TEST(QC8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_DUP,k_lt_16_subtile)9948   TEST(QC8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_DUP, k_lt_16_subtile) {
9949     TEST_REQUIRES_ARM_NEON;
9950     for (size_t k = 1; k < 16; k++) {
9951       for (uint32_t n = 1; n <= 8; n++) {
9952         for (uint32_t m = 1; m <= 2; m++) {
9953           GemmMicrokernelTester()
9954             .mr(2)
9955             .nr(8)
9956             .kr(4)
9957             .sr(1)
9958             .m(m)
9959             .n(n)
9960             .k(k)
9961             .iterations(1)
9962             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c4__neon_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
9963         }
9964       }
9965     }
9966   }
9967 
TEST(QC8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_DUP,k_gt_16)9968   TEST(QC8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_DUP, k_gt_16) {
9969     TEST_REQUIRES_ARM_NEON;
9970     for (size_t k = 17; k < 32; k++) {
9971       GemmMicrokernelTester()
9972         .mr(2)
9973         .nr(8)
9974         .kr(4)
9975         .sr(1)
9976         .m(2)
9977         .n(8)
9978         .k(k)
9979         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c4__neon_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
9980     }
9981   }
9982 
TEST(QC8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_DUP,k_gt_16_subtile)9983   TEST(QC8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_DUP, k_gt_16_subtile) {
9984     TEST_REQUIRES_ARM_NEON;
9985     for (size_t k = 17; k < 32; k++) {
9986       for (uint32_t n = 1; n <= 8; n++) {
9987         for (uint32_t m = 1; m <= 2; m++) {
9988           GemmMicrokernelTester()
9989             .mr(2)
9990             .nr(8)
9991             .kr(4)
9992             .sr(1)
9993             .m(m)
9994             .n(n)
9995             .k(k)
9996             .iterations(1)
9997             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c4__neon_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
9998         }
9999       }
10000     }
10001   }
10002 
TEST(QC8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_DUP,k_div_16)10003   TEST(QC8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_DUP, k_div_16) {
10004     TEST_REQUIRES_ARM_NEON;
10005     for (size_t k = 32; k <= 160; k += 16) {
10006       GemmMicrokernelTester()
10007         .mr(2)
10008         .nr(8)
10009         .kr(4)
10010         .sr(1)
10011         .m(2)
10012         .n(8)
10013         .k(k)
10014         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c4__neon_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
10015     }
10016   }
10017 
TEST(QC8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_DUP,k_div_16_subtile)10018   TEST(QC8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_DUP, k_div_16_subtile) {
10019     TEST_REQUIRES_ARM_NEON;
10020     for (size_t k = 32; k <= 160; k += 16) {
10021       for (uint32_t n = 1; n <= 8; n++) {
10022         for (uint32_t m = 1; m <= 2; m++) {
10023           GemmMicrokernelTester()
10024             .mr(2)
10025             .nr(8)
10026             .kr(4)
10027             .sr(1)
10028             .m(m)
10029             .n(n)
10030             .k(k)
10031             .iterations(1)
10032             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c4__neon_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
10033         }
10034       }
10035     }
10036   }
10037 
TEST(QC8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_DUP,n_gt_8)10038   TEST(QC8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_DUP, n_gt_8) {
10039     TEST_REQUIRES_ARM_NEON;
10040     for (uint32_t n = 9; n < 16; n++) {
10041       for (size_t k = 1; k <= 80; k += 17) {
10042         GemmMicrokernelTester()
10043           .mr(2)
10044           .nr(8)
10045           .kr(4)
10046           .sr(1)
10047           .m(2)
10048           .n(n)
10049           .k(k)
10050           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c4__neon_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
10051       }
10052     }
10053   }
10054 
TEST(QC8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_DUP,n_gt_8_strided_cn)10055   TEST(QC8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_DUP, n_gt_8_strided_cn) {
10056     TEST_REQUIRES_ARM_NEON;
10057     for (uint32_t n = 9; n < 16; n++) {
10058       for (size_t k = 1; k <= 80; k += 17) {
10059         GemmMicrokernelTester()
10060           .mr(2)
10061           .nr(8)
10062           .kr(4)
10063           .sr(1)
10064           .m(2)
10065           .n(n)
10066           .k(k)
10067           .cn_stride(11)
10068           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c4__neon_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
10069       }
10070     }
10071   }
10072 
TEST(QC8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_DUP,n_gt_8_subtile)10073   TEST(QC8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_DUP, n_gt_8_subtile) {
10074     TEST_REQUIRES_ARM_NEON;
10075     for (uint32_t n = 9; n < 16; n++) {
10076       for (size_t k = 1; k <= 80; k += 17) {
10077         for (uint32_t m = 1; m <= 2; m++) {
10078           GemmMicrokernelTester()
10079             .mr(2)
10080             .nr(8)
10081             .kr(4)
10082             .sr(1)
10083             .m(m)
10084             .n(n)
10085             .k(k)
10086             .iterations(1)
10087             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c4__neon_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
10088         }
10089       }
10090     }
10091   }
10092 
TEST(QC8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_DUP,n_div_8)10093   TEST(QC8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_DUP, n_div_8) {
10094     TEST_REQUIRES_ARM_NEON;
10095     for (uint32_t n = 16; n <= 24; n += 8) {
10096       for (size_t k = 1; k <= 80; k += 17) {
10097         GemmMicrokernelTester()
10098           .mr(2)
10099           .nr(8)
10100           .kr(4)
10101           .sr(1)
10102           .m(2)
10103           .n(n)
10104           .k(k)
10105           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c4__neon_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
10106       }
10107     }
10108   }
10109 
TEST(QC8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_DUP,n_div_8_strided_cn)10110   TEST(QC8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_DUP, n_div_8_strided_cn) {
10111     TEST_REQUIRES_ARM_NEON;
10112     for (uint32_t n = 16; n <= 24; n += 8) {
10113       for (size_t k = 1; k <= 80; k += 17) {
10114         GemmMicrokernelTester()
10115           .mr(2)
10116           .nr(8)
10117           .kr(4)
10118           .sr(1)
10119           .m(2)
10120           .n(n)
10121           .k(k)
10122           .cn_stride(11)
10123           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c4__neon_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
10124       }
10125     }
10126   }
10127 
TEST(QC8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_DUP,n_div_8_subtile)10128   TEST(QC8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_DUP, n_div_8_subtile) {
10129     TEST_REQUIRES_ARM_NEON;
10130     for (uint32_t n = 16; n <= 24; n += 8) {
10131       for (size_t k = 1; k <= 80; k += 17) {
10132         for (uint32_t m = 1; m <= 2; m++) {
10133           GemmMicrokernelTester()
10134             .mr(2)
10135             .nr(8)
10136             .kr(4)
10137             .sr(1)
10138             .m(m)
10139             .n(n)
10140             .k(k)
10141             .iterations(1)
10142             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c4__neon_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
10143         }
10144       }
10145     }
10146   }
10147 
TEST(QC8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_DUP,small_kernel)10148   TEST(QC8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_DUP, small_kernel) {
10149     TEST_REQUIRES_ARM_NEON;
10150     for (size_t k = 1; k <= 80; k += 17) {
10151       GemmMicrokernelTester()
10152         .mr(2)
10153         .nr(8)
10154         .kr(4)
10155         .sr(1)
10156         .m(2)
10157         .n(8)
10158         .k(k)
10159         .ks(3)
10160         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c4__neon_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
10161     }
10162   }
10163 
TEST(QC8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_DUP,small_kernel_subtile)10164   TEST(QC8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_DUP, small_kernel_subtile) {
10165     TEST_REQUIRES_ARM_NEON;
10166     for (size_t k = 1; k <= 80; k += 17) {
10167       for (uint32_t n = 1; n <= 8; n++) {
10168         for (uint32_t m = 1; m <= 2; m++) {
10169           GemmMicrokernelTester()
10170             .mr(2)
10171             .nr(8)
10172             .kr(4)
10173             .sr(1)
10174             .m(m)
10175             .n(n)
10176             .k(k)
10177             .ks(3)
10178             .iterations(1)
10179             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c4__neon_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
10180         }
10181       }
10182     }
10183   }
10184 
TEST(QC8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_DUP,n_gt_8_small_kernel)10185   TEST(QC8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_DUP, n_gt_8_small_kernel) {
10186     TEST_REQUIRES_ARM_NEON;
10187     for (uint32_t n = 9; n < 16; n++) {
10188       for (size_t k = 1; k <= 80; k += 17) {
10189         GemmMicrokernelTester()
10190           .mr(2)
10191           .nr(8)
10192           .kr(4)
10193           .sr(1)
10194           .m(2)
10195           .n(n)
10196           .k(k)
10197           .ks(3)
10198           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c4__neon_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
10199       }
10200     }
10201   }
10202 
TEST(QC8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_DUP,n_div_8_small_kernel)10203   TEST(QC8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_DUP, n_div_8_small_kernel) {
10204     TEST_REQUIRES_ARM_NEON;
10205     for (uint32_t n = 16; n <= 24; n += 8) {
10206       for (size_t k = 1; k <= 80; k += 17) {
10207         GemmMicrokernelTester()
10208           .mr(2)
10209           .nr(8)
10210           .kr(4)
10211           .sr(1)
10212           .m(2)
10213           .n(n)
10214           .k(k)
10215           .ks(3)
10216           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c4__neon_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
10217       }
10218     }
10219   }
10220 
TEST(QC8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_DUP,strided_cm_subtile)10221   TEST(QC8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_DUP, strided_cm_subtile) {
10222     TEST_REQUIRES_ARM_NEON;
10223     for (size_t k = 1; k <= 80; k += 17) {
10224       for (uint32_t n = 1; n <= 8; n++) {
10225         for (uint32_t m = 1; m <= 2; m++) {
10226           GemmMicrokernelTester()
10227             .mr(2)
10228             .nr(8)
10229             .kr(4)
10230             .sr(1)
10231             .m(m)
10232             .n(n)
10233             .k(k)
10234             .cm_stride(11)
10235             .iterations(1)
10236             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c4__neon_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
10237         }
10238       }
10239     }
10240   }
10241 
TEST(QC8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_DUP,a_offset)10242   TEST(QC8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_DUP, a_offset) {
10243     TEST_REQUIRES_ARM_NEON;
10244     for (size_t k = 1; k <= 80; k += 17) {
10245       GemmMicrokernelTester()
10246         .mr(2)
10247         .nr(8)
10248         .kr(4)
10249         .sr(1)
10250         .m(2)
10251         .n(8)
10252         .k(k)
10253         .ks(3)
10254         .a_offset(163)
10255         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c4__neon_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
10256     }
10257   }
10258 
TEST(QC8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_DUP,zero)10259   TEST(QC8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_DUP, zero) {
10260     TEST_REQUIRES_ARM_NEON;
10261     for (size_t k = 1; k <= 80; k += 17) {
10262       for (uint32_t mz = 0; mz < 2; mz++) {
10263         GemmMicrokernelTester()
10264           .mr(2)
10265           .nr(8)
10266           .kr(4)
10267           .sr(1)
10268           .m(2)
10269           .n(8)
10270           .k(k)
10271           .ks(3)
10272           .a_offset(163)
10273           .zero_index(mz)
10274           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c4__neon_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
10275       }
10276     }
10277   }
10278 
TEST(QC8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_DUP,qmin)10279   TEST(QC8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_DUP, qmin) {
10280     TEST_REQUIRES_ARM_NEON;
10281     GemmMicrokernelTester()
10282       .mr(2)
10283       .nr(8)
10284       .kr(4)
10285       .sr(1)
10286       .m(2)
10287       .n(8)
10288       .k(16)
10289       .qmin(128)
10290       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c4__neon_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
10291   }
10292 
TEST(QC8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_DUP,qmax)10293   TEST(QC8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_DUP, qmax) {
10294     TEST_REQUIRES_ARM_NEON;
10295     GemmMicrokernelTester()
10296       .mr(2)
10297       .nr(8)
10298       .kr(4)
10299       .sr(1)
10300       .m(2)
10301       .n(8)
10302       .k(16)
10303       .qmax(128)
10304       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c4__neon_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
10305   }
10306 
TEST(QC8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_DUP,strided_cm)10307   TEST(QC8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_DUP, strided_cm) {
10308     TEST_REQUIRES_ARM_NEON;
10309     GemmMicrokernelTester()
10310       .mr(2)
10311       .nr(8)
10312       .kr(4)
10313       .sr(1)
10314       .m(2)
10315       .n(8)
10316       .k(16)
10317       .cm_stride(11)
10318       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c4__neon_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
10319   }
10320 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
10321 
10322 
10323 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__NEON_MLAL,k_eq_16)10324   TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__NEON_MLAL, k_eq_16) {
10325     TEST_REQUIRES_ARM_NEON;
10326     GemmMicrokernelTester()
10327       .mr(2)
10328       .nr(8)
10329       .kr(8)
10330       .sr(1)
10331       .m(2)
10332       .n(8)
10333       .k(16)
10334       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c8__neon_mlal, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
10335   }
10336 
TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__NEON_MLAL,strided_cn)10337   TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__NEON_MLAL, strided_cn) {
10338     TEST_REQUIRES_ARM_NEON;
10339     GemmMicrokernelTester()
10340       .mr(2)
10341       .nr(8)
10342       .kr(8)
10343       .sr(1)
10344       .m(2)
10345       .n(8)
10346       .k(16)
10347       .cn_stride(11)
10348       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c8__neon_mlal, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
10349   }
10350 
TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__NEON_MLAL,k_eq_16_subtile)10351   TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__NEON_MLAL, k_eq_16_subtile) {
10352     TEST_REQUIRES_ARM_NEON;
10353     for (uint32_t n = 1; n <= 8; n++) {
10354       for (uint32_t m = 1; m <= 2; m++) {
10355         GemmMicrokernelTester()
10356           .mr(2)
10357           .nr(8)
10358           .kr(8)
10359           .sr(1)
10360           .m(m)
10361           .n(n)
10362           .k(16)
10363           .iterations(1)
10364           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c8__neon_mlal, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
10365       }
10366     }
10367   }
10368 
TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__NEON_MLAL,k_eq_16_subtile_m)10369   TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__NEON_MLAL, k_eq_16_subtile_m) {
10370     TEST_REQUIRES_ARM_NEON;
10371     for (uint32_t m = 1; m <= 2; m++) {
10372       GemmMicrokernelTester()
10373         .mr(2)
10374         .nr(8)
10375         .kr(8)
10376         .sr(1)
10377         .m(m)
10378         .n(8)
10379         .k(16)
10380         .iterations(1)
10381         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c8__neon_mlal, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
10382     }
10383   }
10384 
TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__NEON_MLAL,k_eq_16_subtile_n)10385   TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__NEON_MLAL, k_eq_16_subtile_n) {
10386     TEST_REQUIRES_ARM_NEON;
10387     for (uint32_t n = 1; n <= 8; n++) {
10388       GemmMicrokernelTester()
10389         .mr(2)
10390         .nr(8)
10391         .kr(8)
10392         .sr(1)
10393         .m(2)
10394         .n(n)
10395         .k(16)
10396         .iterations(1)
10397         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c8__neon_mlal, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
10398     }
10399   }
10400 
TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__NEON_MLAL,k_lt_16)10401   TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__NEON_MLAL, k_lt_16) {
10402     TEST_REQUIRES_ARM_NEON;
10403     for (size_t k = 1; k < 16; k++) {
10404       GemmMicrokernelTester()
10405         .mr(2)
10406         .nr(8)
10407         .kr(8)
10408         .sr(1)
10409         .m(2)
10410         .n(8)
10411         .k(k)
10412         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c8__neon_mlal, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
10413     }
10414   }
10415 
TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__NEON_MLAL,k_lt_16_subtile)10416   TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__NEON_MLAL, k_lt_16_subtile) {
10417     TEST_REQUIRES_ARM_NEON;
10418     for (size_t k = 1; k < 16; k++) {
10419       for (uint32_t n = 1; n <= 8; n++) {
10420         for (uint32_t m = 1; m <= 2; m++) {
10421           GemmMicrokernelTester()
10422             .mr(2)
10423             .nr(8)
10424             .kr(8)
10425             .sr(1)
10426             .m(m)
10427             .n(n)
10428             .k(k)
10429             .iterations(1)
10430             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c8__neon_mlal, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
10431         }
10432       }
10433     }
10434   }
10435 
TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__NEON_MLAL,k_gt_16)10436   TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__NEON_MLAL, k_gt_16) {
10437     TEST_REQUIRES_ARM_NEON;
10438     for (size_t k = 17; k < 32; k++) {
10439       GemmMicrokernelTester()
10440         .mr(2)
10441         .nr(8)
10442         .kr(8)
10443         .sr(1)
10444         .m(2)
10445         .n(8)
10446         .k(k)
10447         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c8__neon_mlal, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
10448     }
10449   }
10450 
TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__NEON_MLAL,k_gt_16_subtile)10451   TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__NEON_MLAL, k_gt_16_subtile) {
10452     TEST_REQUIRES_ARM_NEON;
10453     for (size_t k = 17; k < 32; k++) {
10454       for (uint32_t n = 1; n <= 8; n++) {
10455         for (uint32_t m = 1; m <= 2; m++) {
10456           GemmMicrokernelTester()
10457             .mr(2)
10458             .nr(8)
10459             .kr(8)
10460             .sr(1)
10461             .m(m)
10462             .n(n)
10463             .k(k)
10464             .iterations(1)
10465             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c8__neon_mlal, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
10466         }
10467       }
10468     }
10469   }
10470 
TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__NEON_MLAL,k_div_16)10471   TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__NEON_MLAL, k_div_16) {
10472     TEST_REQUIRES_ARM_NEON;
10473     for (size_t k = 32; k <= 160; k += 16) {
10474       GemmMicrokernelTester()
10475         .mr(2)
10476         .nr(8)
10477         .kr(8)
10478         .sr(1)
10479         .m(2)
10480         .n(8)
10481         .k(k)
10482         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c8__neon_mlal, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
10483     }
10484   }
10485 
TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__NEON_MLAL,k_div_16_subtile)10486   TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__NEON_MLAL, k_div_16_subtile) {
10487     TEST_REQUIRES_ARM_NEON;
10488     for (size_t k = 32; k <= 160; k += 16) {
10489       for (uint32_t n = 1; n <= 8; n++) {
10490         for (uint32_t m = 1; m <= 2; m++) {
10491           GemmMicrokernelTester()
10492             .mr(2)
10493             .nr(8)
10494             .kr(8)
10495             .sr(1)
10496             .m(m)
10497             .n(n)
10498             .k(k)
10499             .iterations(1)
10500             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c8__neon_mlal, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
10501         }
10502       }
10503     }
10504   }
10505 
TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__NEON_MLAL,n_gt_8)10506   TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__NEON_MLAL, n_gt_8) {
10507     TEST_REQUIRES_ARM_NEON;
10508     for (uint32_t n = 9; n < 16; n++) {
10509       for (size_t k = 1; k <= 80; k += 17) {
10510         GemmMicrokernelTester()
10511           .mr(2)
10512           .nr(8)
10513           .kr(8)
10514           .sr(1)
10515           .m(2)
10516           .n(n)
10517           .k(k)
10518           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c8__neon_mlal, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
10519       }
10520     }
10521   }
10522 
TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__NEON_MLAL,n_gt_8_strided_cn)10523   TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__NEON_MLAL, n_gt_8_strided_cn) {
10524     TEST_REQUIRES_ARM_NEON;
10525     for (uint32_t n = 9; n < 16; n++) {
10526       for (size_t k = 1; k <= 80; k += 17) {
10527         GemmMicrokernelTester()
10528           .mr(2)
10529           .nr(8)
10530           .kr(8)
10531           .sr(1)
10532           .m(2)
10533           .n(n)
10534           .k(k)
10535           .cn_stride(11)
10536           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c8__neon_mlal, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
10537       }
10538     }
10539   }
10540 
TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__NEON_MLAL,n_gt_8_subtile)10541   TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__NEON_MLAL, n_gt_8_subtile) {
10542     TEST_REQUIRES_ARM_NEON;
10543     for (uint32_t n = 9; n < 16; n++) {
10544       for (size_t k = 1; k <= 80; k += 17) {
10545         for (uint32_t m = 1; m <= 2; m++) {
10546           GemmMicrokernelTester()
10547             .mr(2)
10548             .nr(8)
10549             .kr(8)
10550             .sr(1)
10551             .m(m)
10552             .n(n)
10553             .k(k)
10554             .iterations(1)
10555             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c8__neon_mlal, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
10556         }
10557       }
10558     }
10559   }
10560 
TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__NEON_MLAL,n_div_8)10561   TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__NEON_MLAL, n_div_8) {
10562     TEST_REQUIRES_ARM_NEON;
10563     for (uint32_t n = 16; n <= 24; n += 8) {
10564       for (size_t k = 1; k <= 80; k += 17) {
10565         GemmMicrokernelTester()
10566           .mr(2)
10567           .nr(8)
10568           .kr(8)
10569           .sr(1)
10570           .m(2)
10571           .n(n)
10572           .k(k)
10573           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c8__neon_mlal, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
10574       }
10575     }
10576   }
10577 
TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__NEON_MLAL,n_div_8_strided_cn)10578   TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__NEON_MLAL, n_div_8_strided_cn) {
10579     TEST_REQUIRES_ARM_NEON;
10580     for (uint32_t n = 16; n <= 24; n += 8) {
10581       for (size_t k = 1; k <= 80; k += 17) {
10582         GemmMicrokernelTester()
10583           .mr(2)
10584           .nr(8)
10585           .kr(8)
10586           .sr(1)
10587           .m(2)
10588           .n(n)
10589           .k(k)
10590           .cn_stride(11)
10591           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c8__neon_mlal, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
10592       }
10593     }
10594   }
10595 
TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__NEON_MLAL,n_div_8_subtile)10596   TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__NEON_MLAL, n_div_8_subtile) {
10597     TEST_REQUIRES_ARM_NEON;
10598     for (uint32_t n = 16; n <= 24; n += 8) {
10599       for (size_t k = 1; k <= 80; k += 17) {
10600         for (uint32_t m = 1; m <= 2; m++) {
10601           GemmMicrokernelTester()
10602             .mr(2)
10603             .nr(8)
10604             .kr(8)
10605             .sr(1)
10606             .m(m)
10607             .n(n)
10608             .k(k)
10609             .iterations(1)
10610             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c8__neon_mlal, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
10611         }
10612       }
10613     }
10614   }
10615 
TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__NEON_MLAL,small_kernel)10616   TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__NEON_MLAL, small_kernel) {
10617     TEST_REQUIRES_ARM_NEON;
10618     for (size_t k = 1; k <= 80; k += 17) {
10619       GemmMicrokernelTester()
10620         .mr(2)
10621         .nr(8)
10622         .kr(8)
10623         .sr(1)
10624         .m(2)
10625         .n(8)
10626         .k(k)
10627         .ks(3)
10628         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c8__neon_mlal, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
10629     }
10630   }
10631 
TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__NEON_MLAL,small_kernel_subtile)10632   TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__NEON_MLAL, small_kernel_subtile) {
10633     TEST_REQUIRES_ARM_NEON;
10634     for (size_t k = 1; k <= 80; k += 17) {
10635       for (uint32_t n = 1; n <= 8; n++) {
10636         for (uint32_t m = 1; m <= 2; m++) {
10637           GemmMicrokernelTester()
10638             .mr(2)
10639             .nr(8)
10640             .kr(8)
10641             .sr(1)
10642             .m(m)
10643             .n(n)
10644             .k(k)
10645             .ks(3)
10646             .iterations(1)
10647             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c8__neon_mlal, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
10648         }
10649       }
10650     }
10651   }
10652 
TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__NEON_MLAL,n_gt_8_small_kernel)10653   TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__NEON_MLAL, n_gt_8_small_kernel) {
10654     TEST_REQUIRES_ARM_NEON;
10655     for (uint32_t n = 9; n < 16; n++) {
10656       for (size_t k = 1; k <= 80; k += 17) {
10657         GemmMicrokernelTester()
10658           .mr(2)
10659           .nr(8)
10660           .kr(8)
10661           .sr(1)
10662           .m(2)
10663           .n(n)
10664           .k(k)
10665           .ks(3)
10666           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c8__neon_mlal, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
10667       }
10668     }
10669   }
10670 
TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__NEON_MLAL,n_div_8_small_kernel)10671   TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__NEON_MLAL, n_div_8_small_kernel) {
10672     TEST_REQUIRES_ARM_NEON;
10673     for (uint32_t n = 16; n <= 24; n += 8) {
10674       for (size_t k = 1; k <= 80; k += 17) {
10675         GemmMicrokernelTester()
10676           .mr(2)
10677           .nr(8)
10678           .kr(8)
10679           .sr(1)
10680           .m(2)
10681           .n(n)
10682           .k(k)
10683           .ks(3)
10684           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c8__neon_mlal, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
10685       }
10686     }
10687   }
10688 
TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__NEON_MLAL,strided_cm_subtile)10689   TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__NEON_MLAL, strided_cm_subtile) {
10690     TEST_REQUIRES_ARM_NEON;
10691     for (size_t k = 1; k <= 80; k += 17) {
10692       for (uint32_t n = 1; n <= 8; n++) {
10693         for (uint32_t m = 1; m <= 2; m++) {
10694           GemmMicrokernelTester()
10695             .mr(2)
10696             .nr(8)
10697             .kr(8)
10698             .sr(1)
10699             .m(m)
10700             .n(n)
10701             .k(k)
10702             .cm_stride(11)
10703             .iterations(1)
10704             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c8__neon_mlal, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
10705         }
10706       }
10707     }
10708   }
10709 
TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__NEON_MLAL,a_offset)10710   TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__NEON_MLAL, a_offset) {
10711     TEST_REQUIRES_ARM_NEON;
10712     for (size_t k = 1; k <= 80; k += 17) {
10713       GemmMicrokernelTester()
10714         .mr(2)
10715         .nr(8)
10716         .kr(8)
10717         .sr(1)
10718         .m(2)
10719         .n(8)
10720         .k(k)
10721         .ks(3)
10722         .a_offset(163)
10723         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c8__neon_mlal, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
10724     }
10725   }
10726 
TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__NEON_MLAL,zero)10727   TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__NEON_MLAL, zero) {
10728     TEST_REQUIRES_ARM_NEON;
10729     for (size_t k = 1; k <= 80; k += 17) {
10730       for (uint32_t mz = 0; mz < 2; mz++) {
10731         GemmMicrokernelTester()
10732           .mr(2)
10733           .nr(8)
10734           .kr(8)
10735           .sr(1)
10736           .m(2)
10737           .n(8)
10738           .k(k)
10739           .ks(3)
10740           .a_offset(163)
10741           .zero_index(mz)
10742           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c8__neon_mlal, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
10743       }
10744     }
10745   }
10746 
TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__NEON_MLAL,qmin)10747   TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__NEON_MLAL, qmin) {
10748     TEST_REQUIRES_ARM_NEON;
10749     GemmMicrokernelTester()
10750       .mr(2)
10751       .nr(8)
10752       .kr(8)
10753       .sr(1)
10754       .m(2)
10755       .n(8)
10756       .k(16)
10757       .qmin(128)
10758       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c8__neon_mlal, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
10759   }
10760 
TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__NEON_MLAL,qmax)10761   TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__NEON_MLAL, qmax) {
10762     TEST_REQUIRES_ARM_NEON;
10763     GemmMicrokernelTester()
10764       .mr(2)
10765       .nr(8)
10766       .kr(8)
10767       .sr(1)
10768       .m(2)
10769       .n(8)
10770       .k(16)
10771       .qmax(128)
10772       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c8__neon_mlal, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
10773   }
10774 
TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__NEON_MLAL,strided_cm)10775   TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__NEON_MLAL, strided_cm) {
10776     TEST_REQUIRES_ARM_NEON;
10777     GemmMicrokernelTester()
10778       .mr(2)
10779       .nr(8)
10780       .kr(8)
10781       .sr(1)
10782       .m(2)
10783       .n(8)
10784       .k(16)
10785       .cm_stride(11)
10786       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c8__neon_mlal, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
10787   }
10788 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
10789 
10790 
10791 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QC8_IGEMM_MINMAX_FP32_2X16__NEON_MLAL_LANE,k_eq_8)10792   TEST(QC8_IGEMM_MINMAX_FP32_2X16__NEON_MLAL_LANE, k_eq_8) {
10793     TEST_REQUIRES_ARM_NEON;
10794     GemmMicrokernelTester()
10795       .mr(2)
10796       .nr(16)
10797       .kr(1)
10798       .sr(1)
10799       .m(2)
10800       .n(16)
10801       .k(8)
10802       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x16__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
10803   }
10804 
TEST(QC8_IGEMM_MINMAX_FP32_2X16__NEON_MLAL_LANE,strided_cn)10805   TEST(QC8_IGEMM_MINMAX_FP32_2X16__NEON_MLAL_LANE, strided_cn) {
10806     TEST_REQUIRES_ARM_NEON;
10807     GemmMicrokernelTester()
10808       .mr(2)
10809       .nr(16)
10810       .kr(1)
10811       .sr(1)
10812       .m(2)
10813       .n(16)
10814       .k(8)
10815       .cn_stride(19)
10816       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x16__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
10817   }
10818 
TEST(QC8_IGEMM_MINMAX_FP32_2X16__NEON_MLAL_LANE,k_eq_8_subtile)10819   TEST(QC8_IGEMM_MINMAX_FP32_2X16__NEON_MLAL_LANE, k_eq_8_subtile) {
10820     TEST_REQUIRES_ARM_NEON;
10821     for (uint32_t n = 1; n <= 16; n++) {
10822       for (uint32_t m = 1; m <= 2; m++) {
10823         GemmMicrokernelTester()
10824           .mr(2)
10825           .nr(16)
10826           .kr(1)
10827           .sr(1)
10828           .m(m)
10829           .n(n)
10830           .k(8)
10831           .iterations(1)
10832           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x16__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
10833       }
10834     }
10835   }
10836 
TEST(QC8_IGEMM_MINMAX_FP32_2X16__NEON_MLAL_LANE,k_eq_8_subtile_m)10837   TEST(QC8_IGEMM_MINMAX_FP32_2X16__NEON_MLAL_LANE, k_eq_8_subtile_m) {
10838     TEST_REQUIRES_ARM_NEON;
10839     for (uint32_t m = 1; m <= 2; m++) {
10840       GemmMicrokernelTester()
10841         .mr(2)
10842         .nr(16)
10843         .kr(1)
10844         .sr(1)
10845         .m(m)
10846         .n(16)
10847         .k(8)
10848         .iterations(1)
10849         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x16__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
10850     }
10851   }
10852 
TEST(QC8_IGEMM_MINMAX_FP32_2X16__NEON_MLAL_LANE,k_eq_8_subtile_n)10853   TEST(QC8_IGEMM_MINMAX_FP32_2X16__NEON_MLAL_LANE, k_eq_8_subtile_n) {
10854     TEST_REQUIRES_ARM_NEON;
10855     for (uint32_t n = 1; n <= 16; n++) {
10856       GemmMicrokernelTester()
10857         .mr(2)
10858         .nr(16)
10859         .kr(1)
10860         .sr(1)
10861         .m(2)
10862         .n(n)
10863         .k(8)
10864         .iterations(1)
10865         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x16__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
10866     }
10867   }
10868 
TEST(QC8_IGEMM_MINMAX_FP32_2X16__NEON_MLAL_LANE,k_lt_8)10869   TEST(QC8_IGEMM_MINMAX_FP32_2X16__NEON_MLAL_LANE, k_lt_8) {
10870     TEST_REQUIRES_ARM_NEON;
10871     for (size_t k = 1; k < 8; k++) {
10872       GemmMicrokernelTester()
10873         .mr(2)
10874         .nr(16)
10875         .kr(1)
10876         .sr(1)
10877         .m(2)
10878         .n(16)
10879         .k(k)
10880         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x16__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
10881     }
10882   }
10883 
TEST(QC8_IGEMM_MINMAX_FP32_2X16__NEON_MLAL_LANE,k_lt_8_subtile)10884   TEST(QC8_IGEMM_MINMAX_FP32_2X16__NEON_MLAL_LANE, k_lt_8_subtile) {
10885     TEST_REQUIRES_ARM_NEON;
10886     for (size_t k = 1; k < 8; k++) {
10887       for (uint32_t n = 1; n <= 16; n++) {
10888         for (uint32_t m = 1; m <= 2; m++) {
10889           GemmMicrokernelTester()
10890             .mr(2)
10891             .nr(16)
10892             .kr(1)
10893             .sr(1)
10894             .m(m)
10895             .n(n)
10896             .k(k)
10897             .iterations(1)
10898             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x16__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
10899         }
10900       }
10901     }
10902   }
10903 
TEST(QC8_IGEMM_MINMAX_FP32_2X16__NEON_MLAL_LANE,k_gt_8)10904   TEST(QC8_IGEMM_MINMAX_FP32_2X16__NEON_MLAL_LANE, k_gt_8) {
10905     TEST_REQUIRES_ARM_NEON;
10906     for (size_t k = 9; k < 16; k++) {
10907       GemmMicrokernelTester()
10908         .mr(2)
10909         .nr(16)
10910         .kr(1)
10911         .sr(1)
10912         .m(2)
10913         .n(16)
10914         .k(k)
10915         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x16__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
10916     }
10917   }
10918 
TEST(QC8_IGEMM_MINMAX_FP32_2X16__NEON_MLAL_LANE,k_gt_8_subtile)10919   TEST(QC8_IGEMM_MINMAX_FP32_2X16__NEON_MLAL_LANE, k_gt_8_subtile) {
10920     TEST_REQUIRES_ARM_NEON;
10921     for (size_t k = 9; k < 16; k++) {
10922       for (uint32_t n = 1; n <= 16; n++) {
10923         for (uint32_t m = 1; m <= 2; m++) {
10924           GemmMicrokernelTester()
10925             .mr(2)
10926             .nr(16)
10927             .kr(1)
10928             .sr(1)
10929             .m(m)
10930             .n(n)
10931             .k(k)
10932             .iterations(1)
10933             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x16__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
10934         }
10935       }
10936     }
10937   }
10938 
TEST(QC8_IGEMM_MINMAX_FP32_2X16__NEON_MLAL_LANE,k_div_8)10939   TEST(QC8_IGEMM_MINMAX_FP32_2X16__NEON_MLAL_LANE, k_div_8) {
10940     TEST_REQUIRES_ARM_NEON;
10941     for (size_t k = 16; k <= 80; k += 8) {
10942       GemmMicrokernelTester()
10943         .mr(2)
10944         .nr(16)
10945         .kr(1)
10946         .sr(1)
10947         .m(2)
10948         .n(16)
10949         .k(k)
10950         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x16__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
10951     }
10952   }
10953 
TEST(QC8_IGEMM_MINMAX_FP32_2X16__NEON_MLAL_LANE,k_div_8_subtile)10954   TEST(QC8_IGEMM_MINMAX_FP32_2X16__NEON_MLAL_LANE, k_div_8_subtile) {
10955     TEST_REQUIRES_ARM_NEON;
10956     for (size_t k = 16; k <= 80; k += 8) {
10957       for (uint32_t n = 1; n <= 16; n++) {
10958         for (uint32_t m = 1; m <= 2; m++) {
10959           GemmMicrokernelTester()
10960             .mr(2)
10961             .nr(16)
10962             .kr(1)
10963             .sr(1)
10964             .m(m)
10965             .n(n)
10966             .k(k)
10967             .iterations(1)
10968             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x16__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
10969         }
10970       }
10971     }
10972   }
10973 
TEST(QC8_IGEMM_MINMAX_FP32_2X16__NEON_MLAL_LANE,n_gt_16)10974   TEST(QC8_IGEMM_MINMAX_FP32_2X16__NEON_MLAL_LANE, n_gt_16) {
10975     TEST_REQUIRES_ARM_NEON;
10976     for (uint32_t n = 17; n < 32; n++) {
10977       for (size_t k = 1; k <= 40; k += 9) {
10978         GemmMicrokernelTester()
10979           .mr(2)
10980           .nr(16)
10981           .kr(1)
10982           .sr(1)
10983           .m(2)
10984           .n(n)
10985           .k(k)
10986           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x16__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
10987       }
10988     }
10989   }
10990 
TEST(QC8_IGEMM_MINMAX_FP32_2X16__NEON_MLAL_LANE,n_gt_16_strided_cn)10991   TEST(QC8_IGEMM_MINMAX_FP32_2X16__NEON_MLAL_LANE, n_gt_16_strided_cn) {
10992     TEST_REQUIRES_ARM_NEON;
10993     for (uint32_t n = 17; n < 32; n++) {
10994       for (size_t k = 1; k <= 40; k += 9) {
10995         GemmMicrokernelTester()
10996           .mr(2)
10997           .nr(16)
10998           .kr(1)
10999           .sr(1)
11000           .m(2)
11001           .n(n)
11002           .k(k)
11003           .cn_stride(19)
11004           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x16__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
11005       }
11006     }
11007   }
11008 
TEST(QC8_IGEMM_MINMAX_FP32_2X16__NEON_MLAL_LANE,n_gt_16_subtile)11009   TEST(QC8_IGEMM_MINMAX_FP32_2X16__NEON_MLAL_LANE, n_gt_16_subtile) {
11010     TEST_REQUIRES_ARM_NEON;
11011     for (uint32_t n = 17; n < 32; n++) {
11012       for (size_t k = 1; k <= 40; k += 9) {
11013         for (uint32_t m = 1; m <= 2; m++) {
11014           GemmMicrokernelTester()
11015             .mr(2)
11016             .nr(16)
11017             .kr(1)
11018             .sr(1)
11019             .m(m)
11020             .n(n)
11021             .k(k)
11022             .iterations(1)
11023             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x16__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
11024         }
11025       }
11026     }
11027   }
11028 
TEST(QC8_IGEMM_MINMAX_FP32_2X16__NEON_MLAL_LANE,n_div_16)11029   TEST(QC8_IGEMM_MINMAX_FP32_2X16__NEON_MLAL_LANE, n_div_16) {
11030     TEST_REQUIRES_ARM_NEON;
11031     for (uint32_t n = 32; n <= 48; n += 16) {
11032       for (size_t k = 1; k <= 40; k += 9) {
11033         GemmMicrokernelTester()
11034           .mr(2)
11035           .nr(16)
11036           .kr(1)
11037           .sr(1)
11038           .m(2)
11039           .n(n)
11040           .k(k)
11041           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x16__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
11042       }
11043     }
11044   }
11045 
TEST(QC8_IGEMM_MINMAX_FP32_2X16__NEON_MLAL_LANE,n_div_16_strided_cn)11046   TEST(QC8_IGEMM_MINMAX_FP32_2X16__NEON_MLAL_LANE, n_div_16_strided_cn) {
11047     TEST_REQUIRES_ARM_NEON;
11048     for (uint32_t n = 32; n <= 48; n += 16) {
11049       for (size_t k = 1; k <= 40; k += 9) {
11050         GemmMicrokernelTester()
11051           .mr(2)
11052           .nr(16)
11053           .kr(1)
11054           .sr(1)
11055           .m(2)
11056           .n(n)
11057           .k(k)
11058           .cn_stride(19)
11059           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x16__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
11060       }
11061     }
11062   }
11063 
TEST(QC8_IGEMM_MINMAX_FP32_2X16__NEON_MLAL_LANE,n_div_16_subtile)11064   TEST(QC8_IGEMM_MINMAX_FP32_2X16__NEON_MLAL_LANE, n_div_16_subtile) {
11065     TEST_REQUIRES_ARM_NEON;
11066     for (uint32_t n = 32; n <= 48; n += 16) {
11067       for (size_t k = 1; k <= 40; k += 9) {
11068         for (uint32_t m = 1; m <= 2; m++) {
11069           GemmMicrokernelTester()
11070             .mr(2)
11071             .nr(16)
11072             .kr(1)
11073             .sr(1)
11074             .m(m)
11075             .n(n)
11076             .k(k)
11077             .iterations(1)
11078             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x16__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
11079         }
11080       }
11081     }
11082   }
11083 
TEST(QC8_IGEMM_MINMAX_FP32_2X16__NEON_MLAL_LANE,small_kernel)11084   TEST(QC8_IGEMM_MINMAX_FP32_2X16__NEON_MLAL_LANE, small_kernel) {
11085     TEST_REQUIRES_ARM_NEON;
11086     for (size_t k = 1; k <= 40; k += 9) {
11087       GemmMicrokernelTester()
11088         .mr(2)
11089         .nr(16)
11090         .kr(1)
11091         .sr(1)
11092         .m(2)
11093         .n(16)
11094         .k(k)
11095         .ks(3)
11096         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x16__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
11097     }
11098   }
11099 
TEST(QC8_IGEMM_MINMAX_FP32_2X16__NEON_MLAL_LANE,small_kernel_subtile)11100   TEST(QC8_IGEMM_MINMAX_FP32_2X16__NEON_MLAL_LANE, small_kernel_subtile) {
11101     TEST_REQUIRES_ARM_NEON;
11102     for (size_t k = 1; k <= 40; k += 9) {
11103       for (uint32_t n = 1; n <= 16; n++) {
11104         for (uint32_t m = 1; m <= 2; m++) {
11105           GemmMicrokernelTester()
11106             .mr(2)
11107             .nr(16)
11108             .kr(1)
11109             .sr(1)
11110             .m(m)
11111             .n(n)
11112             .k(k)
11113             .ks(3)
11114             .iterations(1)
11115             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x16__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
11116         }
11117       }
11118     }
11119   }
11120 
TEST(QC8_IGEMM_MINMAX_FP32_2X16__NEON_MLAL_LANE,n_gt_16_small_kernel)11121   TEST(QC8_IGEMM_MINMAX_FP32_2X16__NEON_MLAL_LANE, n_gt_16_small_kernel) {
11122     TEST_REQUIRES_ARM_NEON;
11123     for (uint32_t n = 17; n < 32; n++) {
11124       for (size_t k = 1; k <= 40; k += 9) {
11125         GemmMicrokernelTester()
11126           .mr(2)
11127           .nr(16)
11128           .kr(1)
11129           .sr(1)
11130           .m(2)
11131           .n(n)
11132           .k(k)
11133           .ks(3)
11134           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x16__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
11135       }
11136     }
11137   }
11138 
TEST(QC8_IGEMM_MINMAX_FP32_2X16__NEON_MLAL_LANE,n_div_16_small_kernel)11139   TEST(QC8_IGEMM_MINMAX_FP32_2X16__NEON_MLAL_LANE, n_div_16_small_kernel) {
11140     TEST_REQUIRES_ARM_NEON;
11141     for (uint32_t n = 32; n <= 48; n += 16) {
11142       for (size_t k = 1; k <= 40; k += 9) {
11143         GemmMicrokernelTester()
11144           .mr(2)
11145           .nr(16)
11146           .kr(1)
11147           .sr(1)
11148           .m(2)
11149           .n(n)
11150           .k(k)
11151           .ks(3)
11152           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x16__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
11153       }
11154     }
11155   }
11156 
TEST(QC8_IGEMM_MINMAX_FP32_2X16__NEON_MLAL_LANE,strided_cm_subtile)11157   TEST(QC8_IGEMM_MINMAX_FP32_2X16__NEON_MLAL_LANE, strided_cm_subtile) {
11158     TEST_REQUIRES_ARM_NEON;
11159     for (size_t k = 1; k <= 40; k += 9) {
11160       for (uint32_t n = 1; n <= 16; n++) {
11161         for (uint32_t m = 1; m <= 2; m++) {
11162           GemmMicrokernelTester()
11163             .mr(2)
11164             .nr(16)
11165             .kr(1)
11166             .sr(1)
11167             .m(m)
11168             .n(n)
11169             .k(k)
11170             .cm_stride(19)
11171             .iterations(1)
11172             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x16__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
11173         }
11174       }
11175     }
11176   }
11177 
TEST(QC8_IGEMM_MINMAX_FP32_2X16__NEON_MLAL_LANE,a_offset)11178   TEST(QC8_IGEMM_MINMAX_FP32_2X16__NEON_MLAL_LANE, a_offset) {
11179     TEST_REQUIRES_ARM_NEON;
11180     for (size_t k = 1; k <= 40; k += 9) {
11181       GemmMicrokernelTester()
11182         .mr(2)
11183         .nr(16)
11184         .kr(1)
11185         .sr(1)
11186         .m(2)
11187         .n(16)
11188         .k(k)
11189         .ks(3)
11190         .a_offset(83)
11191         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x16__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
11192     }
11193   }
11194 
TEST(QC8_IGEMM_MINMAX_FP32_2X16__NEON_MLAL_LANE,zero)11195   TEST(QC8_IGEMM_MINMAX_FP32_2X16__NEON_MLAL_LANE, zero) {
11196     TEST_REQUIRES_ARM_NEON;
11197     for (size_t k = 1; k <= 40; k += 9) {
11198       for (uint32_t mz = 0; mz < 2; mz++) {
11199         GemmMicrokernelTester()
11200           .mr(2)
11201           .nr(16)
11202           .kr(1)
11203           .sr(1)
11204           .m(2)
11205           .n(16)
11206           .k(k)
11207           .ks(3)
11208           .a_offset(83)
11209           .zero_index(mz)
11210           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x16__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
11211       }
11212     }
11213   }
11214 
TEST(QC8_IGEMM_MINMAX_FP32_2X16__NEON_MLAL_LANE,qmin)11215   TEST(QC8_IGEMM_MINMAX_FP32_2X16__NEON_MLAL_LANE, qmin) {
11216     TEST_REQUIRES_ARM_NEON;
11217     GemmMicrokernelTester()
11218       .mr(2)
11219       .nr(16)
11220       .kr(1)
11221       .sr(1)
11222       .m(2)
11223       .n(16)
11224       .k(8)
11225       .qmin(128)
11226       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x16__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
11227   }
11228 
TEST(QC8_IGEMM_MINMAX_FP32_2X16__NEON_MLAL_LANE,qmax)11229   TEST(QC8_IGEMM_MINMAX_FP32_2X16__NEON_MLAL_LANE, qmax) {
11230     TEST_REQUIRES_ARM_NEON;
11231     GemmMicrokernelTester()
11232       .mr(2)
11233       .nr(16)
11234       .kr(1)
11235       .sr(1)
11236       .m(2)
11237       .n(16)
11238       .k(8)
11239       .qmax(128)
11240       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x16__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
11241   }
11242 
TEST(QC8_IGEMM_MINMAX_FP32_2X16__NEON_MLAL_LANE,strided_cm)11243   TEST(QC8_IGEMM_MINMAX_FP32_2X16__NEON_MLAL_LANE, strided_cm) {
11244     TEST_REQUIRES_ARM_NEON;
11245     GemmMicrokernelTester()
11246       .mr(2)
11247       .nr(16)
11248       .kr(1)
11249       .sr(1)
11250       .m(2)
11251       .n(16)
11252       .k(8)
11253       .cm_stride(19)
11254       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x16__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
11255   }
11256 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
11257 
11258 
11259 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QC8_IGEMM_MINMAX_FP32_3X8__NEON_MLAL_LANE_PRFM,k_eq_8)11260   TEST(QC8_IGEMM_MINMAX_FP32_3X8__NEON_MLAL_LANE_PRFM, k_eq_8) {
11261     TEST_REQUIRES_ARM_NEON;
11262     GemmMicrokernelTester()
11263       .mr(3)
11264       .nr(8)
11265       .kr(1)
11266       .sr(1)
11267       .m(3)
11268       .n(8)
11269       .k(8)
11270       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x8__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
11271   }
11272 
TEST(QC8_IGEMM_MINMAX_FP32_3X8__NEON_MLAL_LANE_PRFM,strided_cn)11273   TEST(QC8_IGEMM_MINMAX_FP32_3X8__NEON_MLAL_LANE_PRFM, strided_cn) {
11274     TEST_REQUIRES_ARM_NEON;
11275     GemmMicrokernelTester()
11276       .mr(3)
11277       .nr(8)
11278       .kr(1)
11279       .sr(1)
11280       .m(3)
11281       .n(8)
11282       .k(8)
11283       .cn_stride(11)
11284       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x8__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
11285   }
11286 
TEST(QC8_IGEMM_MINMAX_FP32_3X8__NEON_MLAL_LANE_PRFM,k_eq_8_subtile)11287   TEST(QC8_IGEMM_MINMAX_FP32_3X8__NEON_MLAL_LANE_PRFM, k_eq_8_subtile) {
11288     TEST_REQUIRES_ARM_NEON;
11289     for (uint32_t n = 1; n <= 8; n++) {
11290       for (uint32_t m = 1; m <= 3; m++) {
11291         GemmMicrokernelTester()
11292           .mr(3)
11293           .nr(8)
11294           .kr(1)
11295           .sr(1)
11296           .m(m)
11297           .n(n)
11298           .k(8)
11299           .iterations(1)
11300           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x8__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
11301       }
11302     }
11303   }
11304 
TEST(QC8_IGEMM_MINMAX_FP32_3X8__NEON_MLAL_LANE_PRFM,k_eq_8_subtile_m)11305   TEST(QC8_IGEMM_MINMAX_FP32_3X8__NEON_MLAL_LANE_PRFM, k_eq_8_subtile_m) {
11306     TEST_REQUIRES_ARM_NEON;
11307     for (uint32_t m = 1; m <= 3; m++) {
11308       GemmMicrokernelTester()
11309         .mr(3)
11310         .nr(8)
11311         .kr(1)
11312         .sr(1)
11313         .m(m)
11314         .n(8)
11315         .k(8)
11316         .iterations(1)
11317         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x8__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
11318     }
11319   }
11320 
TEST(QC8_IGEMM_MINMAX_FP32_3X8__NEON_MLAL_LANE_PRFM,k_eq_8_subtile_n)11321   TEST(QC8_IGEMM_MINMAX_FP32_3X8__NEON_MLAL_LANE_PRFM, k_eq_8_subtile_n) {
11322     TEST_REQUIRES_ARM_NEON;
11323     for (uint32_t n = 1; n <= 8; n++) {
11324       GemmMicrokernelTester()
11325         .mr(3)
11326         .nr(8)
11327         .kr(1)
11328         .sr(1)
11329         .m(3)
11330         .n(n)
11331         .k(8)
11332         .iterations(1)
11333         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x8__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
11334     }
11335   }
11336 
TEST(QC8_IGEMM_MINMAX_FP32_3X8__NEON_MLAL_LANE_PRFM,k_lt_8)11337   TEST(QC8_IGEMM_MINMAX_FP32_3X8__NEON_MLAL_LANE_PRFM, k_lt_8) {
11338     TEST_REQUIRES_ARM_NEON;
11339     for (size_t k = 1; k < 8; k++) {
11340       GemmMicrokernelTester()
11341         .mr(3)
11342         .nr(8)
11343         .kr(1)
11344         .sr(1)
11345         .m(3)
11346         .n(8)
11347         .k(k)
11348         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x8__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
11349     }
11350   }
11351 
TEST(QC8_IGEMM_MINMAX_FP32_3X8__NEON_MLAL_LANE_PRFM,k_lt_8_subtile)11352   TEST(QC8_IGEMM_MINMAX_FP32_3X8__NEON_MLAL_LANE_PRFM, k_lt_8_subtile) {
11353     TEST_REQUIRES_ARM_NEON;
11354     for (size_t k = 1; k < 8; k++) {
11355       for (uint32_t n = 1; n <= 8; n++) {
11356         for (uint32_t m = 1; m <= 3; m++) {
11357           GemmMicrokernelTester()
11358             .mr(3)
11359             .nr(8)
11360             .kr(1)
11361             .sr(1)
11362             .m(m)
11363             .n(n)
11364             .k(k)
11365             .iterations(1)
11366             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x8__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
11367         }
11368       }
11369     }
11370   }
11371 
TEST(QC8_IGEMM_MINMAX_FP32_3X8__NEON_MLAL_LANE_PRFM,k_gt_8)11372   TEST(QC8_IGEMM_MINMAX_FP32_3X8__NEON_MLAL_LANE_PRFM, k_gt_8) {
11373     TEST_REQUIRES_ARM_NEON;
11374     for (size_t k = 9; k < 16; k++) {
11375       GemmMicrokernelTester()
11376         .mr(3)
11377         .nr(8)
11378         .kr(1)
11379         .sr(1)
11380         .m(3)
11381         .n(8)
11382         .k(k)
11383         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x8__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
11384     }
11385   }
11386 
TEST(QC8_IGEMM_MINMAX_FP32_3X8__NEON_MLAL_LANE_PRFM,k_gt_8_subtile)11387   TEST(QC8_IGEMM_MINMAX_FP32_3X8__NEON_MLAL_LANE_PRFM, k_gt_8_subtile) {
11388     TEST_REQUIRES_ARM_NEON;
11389     for (size_t k = 9; k < 16; k++) {
11390       for (uint32_t n = 1; n <= 8; n++) {
11391         for (uint32_t m = 1; m <= 3; m++) {
11392           GemmMicrokernelTester()
11393             .mr(3)
11394             .nr(8)
11395             .kr(1)
11396             .sr(1)
11397             .m(m)
11398             .n(n)
11399             .k(k)
11400             .iterations(1)
11401             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x8__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
11402         }
11403       }
11404     }
11405   }
11406 
TEST(QC8_IGEMM_MINMAX_FP32_3X8__NEON_MLAL_LANE_PRFM,k_div_8)11407   TEST(QC8_IGEMM_MINMAX_FP32_3X8__NEON_MLAL_LANE_PRFM, k_div_8) {
11408     TEST_REQUIRES_ARM_NEON;
11409     for (size_t k = 16; k <= 80; k += 8) {
11410       GemmMicrokernelTester()
11411         .mr(3)
11412         .nr(8)
11413         .kr(1)
11414         .sr(1)
11415         .m(3)
11416         .n(8)
11417         .k(k)
11418         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x8__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
11419     }
11420   }
11421 
TEST(QC8_IGEMM_MINMAX_FP32_3X8__NEON_MLAL_LANE_PRFM,k_div_8_subtile)11422   TEST(QC8_IGEMM_MINMAX_FP32_3X8__NEON_MLAL_LANE_PRFM, k_div_8_subtile) {
11423     TEST_REQUIRES_ARM_NEON;
11424     for (size_t k = 16; k <= 80; k += 8) {
11425       for (uint32_t n = 1; n <= 8; n++) {
11426         for (uint32_t m = 1; m <= 3; m++) {
11427           GemmMicrokernelTester()
11428             .mr(3)
11429             .nr(8)
11430             .kr(1)
11431             .sr(1)
11432             .m(m)
11433             .n(n)
11434             .k(k)
11435             .iterations(1)
11436             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x8__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
11437         }
11438       }
11439     }
11440   }
11441 
TEST(QC8_IGEMM_MINMAX_FP32_3X8__NEON_MLAL_LANE_PRFM,n_gt_8)11442   TEST(QC8_IGEMM_MINMAX_FP32_3X8__NEON_MLAL_LANE_PRFM, n_gt_8) {
11443     TEST_REQUIRES_ARM_NEON;
11444     for (uint32_t n = 9; n < 16; n++) {
11445       for (size_t k = 1; k <= 40; k += 9) {
11446         GemmMicrokernelTester()
11447           .mr(3)
11448           .nr(8)
11449           .kr(1)
11450           .sr(1)
11451           .m(3)
11452           .n(n)
11453           .k(k)
11454           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x8__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
11455       }
11456     }
11457   }
11458 
TEST(QC8_IGEMM_MINMAX_FP32_3X8__NEON_MLAL_LANE_PRFM,n_gt_8_strided_cn)11459   TEST(QC8_IGEMM_MINMAX_FP32_3X8__NEON_MLAL_LANE_PRFM, n_gt_8_strided_cn) {
11460     TEST_REQUIRES_ARM_NEON;
11461     for (uint32_t n = 9; n < 16; n++) {
11462       for (size_t k = 1; k <= 40; k += 9) {
11463         GemmMicrokernelTester()
11464           .mr(3)
11465           .nr(8)
11466           .kr(1)
11467           .sr(1)
11468           .m(3)
11469           .n(n)
11470           .k(k)
11471           .cn_stride(11)
11472           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x8__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
11473       }
11474     }
11475   }
11476 
TEST(QC8_IGEMM_MINMAX_FP32_3X8__NEON_MLAL_LANE_PRFM,n_gt_8_subtile)11477   TEST(QC8_IGEMM_MINMAX_FP32_3X8__NEON_MLAL_LANE_PRFM, n_gt_8_subtile) {
11478     TEST_REQUIRES_ARM_NEON;
11479     for (uint32_t n = 9; n < 16; n++) {
11480       for (size_t k = 1; k <= 40; k += 9) {
11481         for (uint32_t m = 1; m <= 3; m++) {
11482           GemmMicrokernelTester()
11483             .mr(3)
11484             .nr(8)
11485             .kr(1)
11486             .sr(1)
11487             .m(m)
11488             .n(n)
11489             .k(k)
11490             .iterations(1)
11491             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x8__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
11492         }
11493       }
11494     }
11495   }
11496 
TEST(QC8_IGEMM_MINMAX_FP32_3X8__NEON_MLAL_LANE_PRFM,n_div_8)11497   TEST(QC8_IGEMM_MINMAX_FP32_3X8__NEON_MLAL_LANE_PRFM, n_div_8) {
11498     TEST_REQUIRES_ARM_NEON;
11499     for (uint32_t n = 16; n <= 24; n += 8) {
11500       for (size_t k = 1; k <= 40; k += 9) {
11501         GemmMicrokernelTester()
11502           .mr(3)
11503           .nr(8)
11504           .kr(1)
11505           .sr(1)
11506           .m(3)
11507           .n(n)
11508           .k(k)
11509           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x8__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
11510       }
11511     }
11512   }
11513 
TEST(QC8_IGEMM_MINMAX_FP32_3X8__NEON_MLAL_LANE_PRFM,n_div_8_strided_cn)11514   TEST(QC8_IGEMM_MINMAX_FP32_3X8__NEON_MLAL_LANE_PRFM, n_div_8_strided_cn) {
11515     TEST_REQUIRES_ARM_NEON;
11516     for (uint32_t n = 16; n <= 24; n += 8) {
11517       for (size_t k = 1; k <= 40; k += 9) {
11518         GemmMicrokernelTester()
11519           .mr(3)
11520           .nr(8)
11521           .kr(1)
11522           .sr(1)
11523           .m(3)
11524           .n(n)
11525           .k(k)
11526           .cn_stride(11)
11527           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x8__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
11528       }
11529     }
11530   }
11531 
TEST(QC8_IGEMM_MINMAX_FP32_3X8__NEON_MLAL_LANE_PRFM,n_div_8_subtile)11532   TEST(QC8_IGEMM_MINMAX_FP32_3X8__NEON_MLAL_LANE_PRFM, n_div_8_subtile) {
11533     TEST_REQUIRES_ARM_NEON;
11534     for (uint32_t n = 16; n <= 24; n += 8) {
11535       for (size_t k = 1; k <= 40; k += 9) {
11536         for (uint32_t m = 1; m <= 3; m++) {
11537           GemmMicrokernelTester()
11538             .mr(3)
11539             .nr(8)
11540             .kr(1)
11541             .sr(1)
11542             .m(m)
11543             .n(n)
11544             .k(k)
11545             .iterations(1)
11546             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x8__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
11547         }
11548       }
11549     }
11550   }
11551 
TEST(QC8_IGEMM_MINMAX_FP32_3X8__NEON_MLAL_LANE_PRFM,small_kernel)11552   TEST(QC8_IGEMM_MINMAX_FP32_3X8__NEON_MLAL_LANE_PRFM, small_kernel) {
11553     TEST_REQUIRES_ARM_NEON;
11554     for (size_t k = 1; k <= 40; k += 9) {
11555       GemmMicrokernelTester()
11556         .mr(3)
11557         .nr(8)
11558         .kr(1)
11559         .sr(1)
11560         .m(3)
11561         .n(8)
11562         .k(k)
11563         .ks(3)
11564         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x8__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
11565     }
11566   }
11567 
TEST(QC8_IGEMM_MINMAX_FP32_3X8__NEON_MLAL_LANE_PRFM,small_kernel_subtile)11568   TEST(QC8_IGEMM_MINMAX_FP32_3X8__NEON_MLAL_LANE_PRFM, small_kernel_subtile) {
11569     TEST_REQUIRES_ARM_NEON;
11570     for (size_t k = 1; k <= 40; k += 9) {
11571       for (uint32_t n = 1; n <= 8; n++) {
11572         for (uint32_t m = 1; m <= 3; m++) {
11573           GemmMicrokernelTester()
11574             .mr(3)
11575             .nr(8)
11576             .kr(1)
11577             .sr(1)
11578             .m(m)
11579             .n(n)
11580             .k(k)
11581             .ks(3)
11582             .iterations(1)
11583             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x8__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
11584         }
11585       }
11586     }
11587   }
11588 
TEST(QC8_IGEMM_MINMAX_FP32_3X8__NEON_MLAL_LANE_PRFM,n_gt_8_small_kernel)11589   TEST(QC8_IGEMM_MINMAX_FP32_3X8__NEON_MLAL_LANE_PRFM, n_gt_8_small_kernel) {
11590     TEST_REQUIRES_ARM_NEON;
11591     for (uint32_t n = 9; n < 16; n++) {
11592       for (size_t k = 1; k <= 40; k += 9) {
11593         GemmMicrokernelTester()
11594           .mr(3)
11595           .nr(8)
11596           .kr(1)
11597           .sr(1)
11598           .m(3)
11599           .n(n)
11600           .k(k)
11601           .ks(3)
11602           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x8__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
11603       }
11604     }
11605   }
11606 
TEST(QC8_IGEMM_MINMAX_FP32_3X8__NEON_MLAL_LANE_PRFM,n_div_8_small_kernel)11607   TEST(QC8_IGEMM_MINMAX_FP32_3X8__NEON_MLAL_LANE_PRFM, n_div_8_small_kernel) {
11608     TEST_REQUIRES_ARM_NEON;
11609     for (uint32_t n = 16; n <= 24; n += 8) {
11610       for (size_t k = 1; k <= 40; k += 9) {
11611         GemmMicrokernelTester()
11612           .mr(3)
11613           .nr(8)
11614           .kr(1)
11615           .sr(1)
11616           .m(3)
11617           .n(n)
11618           .k(k)
11619           .ks(3)
11620           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x8__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
11621       }
11622     }
11623   }
11624 
TEST(QC8_IGEMM_MINMAX_FP32_3X8__NEON_MLAL_LANE_PRFM,strided_cm_subtile)11625   TEST(QC8_IGEMM_MINMAX_FP32_3X8__NEON_MLAL_LANE_PRFM, strided_cm_subtile) {
11626     TEST_REQUIRES_ARM_NEON;
11627     for (size_t k = 1; k <= 40; k += 9) {
11628       for (uint32_t n = 1; n <= 8; n++) {
11629         for (uint32_t m = 1; m <= 3; m++) {
11630           GemmMicrokernelTester()
11631             .mr(3)
11632             .nr(8)
11633             .kr(1)
11634             .sr(1)
11635             .m(m)
11636             .n(n)
11637             .k(k)
11638             .cm_stride(11)
11639             .iterations(1)
11640             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x8__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
11641         }
11642       }
11643     }
11644   }
11645 
TEST(QC8_IGEMM_MINMAX_FP32_3X8__NEON_MLAL_LANE_PRFM,a_offset)11646   TEST(QC8_IGEMM_MINMAX_FP32_3X8__NEON_MLAL_LANE_PRFM, a_offset) {
11647     TEST_REQUIRES_ARM_NEON;
11648     for (size_t k = 1; k <= 40; k += 9) {
11649       GemmMicrokernelTester()
11650         .mr(3)
11651         .nr(8)
11652         .kr(1)
11653         .sr(1)
11654         .m(3)
11655         .n(8)
11656         .k(k)
11657         .ks(3)
11658         .a_offset(127)
11659         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x8__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
11660     }
11661   }
11662 
TEST(QC8_IGEMM_MINMAX_FP32_3X8__NEON_MLAL_LANE_PRFM,zero)11663   TEST(QC8_IGEMM_MINMAX_FP32_3X8__NEON_MLAL_LANE_PRFM, zero) {
11664     TEST_REQUIRES_ARM_NEON;
11665     for (size_t k = 1; k <= 40; k += 9) {
11666       for (uint32_t mz = 0; mz < 3; mz++) {
11667         GemmMicrokernelTester()
11668           .mr(3)
11669           .nr(8)
11670           .kr(1)
11671           .sr(1)
11672           .m(3)
11673           .n(8)
11674           .k(k)
11675           .ks(3)
11676           .a_offset(127)
11677           .zero_index(mz)
11678           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x8__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
11679       }
11680     }
11681   }
11682 
TEST(QC8_IGEMM_MINMAX_FP32_3X8__NEON_MLAL_LANE_PRFM,qmin)11683   TEST(QC8_IGEMM_MINMAX_FP32_3X8__NEON_MLAL_LANE_PRFM, qmin) {
11684     TEST_REQUIRES_ARM_NEON;
11685     GemmMicrokernelTester()
11686       .mr(3)
11687       .nr(8)
11688       .kr(1)
11689       .sr(1)
11690       .m(3)
11691       .n(8)
11692       .k(8)
11693       .qmin(128)
11694       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x8__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
11695   }
11696 
TEST(QC8_IGEMM_MINMAX_FP32_3X8__NEON_MLAL_LANE_PRFM,qmax)11697   TEST(QC8_IGEMM_MINMAX_FP32_3X8__NEON_MLAL_LANE_PRFM, qmax) {
11698     TEST_REQUIRES_ARM_NEON;
11699     GemmMicrokernelTester()
11700       .mr(3)
11701       .nr(8)
11702       .kr(1)
11703       .sr(1)
11704       .m(3)
11705       .n(8)
11706       .k(8)
11707       .qmax(128)
11708       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x8__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
11709   }
11710 
TEST(QC8_IGEMM_MINMAX_FP32_3X8__NEON_MLAL_LANE_PRFM,strided_cm)11711   TEST(QC8_IGEMM_MINMAX_FP32_3X8__NEON_MLAL_LANE_PRFM, strided_cm) {
11712     TEST_REQUIRES_ARM_NEON;
11713     GemmMicrokernelTester()
11714       .mr(3)
11715       .nr(8)
11716       .kr(1)
11717       .sr(1)
11718       .m(3)
11719       .n(8)
11720       .k(8)
11721       .cm_stride(11)
11722       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x8__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
11723   }
11724 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
11725 
11726 
11727 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QC8_IGEMM_MINMAX_FP32_3X16__NEON_MLAL_LANE,k_eq_8)11728   TEST(QC8_IGEMM_MINMAX_FP32_3X16__NEON_MLAL_LANE, k_eq_8) {
11729     TEST_REQUIRES_ARM_NEON;
11730     GemmMicrokernelTester()
11731       .mr(3)
11732       .nr(16)
11733       .kr(1)
11734       .sr(1)
11735       .m(3)
11736       .n(16)
11737       .k(8)
11738       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x16__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
11739   }
11740 
TEST(QC8_IGEMM_MINMAX_FP32_3X16__NEON_MLAL_LANE,strided_cn)11741   TEST(QC8_IGEMM_MINMAX_FP32_3X16__NEON_MLAL_LANE, strided_cn) {
11742     TEST_REQUIRES_ARM_NEON;
11743     GemmMicrokernelTester()
11744       .mr(3)
11745       .nr(16)
11746       .kr(1)
11747       .sr(1)
11748       .m(3)
11749       .n(16)
11750       .k(8)
11751       .cn_stride(19)
11752       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x16__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
11753   }
11754 
TEST(QC8_IGEMM_MINMAX_FP32_3X16__NEON_MLAL_LANE,k_eq_8_subtile)11755   TEST(QC8_IGEMM_MINMAX_FP32_3X16__NEON_MLAL_LANE, k_eq_8_subtile) {
11756     TEST_REQUIRES_ARM_NEON;
11757     for (uint32_t n = 1; n <= 16; n++) {
11758       for (uint32_t m = 1; m <= 3; m++) {
11759         GemmMicrokernelTester()
11760           .mr(3)
11761           .nr(16)
11762           .kr(1)
11763           .sr(1)
11764           .m(m)
11765           .n(n)
11766           .k(8)
11767           .iterations(1)
11768           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x16__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
11769       }
11770     }
11771   }
11772 
TEST(QC8_IGEMM_MINMAX_FP32_3X16__NEON_MLAL_LANE,k_eq_8_subtile_m)11773   TEST(QC8_IGEMM_MINMAX_FP32_3X16__NEON_MLAL_LANE, k_eq_8_subtile_m) {
11774     TEST_REQUIRES_ARM_NEON;
11775     for (uint32_t m = 1; m <= 3; m++) {
11776       GemmMicrokernelTester()
11777         .mr(3)
11778         .nr(16)
11779         .kr(1)
11780         .sr(1)
11781         .m(m)
11782         .n(16)
11783         .k(8)
11784         .iterations(1)
11785         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x16__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
11786     }
11787   }
11788 
TEST(QC8_IGEMM_MINMAX_FP32_3X16__NEON_MLAL_LANE,k_eq_8_subtile_n)11789   TEST(QC8_IGEMM_MINMAX_FP32_3X16__NEON_MLAL_LANE, k_eq_8_subtile_n) {
11790     TEST_REQUIRES_ARM_NEON;
11791     for (uint32_t n = 1; n <= 16; n++) {
11792       GemmMicrokernelTester()
11793         .mr(3)
11794         .nr(16)
11795         .kr(1)
11796         .sr(1)
11797         .m(3)
11798         .n(n)
11799         .k(8)
11800         .iterations(1)
11801         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x16__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
11802     }
11803   }
11804 
TEST(QC8_IGEMM_MINMAX_FP32_3X16__NEON_MLAL_LANE,k_lt_8)11805   TEST(QC8_IGEMM_MINMAX_FP32_3X16__NEON_MLAL_LANE, k_lt_8) {
11806     TEST_REQUIRES_ARM_NEON;
11807     for (size_t k = 1; k < 8; k++) {
11808       GemmMicrokernelTester()
11809         .mr(3)
11810         .nr(16)
11811         .kr(1)
11812         .sr(1)
11813         .m(3)
11814         .n(16)
11815         .k(k)
11816         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x16__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
11817     }
11818   }
11819 
TEST(QC8_IGEMM_MINMAX_FP32_3X16__NEON_MLAL_LANE,k_lt_8_subtile)11820   TEST(QC8_IGEMM_MINMAX_FP32_3X16__NEON_MLAL_LANE, k_lt_8_subtile) {
11821     TEST_REQUIRES_ARM_NEON;
11822     for (size_t k = 1; k < 8; k++) {
11823       for (uint32_t n = 1; n <= 16; n++) {
11824         for (uint32_t m = 1; m <= 3; m++) {
11825           GemmMicrokernelTester()
11826             .mr(3)
11827             .nr(16)
11828             .kr(1)
11829             .sr(1)
11830             .m(m)
11831             .n(n)
11832             .k(k)
11833             .iterations(1)
11834             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x16__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
11835         }
11836       }
11837     }
11838   }
11839 
TEST(QC8_IGEMM_MINMAX_FP32_3X16__NEON_MLAL_LANE,k_gt_8)11840   TEST(QC8_IGEMM_MINMAX_FP32_3X16__NEON_MLAL_LANE, k_gt_8) {
11841     TEST_REQUIRES_ARM_NEON;
11842     for (size_t k = 9; k < 16; k++) {
11843       GemmMicrokernelTester()
11844         .mr(3)
11845         .nr(16)
11846         .kr(1)
11847         .sr(1)
11848         .m(3)
11849         .n(16)
11850         .k(k)
11851         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x16__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
11852     }
11853   }
11854 
TEST(QC8_IGEMM_MINMAX_FP32_3X16__NEON_MLAL_LANE,k_gt_8_subtile)11855   TEST(QC8_IGEMM_MINMAX_FP32_3X16__NEON_MLAL_LANE, k_gt_8_subtile) {
11856     TEST_REQUIRES_ARM_NEON;
11857     for (size_t k = 9; k < 16; k++) {
11858       for (uint32_t n = 1; n <= 16; n++) {
11859         for (uint32_t m = 1; m <= 3; m++) {
11860           GemmMicrokernelTester()
11861             .mr(3)
11862             .nr(16)
11863             .kr(1)
11864             .sr(1)
11865             .m(m)
11866             .n(n)
11867             .k(k)
11868             .iterations(1)
11869             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x16__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
11870         }
11871       }
11872     }
11873   }
11874 
TEST(QC8_IGEMM_MINMAX_FP32_3X16__NEON_MLAL_LANE,k_div_8)11875   TEST(QC8_IGEMM_MINMAX_FP32_3X16__NEON_MLAL_LANE, k_div_8) {
11876     TEST_REQUIRES_ARM_NEON;
11877     for (size_t k = 16; k <= 80; k += 8) {
11878       GemmMicrokernelTester()
11879         .mr(3)
11880         .nr(16)
11881         .kr(1)
11882         .sr(1)
11883         .m(3)
11884         .n(16)
11885         .k(k)
11886         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x16__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
11887     }
11888   }
11889 
TEST(QC8_IGEMM_MINMAX_FP32_3X16__NEON_MLAL_LANE,k_div_8_subtile)11890   TEST(QC8_IGEMM_MINMAX_FP32_3X16__NEON_MLAL_LANE, k_div_8_subtile) {
11891     TEST_REQUIRES_ARM_NEON;
11892     for (size_t k = 16; k <= 80; k += 8) {
11893       for (uint32_t n = 1; n <= 16; n++) {
11894         for (uint32_t m = 1; m <= 3; m++) {
11895           GemmMicrokernelTester()
11896             .mr(3)
11897             .nr(16)
11898             .kr(1)
11899             .sr(1)
11900             .m(m)
11901             .n(n)
11902             .k(k)
11903             .iterations(1)
11904             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x16__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
11905         }
11906       }
11907     }
11908   }
11909 
TEST(QC8_IGEMM_MINMAX_FP32_3X16__NEON_MLAL_LANE,n_gt_16)11910   TEST(QC8_IGEMM_MINMAX_FP32_3X16__NEON_MLAL_LANE, n_gt_16) {
11911     TEST_REQUIRES_ARM_NEON;
11912     for (uint32_t n = 17; n < 32; n++) {
11913       for (size_t k = 1; k <= 40; k += 9) {
11914         GemmMicrokernelTester()
11915           .mr(3)
11916           .nr(16)
11917           .kr(1)
11918           .sr(1)
11919           .m(3)
11920           .n(n)
11921           .k(k)
11922           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x16__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
11923       }
11924     }
11925   }
11926 
TEST(QC8_IGEMM_MINMAX_FP32_3X16__NEON_MLAL_LANE,n_gt_16_strided_cn)11927   TEST(QC8_IGEMM_MINMAX_FP32_3X16__NEON_MLAL_LANE, n_gt_16_strided_cn) {
11928     TEST_REQUIRES_ARM_NEON;
11929     for (uint32_t n = 17; n < 32; n++) {
11930       for (size_t k = 1; k <= 40; k += 9) {
11931         GemmMicrokernelTester()
11932           .mr(3)
11933           .nr(16)
11934           .kr(1)
11935           .sr(1)
11936           .m(3)
11937           .n(n)
11938           .k(k)
11939           .cn_stride(19)
11940           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x16__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
11941       }
11942     }
11943   }
11944 
TEST(QC8_IGEMM_MINMAX_FP32_3X16__NEON_MLAL_LANE,n_gt_16_subtile)11945   TEST(QC8_IGEMM_MINMAX_FP32_3X16__NEON_MLAL_LANE, n_gt_16_subtile) {
11946     TEST_REQUIRES_ARM_NEON;
11947     for (uint32_t n = 17; n < 32; n++) {
11948       for (size_t k = 1; k <= 40; k += 9) {
11949         for (uint32_t m = 1; m <= 3; m++) {
11950           GemmMicrokernelTester()
11951             .mr(3)
11952             .nr(16)
11953             .kr(1)
11954             .sr(1)
11955             .m(m)
11956             .n(n)
11957             .k(k)
11958             .iterations(1)
11959             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x16__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
11960         }
11961       }
11962     }
11963   }
11964 
TEST(QC8_IGEMM_MINMAX_FP32_3X16__NEON_MLAL_LANE,n_div_16)11965   TEST(QC8_IGEMM_MINMAX_FP32_3X16__NEON_MLAL_LANE, n_div_16) {
11966     TEST_REQUIRES_ARM_NEON;
11967     for (uint32_t n = 32; n <= 48; n += 16) {
11968       for (size_t k = 1; k <= 40; k += 9) {
11969         GemmMicrokernelTester()
11970           .mr(3)
11971           .nr(16)
11972           .kr(1)
11973           .sr(1)
11974           .m(3)
11975           .n(n)
11976           .k(k)
11977           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x16__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
11978       }
11979     }
11980   }
11981 
TEST(QC8_IGEMM_MINMAX_FP32_3X16__NEON_MLAL_LANE,n_div_16_strided_cn)11982   TEST(QC8_IGEMM_MINMAX_FP32_3X16__NEON_MLAL_LANE, n_div_16_strided_cn) {
11983     TEST_REQUIRES_ARM_NEON;
11984     for (uint32_t n = 32; n <= 48; n += 16) {
11985       for (size_t k = 1; k <= 40; k += 9) {
11986         GemmMicrokernelTester()
11987           .mr(3)
11988           .nr(16)
11989           .kr(1)
11990           .sr(1)
11991           .m(3)
11992           .n(n)
11993           .k(k)
11994           .cn_stride(19)
11995           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x16__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
11996       }
11997     }
11998   }
11999 
TEST(QC8_IGEMM_MINMAX_FP32_3X16__NEON_MLAL_LANE,n_div_16_subtile)12000   TEST(QC8_IGEMM_MINMAX_FP32_3X16__NEON_MLAL_LANE, n_div_16_subtile) {
12001     TEST_REQUIRES_ARM_NEON;
12002     for (uint32_t n = 32; n <= 48; n += 16) {
12003       for (size_t k = 1; k <= 40; k += 9) {
12004         for (uint32_t m = 1; m <= 3; m++) {
12005           GemmMicrokernelTester()
12006             .mr(3)
12007             .nr(16)
12008             .kr(1)
12009             .sr(1)
12010             .m(m)
12011             .n(n)
12012             .k(k)
12013             .iterations(1)
12014             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x16__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
12015         }
12016       }
12017     }
12018   }
12019 
TEST(QC8_IGEMM_MINMAX_FP32_3X16__NEON_MLAL_LANE,small_kernel)12020   TEST(QC8_IGEMM_MINMAX_FP32_3X16__NEON_MLAL_LANE, small_kernel) {
12021     TEST_REQUIRES_ARM_NEON;
12022     for (size_t k = 1; k <= 40; k += 9) {
12023       GemmMicrokernelTester()
12024         .mr(3)
12025         .nr(16)
12026         .kr(1)
12027         .sr(1)
12028         .m(3)
12029         .n(16)
12030         .k(k)
12031         .ks(3)
12032         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x16__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
12033     }
12034   }
12035 
TEST(QC8_IGEMM_MINMAX_FP32_3X16__NEON_MLAL_LANE,small_kernel_subtile)12036   TEST(QC8_IGEMM_MINMAX_FP32_3X16__NEON_MLAL_LANE, small_kernel_subtile) {
12037     TEST_REQUIRES_ARM_NEON;
12038     for (size_t k = 1; k <= 40; k += 9) {
12039       for (uint32_t n = 1; n <= 16; n++) {
12040         for (uint32_t m = 1; m <= 3; m++) {
12041           GemmMicrokernelTester()
12042             .mr(3)
12043             .nr(16)
12044             .kr(1)
12045             .sr(1)
12046             .m(m)
12047             .n(n)
12048             .k(k)
12049             .ks(3)
12050             .iterations(1)
12051             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x16__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
12052         }
12053       }
12054     }
12055   }
12056 
TEST(QC8_IGEMM_MINMAX_FP32_3X16__NEON_MLAL_LANE,n_gt_16_small_kernel)12057   TEST(QC8_IGEMM_MINMAX_FP32_3X16__NEON_MLAL_LANE, n_gt_16_small_kernel) {
12058     TEST_REQUIRES_ARM_NEON;
12059     for (uint32_t n = 17; n < 32; n++) {
12060       for (size_t k = 1; k <= 40; k += 9) {
12061         GemmMicrokernelTester()
12062           .mr(3)
12063           .nr(16)
12064           .kr(1)
12065           .sr(1)
12066           .m(3)
12067           .n(n)
12068           .k(k)
12069           .ks(3)
12070           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x16__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
12071       }
12072     }
12073   }
12074 
TEST(QC8_IGEMM_MINMAX_FP32_3X16__NEON_MLAL_LANE,n_div_16_small_kernel)12075   TEST(QC8_IGEMM_MINMAX_FP32_3X16__NEON_MLAL_LANE, n_div_16_small_kernel) {
12076     TEST_REQUIRES_ARM_NEON;
12077     for (uint32_t n = 32; n <= 48; n += 16) {
12078       for (size_t k = 1; k <= 40; k += 9) {
12079         GemmMicrokernelTester()
12080           .mr(3)
12081           .nr(16)
12082           .kr(1)
12083           .sr(1)
12084           .m(3)
12085           .n(n)
12086           .k(k)
12087           .ks(3)
12088           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x16__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
12089       }
12090     }
12091   }
12092 
TEST(QC8_IGEMM_MINMAX_FP32_3X16__NEON_MLAL_LANE,strided_cm_subtile)12093   TEST(QC8_IGEMM_MINMAX_FP32_3X16__NEON_MLAL_LANE, strided_cm_subtile) {
12094     TEST_REQUIRES_ARM_NEON;
12095     for (size_t k = 1; k <= 40; k += 9) {
12096       for (uint32_t n = 1; n <= 16; n++) {
12097         for (uint32_t m = 1; m <= 3; m++) {
12098           GemmMicrokernelTester()
12099             .mr(3)
12100             .nr(16)
12101             .kr(1)
12102             .sr(1)
12103             .m(m)
12104             .n(n)
12105             .k(k)
12106             .cm_stride(19)
12107             .iterations(1)
12108             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x16__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
12109         }
12110       }
12111     }
12112   }
12113 
TEST(QC8_IGEMM_MINMAX_FP32_3X16__NEON_MLAL_LANE,a_offset)12114   TEST(QC8_IGEMM_MINMAX_FP32_3X16__NEON_MLAL_LANE, a_offset) {
12115     TEST_REQUIRES_ARM_NEON;
12116     for (size_t k = 1; k <= 40; k += 9) {
12117       GemmMicrokernelTester()
12118         .mr(3)
12119         .nr(16)
12120         .kr(1)
12121         .sr(1)
12122         .m(3)
12123         .n(16)
12124         .k(k)
12125         .ks(3)
12126         .a_offset(127)
12127         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x16__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
12128     }
12129   }
12130 
TEST(QC8_IGEMM_MINMAX_FP32_3X16__NEON_MLAL_LANE,zero)12131   TEST(QC8_IGEMM_MINMAX_FP32_3X16__NEON_MLAL_LANE, zero) {
12132     TEST_REQUIRES_ARM_NEON;
12133     for (size_t k = 1; k <= 40; k += 9) {
12134       for (uint32_t mz = 0; mz < 3; mz++) {
12135         GemmMicrokernelTester()
12136           .mr(3)
12137           .nr(16)
12138           .kr(1)
12139           .sr(1)
12140           .m(3)
12141           .n(16)
12142           .k(k)
12143           .ks(3)
12144           .a_offset(127)
12145           .zero_index(mz)
12146           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x16__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
12147       }
12148     }
12149   }
12150 
TEST(QC8_IGEMM_MINMAX_FP32_3X16__NEON_MLAL_LANE,qmin)12151   TEST(QC8_IGEMM_MINMAX_FP32_3X16__NEON_MLAL_LANE, qmin) {
12152     TEST_REQUIRES_ARM_NEON;
12153     GemmMicrokernelTester()
12154       .mr(3)
12155       .nr(16)
12156       .kr(1)
12157       .sr(1)
12158       .m(3)
12159       .n(16)
12160       .k(8)
12161       .qmin(128)
12162       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x16__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
12163   }
12164 
TEST(QC8_IGEMM_MINMAX_FP32_3X16__NEON_MLAL_LANE,qmax)12165   TEST(QC8_IGEMM_MINMAX_FP32_3X16__NEON_MLAL_LANE, qmax) {
12166     TEST_REQUIRES_ARM_NEON;
12167     GemmMicrokernelTester()
12168       .mr(3)
12169       .nr(16)
12170       .kr(1)
12171       .sr(1)
12172       .m(3)
12173       .n(16)
12174       .k(8)
12175       .qmax(128)
12176       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x16__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
12177   }
12178 
TEST(QC8_IGEMM_MINMAX_FP32_3X16__NEON_MLAL_LANE,strided_cm)12179   TEST(QC8_IGEMM_MINMAX_FP32_3X16__NEON_MLAL_LANE, strided_cm) {
12180     TEST_REQUIRES_ARM_NEON;
12181     GemmMicrokernelTester()
12182       .mr(3)
12183       .nr(16)
12184       .kr(1)
12185       .sr(1)
12186       .m(3)
12187       .n(16)
12188       .k(8)
12189       .cm_stride(19)
12190       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x16__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
12191   }
12192 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
12193 
12194 
12195 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QC8_IGEMM_MINMAX_FP32_3X16__NEON_MLAL_LANE_PRFM,k_eq_8)12196   TEST(QC8_IGEMM_MINMAX_FP32_3X16__NEON_MLAL_LANE_PRFM, k_eq_8) {
12197     TEST_REQUIRES_ARM_NEON;
12198     GemmMicrokernelTester()
12199       .mr(3)
12200       .nr(16)
12201       .kr(1)
12202       .sr(1)
12203       .m(3)
12204       .n(16)
12205       .k(8)
12206       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x16__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
12207   }
12208 
TEST(QC8_IGEMM_MINMAX_FP32_3X16__NEON_MLAL_LANE_PRFM,strided_cn)12209   TEST(QC8_IGEMM_MINMAX_FP32_3X16__NEON_MLAL_LANE_PRFM, strided_cn) {
12210     TEST_REQUIRES_ARM_NEON;
12211     GemmMicrokernelTester()
12212       .mr(3)
12213       .nr(16)
12214       .kr(1)
12215       .sr(1)
12216       .m(3)
12217       .n(16)
12218       .k(8)
12219       .cn_stride(19)
12220       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x16__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
12221   }
12222 
TEST(QC8_IGEMM_MINMAX_FP32_3X16__NEON_MLAL_LANE_PRFM,k_eq_8_subtile)12223   TEST(QC8_IGEMM_MINMAX_FP32_3X16__NEON_MLAL_LANE_PRFM, k_eq_8_subtile) {
12224     TEST_REQUIRES_ARM_NEON;
12225     for (uint32_t n = 1; n <= 16; n++) {
12226       for (uint32_t m = 1; m <= 3; m++) {
12227         GemmMicrokernelTester()
12228           .mr(3)
12229           .nr(16)
12230           .kr(1)
12231           .sr(1)
12232           .m(m)
12233           .n(n)
12234           .k(8)
12235           .iterations(1)
12236           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x16__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
12237       }
12238     }
12239   }
12240 
TEST(QC8_IGEMM_MINMAX_FP32_3X16__NEON_MLAL_LANE_PRFM,k_eq_8_subtile_m)12241   TEST(QC8_IGEMM_MINMAX_FP32_3X16__NEON_MLAL_LANE_PRFM, k_eq_8_subtile_m) {
12242     TEST_REQUIRES_ARM_NEON;
12243     for (uint32_t m = 1; m <= 3; m++) {
12244       GemmMicrokernelTester()
12245         .mr(3)
12246         .nr(16)
12247         .kr(1)
12248         .sr(1)
12249         .m(m)
12250         .n(16)
12251         .k(8)
12252         .iterations(1)
12253         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x16__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
12254     }
12255   }
12256 
TEST(QC8_IGEMM_MINMAX_FP32_3X16__NEON_MLAL_LANE_PRFM,k_eq_8_subtile_n)12257   TEST(QC8_IGEMM_MINMAX_FP32_3X16__NEON_MLAL_LANE_PRFM, k_eq_8_subtile_n) {
12258     TEST_REQUIRES_ARM_NEON;
12259     for (uint32_t n = 1; n <= 16; n++) {
12260       GemmMicrokernelTester()
12261         .mr(3)
12262         .nr(16)
12263         .kr(1)
12264         .sr(1)
12265         .m(3)
12266         .n(n)
12267         .k(8)
12268         .iterations(1)
12269         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x16__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
12270     }
12271   }
12272 
TEST(QC8_IGEMM_MINMAX_FP32_3X16__NEON_MLAL_LANE_PRFM,k_lt_8)12273   TEST(QC8_IGEMM_MINMAX_FP32_3X16__NEON_MLAL_LANE_PRFM, k_lt_8) {
12274     TEST_REQUIRES_ARM_NEON;
12275     for (size_t k = 1; k < 8; k++) {
12276       GemmMicrokernelTester()
12277         .mr(3)
12278         .nr(16)
12279         .kr(1)
12280         .sr(1)
12281         .m(3)
12282         .n(16)
12283         .k(k)
12284         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x16__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
12285     }
12286   }
12287 
TEST(QC8_IGEMM_MINMAX_FP32_3X16__NEON_MLAL_LANE_PRFM,k_lt_8_subtile)12288   TEST(QC8_IGEMM_MINMAX_FP32_3X16__NEON_MLAL_LANE_PRFM, k_lt_8_subtile) {
12289     TEST_REQUIRES_ARM_NEON;
12290     for (size_t k = 1; k < 8; k++) {
12291       for (uint32_t n = 1; n <= 16; n++) {
12292         for (uint32_t m = 1; m <= 3; m++) {
12293           GemmMicrokernelTester()
12294             .mr(3)
12295             .nr(16)
12296             .kr(1)
12297             .sr(1)
12298             .m(m)
12299             .n(n)
12300             .k(k)
12301             .iterations(1)
12302             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x16__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
12303         }
12304       }
12305     }
12306   }
12307 
TEST(QC8_IGEMM_MINMAX_FP32_3X16__NEON_MLAL_LANE_PRFM,k_gt_8)12308   TEST(QC8_IGEMM_MINMAX_FP32_3X16__NEON_MLAL_LANE_PRFM, k_gt_8) {
12309     TEST_REQUIRES_ARM_NEON;
12310     for (size_t k = 9; k < 16; k++) {
12311       GemmMicrokernelTester()
12312         .mr(3)
12313         .nr(16)
12314         .kr(1)
12315         .sr(1)
12316         .m(3)
12317         .n(16)
12318         .k(k)
12319         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x16__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
12320     }
12321   }
12322 
TEST(QC8_IGEMM_MINMAX_FP32_3X16__NEON_MLAL_LANE_PRFM,k_gt_8_subtile)12323   TEST(QC8_IGEMM_MINMAX_FP32_3X16__NEON_MLAL_LANE_PRFM, k_gt_8_subtile) {
12324     TEST_REQUIRES_ARM_NEON;
12325     for (size_t k = 9; k < 16; k++) {
12326       for (uint32_t n = 1; n <= 16; n++) {
12327         for (uint32_t m = 1; m <= 3; m++) {
12328           GemmMicrokernelTester()
12329             .mr(3)
12330             .nr(16)
12331             .kr(1)
12332             .sr(1)
12333             .m(m)
12334             .n(n)
12335             .k(k)
12336             .iterations(1)
12337             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x16__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
12338         }
12339       }
12340     }
12341   }
12342 
TEST(QC8_IGEMM_MINMAX_FP32_3X16__NEON_MLAL_LANE_PRFM,k_div_8)12343   TEST(QC8_IGEMM_MINMAX_FP32_3X16__NEON_MLAL_LANE_PRFM, k_div_8) {
12344     TEST_REQUIRES_ARM_NEON;
12345     for (size_t k = 16; k <= 80; k += 8) {
12346       GemmMicrokernelTester()
12347         .mr(3)
12348         .nr(16)
12349         .kr(1)
12350         .sr(1)
12351         .m(3)
12352         .n(16)
12353         .k(k)
12354         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x16__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
12355     }
12356   }
12357 
TEST(QC8_IGEMM_MINMAX_FP32_3X16__NEON_MLAL_LANE_PRFM,k_div_8_subtile)12358   TEST(QC8_IGEMM_MINMAX_FP32_3X16__NEON_MLAL_LANE_PRFM, k_div_8_subtile) {
12359     TEST_REQUIRES_ARM_NEON;
12360     for (size_t k = 16; k <= 80; k += 8) {
12361       for (uint32_t n = 1; n <= 16; n++) {
12362         for (uint32_t m = 1; m <= 3; m++) {
12363           GemmMicrokernelTester()
12364             .mr(3)
12365             .nr(16)
12366             .kr(1)
12367             .sr(1)
12368             .m(m)
12369             .n(n)
12370             .k(k)
12371             .iterations(1)
12372             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x16__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
12373         }
12374       }
12375     }
12376   }
12377 
TEST(QC8_IGEMM_MINMAX_FP32_3X16__NEON_MLAL_LANE_PRFM,n_gt_16)12378   TEST(QC8_IGEMM_MINMAX_FP32_3X16__NEON_MLAL_LANE_PRFM, n_gt_16) {
12379     TEST_REQUIRES_ARM_NEON;
12380     for (uint32_t n = 17; n < 32; n++) {
12381       for (size_t k = 1; k <= 40; k += 9) {
12382         GemmMicrokernelTester()
12383           .mr(3)
12384           .nr(16)
12385           .kr(1)
12386           .sr(1)
12387           .m(3)
12388           .n(n)
12389           .k(k)
12390           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x16__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
12391       }
12392     }
12393   }
12394 
TEST(QC8_IGEMM_MINMAX_FP32_3X16__NEON_MLAL_LANE_PRFM,n_gt_16_strided_cn)12395   TEST(QC8_IGEMM_MINMAX_FP32_3X16__NEON_MLAL_LANE_PRFM, n_gt_16_strided_cn) {
12396     TEST_REQUIRES_ARM_NEON;
12397     for (uint32_t n = 17; n < 32; n++) {
12398       for (size_t k = 1; k <= 40; k += 9) {
12399         GemmMicrokernelTester()
12400           .mr(3)
12401           .nr(16)
12402           .kr(1)
12403           .sr(1)
12404           .m(3)
12405           .n(n)
12406           .k(k)
12407           .cn_stride(19)
12408           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x16__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
12409       }
12410     }
12411   }
12412 
TEST(QC8_IGEMM_MINMAX_FP32_3X16__NEON_MLAL_LANE_PRFM,n_gt_16_subtile)12413   TEST(QC8_IGEMM_MINMAX_FP32_3X16__NEON_MLAL_LANE_PRFM, n_gt_16_subtile) {
12414     TEST_REQUIRES_ARM_NEON;
12415     for (uint32_t n = 17; n < 32; n++) {
12416       for (size_t k = 1; k <= 40; k += 9) {
12417         for (uint32_t m = 1; m <= 3; m++) {
12418           GemmMicrokernelTester()
12419             .mr(3)
12420             .nr(16)
12421             .kr(1)
12422             .sr(1)
12423             .m(m)
12424             .n(n)
12425             .k(k)
12426             .iterations(1)
12427             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x16__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
12428         }
12429       }
12430     }
12431   }
12432 
TEST(QC8_IGEMM_MINMAX_FP32_3X16__NEON_MLAL_LANE_PRFM,n_div_16)12433   TEST(QC8_IGEMM_MINMAX_FP32_3X16__NEON_MLAL_LANE_PRFM, n_div_16) {
12434     TEST_REQUIRES_ARM_NEON;
12435     for (uint32_t n = 32; n <= 48; n += 16) {
12436       for (size_t k = 1; k <= 40; k += 9) {
12437         GemmMicrokernelTester()
12438           .mr(3)
12439           .nr(16)
12440           .kr(1)
12441           .sr(1)
12442           .m(3)
12443           .n(n)
12444           .k(k)
12445           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x16__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
12446       }
12447     }
12448   }
12449 
TEST(QC8_IGEMM_MINMAX_FP32_3X16__NEON_MLAL_LANE_PRFM,n_div_16_strided_cn)12450   TEST(QC8_IGEMM_MINMAX_FP32_3X16__NEON_MLAL_LANE_PRFM, n_div_16_strided_cn) {
12451     TEST_REQUIRES_ARM_NEON;
12452     for (uint32_t n = 32; n <= 48; n += 16) {
12453       for (size_t k = 1; k <= 40; k += 9) {
12454         GemmMicrokernelTester()
12455           .mr(3)
12456           .nr(16)
12457           .kr(1)
12458           .sr(1)
12459           .m(3)
12460           .n(n)
12461           .k(k)
12462           .cn_stride(19)
12463           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x16__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
12464       }
12465     }
12466   }
12467 
TEST(QC8_IGEMM_MINMAX_FP32_3X16__NEON_MLAL_LANE_PRFM,n_div_16_subtile)12468   TEST(QC8_IGEMM_MINMAX_FP32_3X16__NEON_MLAL_LANE_PRFM, n_div_16_subtile) {
12469     TEST_REQUIRES_ARM_NEON;
12470     for (uint32_t n = 32; n <= 48; n += 16) {
12471       for (size_t k = 1; k <= 40; k += 9) {
12472         for (uint32_t m = 1; m <= 3; m++) {
12473           GemmMicrokernelTester()
12474             .mr(3)
12475             .nr(16)
12476             .kr(1)
12477             .sr(1)
12478             .m(m)
12479             .n(n)
12480             .k(k)
12481             .iterations(1)
12482             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x16__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
12483         }
12484       }
12485     }
12486   }
12487 
TEST(QC8_IGEMM_MINMAX_FP32_3X16__NEON_MLAL_LANE_PRFM,small_kernel)12488   TEST(QC8_IGEMM_MINMAX_FP32_3X16__NEON_MLAL_LANE_PRFM, small_kernel) {
12489     TEST_REQUIRES_ARM_NEON;
12490     for (size_t k = 1; k <= 40; k += 9) {
12491       GemmMicrokernelTester()
12492         .mr(3)
12493         .nr(16)
12494         .kr(1)
12495         .sr(1)
12496         .m(3)
12497         .n(16)
12498         .k(k)
12499         .ks(3)
12500         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x16__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
12501     }
12502   }
12503 
TEST(QC8_IGEMM_MINMAX_FP32_3X16__NEON_MLAL_LANE_PRFM,small_kernel_subtile)12504   TEST(QC8_IGEMM_MINMAX_FP32_3X16__NEON_MLAL_LANE_PRFM, small_kernel_subtile) {
12505     TEST_REQUIRES_ARM_NEON;
12506     for (size_t k = 1; k <= 40; k += 9) {
12507       for (uint32_t n = 1; n <= 16; n++) {
12508         for (uint32_t m = 1; m <= 3; m++) {
12509           GemmMicrokernelTester()
12510             .mr(3)
12511             .nr(16)
12512             .kr(1)
12513             .sr(1)
12514             .m(m)
12515             .n(n)
12516             .k(k)
12517             .ks(3)
12518             .iterations(1)
12519             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x16__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
12520         }
12521       }
12522     }
12523   }
12524 
TEST(QC8_IGEMM_MINMAX_FP32_3X16__NEON_MLAL_LANE_PRFM,n_gt_16_small_kernel)12525   TEST(QC8_IGEMM_MINMAX_FP32_3X16__NEON_MLAL_LANE_PRFM, n_gt_16_small_kernel) {
12526     TEST_REQUIRES_ARM_NEON;
12527     for (uint32_t n = 17; n < 32; n++) {
12528       for (size_t k = 1; k <= 40; k += 9) {
12529         GemmMicrokernelTester()
12530           .mr(3)
12531           .nr(16)
12532           .kr(1)
12533           .sr(1)
12534           .m(3)
12535           .n(n)
12536           .k(k)
12537           .ks(3)
12538           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x16__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
12539       }
12540     }
12541   }
12542 
TEST(QC8_IGEMM_MINMAX_FP32_3X16__NEON_MLAL_LANE_PRFM,n_div_16_small_kernel)12543   TEST(QC8_IGEMM_MINMAX_FP32_3X16__NEON_MLAL_LANE_PRFM, n_div_16_small_kernel) {
12544     TEST_REQUIRES_ARM_NEON;
12545     for (uint32_t n = 32; n <= 48; n += 16) {
12546       for (size_t k = 1; k <= 40; k += 9) {
12547         GemmMicrokernelTester()
12548           .mr(3)
12549           .nr(16)
12550           .kr(1)
12551           .sr(1)
12552           .m(3)
12553           .n(n)
12554           .k(k)
12555           .ks(3)
12556           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x16__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
12557       }
12558     }
12559   }
12560 
TEST(QC8_IGEMM_MINMAX_FP32_3X16__NEON_MLAL_LANE_PRFM,strided_cm_subtile)12561   TEST(QC8_IGEMM_MINMAX_FP32_3X16__NEON_MLAL_LANE_PRFM, strided_cm_subtile) {
12562     TEST_REQUIRES_ARM_NEON;
12563     for (size_t k = 1; k <= 40; k += 9) {
12564       for (uint32_t n = 1; n <= 16; n++) {
12565         for (uint32_t m = 1; m <= 3; m++) {
12566           GemmMicrokernelTester()
12567             .mr(3)
12568             .nr(16)
12569             .kr(1)
12570             .sr(1)
12571             .m(m)
12572             .n(n)
12573             .k(k)
12574             .cm_stride(19)
12575             .iterations(1)
12576             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x16__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
12577         }
12578       }
12579     }
12580   }
12581 
TEST(QC8_IGEMM_MINMAX_FP32_3X16__NEON_MLAL_LANE_PRFM,a_offset)12582   TEST(QC8_IGEMM_MINMAX_FP32_3X16__NEON_MLAL_LANE_PRFM, a_offset) {
12583     TEST_REQUIRES_ARM_NEON;
12584     for (size_t k = 1; k <= 40; k += 9) {
12585       GemmMicrokernelTester()
12586         .mr(3)
12587         .nr(16)
12588         .kr(1)
12589         .sr(1)
12590         .m(3)
12591         .n(16)
12592         .k(k)
12593         .ks(3)
12594         .a_offset(127)
12595         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x16__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
12596     }
12597   }
12598 
TEST(QC8_IGEMM_MINMAX_FP32_3X16__NEON_MLAL_LANE_PRFM,zero)12599   TEST(QC8_IGEMM_MINMAX_FP32_3X16__NEON_MLAL_LANE_PRFM, zero) {
12600     TEST_REQUIRES_ARM_NEON;
12601     for (size_t k = 1; k <= 40; k += 9) {
12602       for (uint32_t mz = 0; mz < 3; mz++) {
12603         GemmMicrokernelTester()
12604           .mr(3)
12605           .nr(16)
12606           .kr(1)
12607           .sr(1)
12608           .m(3)
12609           .n(16)
12610           .k(k)
12611           .ks(3)
12612           .a_offset(127)
12613           .zero_index(mz)
12614           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x16__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
12615       }
12616     }
12617   }
12618 
TEST(QC8_IGEMM_MINMAX_FP32_3X16__NEON_MLAL_LANE_PRFM,qmin)12619   TEST(QC8_IGEMM_MINMAX_FP32_3X16__NEON_MLAL_LANE_PRFM, qmin) {
12620     TEST_REQUIRES_ARM_NEON;
12621     GemmMicrokernelTester()
12622       .mr(3)
12623       .nr(16)
12624       .kr(1)
12625       .sr(1)
12626       .m(3)
12627       .n(16)
12628       .k(8)
12629       .qmin(128)
12630       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x16__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
12631   }
12632 
TEST(QC8_IGEMM_MINMAX_FP32_3X16__NEON_MLAL_LANE_PRFM,qmax)12633   TEST(QC8_IGEMM_MINMAX_FP32_3X16__NEON_MLAL_LANE_PRFM, qmax) {
12634     TEST_REQUIRES_ARM_NEON;
12635     GemmMicrokernelTester()
12636       .mr(3)
12637       .nr(16)
12638       .kr(1)
12639       .sr(1)
12640       .m(3)
12641       .n(16)
12642       .k(8)
12643       .qmax(128)
12644       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x16__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
12645   }
12646 
TEST(QC8_IGEMM_MINMAX_FP32_3X16__NEON_MLAL_LANE_PRFM,strided_cm)12647   TEST(QC8_IGEMM_MINMAX_FP32_3X16__NEON_MLAL_LANE_PRFM, strided_cm) {
12648     TEST_REQUIRES_ARM_NEON;
12649     GemmMicrokernelTester()
12650       .mr(3)
12651       .nr(16)
12652       .kr(1)
12653       .sr(1)
12654       .m(3)
12655       .n(16)
12656       .k(8)
12657       .cm_stride(19)
12658       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x16__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
12659   }
12660 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
12661 
12662 
12663 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QC8_IGEMM_MINMAX_FP32_3X16__NEONV8_MLAL_LANE,k_eq_8)12664   TEST(QC8_IGEMM_MINMAX_FP32_3X16__NEONV8_MLAL_LANE, k_eq_8) {
12665     TEST_REQUIRES_ARM_NEON_V8;
12666     GemmMicrokernelTester()
12667       .mr(3)
12668       .nr(16)
12669       .kr(1)
12670       .sr(1)
12671       .m(3)
12672       .n(16)
12673       .k(8)
12674       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x16__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
12675   }
12676 
TEST(QC8_IGEMM_MINMAX_FP32_3X16__NEONV8_MLAL_LANE,strided_cn)12677   TEST(QC8_IGEMM_MINMAX_FP32_3X16__NEONV8_MLAL_LANE, strided_cn) {
12678     TEST_REQUIRES_ARM_NEON_V8;
12679     GemmMicrokernelTester()
12680       .mr(3)
12681       .nr(16)
12682       .kr(1)
12683       .sr(1)
12684       .m(3)
12685       .n(16)
12686       .k(8)
12687       .cn_stride(19)
12688       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x16__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
12689   }
12690 
TEST(QC8_IGEMM_MINMAX_FP32_3X16__NEONV8_MLAL_LANE,k_eq_8_subtile)12691   TEST(QC8_IGEMM_MINMAX_FP32_3X16__NEONV8_MLAL_LANE, k_eq_8_subtile) {
12692     TEST_REQUIRES_ARM_NEON_V8;
12693     for (uint32_t n = 1; n <= 16; n++) {
12694       for (uint32_t m = 1; m <= 3; m++) {
12695         GemmMicrokernelTester()
12696           .mr(3)
12697           .nr(16)
12698           .kr(1)
12699           .sr(1)
12700           .m(m)
12701           .n(n)
12702           .k(8)
12703           .iterations(1)
12704           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x16__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
12705       }
12706     }
12707   }
12708 
TEST(QC8_IGEMM_MINMAX_FP32_3X16__NEONV8_MLAL_LANE,k_eq_8_subtile_m)12709   TEST(QC8_IGEMM_MINMAX_FP32_3X16__NEONV8_MLAL_LANE, k_eq_8_subtile_m) {
12710     TEST_REQUIRES_ARM_NEON_V8;
12711     for (uint32_t m = 1; m <= 3; m++) {
12712       GemmMicrokernelTester()
12713         .mr(3)
12714         .nr(16)
12715         .kr(1)
12716         .sr(1)
12717         .m(m)
12718         .n(16)
12719         .k(8)
12720         .iterations(1)
12721         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x16__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
12722     }
12723   }
12724 
TEST(QC8_IGEMM_MINMAX_FP32_3X16__NEONV8_MLAL_LANE,k_eq_8_subtile_n)12725   TEST(QC8_IGEMM_MINMAX_FP32_3X16__NEONV8_MLAL_LANE, k_eq_8_subtile_n) {
12726     TEST_REQUIRES_ARM_NEON_V8;
12727     for (uint32_t n = 1; n <= 16; n++) {
12728       GemmMicrokernelTester()
12729         .mr(3)
12730         .nr(16)
12731         .kr(1)
12732         .sr(1)
12733         .m(3)
12734         .n(n)
12735         .k(8)
12736         .iterations(1)
12737         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x16__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
12738     }
12739   }
12740 
TEST(QC8_IGEMM_MINMAX_FP32_3X16__NEONV8_MLAL_LANE,k_lt_8)12741   TEST(QC8_IGEMM_MINMAX_FP32_3X16__NEONV8_MLAL_LANE, k_lt_8) {
12742     TEST_REQUIRES_ARM_NEON_V8;
12743     for (size_t k = 1; k < 8; k++) {
12744       GemmMicrokernelTester()
12745         .mr(3)
12746         .nr(16)
12747         .kr(1)
12748         .sr(1)
12749         .m(3)
12750         .n(16)
12751         .k(k)
12752         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x16__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
12753     }
12754   }
12755 
TEST(QC8_IGEMM_MINMAX_FP32_3X16__NEONV8_MLAL_LANE,k_lt_8_subtile)12756   TEST(QC8_IGEMM_MINMAX_FP32_3X16__NEONV8_MLAL_LANE, k_lt_8_subtile) {
12757     TEST_REQUIRES_ARM_NEON_V8;
12758     for (size_t k = 1; k < 8; k++) {
12759       for (uint32_t n = 1; n <= 16; n++) {
12760         for (uint32_t m = 1; m <= 3; m++) {
12761           GemmMicrokernelTester()
12762             .mr(3)
12763             .nr(16)
12764             .kr(1)
12765             .sr(1)
12766             .m(m)
12767             .n(n)
12768             .k(k)
12769             .iterations(1)
12770             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x16__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
12771         }
12772       }
12773     }
12774   }
12775 
TEST(QC8_IGEMM_MINMAX_FP32_3X16__NEONV8_MLAL_LANE,k_gt_8)12776   TEST(QC8_IGEMM_MINMAX_FP32_3X16__NEONV8_MLAL_LANE, k_gt_8) {
12777     TEST_REQUIRES_ARM_NEON_V8;
12778     for (size_t k = 9; k < 16; k++) {
12779       GemmMicrokernelTester()
12780         .mr(3)
12781         .nr(16)
12782         .kr(1)
12783         .sr(1)
12784         .m(3)
12785         .n(16)
12786         .k(k)
12787         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x16__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
12788     }
12789   }
12790 
TEST(QC8_IGEMM_MINMAX_FP32_3X16__NEONV8_MLAL_LANE,k_gt_8_subtile)12791   TEST(QC8_IGEMM_MINMAX_FP32_3X16__NEONV8_MLAL_LANE, k_gt_8_subtile) {
12792     TEST_REQUIRES_ARM_NEON_V8;
12793     for (size_t k = 9; k < 16; k++) {
12794       for (uint32_t n = 1; n <= 16; n++) {
12795         for (uint32_t m = 1; m <= 3; m++) {
12796           GemmMicrokernelTester()
12797             .mr(3)
12798             .nr(16)
12799             .kr(1)
12800             .sr(1)
12801             .m(m)
12802             .n(n)
12803             .k(k)
12804             .iterations(1)
12805             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x16__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
12806         }
12807       }
12808     }
12809   }
12810 
TEST(QC8_IGEMM_MINMAX_FP32_3X16__NEONV8_MLAL_LANE,k_div_8)12811   TEST(QC8_IGEMM_MINMAX_FP32_3X16__NEONV8_MLAL_LANE, k_div_8) {
12812     TEST_REQUIRES_ARM_NEON_V8;
12813     for (size_t k = 16; k <= 80; k += 8) {
12814       GemmMicrokernelTester()
12815         .mr(3)
12816         .nr(16)
12817         .kr(1)
12818         .sr(1)
12819         .m(3)
12820         .n(16)
12821         .k(k)
12822         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x16__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
12823     }
12824   }
12825 
TEST(QC8_IGEMM_MINMAX_FP32_3X16__NEONV8_MLAL_LANE,k_div_8_subtile)12826   TEST(QC8_IGEMM_MINMAX_FP32_3X16__NEONV8_MLAL_LANE, k_div_8_subtile) {
12827     TEST_REQUIRES_ARM_NEON_V8;
12828     for (size_t k = 16; k <= 80; k += 8) {
12829       for (uint32_t n = 1; n <= 16; n++) {
12830         for (uint32_t m = 1; m <= 3; m++) {
12831           GemmMicrokernelTester()
12832             .mr(3)
12833             .nr(16)
12834             .kr(1)
12835             .sr(1)
12836             .m(m)
12837             .n(n)
12838             .k(k)
12839             .iterations(1)
12840             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x16__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
12841         }
12842       }
12843     }
12844   }
12845 
TEST(QC8_IGEMM_MINMAX_FP32_3X16__NEONV8_MLAL_LANE,n_gt_16)12846   TEST(QC8_IGEMM_MINMAX_FP32_3X16__NEONV8_MLAL_LANE, n_gt_16) {
12847     TEST_REQUIRES_ARM_NEON_V8;
12848     for (uint32_t n = 17; n < 32; n++) {
12849       for (size_t k = 1; k <= 40; k += 9) {
12850         GemmMicrokernelTester()
12851           .mr(3)
12852           .nr(16)
12853           .kr(1)
12854           .sr(1)
12855           .m(3)
12856           .n(n)
12857           .k(k)
12858           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x16__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
12859       }
12860     }
12861   }
12862 
TEST(QC8_IGEMM_MINMAX_FP32_3X16__NEONV8_MLAL_LANE,n_gt_16_strided_cn)12863   TEST(QC8_IGEMM_MINMAX_FP32_3X16__NEONV8_MLAL_LANE, n_gt_16_strided_cn) {
12864     TEST_REQUIRES_ARM_NEON_V8;
12865     for (uint32_t n = 17; n < 32; n++) {
12866       for (size_t k = 1; k <= 40; k += 9) {
12867         GemmMicrokernelTester()
12868           .mr(3)
12869           .nr(16)
12870           .kr(1)
12871           .sr(1)
12872           .m(3)
12873           .n(n)
12874           .k(k)
12875           .cn_stride(19)
12876           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x16__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
12877       }
12878     }
12879   }
12880 
TEST(QC8_IGEMM_MINMAX_FP32_3X16__NEONV8_MLAL_LANE,n_gt_16_subtile)12881   TEST(QC8_IGEMM_MINMAX_FP32_3X16__NEONV8_MLAL_LANE, n_gt_16_subtile) {
12882     TEST_REQUIRES_ARM_NEON_V8;
12883     for (uint32_t n = 17; n < 32; n++) {
12884       for (size_t k = 1; k <= 40; k += 9) {
12885         for (uint32_t m = 1; m <= 3; m++) {
12886           GemmMicrokernelTester()
12887             .mr(3)
12888             .nr(16)
12889             .kr(1)
12890             .sr(1)
12891             .m(m)
12892             .n(n)
12893             .k(k)
12894             .iterations(1)
12895             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x16__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
12896         }
12897       }
12898     }
12899   }
12900 
TEST(QC8_IGEMM_MINMAX_FP32_3X16__NEONV8_MLAL_LANE,n_div_16)12901   TEST(QC8_IGEMM_MINMAX_FP32_3X16__NEONV8_MLAL_LANE, n_div_16) {
12902     TEST_REQUIRES_ARM_NEON_V8;
12903     for (uint32_t n = 32; n <= 48; n += 16) {
12904       for (size_t k = 1; k <= 40; k += 9) {
12905         GemmMicrokernelTester()
12906           .mr(3)
12907           .nr(16)
12908           .kr(1)
12909           .sr(1)
12910           .m(3)
12911           .n(n)
12912           .k(k)
12913           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x16__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
12914       }
12915     }
12916   }
12917 
TEST(QC8_IGEMM_MINMAX_FP32_3X16__NEONV8_MLAL_LANE,n_div_16_strided_cn)12918   TEST(QC8_IGEMM_MINMAX_FP32_3X16__NEONV8_MLAL_LANE, n_div_16_strided_cn) {
12919     TEST_REQUIRES_ARM_NEON_V8;
12920     for (uint32_t n = 32; n <= 48; n += 16) {
12921       for (size_t k = 1; k <= 40; k += 9) {
12922         GemmMicrokernelTester()
12923           .mr(3)
12924           .nr(16)
12925           .kr(1)
12926           .sr(1)
12927           .m(3)
12928           .n(n)
12929           .k(k)
12930           .cn_stride(19)
12931           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x16__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
12932       }
12933     }
12934   }
12935 
TEST(QC8_IGEMM_MINMAX_FP32_3X16__NEONV8_MLAL_LANE,n_div_16_subtile)12936   TEST(QC8_IGEMM_MINMAX_FP32_3X16__NEONV8_MLAL_LANE, n_div_16_subtile) {
12937     TEST_REQUIRES_ARM_NEON_V8;
12938     for (uint32_t n = 32; n <= 48; n += 16) {
12939       for (size_t k = 1; k <= 40; k += 9) {
12940         for (uint32_t m = 1; m <= 3; m++) {
12941           GemmMicrokernelTester()
12942             .mr(3)
12943             .nr(16)
12944             .kr(1)
12945             .sr(1)
12946             .m(m)
12947             .n(n)
12948             .k(k)
12949             .iterations(1)
12950             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x16__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
12951         }
12952       }
12953     }
12954   }
12955 
TEST(QC8_IGEMM_MINMAX_FP32_3X16__NEONV8_MLAL_LANE,small_kernel)12956   TEST(QC8_IGEMM_MINMAX_FP32_3X16__NEONV8_MLAL_LANE, small_kernel) {
12957     TEST_REQUIRES_ARM_NEON_V8;
12958     for (size_t k = 1; k <= 40; k += 9) {
12959       GemmMicrokernelTester()
12960         .mr(3)
12961         .nr(16)
12962         .kr(1)
12963         .sr(1)
12964         .m(3)
12965         .n(16)
12966         .k(k)
12967         .ks(3)
12968         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x16__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
12969     }
12970   }
12971 
TEST(QC8_IGEMM_MINMAX_FP32_3X16__NEONV8_MLAL_LANE,small_kernel_subtile)12972   TEST(QC8_IGEMM_MINMAX_FP32_3X16__NEONV8_MLAL_LANE, small_kernel_subtile) {
12973     TEST_REQUIRES_ARM_NEON_V8;
12974     for (size_t k = 1; k <= 40; k += 9) {
12975       for (uint32_t n = 1; n <= 16; n++) {
12976         for (uint32_t m = 1; m <= 3; m++) {
12977           GemmMicrokernelTester()
12978             .mr(3)
12979             .nr(16)
12980             .kr(1)
12981             .sr(1)
12982             .m(m)
12983             .n(n)
12984             .k(k)
12985             .ks(3)
12986             .iterations(1)
12987             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x16__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
12988         }
12989       }
12990     }
12991   }
12992 
TEST(QC8_IGEMM_MINMAX_FP32_3X16__NEONV8_MLAL_LANE,n_gt_16_small_kernel)12993   TEST(QC8_IGEMM_MINMAX_FP32_3X16__NEONV8_MLAL_LANE, n_gt_16_small_kernel) {
12994     TEST_REQUIRES_ARM_NEON_V8;
12995     for (uint32_t n = 17; n < 32; n++) {
12996       for (size_t k = 1; k <= 40; k += 9) {
12997         GemmMicrokernelTester()
12998           .mr(3)
12999           .nr(16)
13000           .kr(1)
13001           .sr(1)
13002           .m(3)
13003           .n(n)
13004           .k(k)
13005           .ks(3)
13006           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x16__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
13007       }
13008     }
13009   }
13010 
TEST(QC8_IGEMM_MINMAX_FP32_3X16__NEONV8_MLAL_LANE,n_div_16_small_kernel)13011   TEST(QC8_IGEMM_MINMAX_FP32_3X16__NEONV8_MLAL_LANE, n_div_16_small_kernel) {
13012     TEST_REQUIRES_ARM_NEON_V8;
13013     for (uint32_t n = 32; n <= 48; n += 16) {
13014       for (size_t k = 1; k <= 40; k += 9) {
13015         GemmMicrokernelTester()
13016           .mr(3)
13017           .nr(16)
13018           .kr(1)
13019           .sr(1)
13020           .m(3)
13021           .n(n)
13022           .k(k)
13023           .ks(3)
13024           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x16__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
13025       }
13026     }
13027   }
13028 
TEST(QC8_IGEMM_MINMAX_FP32_3X16__NEONV8_MLAL_LANE,strided_cm_subtile)13029   TEST(QC8_IGEMM_MINMAX_FP32_3X16__NEONV8_MLAL_LANE, strided_cm_subtile) {
13030     TEST_REQUIRES_ARM_NEON_V8;
13031     for (size_t k = 1; k <= 40; k += 9) {
13032       for (uint32_t n = 1; n <= 16; n++) {
13033         for (uint32_t m = 1; m <= 3; m++) {
13034           GemmMicrokernelTester()
13035             .mr(3)
13036             .nr(16)
13037             .kr(1)
13038             .sr(1)
13039             .m(m)
13040             .n(n)
13041             .k(k)
13042             .cm_stride(19)
13043             .iterations(1)
13044             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x16__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
13045         }
13046       }
13047     }
13048   }
13049 
TEST(QC8_IGEMM_MINMAX_FP32_3X16__NEONV8_MLAL_LANE,a_offset)13050   TEST(QC8_IGEMM_MINMAX_FP32_3X16__NEONV8_MLAL_LANE, a_offset) {
13051     TEST_REQUIRES_ARM_NEON_V8;
13052     for (size_t k = 1; k <= 40; k += 9) {
13053       GemmMicrokernelTester()
13054         .mr(3)
13055         .nr(16)
13056         .kr(1)
13057         .sr(1)
13058         .m(3)
13059         .n(16)
13060         .k(k)
13061         .ks(3)
13062         .a_offset(127)
13063         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x16__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
13064     }
13065   }
13066 
TEST(QC8_IGEMM_MINMAX_FP32_3X16__NEONV8_MLAL_LANE,zero)13067   TEST(QC8_IGEMM_MINMAX_FP32_3X16__NEONV8_MLAL_LANE, zero) {
13068     TEST_REQUIRES_ARM_NEON_V8;
13069     for (size_t k = 1; k <= 40; k += 9) {
13070       for (uint32_t mz = 0; mz < 3; mz++) {
13071         GemmMicrokernelTester()
13072           .mr(3)
13073           .nr(16)
13074           .kr(1)
13075           .sr(1)
13076           .m(3)
13077           .n(16)
13078           .k(k)
13079           .ks(3)
13080           .a_offset(127)
13081           .zero_index(mz)
13082           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x16__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
13083       }
13084     }
13085   }
13086 
TEST(QC8_IGEMM_MINMAX_FP32_3X16__NEONV8_MLAL_LANE,qmin)13087   TEST(QC8_IGEMM_MINMAX_FP32_3X16__NEONV8_MLAL_LANE, qmin) {
13088     TEST_REQUIRES_ARM_NEON_V8;
13089     GemmMicrokernelTester()
13090       .mr(3)
13091       .nr(16)
13092       .kr(1)
13093       .sr(1)
13094       .m(3)
13095       .n(16)
13096       .k(8)
13097       .qmin(128)
13098       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x16__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
13099   }
13100 
TEST(QC8_IGEMM_MINMAX_FP32_3X16__NEONV8_MLAL_LANE,qmax)13101   TEST(QC8_IGEMM_MINMAX_FP32_3X16__NEONV8_MLAL_LANE, qmax) {
13102     TEST_REQUIRES_ARM_NEON_V8;
13103     GemmMicrokernelTester()
13104       .mr(3)
13105       .nr(16)
13106       .kr(1)
13107       .sr(1)
13108       .m(3)
13109       .n(16)
13110       .k(8)
13111       .qmax(128)
13112       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x16__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
13113   }
13114 
TEST(QC8_IGEMM_MINMAX_FP32_3X16__NEONV8_MLAL_LANE,strided_cm)13115   TEST(QC8_IGEMM_MINMAX_FP32_3X16__NEONV8_MLAL_LANE, strided_cm) {
13116     TEST_REQUIRES_ARM_NEON_V8;
13117     GemmMicrokernelTester()
13118       .mr(3)
13119       .nr(16)
13120       .kr(1)
13121       .sr(1)
13122       .m(3)
13123       .n(16)
13124       .k(8)
13125       .cm_stride(19)
13126       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x16__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
13127   }
13128 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
13129 
13130 
13131 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QC8_IGEMM_MINMAX_FP32_4X8__NEON_MLAL_LANE,k_eq_8)13132   TEST(QC8_IGEMM_MINMAX_FP32_4X8__NEON_MLAL_LANE, k_eq_8) {
13133     TEST_REQUIRES_ARM_NEON;
13134     GemmMicrokernelTester()
13135       .mr(4)
13136       .nr(8)
13137       .kr(1)
13138       .sr(1)
13139       .m(4)
13140       .n(8)
13141       .k(8)
13142       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
13143   }
13144 
TEST(QC8_IGEMM_MINMAX_FP32_4X8__NEON_MLAL_LANE,strided_cn)13145   TEST(QC8_IGEMM_MINMAX_FP32_4X8__NEON_MLAL_LANE, strided_cn) {
13146     TEST_REQUIRES_ARM_NEON;
13147     GemmMicrokernelTester()
13148       .mr(4)
13149       .nr(8)
13150       .kr(1)
13151       .sr(1)
13152       .m(4)
13153       .n(8)
13154       .k(8)
13155       .cn_stride(11)
13156       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
13157   }
13158 
TEST(QC8_IGEMM_MINMAX_FP32_4X8__NEON_MLAL_LANE,k_eq_8_subtile)13159   TEST(QC8_IGEMM_MINMAX_FP32_4X8__NEON_MLAL_LANE, k_eq_8_subtile) {
13160     TEST_REQUIRES_ARM_NEON;
13161     for (uint32_t n = 1; n <= 8; n++) {
13162       for (uint32_t m = 1; m <= 4; m++) {
13163         GemmMicrokernelTester()
13164           .mr(4)
13165           .nr(8)
13166           .kr(1)
13167           .sr(1)
13168           .m(m)
13169           .n(n)
13170           .k(8)
13171           .iterations(1)
13172           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
13173       }
13174     }
13175   }
13176 
TEST(QC8_IGEMM_MINMAX_FP32_4X8__NEON_MLAL_LANE,k_eq_8_subtile_m)13177   TEST(QC8_IGEMM_MINMAX_FP32_4X8__NEON_MLAL_LANE, k_eq_8_subtile_m) {
13178     TEST_REQUIRES_ARM_NEON;
13179     for (uint32_t m = 1; m <= 4; m++) {
13180       GemmMicrokernelTester()
13181         .mr(4)
13182         .nr(8)
13183         .kr(1)
13184         .sr(1)
13185         .m(m)
13186         .n(8)
13187         .k(8)
13188         .iterations(1)
13189         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
13190     }
13191   }
13192 
TEST(QC8_IGEMM_MINMAX_FP32_4X8__NEON_MLAL_LANE,k_eq_8_subtile_n)13193   TEST(QC8_IGEMM_MINMAX_FP32_4X8__NEON_MLAL_LANE, k_eq_8_subtile_n) {
13194     TEST_REQUIRES_ARM_NEON;
13195     for (uint32_t n = 1; n <= 8; n++) {
13196       GemmMicrokernelTester()
13197         .mr(4)
13198         .nr(8)
13199         .kr(1)
13200         .sr(1)
13201         .m(4)
13202         .n(n)
13203         .k(8)
13204         .iterations(1)
13205         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
13206     }
13207   }
13208 
TEST(QC8_IGEMM_MINMAX_FP32_4X8__NEON_MLAL_LANE,k_lt_8)13209   TEST(QC8_IGEMM_MINMAX_FP32_4X8__NEON_MLAL_LANE, k_lt_8) {
13210     TEST_REQUIRES_ARM_NEON;
13211     for (size_t k = 1; k < 8; k++) {
13212       GemmMicrokernelTester()
13213         .mr(4)
13214         .nr(8)
13215         .kr(1)
13216         .sr(1)
13217         .m(4)
13218         .n(8)
13219         .k(k)
13220         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
13221     }
13222   }
13223 
TEST(QC8_IGEMM_MINMAX_FP32_4X8__NEON_MLAL_LANE,k_lt_8_subtile)13224   TEST(QC8_IGEMM_MINMAX_FP32_4X8__NEON_MLAL_LANE, k_lt_8_subtile) {
13225     TEST_REQUIRES_ARM_NEON;
13226     for (size_t k = 1; k < 8; k++) {
13227       for (uint32_t n = 1; n <= 8; n++) {
13228         for (uint32_t m = 1; m <= 4; m++) {
13229           GemmMicrokernelTester()
13230             .mr(4)
13231             .nr(8)
13232             .kr(1)
13233             .sr(1)
13234             .m(m)
13235             .n(n)
13236             .k(k)
13237             .iterations(1)
13238             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
13239         }
13240       }
13241     }
13242   }
13243 
TEST(QC8_IGEMM_MINMAX_FP32_4X8__NEON_MLAL_LANE,k_gt_8)13244   TEST(QC8_IGEMM_MINMAX_FP32_4X8__NEON_MLAL_LANE, k_gt_8) {
13245     TEST_REQUIRES_ARM_NEON;
13246     for (size_t k = 9; k < 16; k++) {
13247       GemmMicrokernelTester()
13248         .mr(4)
13249         .nr(8)
13250         .kr(1)
13251         .sr(1)
13252         .m(4)
13253         .n(8)
13254         .k(k)
13255         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
13256     }
13257   }
13258 
TEST(QC8_IGEMM_MINMAX_FP32_4X8__NEON_MLAL_LANE,k_gt_8_subtile)13259   TEST(QC8_IGEMM_MINMAX_FP32_4X8__NEON_MLAL_LANE, k_gt_8_subtile) {
13260     TEST_REQUIRES_ARM_NEON;
13261     for (size_t k = 9; k < 16; k++) {
13262       for (uint32_t n = 1; n <= 8; n++) {
13263         for (uint32_t m = 1; m <= 4; m++) {
13264           GemmMicrokernelTester()
13265             .mr(4)
13266             .nr(8)
13267             .kr(1)
13268             .sr(1)
13269             .m(m)
13270             .n(n)
13271             .k(k)
13272             .iterations(1)
13273             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
13274         }
13275       }
13276     }
13277   }
13278 
TEST(QC8_IGEMM_MINMAX_FP32_4X8__NEON_MLAL_LANE,k_div_8)13279   TEST(QC8_IGEMM_MINMAX_FP32_4X8__NEON_MLAL_LANE, k_div_8) {
13280     TEST_REQUIRES_ARM_NEON;
13281     for (size_t k = 16; k <= 80; k += 8) {
13282       GemmMicrokernelTester()
13283         .mr(4)
13284         .nr(8)
13285         .kr(1)
13286         .sr(1)
13287         .m(4)
13288         .n(8)
13289         .k(k)
13290         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
13291     }
13292   }
13293 
TEST(QC8_IGEMM_MINMAX_FP32_4X8__NEON_MLAL_LANE,k_div_8_subtile)13294   TEST(QC8_IGEMM_MINMAX_FP32_4X8__NEON_MLAL_LANE, k_div_8_subtile) {
13295     TEST_REQUIRES_ARM_NEON;
13296     for (size_t k = 16; k <= 80; k += 8) {
13297       for (uint32_t n = 1; n <= 8; n++) {
13298         for (uint32_t m = 1; m <= 4; m++) {
13299           GemmMicrokernelTester()
13300             .mr(4)
13301             .nr(8)
13302             .kr(1)
13303             .sr(1)
13304             .m(m)
13305             .n(n)
13306             .k(k)
13307             .iterations(1)
13308             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
13309         }
13310       }
13311     }
13312   }
13313 
TEST(QC8_IGEMM_MINMAX_FP32_4X8__NEON_MLAL_LANE,n_gt_8)13314   TEST(QC8_IGEMM_MINMAX_FP32_4X8__NEON_MLAL_LANE, n_gt_8) {
13315     TEST_REQUIRES_ARM_NEON;
13316     for (uint32_t n = 9; n < 16; n++) {
13317       for (size_t k = 1; k <= 40; k += 9) {
13318         GemmMicrokernelTester()
13319           .mr(4)
13320           .nr(8)
13321           .kr(1)
13322           .sr(1)
13323           .m(4)
13324           .n(n)
13325           .k(k)
13326           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
13327       }
13328     }
13329   }
13330 
TEST(QC8_IGEMM_MINMAX_FP32_4X8__NEON_MLAL_LANE,n_gt_8_strided_cn)13331   TEST(QC8_IGEMM_MINMAX_FP32_4X8__NEON_MLAL_LANE, n_gt_8_strided_cn) {
13332     TEST_REQUIRES_ARM_NEON;
13333     for (uint32_t n = 9; n < 16; n++) {
13334       for (size_t k = 1; k <= 40; k += 9) {
13335         GemmMicrokernelTester()
13336           .mr(4)
13337           .nr(8)
13338           .kr(1)
13339           .sr(1)
13340           .m(4)
13341           .n(n)
13342           .k(k)
13343           .cn_stride(11)
13344           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
13345       }
13346     }
13347   }
13348 
TEST(QC8_IGEMM_MINMAX_FP32_4X8__NEON_MLAL_LANE,n_gt_8_subtile)13349   TEST(QC8_IGEMM_MINMAX_FP32_4X8__NEON_MLAL_LANE, n_gt_8_subtile) {
13350     TEST_REQUIRES_ARM_NEON;
13351     for (uint32_t n = 9; n < 16; n++) {
13352       for (size_t k = 1; k <= 40; k += 9) {
13353         for (uint32_t m = 1; m <= 4; m++) {
13354           GemmMicrokernelTester()
13355             .mr(4)
13356             .nr(8)
13357             .kr(1)
13358             .sr(1)
13359             .m(m)
13360             .n(n)
13361             .k(k)
13362             .iterations(1)
13363             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
13364         }
13365       }
13366     }
13367   }
13368 
TEST(QC8_IGEMM_MINMAX_FP32_4X8__NEON_MLAL_LANE,n_div_8)13369   TEST(QC8_IGEMM_MINMAX_FP32_4X8__NEON_MLAL_LANE, n_div_8) {
13370     TEST_REQUIRES_ARM_NEON;
13371     for (uint32_t n = 16; n <= 24; n += 8) {
13372       for (size_t k = 1; k <= 40; k += 9) {
13373         GemmMicrokernelTester()
13374           .mr(4)
13375           .nr(8)
13376           .kr(1)
13377           .sr(1)
13378           .m(4)
13379           .n(n)
13380           .k(k)
13381           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
13382       }
13383     }
13384   }
13385 
TEST(QC8_IGEMM_MINMAX_FP32_4X8__NEON_MLAL_LANE,n_div_8_strided_cn)13386   TEST(QC8_IGEMM_MINMAX_FP32_4X8__NEON_MLAL_LANE, n_div_8_strided_cn) {
13387     TEST_REQUIRES_ARM_NEON;
13388     for (uint32_t n = 16; n <= 24; n += 8) {
13389       for (size_t k = 1; k <= 40; k += 9) {
13390         GemmMicrokernelTester()
13391           .mr(4)
13392           .nr(8)
13393           .kr(1)
13394           .sr(1)
13395           .m(4)
13396           .n(n)
13397           .k(k)
13398           .cn_stride(11)
13399           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
13400       }
13401     }
13402   }
13403 
TEST(QC8_IGEMM_MINMAX_FP32_4X8__NEON_MLAL_LANE,n_div_8_subtile)13404   TEST(QC8_IGEMM_MINMAX_FP32_4X8__NEON_MLAL_LANE, n_div_8_subtile) {
13405     TEST_REQUIRES_ARM_NEON;
13406     for (uint32_t n = 16; n <= 24; n += 8) {
13407       for (size_t k = 1; k <= 40; k += 9) {
13408         for (uint32_t m = 1; m <= 4; m++) {
13409           GemmMicrokernelTester()
13410             .mr(4)
13411             .nr(8)
13412             .kr(1)
13413             .sr(1)
13414             .m(m)
13415             .n(n)
13416             .k(k)
13417             .iterations(1)
13418             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
13419         }
13420       }
13421     }
13422   }
13423 
TEST(QC8_IGEMM_MINMAX_FP32_4X8__NEON_MLAL_LANE,small_kernel)13424   TEST(QC8_IGEMM_MINMAX_FP32_4X8__NEON_MLAL_LANE, small_kernel) {
13425     TEST_REQUIRES_ARM_NEON;
13426     for (size_t k = 1; k <= 40; k += 9) {
13427       GemmMicrokernelTester()
13428         .mr(4)
13429         .nr(8)
13430         .kr(1)
13431         .sr(1)
13432         .m(4)
13433         .n(8)
13434         .k(k)
13435         .ks(3)
13436         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
13437     }
13438   }
13439 
TEST(QC8_IGEMM_MINMAX_FP32_4X8__NEON_MLAL_LANE,small_kernel_subtile)13440   TEST(QC8_IGEMM_MINMAX_FP32_4X8__NEON_MLAL_LANE, small_kernel_subtile) {
13441     TEST_REQUIRES_ARM_NEON;
13442     for (size_t k = 1; k <= 40; k += 9) {
13443       for (uint32_t n = 1; n <= 8; n++) {
13444         for (uint32_t m = 1; m <= 4; m++) {
13445           GemmMicrokernelTester()
13446             .mr(4)
13447             .nr(8)
13448             .kr(1)
13449             .sr(1)
13450             .m(m)
13451             .n(n)
13452             .k(k)
13453             .ks(3)
13454             .iterations(1)
13455             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
13456         }
13457       }
13458     }
13459   }
13460 
TEST(QC8_IGEMM_MINMAX_FP32_4X8__NEON_MLAL_LANE,n_gt_8_small_kernel)13461   TEST(QC8_IGEMM_MINMAX_FP32_4X8__NEON_MLAL_LANE, n_gt_8_small_kernel) {
13462     TEST_REQUIRES_ARM_NEON;
13463     for (uint32_t n = 9; n < 16; n++) {
13464       for (size_t k = 1; k <= 40; k += 9) {
13465         GemmMicrokernelTester()
13466           .mr(4)
13467           .nr(8)
13468           .kr(1)
13469           .sr(1)
13470           .m(4)
13471           .n(n)
13472           .k(k)
13473           .ks(3)
13474           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
13475       }
13476     }
13477   }
13478 
TEST(QC8_IGEMM_MINMAX_FP32_4X8__NEON_MLAL_LANE,n_div_8_small_kernel)13479   TEST(QC8_IGEMM_MINMAX_FP32_4X8__NEON_MLAL_LANE, n_div_8_small_kernel) {
13480     TEST_REQUIRES_ARM_NEON;
13481     for (uint32_t n = 16; n <= 24; n += 8) {
13482       for (size_t k = 1; k <= 40; k += 9) {
13483         GemmMicrokernelTester()
13484           .mr(4)
13485           .nr(8)
13486           .kr(1)
13487           .sr(1)
13488           .m(4)
13489           .n(n)
13490           .k(k)
13491           .ks(3)
13492           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
13493       }
13494     }
13495   }
13496 
TEST(QC8_IGEMM_MINMAX_FP32_4X8__NEON_MLAL_LANE,strided_cm_subtile)13497   TEST(QC8_IGEMM_MINMAX_FP32_4X8__NEON_MLAL_LANE, strided_cm_subtile) {
13498     TEST_REQUIRES_ARM_NEON;
13499     for (size_t k = 1; k <= 40; k += 9) {
13500       for (uint32_t n = 1; n <= 8; n++) {
13501         for (uint32_t m = 1; m <= 4; m++) {
13502           GemmMicrokernelTester()
13503             .mr(4)
13504             .nr(8)
13505             .kr(1)
13506             .sr(1)
13507             .m(m)
13508             .n(n)
13509             .k(k)
13510             .cm_stride(11)
13511             .iterations(1)
13512             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
13513         }
13514       }
13515     }
13516   }
13517 
TEST(QC8_IGEMM_MINMAX_FP32_4X8__NEON_MLAL_LANE,a_offset)13518   TEST(QC8_IGEMM_MINMAX_FP32_4X8__NEON_MLAL_LANE, a_offset) {
13519     TEST_REQUIRES_ARM_NEON;
13520     for (size_t k = 1; k <= 40; k += 9) {
13521       GemmMicrokernelTester()
13522         .mr(4)
13523         .nr(8)
13524         .kr(1)
13525         .sr(1)
13526         .m(4)
13527         .n(8)
13528         .k(k)
13529         .ks(3)
13530         .a_offset(163)
13531         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
13532     }
13533   }
13534 
TEST(QC8_IGEMM_MINMAX_FP32_4X8__NEON_MLAL_LANE,zero)13535   TEST(QC8_IGEMM_MINMAX_FP32_4X8__NEON_MLAL_LANE, zero) {
13536     TEST_REQUIRES_ARM_NEON;
13537     for (size_t k = 1; k <= 40; k += 9) {
13538       for (uint32_t mz = 0; mz < 4; mz++) {
13539         GemmMicrokernelTester()
13540           .mr(4)
13541           .nr(8)
13542           .kr(1)
13543           .sr(1)
13544           .m(4)
13545           .n(8)
13546           .k(k)
13547           .ks(3)
13548           .a_offset(163)
13549           .zero_index(mz)
13550           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
13551       }
13552     }
13553   }
13554 
TEST(QC8_IGEMM_MINMAX_FP32_4X8__NEON_MLAL_LANE,qmin)13555   TEST(QC8_IGEMM_MINMAX_FP32_4X8__NEON_MLAL_LANE, qmin) {
13556     TEST_REQUIRES_ARM_NEON;
13557     GemmMicrokernelTester()
13558       .mr(4)
13559       .nr(8)
13560       .kr(1)
13561       .sr(1)
13562       .m(4)
13563       .n(8)
13564       .k(8)
13565       .qmin(128)
13566       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
13567   }
13568 
TEST(QC8_IGEMM_MINMAX_FP32_4X8__NEON_MLAL_LANE,qmax)13569   TEST(QC8_IGEMM_MINMAX_FP32_4X8__NEON_MLAL_LANE, qmax) {
13570     TEST_REQUIRES_ARM_NEON;
13571     GemmMicrokernelTester()
13572       .mr(4)
13573       .nr(8)
13574       .kr(1)
13575       .sr(1)
13576       .m(4)
13577       .n(8)
13578       .k(8)
13579       .qmax(128)
13580       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
13581   }
13582 
TEST(QC8_IGEMM_MINMAX_FP32_4X8__NEON_MLAL_LANE,strided_cm)13583   TEST(QC8_IGEMM_MINMAX_FP32_4X8__NEON_MLAL_LANE, strided_cm) {
13584     TEST_REQUIRES_ARM_NEON;
13585     GemmMicrokernelTester()
13586       .mr(4)
13587       .nr(8)
13588       .kr(1)
13589       .sr(1)
13590       .m(4)
13591       .n(8)
13592       .k(8)
13593       .cm_stride(11)
13594       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
13595   }
13596 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
13597 
13598 
13599 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QC8_IGEMM_MINMAX_FP32_4X8__NEONV8_MLAL_LANE,k_eq_8)13600   TEST(QC8_IGEMM_MINMAX_FP32_4X8__NEONV8_MLAL_LANE, k_eq_8) {
13601     TEST_REQUIRES_ARM_NEON_V8;
13602     GemmMicrokernelTester()
13603       .mr(4)
13604       .nr(8)
13605       .kr(1)
13606       .sr(1)
13607       .m(4)
13608       .n(8)
13609       .k(8)
13610       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
13611   }
13612 
TEST(QC8_IGEMM_MINMAX_FP32_4X8__NEONV8_MLAL_LANE,strided_cn)13613   TEST(QC8_IGEMM_MINMAX_FP32_4X8__NEONV8_MLAL_LANE, strided_cn) {
13614     TEST_REQUIRES_ARM_NEON_V8;
13615     GemmMicrokernelTester()
13616       .mr(4)
13617       .nr(8)
13618       .kr(1)
13619       .sr(1)
13620       .m(4)
13621       .n(8)
13622       .k(8)
13623       .cn_stride(11)
13624       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
13625   }
13626 
TEST(QC8_IGEMM_MINMAX_FP32_4X8__NEONV8_MLAL_LANE,k_eq_8_subtile)13627   TEST(QC8_IGEMM_MINMAX_FP32_4X8__NEONV8_MLAL_LANE, k_eq_8_subtile) {
13628     TEST_REQUIRES_ARM_NEON_V8;
13629     for (uint32_t n = 1; n <= 8; n++) {
13630       for (uint32_t m = 1; m <= 4; m++) {
13631         GemmMicrokernelTester()
13632           .mr(4)
13633           .nr(8)
13634           .kr(1)
13635           .sr(1)
13636           .m(m)
13637           .n(n)
13638           .k(8)
13639           .iterations(1)
13640           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
13641       }
13642     }
13643   }
13644 
TEST(QC8_IGEMM_MINMAX_FP32_4X8__NEONV8_MLAL_LANE,k_eq_8_subtile_m)13645   TEST(QC8_IGEMM_MINMAX_FP32_4X8__NEONV8_MLAL_LANE, k_eq_8_subtile_m) {
13646     TEST_REQUIRES_ARM_NEON_V8;
13647     for (uint32_t m = 1; m <= 4; m++) {
13648       GemmMicrokernelTester()
13649         .mr(4)
13650         .nr(8)
13651         .kr(1)
13652         .sr(1)
13653         .m(m)
13654         .n(8)
13655         .k(8)
13656         .iterations(1)
13657         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
13658     }
13659   }
13660 
TEST(QC8_IGEMM_MINMAX_FP32_4X8__NEONV8_MLAL_LANE,k_eq_8_subtile_n)13661   TEST(QC8_IGEMM_MINMAX_FP32_4X8__NEONV8_MLAL_LANE, k_eq_8_subtile_n) {
13662     TEST_REQUIRES_ARM_NEON_V8;
13663     for (uint32_t n = 1; n <= 8; n++) {
13664       GemmMicrokernelTester()
13665         .mr(4)
13666         .nr(8)
13667         .kr(1)
13668         .sr(1)
13669         .m(4)
13670         .n(n)
13671         .k(8)
13672         .iterations(1)
13673         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
13674     }
13675   }
13676 
TEST(QC8_IGEMM_MINMAX_FP32_4X8__NEONV8_MLAL_LANE,k_lt_8)13677   TEST(QC8_IGEMM_MINMAX_FP32_4X8__NEONV8_MLAL_LANE, k_lt_8) {
13678     TEST_REQUIRES_ARM_NEON_V8;
13679     for (size_t k = 1; k < 8; k++) {
13680       GemmMicrokernelTester()
13681         .mr(4)
13682         .nr(8)
13683         .kr(1)
13684         .sr(1)
13685         .m(4)
13686         .n(8)
13687         .k(k)
13688         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
13689     }
13690   }
13691 
TEST(QC8_IGEMM_MINMAX_FP32_4X8__NEONV8_MLAL_LANE,k_lt_8_subtile)13692   TEST(QC8_IGEMM_MINMAX_FP32_4X8__NEONV8_MLAL_LANE, k_lt_8_subtile) {
13693     TEST_REQUIRES_ARM_NEON_V8;
13694     for (size_t k = 1; k < 8; k++) {
13695       for (uint32_t n = 1; n <= 8; n++) {
13696         for (uint32_t m = 1; m <= 4; m++) {
13697           GemmMicrokernelTester()
13698             .mr(4)
13699             .nr(8)
13700             .kr(1)
13701             .sr(1)
13702             .m(m)
13703             .n(n)
13704             .k(k)
13705             .iterations(1)
13706             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
13707         }
13708       }
13709     }
13710   }
13711 
TEST(QC8_IGEMM_MINMAX_FP32_4X8__NEONV8_MLAL_LANE,k_gt_8)13712   TEST(QC8_IGEMM_MINMAX_FP32_4X8__NEONV8_MLAL_LANE, k_gt_8) {
13713     TEST_REQUIRES_ARM_NEON_V8;
13714     for (size_t k = 9; k < 16; k++) {
13715       GemmMicrokernelTester()
13716         .mr(4)
13717         .nr(8)
13718         .kr(1)
13719         .sr(1)
13720         .m(4)
13721         .n(8)
13722         .k(k)
13723         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
13724     }
13725   }
13726 
TEST(QC8_IGEMM_MINMAX_FP32_4X8__NEONV8_MLAL_LANE,k_gt_8_subtile)13727   TEST(QC8_IGEMM_MINMAX_FP32_4X8__NEONV8_MLAL_LANE, k_gt_8_subtile) {
13728     TEST_REQUIRES_ARM_NEON_V8;
13729     for (size_t k = 9; k < 16; k++) {
13730       for (uint32_t n = 1; n <= 8; n++) {
13731         for (uint32_t m = 1; m <= 4; m++) {
13732           GemmMicrokernelTester()
13733             .mr(4)
13734             .nr(8)
13735             .kr(1)
13736             .sr(1)
13737             .m(m)
13738             .n(n)
13739             .k(k)
13740             .iterations(1)
13741             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
13742         }
13743       }
13744     }
13745   }
13746 
TEST(QC8_IGEMM_MINMAX_FP32_4X8__NEONV8_MLAL_LANE,k_div_8)13747   TEST(QC8_IGEMM_MINMAX_FP32_4X8__NEONV8_MLAL_LANE, k_div_8) {
13748     TEST_REQUIRES_ARM_NEON_V8;
13749     for (size_t k = 16; k <= 80; k += 8) {
13750       GemmMicrokernelTester()
13751         .mr(4)
13752         .nr(8)
13753         .kr(1)
13754         .sr(1)
13755         .m(4)
13756         .n(8)
13757         .k(k)
13758         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
13759     }
13760   }
13761 
TEST(QC8_IGEMM_MINMAX_FP32_4X8__NEONV8_MLAL_LANE,k_div_8_subtile)13762   TEST(QC8_IGEMM_MINMAX_FP32_4X8__NEONV8_MLAL_LANE, k_div_8_subtile) {
13763     TEST_REQUIRES_ARM_NEON_V8;
13764     for (size_t k = 16; k <= 80; k += 8) {
13765       for (uint32_t n = 1; n <= 8; n++) {
13766         for (uint32_t m = 1; m <= 4; m++) {
13767           GemmMicrokernelTester()
13768             .mr(4)
13769             .nr(8)
13770             .kr(1)
13771             .sr(1)
13772             .m(m)
13773             .n(n)
13774             .k(k)
13775             .iterations(1)
13776             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
13777         }
13778       }
13779     }
13780   }
13781 
TEST(QC8_IGEMM_MINMAX_FP32_4X8__NEONV8_MLAL_LANE,n_gt_8)13782   TEST(QC8_IGEMM_MINMAX_FP32_4X8__NEONV8_MLAL_LANE, n_gt_8) {
13783     TEST_REQUIRES_ARM_NEON_V8;
13784     for (uint32_t n = 9; n < 16; n++) {
13785       for (size_t k = 1; k <= 40; k += 9) {
13786         GemmMicrokernelTester()
13787           .mr(4)
13788           .nr(8)
13789           .kr(1)
13790           .sr(1)
13791           .m(4)
13792           .n(n)
13793           .k(k)
13794           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
13795       }
13796     }
13797   }
13798 
TEST(QC8_IGEMM_MINMAX_FP32_4X8__NEONV8_MLAL_LANE,n_gt_8_strided_cn)13799   TEST(QC8_IGEMM_MINMAX_FP32_4X8__NEONV8_MLAL_LANE, n_gt_8_strided_cn) {
13800     TEST_REQUIRES_ARM_NEON_V8;
13801     for (uint32_t n = 9; n < 16; n++) {
13802       for (size_t k = 1; k <= 40; k += 9) {
13803         GemmMicrokernelTester()
13804           .mr(4)
13805           .nr(8)
13806           .kr(1)
13807           .sr(1)
13808           .m(4)
13809           .n(n)
13810           .k(k)
13811           .cn_stride(11)
13812           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
13813       }
13814     }
13815   }
13816 
TEST(QC8_IGEMM_MINMAX_FP32_4X8__NEONV8_MLAL_LANE,n_gt_8_subtile)13817   TEST(QC8_IGEMM_MINMAX_FP32_4X8__NEONV8_MLAL_LANE, n_gt_8_subtile) {
13818     TEST_REQUIRES_ARM_NEON_V8;
13819     for (uint32_t n = 9; n < 16; n++) {
13820       for (size_t k = 1; k <= 40; k += 9) {
13821         for (uint32_t m = 1; m <= 4; m++) {
13822           GemmMicrokernelTester()
13823             .mr(4)
13824             .nr(8)
13825             .kr(1)
13826             .sr(1)
13827             .m(m)
13828             .n(n)
13829             .k(k)
13830             .iterations(1)
13831             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
13832         }
13833       }
13834     }
13835   }
13836 
TEST(QC8_IGEMM_MINMAX_FP32_4X8__NEONV8_MLAL_LANE,n_div_8)13837   TEST(QC8_IGEMM_MINMAX_FP32_4X8__NEONV8_MLAL_LANE, n_div_8) {
13838     TEST_REQUIRES_ARM_NEON_V8;
13839     for (uint32_t n = 16; n <= 24; n += 8) {
13840       for (size_t k = 1; k <= 40; k += 9) {
13841         GemmMicrokernelTester()
13842           .mr(4)
13843           .nr(8)
13844           .kr(1)
13845           .sr(1)
13846           .m(4)
13847           .n(n)
13848           .k(k)
13849           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
13850       }
13851     }
13852   }
13853 
TEST(QC8_IGEMM_MINMAX_FP32_4X8__NEONV8_MLAL_LANE,n_div_8_strided_cn)13854   TEST(QC8_IGEMM_MINMAX_FP32_4X8__NEONV8_MLAL_LANE, n_div_8_strided_cn) {
13855     TEST_REQUIRES_ARM_NEON_V8;
13856     for (uint32_t n = 16; n <= 24; n += 8) {
13857       for (size_t k = 1; k <= 40; k += 9) {
13858         GemmMicrokernelTester()
13859           .mr(4)
13860           .nr(8)
13861           .kr(1)
13862           .sr(1)
13863           .m(4)
13864           .n(n)
13865           .k(k)
13866           .cn_stride(11)
13867           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
13868       }
13869     }
13870   }
13871 
TEST(QC8_IGEMM_MINMAX_FP32_4X8__NEONV8_MLAL_LANE,n_div_8_subtile)13872   TEST(QC8_IGEMM_MINMAX_FP32_4X8__NEONV8_MLAL_LANE, n_div_8_subtile) {
13873     TEST_REQUIRES_ARM_NEON_V8;
13874     for (uint32_t n = 16; n <= 24; n += 8) {
13875       for (size_t k = 1; k <= 40; k += 9) {
13876         for (uint32_t m = 1; m <= 4; m++) {
13877           GemmMicrokernelTester()
13878             .mr(4)
13879             .nr(8)
13880             .kr(1)
13881             .sr(1)
13882             .m(m)
13883             .n(n)
13884             .k(k)
13885             .iterations(1)
13886             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
13887         }
13888       }
13889     }
13890   }
13891 
TEST(QC8_IGEMM_MINMAX_FP32_4X8__NEONV8_MLAL_LANE,small_kernel)13892   TEST(QC8_IGEMM_MINMAX_FP32_4X8__NEONV8_MLAL_LANE, small_kernel) {
13893     TEST_REQUIRES_ARM_NEON_V8;
13894     for (size_t k = 1; k <= 40; k += 9) {
13895       GemmMicrokernelTester()
13896         .mr(4)
13897         .nr(8)
13898         .kr(1)
13899         .sr(1)
13900         .m(4)
13901         .n(8)
13902         .k(k)
13903         .ks(3)
13904         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
13905     }
13906   }
13907 
TEST(QC8_IGEMM_MINMAX_FP32_4X8__NEONV8_MLAL_LANE,small_kernel_subtile)13908   TEST(QC8_IGEMM_MINMAX_FP32_4X8__NEONV8_MLAL_LANE, small_kernel_subtile) {
13909     TEST_REQUIRES_ARM_NEON_V8;
13910     for (size_t k = 1; k <= 40; k += 9) {
13911       for (uint32_t n = 1; n <= 8; n++) {
13912         for (uint32_t m = 1; m <= 4; m++) {
13913           GemmMicrokernelTester()
13914             .mr(4)
13915             .nr(8)
13916             .kr(1)
13917             .sr(1)
13918             .m(m)
13919             .n(n)
13920             .k(k)
13921             .ks(3)
13922             .iterations(1)
13923             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
13924         }
13925       }
13926     }
13927   }
13928 
TEST(QC8_IGEMM_MINMAX_FP32_4X8__NEONV8_MLAL_LANE,n_gt_8_small_kernel)13929   TEST(QC8_IGEMM_MINMAX_FP32_4X8__NEONV8_MLAL_LANE, n_gt_8_small_kernel) {
13930     TEST_REQUIRES_ARM_NEON_V8;
13931     for (uint32_t n = 9; n < 16; n++) {
13932       for (size_t k = 1; k <= 40; k += 9) {
13933         GemmMicrokernelTester()
13934           .mr(4)
13935           .nr(8)
13936           .kr(1)
13937           .sr(1)
13938           .m(4)
13939           .n(n)
13940           .k(k)
13941           .ks(3)
13942           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
13943       }
13944     }
13945   }
13946 
TEST(QC8_IGEMM_MINMAX_FP32_4X8__NEONV8_MLAL_LANE,n_div_8_small_kernel)13947   TEST(QC8_IGEMM_MINMAX_FP32_4X8__NEONV8_MLAL_LANE, n_div_8_small_kernel) {
13948     TEST_REQUIRES_ARM_NEON_V8;
13949     for (uint32_t n = 16; n <= 24; n += 8) {
13950       for (size_t k = 1; k <= 40; k += 9) {
13951         GemmMicrokernelTester()
13952           .mr(4)
13953           .nr(8)
13954           .kr(1)
13955           .sr(1)
13956           .m(4)
13957           .n(n)
13958           .k(k)
13959           .ks(3)
13960           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
13961       }
13962     }
13963   }
13964 
TEST(QC8_IGEMM_MINMAX_FP32_4X8__NEONV8_MLAL_LANE,strided_cm_subtile)13965   TEST(QC8_IGEMM_MINMAX_FP32_4X8__NEONV8_MLAL_LANE, strided_cm_subtile) {
13966     TEST_REQUIRES_ARM_NEON_V8;
13967     for (size_t k = 1; k <= 40; k += 9) {
13968       for (uint32_t n = 1; n <= 8; n++) {
13969         for (uint32_t m = 1; m <= 4; m++) {
13970           GemmMicrokernelTester()
13971             .mr(4)
13972             .nr(8)
13973             .kr(1)
13974             .sr(1)
13975             .m(m)
13976             .n(n)
13977             .k(k)
13978             .cm_stride(11)
13979             .iterations(1)
13980             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
13981         }
13982       }
13983     }
13984   }
13985 
TEST(QC8_IGEMM_MINMAX_FP32_4X8__NEONV8_MLAL_LANE,a_offset)13986   TEST(QC8_IGEMM_MINMAX_FP32_4X8__NEONV8_MLAL_LANE, a_offset) {
13987     TEST_REQUIRES_ARM_NEON_V8;
13988     for (size_t k = 1; k <= 40; k += 9) {
13989       GemmMicrokernelTester()
13990         .mr(4)
13991         .nr(8)
13992         .kr(1)
13993         .sr(1)
13994         .m(4)
13995         .n(8)
13996         .k(k)
13997         .ks(3)
13998         .a_offset(163)
13999         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
14000     }
14001   }
14002 
TEST(QC8_IGEMM_MINMAX_FP32_4X8__NEONV8_MLAL_LANE,zero)14003   TEST(QC8_IGEMM_MINMAX_FP32_4X8__NEONV8_MLAL_LANE, zero) {
14004     TEST_REQUIRES_ARM_NEON_V8;
14005     for (size_t k = 1; k <= 40; k += 9) {
14006       for (uint32_t mz = 0; mz < 4; mz++) {
14007         GemmMicrokernelTester()
14008           .mr(4)
14009           .nr(8)
14010           .kr(1)
14011           .sr(1)
14012           .m(4)
14013           .n(8)
14014           .k(k)
14015           .ks(3)
14016           .a_offset(163)
14017           .zero_index(mz)
14018           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
14019       }
14020     }
14021   }
14022 
TEST(QC8_IGEMM_MINMAX_FP32_4X8__NEONV8_MLAL_LANE,qmin)14023   TEST(QC8_IGEMM_MINMAX_FP32_4X8__NEONV8_MLAL_LANE, qmin) {
14024     TEST_REQUIRES_ARM_NEON_V8;
14025     GemmMicrokernelTester()
14026       .mr(4)
14027       .nr(8)
14028       .kr(1)
14029       .sr(1)
14030       .m(4)
14031       .n(8)
14032       .k(8)
14033       .qmin(128)
14034       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
14035   }
14036 
TEST(QC8_IGEMM_MINMAX_FP32_4X8__NEONV8_MLAL_LANE,qmax)14037   TEST(QC8_IGEMM_MINMAX_FP32_4X8__NEONV8_MLAL_LANE, qmax) {
14038     TEST_REQUIRES_ARM_NEON_V8;
14039     GemmMicrokernelTester()
14040       .mr(4)
14041       .nr(8)
14042       .kr(1)
14043       .sr(1)
14044       .m(4)
14045       .n(8)
14046       .k(8)
14047       .qmax(128)
14048       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
14049   }
14050 
TEST(QC8_IGEMM_MINMAX_FP32_4X8__NEONV8_MLAL_LANE,strided_cm)14051   TEST(QC8_IGEMM_MINMAX_FP32_4X8__NEONV8_MLAL_LANE, strided_cm) {
14052     TEST_REQUIRES_ARM_NEON_V8;
14053     GemmMicrokernelTester()
14054       .mr(4)
14055       .nr(8)
14056       .kr(1)
14057       .sr(1)
14058       .m(4)
14059       .n(8)
14060       .k(8)
14061       .cm_stride(11)
14062       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
14063   }
14064 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
14065 
14066 
14067 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QC8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE,k_eq_8)14068   TEST(QC8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE, k_eq_8) {
14069     TEST_REQUIRES_ARM_NEON_V8;
14070     GemmMicrokernelTester()
14071       .mr(4)
14072       .nr(16)
14073       .kr(1)
14074       .sr(1)
14075       .m(4)
14076       .n(16)
14077       .k(8)
14078       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
14079   }
14080 
TEST(QC8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE,strided_cn)14081   TEST(QC8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE, strided_cn) {
14082     TEST_REQUIRES_ARM_NEON_V8;
14083     GemmMicrokernelTester()
14084       .mr(4)
14085       .nr(16)
14086       .kr(1)
14087       .sr(1)
14088       .m(4)
14089       .n(16)
14090       .k(8)
14091       .cn_stride(19)
14092       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
14093   }
14094 
TEST(QC8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE,k_eq_8_subtile)14095   TEST(QC8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE, k_eq_8_subtile) {
14096     TEST_REQUIRES_ARM_NEON_V8;
14097     for (uint32_t n = 1; n <= 16; n++) {
14098       for (uint32_t m = 1; m <= 4; m++) {
14099         GemmMicrokernelTester()
14100           .mr(4)
14101           .nr(16)
14102           .kr(1)
14103           .sr(1)
14104           .m(m)
14105           .n(n)
14106           .k(8)
14107           .iterations(1)
14108           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
14109       }
14110     }
14111   }
14112 
TEST(QC8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE,k_eq_8_subtile_m)14113   TEST(QC8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE, k_eq_8_subtile_m) {
14114     TEST_REQUIRES_ARM_NEON_V8;
14115     for (uint32_t m = 1; m <= 4; m++) {
14116       GemmMicrokernelTester()
14117         .mr(4)
14118         .nr(16)
14119         .kr(1)
14120         .sr(1)
14121         .m(m)
14122         .n(16)
14123         .k(8)
14124         .iterations(1)
14125         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
14126     }
14127   }
14128 
TEST(QC8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE,k_eq_8_subtile_n)14129   TEST(QC8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE, k_eq_8_subtile_n) {
14130     TEST_REQUIRES_ARM_NEON_V8;
14131     for (uint32_t n = 1; n <= 16; n++) {
14132       GemmMicrokernelTester()
14133         .mr(4)
14134         .nr(16)
14135         .kr(1)
14136         .sr(1)
14137         .m(4)
14138         .n(n)
14139         .k(8)
14140         .iterations(1)
14141         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
14142     }
14143   }
14144 
TEST(QC8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE,k_lt_8)14145   TEST(QC8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE, k_lt_8) {
14146     TEST_REQUIRES_ARM_NEON_V8;
14147     for (size_t k = 1; k < 8; k++) {
14148       GemmMicrokernelTester()
14149         .mr(4)
14150         .nr(16)
14151         .kr(1)
14152         .sr(1)
14153         .m(4)
14154         .n(16)
14155         .k(k)
14156         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
14157     }
14158   }
14159 
TEST(QC8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE,k_lt_8_subtile)14160   TEST(QC8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE, k_lt_8_subtile) {
14161     TEST_REQUIRES_ARM_NEON_V8;
14162     for (size_t k = 1; k < 8; k++) {
14163       for (uint32_t n = 1; n <= 16; n++) {
14164         for (uint32_t m = 1; m <= 4; m++) {
14165           GemmMicrokernelTester()
14166             .mr(4)
14167             .nr(16)
14168             .kr(1)
14169             .sr(1)
14170             .m(m)
14171             .n(n)
14172             .k(k)
14173             .iterations(1)
14174             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
14175         }
14176       }
14177     }
14178   }
14179 
TEST(QC8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE,k_gt_8)14180   TEST(QC8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE, k_gt_8) {
14181     TEST_REQUIRES_ARM_NEON_V8;
14182     for (size_t k = 9; k < 16; k++) {
14183       GemmMicrokernelTester()
14184         .mr(4)
14185         .nr(16)
14186         .kr(1)
14187         .sr(1)
14188         .m(4)
14189         .n(16)
14190         .k(k)
14191         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
14192     }
14193   }
14194 
TEST(QC8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE,k_gt_8_subtile)14195   TEST(QC8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE, k_gt_8_subtile) {
14196     TEST_REQUIRES_ARM_NEON_V8;
14197     for (size_t k = 9; k < 16; k++) {
14198       for (uint32_t n = 1; n <= 16; n++) {
14199         for (uint32_t m = 1; m <= 4; m++) {
14200           GemmMicrokernelTester()
14201             .mr(4)
14202             .nr(16)
14203             .kr(1)
14204             .sr(1)
14205             .m(m)
14206             .n(n)
14207             .k(k)
14208             .iterations(1)
14209             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
14210         }
14211       }
14212     }
14213   }
14214 
TEST(QC8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE,k_div_8)14215   TEST(QC8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE, k_div_8) {
14216     TEST_REQUIRES_ARM_NEON_V8;
14217     for (size_t k = 16; k <= 80; k += 8) {
14218       GemmMicrokernelTester()
14219         .mr(4)
14220         .nr(16)
14221         .kr(1)
14222         .sr(1)
14223         .m(4)
14224         .n(16)
14225         .k(k)
14226         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
14227     }
14228   }
14229 
TEST(QC8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE,k_div_8_subtile)14230   TEST(QC8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE, k_div_8_subtile) {
14231     TEST_REQUIRES_ARM_NEON_V8;
14232     for (size_t k = 16; k <= 80; k += 8) {
14233       for (uint32_t n = 1; n <= 16; n++) {
14234         for (uint32_t m = 1; m <= 4; m++) {
14235           GemmMicrokernelTester()
14236             .mr(4)
14237             .nr(16)
14238             .kr(1)
14239             .sr(1)
14240             .m(m)
14241             .n(n)
14242             .k(k)
14243             .iterations(1)
14244             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
14245         }
14246       }
14247     }
14248   }
14249 
TEST(QC8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE,n_gt_16)14250   TEST(QC8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE, n_gt_16) {
14251     TEST_REQUIRES_ARM_NEON_V8;
14252     for (uint32_t n = 17; n < 32; n++) {
14253       for (size_t k = 1; k <= 40; k += 9) {
14254         GemmMicrokernelTester()
14255           .mr(4)
14256           .nr(16)
14257           .kr(1)
14258           .sr(1)
14259           .m(4)
14260           .n(n)
14261           .k(k)
14262           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
14263       }
14264     }
14265   }
14266 
TEST(QC8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE,n_gt_16_strided_cn)14267   TEST(QC8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE, n_gt_16_strided_cn) {
14268     TEST_REQUIRES_ARM_NEON_V8;
14269     for (uint32_t n = 17; n < 32; n++) {
14270       for (size_t k = 1; k <= 40; k += 9) {
14271         GemmMicrokernelTester()
14272           .mr(4)
14273           .nr(16)
14274           .kr(1)
14275           .sr(1)
14276           .m(4)
14277           .n(n)
14278           .k(k)
14279           .cn_stride(19)
14280           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
14281       }
14282     }
14283   }
14284 
TEST(QC8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE,n_gt_16_subtile)14285   TEST(QC8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE, n_gt_16_subtile) {
14286     TEST_REQUIRES_ARM_NEON_V8;
14287     for (uint32_t n = 17; n < 32; n++) {
14288       for (size_t k = 1; k <= 40; k += 9) {
14289         for (uint32_t m = 1; m <= 4; m++) {
14290           GemmMicrokernelTester()
14291             .mr(4)
14292             .nr(16)
14293             .kr(1)
14294             .sr(1)
14295             .m(m)
14296             .n(n)
14297             .k(k)
14298             .iterations(1)
14299             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
14300         }
14301       }
14302     }
14303   }
14304 
TEST(QC8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE,n_div_16)14305   TEST(QC8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE, n_div_16) {
14306     TEST_REQUIRES_ARM_NEON_V8;
14307     for (uint32_t n = 32; n <= 48; n += 16) {
14308       for (size_t k = 1; k <= 40; k += 9) {
14309         GemmMicrokernelTester()
14310           .mr(4)
14311           .nr(16)
14312           .kr(1)
14313           .sr(1)
14314           .m(4)
14315           .n(n)
14316           .k(k)
14317           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
14318       }
14319     }
14320   }
14321 
TEST(QC8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE,n_div_16_strided_cn)14322   TEST(QC8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE, n_div_16_strided_cn) {
14323     TEST_REQUIRES_ARM_NEON_V8;
14324     for (uint32_t n = 32; n <= 48; n += 16) {
14325       for (size_t k = 1; k <= 40; k += 9) {
14326         GemmMicrokernelTester()
14327           .mr(4)
14328           .nr(16)
14329           .kr(1)
14330           .sr(1)
14331           .m(4)
14332           .n(n)
14333           .k(k)
14334           .cn_stride(19)
14335           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
14336       }
14337     }
14338   }
14339 
TEST(QC8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE,n_div_16_subtile)14340   TEST(QC8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE, n_div_16_subtile) {
14341     TEST_REQUIRES_ARM_NEON_V8;
14342     for (uint32_t n = 32; n <= 48; n += 16) {
14343       for (size_t k = 1; k <= 40; k += 9) {
14344         for (uint32_t m = 1; m <= 4; m++) {
14345           GemmMicrokernelTester()
14346             .mr(4)
14347             .nr(16)
14348             .kr(1)
14349             .sr(1)
14350             .m(m)
14351             .n(n)
14352             .k(k)
14353             .iterations(1)
14354             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
14355         }
14356       }
14357     }
14358   }
14359 
TEST(QC8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE,small_kernel)14360   TEST(QC8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE, small_kernel) {
14361     TEST_REQUIRES_ARM_NEON_V8;
14362     for (size_t k = 1; k <= 40; k += 9) {
14363       GemmMicrokernelTester()
14364         .mr(4)
14365         .nr(16)
14366         .kr(1)
14367         .sr(1)
14368         .m(4)
14369         .n(16)
14370         .k(k)
14371         .ks(3)
14372         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
14373     }
14374   }
14375 
TEST(QC8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE,small_kernel_subtile)14376   TEST(QC8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE, small_kernel_subtile) {
14377     TEST_REQUIRES_ARM_NEON_V8;
14378     for (size_t k = 1; k <= 40; k += 9) {
14379       for (uint32_t n = 1; n <= 16; n++) {
14380         for (uint32_t m = 1; m <= 4; m++) {
14381           GemmMicrokernelTester()
14382             .mr(4)
14383             .nr(16)
14384             .kr(1)
14385             .sr(1)
14386             .m(m)
14387             .n(n)
14388             .k(k)
14389             .ks(3)
14390             .iterations(1)
14391             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
14392         }
14393       }
14394     }
14395   }
14396 
TEST(QC8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE,n_gt_16_small_kernel)14397   TEST(QC8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE, n_gt_16_small_kernel) {
14398     TEST_REQUIRES_ARM_NEON_V8;
14399     for (uint32_t n = 17; n < 32; n++) {
14400       for (size_t k = 1; k <= 40; k += 9) {
14401         GemmMicrokernelTester()
14402           .mr(4)
14403           .nr(16)
14404           .kr(1)
14405           .sr(1)
14406           .m(4)
14407           .n(n)
14408           .k(k)
14409           .ks(3)
14410           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
14411       }
14412     }
14413   }
14414 
TEST(QC8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE,n_div_16_small_kernel)14415   TEST(QC8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE, n_div_16_small_kernel) {
14416     TEST_REQUIRES_ARM_NEON_V8;
14417     for (uint32_t n = 32; n <= 48; n += 16) {
14418       for (size_t k = 1; k <= 40; k += 9) {
14419         GemmMicrokernelTester()
14420           .mr(4)
14421           .nr(16)
14422           .kr(1)
14423           .sr(1)
14424           .m(4)
14425           .n(n)
14426           .k(k)
14427           .ks(3)
14428           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
14429       }
14430     }
14431   }
14432 
TEST(QC8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE,strided_cm_subtile)14433   TEST(QC8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE, strided_cm_subtile) {
14434     TEST_REQUIRES_ARM_NEON_V8;
14435     for (size_t k = 1; k <= 40; k += 9) {
14436       for (uint32_t n = 1; n <= 16; n++) {
14437         for (uint32_t m = 1; m <= 4; m++) {
14438           GemmMicrokernelTester()
14439             .mr(4)
14440             .nr(16)
14441             .kr(1)
14442             .sr(1)
14443             .m(m)
14444             .n(n)
14445             .k(k)
14446             .cm_stride(19)
14447             .iterations(1)
14448             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
14449         }
14450       }
14451     }
14452   }
14453 
TEST(QC8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE,a_offset)14454   TEST(QC8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE, a_offset) {
14455     TEST_REQUIRES_ARM_NEON_V8;
14456     for (size_t k = 1; k <= 40; k += 9) {
14457       GemmMicrokernelTester()
14458         .mr(4)
14459         .nr(16)
14460         .kr(1)
14461         .sr(1)
14462         .m(4)
14463         .n(16)
14464         .k(k)
14465         .ks(3)
14466         .a_offset(163)
14467         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
14468     }
14469   }
14470 
TEST(QC8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE,zero)14471   TEST(QC8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE, zero) {
14472     TEST_REQUIRES_ARM_NEON_V8;
14473     for (size_t k = 1; k <= 40; k += 9) {
14474       for (uint32_t mz = 0; mz < 4; mz++) {
14475         GemmMicrokernelTester()
14476           .mr(4)
14477           .nr(16)
14478           .kr(1)
14479           .sr(1)
14480           .m(4)
14481           .n(16)
14482           .k(k)
14483           .ks(3)
14484           .a_offset(163)
14485           .zero_index(mz)
14486           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
14487       }
14488     }
14489   }
14490 
TEST(QC8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE,qmin)14491   TEST(QC8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE, qmin) {
14492     TEST_REQUIRES_ARM_NEON_V8;
14493     GemmMicrokernelTester()
14494       .mr(4)
14495       .nr(16)
14496       .kr(1)
14497       .sr(1)
14498       .m(4)
14499       .n(16)
14500       .k(8)
14501       .qmin(128)
14502       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
14503   }
14504 
TEST(QC8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE,qmax)14505   TEST(QC8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE, qmax) {
14506     TEST_REQUIRES_ARM_NEON_V8;
14507     GemmMicrokernelTester()
14508       .mr(4)
14509       .nr(16)
14510       .kr(1)
14511       .sr(1)
14512       .m(4)
14513       .n(16)
14514       .k(8)
14515       .qmax(128)
14516       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
14517   }
14518 
TEST(QC8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE,strided_cm)14519   TEST(QC8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE, strided_cm) {
14520     TEST_REQUIRES_ARM_NEON_V8;
14521     GemmMicrokernelTester()
14522       .mr(4)
14523       .nr(16)
14524       .kr(1)
14525       .sr(1)
14526       .m(4)
14527       .n(16)
14528       .k(8)
14529       .cm_stride(19)
14530       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
14531   }
14532 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
14533 
14534 
14535 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QC8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE_PRFM,k_eq_8)14536   TEST(QC8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE_PRFM, k_eq_8) {
14537     TEST_REQUIRES_ARM_NEON_V8;
14538     GemmMicrokernelTester()
14539       .mr(4)
14540       .nr(16)
14541       .kr(1)
14542       .sr(1)
14543       .m(4)
14544       .n(16)
14545       .k(8)
14546       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
14547   }
14548 
TEST(QC8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE_PRFM,strided_cn)14549   TEST(QC8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE_PRFM, strided_cn) {
14550     TEST_REQUIRES_ARM_NEON_V8;
14551     GemmMicrokernelTester()
14552       .mr(4)
14553       .nr(16)
14554       .kr(1)
14555       .sr(1)
14556       .m(4)
14557       .n(16)
14558       .k(8)
14559       .cn_stride(19)
14560       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
14561   }
14562 
TEST(QC8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE_PRFM,k_eq_8_subtile)14563   TEST(QC8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE_PRFM, k_eq_8_subtile) {
14564     TEST_REQUIRES_ARM_NEON_V8;
14565     for (uint32_t n = 1; n <= 16; n++) {
14566       for (uint32_t m = 1; m <= 4; m++) {
14567         GemmMicrokernelTester()
14568           .mr(4)
14569           .nr(16)
14570           .kr(1)
14571           .sr(1)
14572           .m(m)
14573           .n(n)
14574           .k(8)
14575           .iterations(1)
14576           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
14577       }
14578     }
14579   }
14580 
TEST(QC8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE_PRFM,k_eq_8_subtile_m)14581   TEST(QC8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE_PRFM, k_eq_8_subtile_m) {
14582     TEST_REQUIRES_ARM_NEON_V8;
14583     for (uint32_t m = 1; m <= 4; m++) {
14584       GemmMicrokernelTester()
14585         .mr(4)
14586         .nr(16)
14587         .kr(1)
14588         .sr(1)
14589         .m(m)
14590         .n(16)
14591         .k(8)
14592         .iterations(1)
14593         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
14594     }
14595   }
14596 
TEST(QC8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE_PRFM,k_eq_8_subtile_n)14597   TEST(QC8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE_PRFM, k_eq_8_subtile_n) {
14598     TEST_REQUIRES_ARM_NEON_V8;
14599     for (uint32_t n = 1; n <= 16; n++) {
14600       GemmMicrokernelTester()
14601         .mr(4)
14602         .nr(16)
14603         .kr(1)
14604         .sr(1)
14605         .m(4)
14606         .n(n)
14607         .k(8)
14608         .iterations(1)
14609         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
14610     }
14611   }
14612 
TEST(QC8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE_PRFM,k_lt_8)14613   TEST(QC8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE_PRFM, k_lt_8) {
14614     TEST_REQUIRES_ARM_NEON_V8;
14615     for (size_t k = 1; k < 8; k++) {
14616       GemmMicrokernelTester()
14617         .mr(4)
14618         .nr(16)
14619         .kr(1)
14620         .sr(1)
14621         .m(4)
14622         .n(16)
14623         .k(k)
14624         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
14625     }
14626   }
14627 
TEST(QC8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE_PRFM,k_lt_8_subtile)14628   TEST(QC8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE_PRFM, k_lt_8_subtile) {
14629     TEST_REQUIRES_ARM_NEON_V8;
14630     for (size_t k = 1; k < 8; k++) {
14631       for (uint32_t n = 1; n <= 16; n++) {
14632         for (uint32_t m = 1; m <= 4; m++) {
14633           GemmMicrokernelTester()
14634             .mr(4)
14635             .nr(16)
14636             .kr(1)
14637             .sr(1)
14638             .m(m)
14639             .n(n)
14640             .k(k)
14641             .iterations(1)
14642             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
14643         }
14644       }
14645     }
14646   }
14647 
TEST(QC8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE_PRFM,k_gt_8)14648   TEST(QC8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE_PRFM, k_gt_8) {
14649     TEST_REQUIRES_ARM_NEON_V8;
14650     for (size_t k = 9; k < 16; k++) {
14651       GemmMicrokernelTester()
14652         .mr(4)
14653         .nr(16)
14654         .kr(1)
14655         .sr(1)
14656         .m(4)
14657         .n(16)
14658         .k(k)
14659         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
14660     }
14661   }
14662 
TEST(QC8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE_PRFM,k_gt_8_subtile)14663   TEST(QC8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE_PRFM, k_gt_8_subtile) {
14664     TEST_REQUIRES_ARM_NEON_V8;
14665     for (size_t k = 9; k < 16; k++) {
14666       for (uint32_t n = 1; n <= 16; n++) {
14667         for (uint32_t m = 1; m <= 4; m++) {
14668           GemmMicrokernelTester()
14669             .mr(4)
14670             .nr(16)
14671             .kr(1)
14672             .sr(1)
14673             .m(m)
14674             .n(n)
14675             .k(k)
14676             .iterations(1)
14677             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
14678         }
14679       }
14680     }
14681   }
14682 
TEST(QC8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE_PRFM,k_div_8)14683   TEST(QC8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE_PRFM, k_div_8) {
14684     TEST_REQUIRES_ARM_NEON_V8;
14685     for (size_t k = 16; k <= 80; k += 8) {
14686       GemmMicrokernelTester()
14687         .mr(4)
14688         .nr(16)
14689         .kr(1)
14690         .sr(1)
14691         .m(4)
14692         .n(16)
14693         .k(k)
14694         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
14695     }
14696   }
14697 
TEST(QC8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE_PRFM,k_div_8_subtile)14698   TEST(QC8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE_PRFM, k_div_8_subtile) {
14699     TEST_REQUIRES_ARM_NEON_V8;
14700     for (size_t k = 16; k <= 80; k += 8) {
14701       for (uint32_t n = 1; n <= 16; n++) {
14702         for (uint32_t m = 1; m <= 4; m++) {
14703           GemmMicrokernelTester()
14704             .mr(4)
14705             .nr(16)
14706             .kr(1)
14707             .sr(1)
14708             .m(m)
14709             .n(n)
14710             .k(k)
14711             .iterations(1)
14712             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
14713         }
14714       }
14715     }
14716   }
14717 
TEST(QC8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE_PRFM,n_gt_16)14718   TEST(QC8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE_PRFM, n_gt_16) {
14719     TEST_REQUIRES_ARM_NEON_V8;
14720     for (uint32_t n = 17; n < 32; n++) {
14721       for (size_t k = 1; k <= 40; k += 9) {
14722         GemmMicrokernelTester()
14723           .mr(4)
14724           .nr(16)
14725           .kr(1)
14726           .sr(1)
14727           .m(4)
14728           .n(n)
14729           .k(k)
14730           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
14731       }
14732     }
14733   }
14734 
TEST(QC8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE_PRFM,n_gt_16_strided_cn)14735   TEST(QC8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE_PRFM, n_gt_16_strided_cn) {
14736     TEST_REQUIRES_ARM_NEON_V8;
14737     for (uint32_t n = 17; n < 32; n++) {
14738       for (size_t k = 1; k <= 40; k += 9) {
14739         GemmMicrokernelTester()
14740           .mr(4)
14741           .nr(16)
14742           .kr(1)
14743           .sr(1)
14744           .m(4)
14745           .n(n)
14746           .k(k)
14747           .cn_stride(19)
14748           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
14749       }
14750     }
14751   }
14752 
TEST(QC8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE_PRFM,n_gt_16_subtile)14753   TEST(QC8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE_PRFM, n_gt_16_subtile) {
14754     TEST_REQUIRES_ARM_NEON_V8;
14755     for (uint32_t n = 17; n < 32; n++) {
14756       for (size_t k = 1; k <= 40; k += 9) {
14757         for (uint32_t m = 1; m <= 4; m++) {
14758           GemmMicrokernelTester()
14759             .mr(4)
14760             .nr(16)
14761             .kr(1)
14762             .sr(1)
14763             .m(m)
14764             .n(n)
14765             .k(k)
14766             .iterations(1)
14767             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
14768         }
14769       }
14770     }
14771   }
14772 
TEST(QC8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE_PRFM,n_div_16)14773   TEST(QC8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE_PRFM, n_div_16) {
14774     TEST_REQUIRES_ARM_NEON_V8;
14775     for (uint32_t n = 32; n <= 48; n += 16) {
14776       for (size_t k = 1; k <= 40; k += 9) {
14777         GemmMicrokernelTester()
14778           .mr(4)
14779           .nr(16)
14780           .kr(1)
14781           .sr(1)
14782           .m(4)
14783           .n(n)
14784           .k(k)
14785           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
14786       }
14787     }
14788   }
14789 
TEST(QC8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE_PRFM,n_div_16_strided_cn)14790   TEST(QC8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE_PRFM, n_div_16_strided_cn) {
14791     TEST_REQUIRES_ARM_NEON_V8;
14792     for (uint32_t n = 32; n <= 48; n += 16) {
14793       for (size_t k = 1; k <= 40; k += 9) {
14794         GemmMicrokernelTester()
14795           .mr(4)
14796           .nr(16)
14797           .kr(1)
14798           .sr(1)
14799           .m(4)
14800           .n(n)
14801           .k(k)
14802           .cn_stride(19)
14803           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
14804       }
14805     }
14806   }
14807 
TEST(QC8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE_PRFM,n_div_16_subtile)14808   TEST(QC8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE_PRFM, n_div_16_subtile) {
14809     TEST_REQUIRES_ARM_NEON_V8;
14810     for (uint32_t n = 32; n <= 48; n += 16) {
14811       for (size_t k = 1; k <= 40; k += 9) {
14812         for (uint32_t m = 1; m <= 4; m++) {
14813           GemmMicrokernelTester()
14814             .mr(4)
14815             .nr(16)
14816             .kr(1)
14817             .sr(1)
14818             .m(m)
14819             .n(n)
14820             .k(k)
14821             .iterations(1)
14822             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
14823         }
14824       }
14825     }
14826   }
14827 
TEST(QC8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE_PRFM,small_kernel)14828   TEST(QC8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE_PRFM, small_kernel) {
14829     TEST_REQUIRES_ARM_NEON_V8;
14830     for (size_t k = 1; k <= 40; k += 9) {
14831       GemmMicrokernelTester()
14832         .mr(4)
14833         .nr(16)
14834         .kr(1)
14835         .sr(1)
14836         .m(4)
14837         .n(16)
14838         .k(k)
14839         .ks(3)
14840         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
14841     }
14842   }
14843 
TEST(QC8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE_PRFM,small_kernel_subtile)14844   TEST(QC8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE_PRFM, small_kernel_subtile) {
14845     TEST_REQUIRES_ARM_NEON_V8;
14846     for (size_t k = 1; k <= 40; k += 9) {
14847       for (uint32_t n = 1; n <= 16; n++) {
14848         for (uint32_t m = 1; m <= 4; m++) {
14849           GemmMicrokernelTester()
14850             .mr(4)
14851             .nr(16)
14852             .kr(1)
14853             .sr(1)
14854             .m(m)
14855             .n(n)
14856             .k(k)
14857             .ks(3)
14858             .iterations(1)
14859             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
14860         }
14861       }
14862     }
14863   }
14864 
TEST(QC8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE_PRFM,n_gt_16_small_kernel)14865   TEST(QC8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE_PRFM, n_gt_16_small_kernel) {
14866     TEST_REQUIRES_ARM_NEON_V8;
14867     for (uint32_t n = 17; n < 32; n++) {
14868       for (size_t k = 1; k <= 40; k += 9) {
14869         GemmMicrokernelTester()
14870           .mr(4)
14871           .nr(16)
14872           .kr(1)
14873           .sr(1)
14874           .m(4)
14875           .n(n)
14876           .k(k)
14877           .ks(3)
14878           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
14879       }
14880     }
14881   }
14882 
TEST(QC8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE_PRFM,n_div_16_small_kernel)14883   TEST(QC8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE_PRFM, n_div_16_small_kernel) {
14884     TEST_REQUIRES_ARM_NEON_V8;
14885     for (uint32_t n = 32; n <= 48; n += 16) {
14886       for (size_t k = 1; k <= 40; k += 9) {
14887         GemmMicrokernelTester()
14888           .mr(4)
14889           .nr(16)
14890           .kr(1)
14891           .sr(1)
14892           .m(4)
14893           .n(n)
14894           .k(k)
14895           .ks(3)
14896           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
14897       }
14898     }
14899   }
14900 
TEST(QC8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE_PRFM,strided_cm_subtile)14901   TEST(QC8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE_PRFM, strided_cm_subtile) {
14902     TEST_REQUIRES_ARM_NEON_V8;
14903     for (size_t k = 1; k <= 40; k += 9) {
14904       for (uint32_t n = 1; n <= 16; n++) {
14905         for (uint32_t m = 1; m <= 4; m++) {
14906           GemmMicrokernelTester()
14907             .mr(4)
14908             .nr(16)
14909             .kr(1)
14910             .sr(1)
14911             .m(m)
14912             .n(n)
14913             .k(k)
14914             .cm_stride(19)
14915             .iterations(1)
14916             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
14917         }
14918       }
14919     }
14920   }
14921 
TEST(QC8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE_PRFM,a_offset)14922   TEST(QC8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE_PRFM, a_offset) {
14923     TEST_REQUIRES_ARM_NEON_V8;
14924     for (size_t k = 1; k <= 40; k += 9) {
14925       GemmMicrokernelTester()
14926         .mr(4)
14927         .nr(16)
14928         .kr(1)
14929         .sr(1)
14930         .m(4)
14931         .n(16)
14932         .k(k)
14933         .ks(3)
14934         .a_offset(163)
14935         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
14936     }
14937   }
14938 
TEST(QC8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE_PRFM,zero)14939   TEST(QC8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE_PRFM, zero) {
14940     TEST_REQUIRES_ARM_NEON_V8;
14941     for (size_t k = 1; k <= 40; k += 9) {
14942       for (uint32_t mz = 0; mz < 4; mz++) {
14943         GemmMicrokernelTester()
14944           .mr(4)
14945           .nr(16)
14946           .kr(1)
14947           .sr(1)
14948           .m(4)
14949           .n(16)
14950           .k(k)
14951           .ks(3)
14952           .a_offset(163)
14953           .zero_index(mz)
14954           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
14955       }
14956     }
14957   }
14958 
TEST(QC8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE_PRFM,qmin)14959   TEST(QC8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE_PRFM, qmin) {
14960     TEST_REQUIRES_ARM_NEON_V8;
14961     GemmMicrokernelTester()
14962       .mr(4)
14963       .nr(16)
14964       .kr(1)
14965       .sr(1)
14966       .m(4)
14967       .n(16)
14968       .k(8)
14969       .qmin(128)
14970       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
14971   }
14972 
TEST(QC8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE_PRFM,qmax)14973   TEST(QC8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE_PRFM, qmax) {
14974     TEST_REQUIRES_ARM_NEON_V8;
14975     GemmMicrokernelTester()
14976       .mr(4)
14977       .nr(16)
14978       .kr(1)
14979       .sr(1)
14980       .m(4)
14981       .n(16)
14982       .k(8)
14983       .qmax(128)
14984       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
14985   }
14986 
TEST(QC8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE_PRFM,strided_cm)14987   TEST(QC8_IGEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE_PRFM, strided_cm) {
14988     TEST_REQUIRES_ARM_NEON_V8;
14989     GemmMicrokernelTester()
14990       .mr(4)
14991       .nr(16)
14992       .kr(1)
14993       .sr(1)
14994       .m(4)
14995       .n(16)
14996       .k(8)
14997       .cm_stride(19)
14998       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
14999   }
15000 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
15001 
15002 
15003 #if XNN_ENABLE_ARM_DOTPROD && (XNN_ARCH_ARM && !XNN_PLATFORM_IOS || XNN_ARCH_ARM64)
TEST(QC8_IGEMM_MINMAX_FP32_4X16C4__NEONDOT,k_eq_8)15004   TEST(QC8_IGEMM_MINMAX_FP32_4X16C4__NEONDOT, k_eq_8) {
15005     TEST_REQUIRES_ARM_NEON_DOT;
15006     GemmMicrokernelTester()
15007       .mr(4)
15008       .nr(16)
15009       .kr(4)
15010       .sr(1)
15011       .m(4)
15012       .n(16)
15013       .k(8)
15014       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x16c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
15015   }
15016 
TEST(QC8_IGEMM_MINMAX_FP32_4X16C4__NEONDOT,strided_cn)15017   TEST(QC8_IGEMM_MINMAX_FP32_4X16C4__NEONDOT, strided_cn) {
15018     TEST_REQUIRES_ARM_NEON_DOT;
15019     GemmMicrokernelTester()
15020       .mr(4)
15021       .nr(16)
15022       .kr(4)
15023       .sr(1)
15024       .m(4)
15025       .n(16)
15026       .k(8)
15027       .cn_stride(19)
15028       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x16c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
15029   }
15030 
TEST(QC8_IGEMM_MINMAX_FP32_4X16C4__NEONDOT,k_eq_8_subtile)15031   TEST(QC8_IGEMM_MINMAX_FP32_4X16C4__NEONDOT, k_eq_8_subtile) {
15032     TEST_REQUIRES_ARM_NEON_DOT;
15033     for (uint32_t n = 1; n <= 16; n++) {
15034       for (uint32_t m = 1; m <= 4; m++) {
15035         GemmMicrokernelTester()
15036           .mr(4)
15037           .nr(16)
15038           .kr(4)
15039           .sr(1)
15040           .m(m)
15041           .n(n)
15042           .k(8)
15043           .iterations(1)
15044           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x16c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
15045       }
15046     }
15047   }
15048 
TEST(QC8_IGEMM_MINMAX_FP32_4X16C4__NEONDOT,k_eq_8_subtile_m)15049   TEST(QC8_IGEMM_MINMAX_FP32_4X16C4__NEONDOT, k_eq_8_subtile_m) {
15050     TEST_REQUIRES_ARM_NEON_DOT;
15051     for (uint32_t m = 1; m <= 4; m++) {
15052       GemmMicrokernelTester()
15053         .mr(4)
15054         .nr(16)
15055         .kr(4)
15056         .sr(1)
15057         .m(m)
15058         .n(16)
15059         .k(8)
15060         .iterations(1)
15061         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x16c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
15062     }
15063   }
15064 
TEST(QC8_IGEMM_MINMAX_FP32_4X16C4__NEONDOT,k_eq_8_subtile_n)15065   TEST(QC8_IGEMM_MINMAX_FP32_4X16C4__NEONDOT, k_eq_8_subtile_n) {
15066     TEST_REQUIRES_ARM_NEON_DOT;
15067     for (uint32_t n = 1; n <= 16; n++) {
15068       GemmMicrokernelTester()
15069         .mr(4)
15070         .nr(16)
15071         .kr(4)
15072         .sr(1)
15073         .m(4)
15074         .n(n)
15075         .k(8)
15076         .iterations(1)
15077         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x16c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
15078     }
15079   }
15080 
TEST(QC8_IGEMM_MINMAX_FP32_4X16C4__NEONDOT,k_lt_8)15081   TEST(QC8_IGEMM_MINMAX_FP32_4X16C4__NEONDOT, k_lt_8) {
15082     TEST_REQUIRES_ARM_NEON_DOT;
15083     for (size_t k = 1; k < 8; k++) {
15084       GemmMicrokernelTester()
15085         .mr(4)
15086         .nr(16)
15087         .kr(4)
15088         .sr(1)
15089         .m(4)
15090         .n(16)
15091         .k(k)
15092         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x16c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
15093     }
15094   }
15095 
TEST(QC8_IGEMM_MINMAX_FP32_4X16C4__NEONDOT,k_lt_8_subtile)15096   TEST(QC8_IGEMM_MINMAX_FP32_4X16C4__NEONDOT, k_lt_8_subtile) {
15097     TEST_REQUIRES_ARM_NEON_DOT;
15098     for (size_t k = 1; k < 8; k++) {
15099       for (uint32_t n = 1; n <= 16; n++) {
15100         for (uint32_t m = 1; m <= 4; m++) {
15101           GemmMicrokernelTester()
15102             .mr(4)
15103             .nr(16)
15104             .kr(4)
15105             .sr(1)
15106             .m(m)
15107             .n(n)
15108             .k(k)
15109             .iterations(1)
15110             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x16c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
15111         }
15112       }
15113     }
15114   }
15115 
TEST(QC8_IGEMM_MINMAX_FP32_4X16C4__NEONDOT,k_gt_8)15116   TEST(QC8_IGEMM_MINMAX_FP32_4X16C4__NEONDOT, k_gt_8) {
15117     TEST_REQUIRES_ARM_NEON_DOT;
15118     for (size_t k = 9; k < 16; k++) {
15119       GemmMicrokernelTester()
15120         .mr(4)
15121         .nr(16)
15122         .kr(4)
15123         .sr(1)
15124         .m(4)
15125         .n(16)
15126         .k(k)
15127         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x16c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
15128     }
15129   }
15130 
TEST(QC8_IGEMM_MINMAX_FP32_4X16C4__NEONDOT,k_gt_8_subtile)15131   TEST(QC8_IGEMM_MINMAX_FP32_4X16C4__NEONDOT, k_gt_8_subtile) {
15132     TEST_REQUIRES_ARM_NEON_DOT;
15133     for (size_t k = 9; k < 16; k++) {
15134       for (uint32_t n = 1; n <= 16; n++) {
15135         for (uint32_t m = 1; m <= 4; m++) {
15136           GemmMicrokernelTester()
15137             .mr(4)
15138             .nr(16)
15139             .kr(4)
15140             .sr(1)
15141             .m(m)
15142             .n(n)
15143             .k(k)
15144             .iterations(1)
15145             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x16c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
15146         }
15147       }
15148     }
15149   }
15150 
TEST(QC8_IGEMM_MINMAX_FP32_4X16C4__NEONDOT,k_div_8)15151   TEST(QC8_IGEMM_MINMAX_FP32_4X16C4__NEONDOT, k_div_8) {
15152     TEST_REQUIRES_ARM_NEON_DOT;
15153     for (size_t k = 16; k <= 80; k += 8) {
15154       GemmMicrokernelTester()
15155         .mr(4)
15156         .nr(16)
15157         .kr(4)
15158         .sr(1)
15159         .m(4)
15160         .n(16)
15161         .k(k)
15162         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x16c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
15163     }
15164   }
15165 
TEST(QC8_IGEMM_MINMAX_FP32_4X16C4__NEONDOT,k_div_8_subtile)15166   TEST(QC8_IGEMM_MINMAX_FP32_4X16C4__NEONDOT, k_div_8_subtile) {
15167     TEST_REQUIRES_ARM_NEON_DOT;
15168     for (size_t k = 16; k <= 80; k += 8) {
15169       for (uint32_t n = 1; n <= 16; n++) {
15170         for (uint32_t m = 1; m <= 4; m++) {
15171           GemmMicrokernelTester()
15172             .mr(4)
15173             .nr(16)
15174             .kr(4)
15175             .sr(1)
15176             .m(m)
15177             .n(n)
15178             .k(k)
15179             .iterations(1)
15180             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x16c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
15181         }
15182       }
15183     }
15184   }
15185 
TEST(QC8_IGEMM_MINMAX_FP32_4X16C4__NEONDOT,n_gt_16)15186   TEST(QC8_IGEMM_MINMAX_FP32_4X16C4__NEONDOT, n_gt_16) {
15187     TEST_REQUIRES_ARM_NEON_DOT;
15188     for (uint32_t n = 17; n < 32; n++) {
15189       for (size_t k = 1; k <= 40; k += 9) {
15190         GemmMicrokernelTester()
15191           .mr(4)
15192           .nr(16)
15193           .kr(4)
15194           .sr(1)
15195           .m(4)
15196           .n(n)
15197           .k(k)
15198           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x16c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
15199       }
15200     }
15201   }
15202 
TEST(QC8_IGEMM_MINMAX_FP32_4X16C4__NEONDOT,n_gt_16_strided_cn)15203   TEST(QC8_IGEMM_MINMAX_FP32_4X16C4__NEONDOT, n_gt_16_strided_cn) {
15204     TEST_REQUIRES_ARM_NEON_DOT;
15205     for (uint32_t n = 17; n < 32; n++) {
15206       for (size_t k = 1; k <= 40; k += 9) {
15207         GemmMicrokernelTester()
15208           .mr(4)
15209           .nr(16)
15210           .kr(4)
15211           .sr(1)
15212           .m(4)
15213           .n(n)
15214           .k(k)
15215           .cn_stride(19)
15216           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x16c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
15217       }
15218     }
15219   }
15220 
TEST(QC8_IGEMM_MINMAX_FP32_4X16C4__NEONDOT,n_gt_16_subtile)15221   TEST(QC8_IGEMM_MINMAX_FP32_4X16C4__NEONDOT, n_gt_16_subtile) {
15222     TEST_REQUIRES_ARM_NEON_DOT;
15223     for (uint32_t n = 17; n < 32; n++) {
15224       for (size_t k = 1; k <= 40; k += 9) {
15225         for (uint32_t m = 1; m <= 4; m++) {
15226           GemmMicrokernelTester()
15227             .mr(4)
15228             .nr(16)
15229             .kr(4)
15230             .sr(1)
15231             .m(m)
15232             .n(n)
15233             .k(k)
15234             .iterations(1)
15235             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x16c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
15236         }
15237       }
15238     }
15239   }
15240 
TEST(QC8_IGEMM_MINMAX_FP32_4X16C4__NEONDOT,n_div_16)15241   TEST(QC8_IGEMM_MINMAX_FP32_4X16C4__NEONDOT, n_div_16) {
15242     TEST_REQUIRES_ARM_NEON_DOT;
15243     for (uint32_t n = 32; n <= 48; n += 16) {
15244       for (size_t k = 1; k <= 40; k += 9) {
15245         GemmMicrokernelTester()
15246           .mr(4)
15247           .nr(16)
15248           .kr(4)
15249           .sr(1)
15250           .m(4)
15251           .n(n)
15252           .k(k)
15253           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x16c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
15254       }
15255     }
15256   }
15257 
TEST(QC8_IGEMM_MINMAX_FP32_4X16C4__NEONDOT,n_div_16_strided_cn)15258   TEST(QC8_IGEMM_MINMAX_FP32_4X16C4__NEONDOT, n_div_16_strided_cn) {
15259     TEST_REQUIRES_ARM_NEON_DOT;
15260     for (uint32_t n = 32; n <= 48; n += 16) {
15261       for (size_t k = 1; k <= 40; k += 9) {
15262         GemmMicrokernelTester()
15263           .mr(4)
15264           .nr(16)
15265           .kr(4)
15266           .sr(1)
15267           .m(4)
15268           .n(n)
15269           .k(k)
15270           .cn_stride(19)
15271           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x16c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
15272       }
15273     }
15274   }
15275 
TEST(QC8_IGEMM_MINMAX_FP32_4X16C4__NEONDOT,n_div_16_subtile)15276   TEST(QC8_IGEMM_MINMAX_FP32_4X16C4__NEONDOT, n_div_16_subtile) {
15277     TEST_REQUIRES_ARM_NEON_DOT;
15278     for (uint32_t n = 32; n <= 48; n += 16) {
15279       for (size_t k = 1; k <= 40; k += 9) {
15280         for (uint32_t m = 1; m <= 4; m++) {
15281           GemmMicrokernelTester()
15282             .mr(4)
15283             .nr(16)
15284             .kr(4)
15285             .sr(1)
15286             .m(m)
15287             .n(n)
15288             .k(k)
15289             .iterations(1)
15290             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x16c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
15291         }
15292       }
15293     }
15294   }
15295 
TEST(QC8_IGEMM_MINMAX_FP32_4X16C4__NEONDOT,small_kernel)15296   TEST(QC8_IGEMM_MINMAX_FP32_4X16C4__NEONDOT, small_kernel) {
15297     TEST_REQUIRES_ARM_NEON_DOT;
15298     for (size_t k = 1; k <= 40; k += 9) {
15299       GemmMicrokernelTester()
15300         .mr(4)
15301         .nr(16)
15302         .kr(4)
15303         .sr(1)
15304         .m(4)
15305         .n(16)
15306         .k(k)
15307         .ks(3)
15308         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x16c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
15309     }
15310   }
15311 
TEST(QC8_IGEMM_MINMAX_FP32_4X16C4__NEONDOT,small_kernel_subtile)15312   TEST(QC8_IGEMM_MINMAX_FP32_4X16C4__NEONDOT, small_kernel_subtile) {
15313     TEST_REQUIRES_ARM_NEON_DOT;
15314     for (size_t k = 1; k <= 40; k += 9) {
15315       for (uint32_t n = 1; n <= 16; n++) {
15316         for (uint32_t m = 1; m <= 4; m++) {
15317           GemmMicrokernelTester()
15318             .mr(4)
15319             .nr(16)
15320             .kr(4)
15321             .sr(1)
15322             .m(m)
15323             .n(n)
15324             .k(k)
15325             .ks(3)
15326             .iterations(1)
15327             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x16c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
15328         }
15329       }
15330     }
15331   }
15332 
TEST(QC8_IGEMM_MINMAX_FP32_4X16C4__NEONDOT,n_gt_16_small_kernel)15333   TEST(QC8_IGEMM_MINMAX_FP32_4X16C4__NEONDOT, n_gt_16_small_kernel) {
15334     TEST_REQUIRES_ARM_NEON_DOT;
15335     for (uint32_t n = 17; n < 32; n++) {
15336       for (size_t k = 1; k <= 40; k += 9) {
15337         GemmMicrokernelTester()
15338           .mr(4)
15339           .nr(16)
15340           .kr(4)
15341           .sr(1)
15342           .m(4)
15343           .n(n)
15344           .k(k)
15345           .ks(3)
15346           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x16c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
15347       }
15348     }
15349   }
15350 
TEST(QC8_IGEMM_MINMAX_FP32_4X16C4__NEONDOT,n_div_16_small_kernel)15351   TEST(QC8_IGEMM_MINMAX_FP32_4X16C4__NEONDOT, n_div_16_small_kernel) {
15352     TEST_REQUIRES_ARM_NEON_DOT;
15353     for (uint32_t n = 32; n <= 48; n += 16) {
15354       for (size_t k = 1; k <= 40; k += 9) {
15355         GemmMicrokernelTester()
15356           .mr(4)
15357           .nr(16)
15358           .kr(4)
15359           .sr(1)
15360           .m(4)
15361           .n(n)
15362           .k(k)
15363           .ks(3)
15364           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x16c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
15365       }
15366     }
15367   }
15368 
TEST(QC8_IGEMM_MINMAX_FP32_4X16C4__NEONDOT,strided_cm_subtile)15369   TEST(QC8_IGEMM_MINMAX_FP32_4X16C4__NEONDOT, strided_cm_subtile) {
15370     TEST_REQUIRES_ARM_NEON_DOT;
15371     for (size_t k = 1; k <= 40; k += 9) {
15372       for (uint32_t n = 1; n <= 16; n++) {
15373         for (uint32_t m = 1; m <= 4; m++) {
15374           GemmMicrokernelTester()
15375             .mr(4)
15376             .nr(16)
15377             .kr(4)
15378             .sr(1)
15379             .m(m)
15380             .n(n)
15381             .k(k)
15382             .cm_stride(19)
15383             .iterations(1)
15384             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x16c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
15385         }
15386       }
15387     }
15388   }
15389 
TEST(QC8_IGEMM_MINMAX_FP32_4X16C4__NEONDOT,a_offset)15390   TEST(QC8_IGEMM_MINMAX_FP32_4X16C4__NEONDOT, a_offset) {
15391     TEST_REQUIRES_ARM_NEON_DOT;
15392     for (size_t k = 1; k <= 40; k += 9) {
15393       GemmMicrokernelTester()
15394         .mr(4)
15395         .nr(16)
15396         .kr(4)
15397         .sr(1)
15398         .m(4)
15399         .n(16)
15400         .k(k)
15401         .ks(3)
15402         .a_offset(163)
15403         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x16c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
15404     }
15405   }
15406 
TEST(QC8_IGEMM_MINMAX_FP32_4X16C4__NEONDOT,zero)15407   TEST(QC8_IGEMM_MINMAX_FP32_4X16C4__NEONDOT, zero) {
15408     TEST_REQUIRES_ARM_NEON_DOT;
15409     for (size_t k = 1; k <= 40; k += 9) {
15410       for (uint32_t mz = 0; mz < 4; mz++) {
15411         GemmMicrokernelTester()
15412           .mr(4)
15413           .nr(16)
15414           .kr(4)
15415           .sr(1)
15416           .m(4)
15417           .n(16)
15418           .k(k)
15419           .ks(3)
15420           .a_offset(163)
15421           .zero_index(mz)
15422           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x16c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
15423       }
15424     }
15425   }
15426 
TEST(QC8_IGEMM_MINMAX_FP32_4X16C4__NEONDOT,qmin)15427   TEST(QC8_IGEMM_MINMAX_FP32_4X16C4__NEONDOT, qmin) {
15428     TEST_REQUIRES_ARM_NEON_DOT;
15429     GemmMicrokernelTester()
15430       .mr(4)
15431       .nr(16)
15432       .kr(4)
15433       .sr(1)
15434       .m(4)
15435       .n(16)
15436       .k(8)
15437       .qmin(128)
15438       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x16c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
15439   }
15440 
TEST(QC8_IGEMM_MINMAX_FP32_4X16C4__NEONDOT,qmax)15441   TEST(QC8_IGEMM_MINMAX_FP32_4X16C4__NEONDOT, qmax) {
15442     TEST_REQUIRES_ARM_NEON_DOT;
15443     GemmMicrokernelTester()
15444       .mr(4)
15445       .nr(16)
15446       .kr(4)
15447       .sr(1)
15448       .m(4)
15449       .n(16)
15450       .k(8)
15451       .qmax(128)
15452       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x16c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
15453   }
15454 
TEST(QC8_IGEMM_MINMAX_FP32_4X16C4__NEONDOT,strided_cm)15455   TEST(QC8_IGEMM_MINMAX_FP32_4X16C4__NEONDOT, strided_cm) {
15456     TEST_REQUIRES_ARM_NEON_DOT;
15457     GemmMicrokernelTester()
15458       .mr(4)
15459       .nr(16)
15460       .kr(4)
15461       .sr(1)
15462       .m(4)
15463       .n(16)
15464       .k(8)
15465       .cm_stride(19)
15466       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x16c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
15467   }
15468 #endif  // XNN_ENABLE_ARM_DOTPROD && (XNN_ARCH_ARM && !XNN_PLATFORM_IOS || XNN_ARCH_ARM64)
15469 
15470 
15471 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QC8_IGEMM_MINMAX_FP32_6X8__NEON_MLAL_LANE_PRFM,k_eq_8)15472   TEST(QC8_IGEMM_MINMAX_FP32_6X8__NEON_MLAL_LANE_PRFM, k_eq_8) {
15473     TEST_REQUIRES_ARM_NEON;
15474     GemmMicrokernelTester()
15475       .mr(6)
15476       .nr(8)
15477       .kr(1)
15478       .sr(1)
15479       .m(6)
15480       .n(8)
15481       .k(8)
15482       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_6x8__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
15483   }
15484 
TEST(QC8_IGEMM_MINMAX_FP32_6X8__NEON_MLAL_LANE_PRFM,strided_cn)15485   TEST(QC8_IGEMM_MINMAX_FP32_6X8__NEON_MLAL_LANE_PRFM, strided_cn) {
15486     TEST_REQUIRES_ARM_NEON;
15487     GemmMicrokernelTester()
15488       .mr(6)
15489       .nr(8)
15490       .kr(1)
15491       .sr(1)
15492       .m(6)
15493       .n(8)
15494       .k(8)
15495       .cn_stride(11)
15496       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_6x8__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
15497   }
15498 
TEST(QC8_IGEMM_MINMAX_FP32_6X8__NEON_MLAL_LANE_PRFM,k_eq_8_subtile)15499   TEST(QC8_IGEMM_MINMAX_FP32_6X8__NEON_MLAL_LANE_PRFM, k_eq_8_subtile) {
15500     TEST_REQUIRES_ARM_NEON;
15501     for (uint32_t n = 1; n <= 8; n++) {
15502       for (uint32_t m = 1; m <= 6; m++) {
15503         GemmMicrokernelTester()
15504           .mr(6)
15505           .nr(8)
15506           .kr(1)
15507           .sr(1)
15508           .m(m)
15509           .n(n)
15510           .k(8)
15511           .iterations(1)
15512           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_6x8__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
15513       }
15514     }
15515   }
15516 
TEST(QC8_IGEMM_MINMAX_FP32_6X8__NEON_MLAL_LANE_PRFM,k_eq_8_subtile_m)15517   TEST(QC8_IGEMM_MINMAX_FP32_6X8__NEON_MLAL_LANE_PRFM, k_eq_8_subtile_m) {
15518     TEST_REQUIRES_ARM_NEON;
15519     for (uint32_t m = 1; m <= 6; m++) {
15520       GemmMicrokernelTester()
15521         .mr(6)
15522         .nr(8)
15523         .kr(1)
15524         .sr(1)
15525         .m(m)
15526         .n(8)
15527         .k(8)
15528         .iterations(1)
15529         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_6x8__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
15530     }
15531   }
15532 
TEST(QC8_IGEMM_MINMAX_FP32_6X8__NEON_MLAL_LANE_PRFM,k_eq_8_subtile_n)15533   TEST(QC8_IGEMM_MINMAX_FP32_6X8__NEON_MLAL_LANE_PRFM, k_eq_8_subtile_n) {
15534     TEST_REQUIRES_ARM_NEON;
15535     for (uint32_t n = 1; n <= 8; n++) {
15536       GemmMicrokernelTester()
15537         .mr(6)
15538         .nr(8)
15539         .kr(1)
15540         .sr(1)
15541         .m(6)
15542         .n(n)
15543         .k(8)
15544         .iterations(1)
15545         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_6x8__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
15546     }
15547   }
15548 
TEST(QC8_IGEMM_MINMAX_FP32_6X8__NEON_MLAL_LANE_PRFM,k_lt_8)15549   TEST(QC8_IGEMM_MINMAX_FP32_6X8__NEON_MLAL_LANE_PRFM, k_lt_8) {
15550     TEST_REQUIRES_ARM_NEON;
15551     for (size_t k = 1; k < 8; k++) {
15552       GemmMicrokernelTester()
15553         .mr(6)
15554         .nr(8)
15555         .kr(1)
15556         .sr(1)
15557         .m(6)
15558         .n(8)
15559         .k(k)
15560         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_6x8__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
15561     }
15562   }
15563 
TEST(QC8_IGEMM_MINMAX_FP32_6X8__NEON_MLAL_LANE_PRFM,k_lt_8_subtile)15564   TEST(QC8_IGEMM_MINMAX_FP32_6X8__NEON_MLAL_LANE_PRFM, k_lt_8_subtile) {
15565     TEST_REQUIRES_ARM_NEON;
15566     for (size_t k = 1; k < 8; k++) {
15567       for (uint32_t n = 1; n <= 8; n++) {
15568         for (uint32_t m = 1; m <= 6; m++) {
15569           GemmMicrokernelTester()
15570             .mr(6)
15571             .nr(8)
15572             .kr(1)
15573             .sr(1)
15574             .m(m)
15575             .n(n)
15576             .k(k)
15577             .iterations(1)
15578             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_6x8__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
15579         }
15580       }
15581     }
15582   }
15583 
TEST(QC8_IGEMM_MINMAX_FP32_6X8__NEON_MLAL_LANE_PRFM,k_gt_8)15584   TEST(QC8_IGEMM_MINMAX_FP32_6X8__NEON_MLAL_LANE_PRFM, k_gt_8) {
15585     TEST_REQUIRES_ARM_NEON;
15586     for (size_t k = 9; k < 16; k++) {
15587       GemmMicrokernelTester()
15588         .mr(6)
15589         .nr(8)
15590         .kr(1)
15591         .sr(1)
15592         .m(6)
15593         .n(8)
15594         .k(k)
15595         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_6x8__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
15596     }
15597   }
15598 
TEST(QC8_IGEMM_MINMAX_FP32_6X8__NEON_MLAL_LANE_PRFM,k_gt_8_subtile)15599   TEST(QC8_IGEMM_MINMAX_FP32_6X8__NEON_MLAL_LANE_PRFM, k_gt_8_subtile) {
15600     TEST_REQUIRES_ARM_NEON;
15601     for (size_t k = 9; k < 16; k++) {
15602       for (uint32_t n = 1; n <= 8; n++) {
15603         for (uint32_t m = 1; m <= 6; m++) {
15604           GemmMicrokernelTester()
15605             .mr(6)
15606             .nr(8)
15607             .kr(1)
15608             .sr(1)
15609             .m(m)
15610             .n(n)
15611             .k(k)
15612             .iterations(1)
15613             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_6x8__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
15614         }
15615       }
15616     }
15617   }
15618 
TEST(QC8_IGEMM_MINMAX_FP32_6X8__NEON_MLAL_LANE_PRFM,k_div_8)15619   TEST(QC8_IGEMM_MINMAX_FP32_6X8__NEON_MLAL_LANE_PRFM, k_div_8) {
15620     TEST_REQUIRES_ARM_NEON;
15621     for (size_t k = 16; k <= 80; k += 8) {
15622       GemmMicrokernelTester()
15623         .mr(6)
15624         .nr(8)
15625         .kr(1)
15626         .sr(1)
15627         .m(6)
15628         .n(8)
15629         .k(k)
15630         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_6x8__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
15631     }
15632   }
15633 
TEST(QC8_IGEMM_MINMAX_FP32_6X8__NEON_MLAL_LANE_PRFM,k_div_8_subtile)15634   TEST(QC8_IGEMM_MINMAX_FP32_6X8__NEON_MLAL_LANE_PRFM, k_div_8_subtile) {
15635     TEST_REQUIRES_ARM_NEON;
15636     for (size_t k = 16; k <= 80; k += 8) {
15637       for (uint32_t n = 1; n <= 8; n++) {
15638         for (uint32_t m = 1; m <= 6; m++) {
15639           GemmMicrokernelTester()
15640             .mr(6)
15641             .nr(8)
15642             .kr(1)
15643             .sr(1)
15644             .m(m)
15645             .n(n)
15646             .k(k)
15647             .iterations(1)
15648             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_6x8__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
15649         }
15650       }
15651     }
15652   }
15653 
TEST(QC8_IGEMM_MINMAX_FP32_6X8__NEON_MLAL_LANE_PRFM,n_gt_8)15654   TEST(QC8_IGEMM_MINMAX_FP32_6X8__NEON_MLAL_LANE_PRFM, n_gt_8) {
15655     TEST_REQUIRES_ARM_NEON;
15656     for (uint32_t n = 9; n < 16; n++) {
15657       for (size_t k = 1; k <= 40; k += 9) {
15658         GemmMicrokernelTester()
15659           .mr(6)
15660           .nr(8)
15661           .kr(1)
15662           .sr(1)
15663           .m(6)
15664           .n(n)
15665           .k(k)
15666           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_6x8__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
15667       }
15668     }
15669   }
15670 
TEST(QC8_IGEMM_MINMAX_FP32_6X8__NEON_MLAL_LANE_PRFM,n_gt_8_strided_cn)15671   TEST(QC8_IGEMM_MINMAX_FP32_6X8__NEON_MLAL_LANE_PRFM, n_gt_8_strided_cn) {
15672     TEST_REQUIRES_ARM_NEON;
15673     for (uint32_t n = 9; n < 16; n++) {
15674       for (size_t k = 1; k <= 40; k += 9) {
15675         GemmMicrokernelTester()
15676           .mr(6)
15677           .nr(8)
15678           .kr(1)
15679           .sr(1)
15680           .m(6)
15681           .n(n)
15682           .k(k)
15683           .cn_stride(11)
15684           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_6x8__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
15685       }
15686     }
15687   }
15688 
TEST(QC8_IGEMM_MINMAX_FP32_6X8__NEON_MLAL_LANE_PRFM,n_gt_8_subtile)15689   TEST(QC8_IGEMM_MINMAX_FP32_6X8__NEON_MLAL_LANE_PRFM, n_gt_8_subtile) {
15690     TEST_REQUIRES_ARM_NEON;
15691     for (uint32_t n = 9; n < 16; n++) {
15692       for (size_t k = 1; k <= 40; k += 9) {
15693         for (uint32_t m = 1; m <= 6; m++) {
15694           GemmMicrokernelTester()
15695             .mr(6)
15696             .nr(8)
15697             .kr(1)
15698             .sr(1)
15699             .m(m)
15700             .n(n)
15701             .k(k)
15702             .iterations(1)
15703             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_6x8__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
15704         }
15705       }
15706     }
15707   }
15708 
TEST(QC8_IGEMM_MINMAX_FP32_6X8__NEON_MLAL_LANE_PRFM,n_div_8)15709   TEST(QC8_IGEMM_MINMAX_FP32_6X8__NEON_MLAL_LANE_PRFM, n_div_8) {
15710     TEST_REQUIRES_ARM_NEON;
15711     for (uint32_t n = 16; n <= 24; n += 8) {
15712       for (size_t k = 1; k <= 40; k += 9) {
15713         GemmMicrokernelTester()
15714           .mr(6)
15715           .nr(8)
15716           .kr(1)
15717           .sr(1)
15718           .m(6)
15719           .n(n)
15720           .k(k)
15721           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_6x8__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
15722       }
15723     }
15724   }
15725 
TEST(QC8_IGEMM_MINMAX_FP32_6X8__NEON_MLAL_LANE_PRFM,n_div_8_strided_cn)15726   TEST(QC8_IGEMM_MINMAX_FP32_6X8__NEON_MLAL_LANE_PRFM, n_div_8_strided_cn) {
15727     TEST_REQUIRES_ARM_NEON;
15728     for (uint32_t n = 16; n <= 24; n += 8) {
15729       for (size_t k = 1; k <= 40; k += 9) {
15730         GemmMicrokernelTester()
15731           .mr(6)
15732           .nr(8)
15733           .kr(1)
15734           .sr(1)
15735           .m(6)
15736           .n(n)
15737           .k(k)
15738           .cn_stride(11)
15739           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_6x8__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
15740       }
15741     }
15742   }
15743 
TEST(QC8_IGEMM_MINMAX_FP32_6X8__NEON_MLAL_LANE_PRFM,n_div_8_subtile)15744   TEST(QC8_IGEMM_MINMAX_FP32_6X8__NEON_MLAL_LANE_PRFM, n_div_8_subtile) {
15745     TEST_REQUIRES_ARM_NEON;
15746     for (uint32_t n = 16; n <= 24; n += 8) {
15747       for (size_t k = 1; k <= 40; k += 9) {
15748         for (uint32_t m = 1; m <= 6; m++) {
15749           GemmMicrokernelTester()
15750             .mr(6)
15751             .nr(8)
15752             .kr(1)
15753             .sr(1)
15754             .m(m)
15755             .n(n)
15756             .k(k)
15757             .iterations(1)
15758             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_6x8__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
15759         }
15760       }
15761     }
15762   }
15763 
TEST(QC8_IGEMM_MINMAX_FP32_6X8__NEON_MLAL_LANE_PRFM,small_kernel)15764   TEST(QC8_IGEMM_MINMAX_FP32_6X8__NEON_MLAL_LANE_PRFM, small_kernel) {
15765     TEST_REQUIRES_ARM_NEON;
15766     for (size_t k = 1; k <= 40; k += 9) {
15767       GemmMicrokernelTester()
15768         .mr(6)
15769         .nr(8)
15770         .kr(1)
15771         .sr(1)
15772         .m(6)
15773         .n(8)
15774         .k(k)
15775         .ks(3)
15776         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_6x8__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
15777     }
15778   }
15779 
TEST(QC8_IGEMM_MINMAX_FP32_6X8__NEON_MLAL_LANE_PRFM,small_kernel_subtile)15780   TEST(QC8_IGEMM_MINMAX_FP32_6X8__NEON_MLAL_LANE_PRFM, small_kernel_subtile) {
15781     TEST_REQUIRES_ARM_NEON;
15782     for (size_t k = 1; k <= 40; k += 9) {
15783       for (uint32_t n = 1; n <= 8; n++) {
15784         for (uint32_t m = 1; m <= 6; m++) {
15785           GemmMicrokernelTester()
15786             .mr(6)
15787             .nr(8)
15788             .kr(1)
15789             .sr(1)
15790             .m(m)
15791             .n(n)
15792             .k(k)
15793             .ks(3)
15794             .iterations(1)
15795             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_6x8__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
15796         }
15797       }
15798     }
15799   }
15800 
TEST(QC8_IGEMM_MINMAX_FP32_6X8__NEON_MLAL_LANE_PRFM,n_gt_8_small_kernel)15801   TEST(QC8_IGEMM_MINMAX_FP32_6X8__NEON_MLAL_LANE_PRFM, n_gt_8_small_kernel) {
15802     TEST_REQUIRES_ARM_NEON;
15803     for (uint32_t n = 9; n < 16; n++) {
15804       for (size_t k = 1; k <= 40; k += 9) {
15805         GemmMicrokernelTester()
15806           .mr(6)
15807           .nr(8)
15808           .kr(1)
15809           .sr(1)
15810           .m(6)
15811           .n(n)
15812           .k(k)
15813           .ks(3)
15814           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_6x8__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
15815       }
15816     }
15817   }
15818 
TEST(QC8_IGEMM_MINMAX_FP32_6X8__NEON_MLAL_LANE_PRFM,n_div_8_small_kernel)15819   TEST(QC8_IGEMM_MINMAX_FP32_6X8__NEON_MLAL_LANE_PRFM, n_div_8_small_kernel) {
15820     TEST_REQUIRES_ARM_NEON;
15821     for (uint32_t n = 16; n <= 24; n += 8) {
15822       for (size_t k = 1; k <= 40; k += 9) {
15823         GemmMicrokernelTester()
15824           .mr(6)
15825           .nr(8)
15826           .kr(1)
15827           .sr(1)
15828           .m(6)
15829           .n(n)
15830           .k(k)
15831           .ks(3)
15832           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_6x8__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
15833       }
15834     }
15835   }
15836 
TEST(QC8_IGEMM_MINMAX_FP32_6X8__NEON_MLAL_LANE_PRFM,strided_cm_subtile)15837   TEST(QC8_IGEMM_MINMAX_FP32_6X8__NEON_MLAL_LANE_PRFM, strided_cm_subtile) {
15838     TEST_REQUIRES_ARM_NEON;
15839     for (size_t k = 1; k <= 40; k += 9) {
15840       for (uint32_t n = 1; n <= 8; n++) {
15841         for (uint32_t m = 1; m <= 6; m++) {
15842           GemmMicrokernelTester()
15843             .mr(6)
15844             .nr(8)
15845             .kr(1)
15846             .sr(1)
15847             .m(m)
15848             .n(n)
15849             .k(k)
15850             .cm_stride(11)
15851             .iterations(1)
15852             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_6x8__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
15853         }
15854       }
15855     }
15856   }
15857 
TEST(QC8_IGEMM_MINMAX_FP32_6X8__NEON_MLAL_LANE_PRFM,a_offset)15858   TEST(QC8_IGEMM_MINMAX_FP32_6X8__NEON_MLAL_LANE_PRFM, a_offset) {
15859     TEST_REQUIRES_ARM_NEON;
15860     for (size_t k = 1; k <= 40; k += 9) {
15861       GemmMicrokernelTester()
15862         .mr(6)
15863         .nr(8)
15864         .kr(1)
15865         .sr(1)
15866         .m(6)
15867         .n(8)
15868         .k(k)
15869         .ks(3)
15870         .a_offset(251)
15871         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_6x8__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
15872     }
15873   }
15874 
TEST(QC8_IGEMM_MINMAX_FP32_6X8__NEON_MLAL_LANE_PRFM,zero)15875   TEST(QC8_IGEMM_MINMAX_FP32_6X8__NEON_MLAL_LANE_PRFM, zero) {
15876     TEST_REQUIRES_ARM_NEON;
15877     for (size_t k = 1; k <= 40; k += 9) {
15878       for (uint32_t mz = 0; mz < 6; mz++) {
15879         GemmMicrokernelTester()
15880           .mr(6)
15881           .nr(8)
15882           .kr(1)
15883           .sr(1)
15884           .m(6)
15885           .n(8)
15886           .k(k)
15887           .ks(3)
15888           .a_offset(251)
15889           .zero_index(mz)
15890           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_6x8__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
15891       }
15892     }
15893   }
15894 
TEST(QC8_IGEMM_MINMAX_FP32_6X8__NEON_MLAL_LANE_PRFM,qmin)15895   TEST(QC8_IGEMM_MINMAX_FP32_6X8__NEON_MLAL_LANE_PRFM, qmin) {
15896     TEST_REQUIRES_ARM_NEON;
15897     GemmMicrokernelTester()
15898       .mr(6)
15899       .nr(8)
15900       .kr(1)
15901       .sr(1)
15902       .m(6)
15903       .n(8)
15904       .k(8)
15905       .qmin(128)
15906       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_6x8__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
15907   }
15908 
TEST(QC8_IGEMM_MINMAX_FP32_6X8__NEON_MLAL_LANE_PRFM,qmax)15909   TEST(QC8_IGEMM_MINMAX_FP32_6X8__NEON_MLAL_LANE_PRFM, qmax) {
15910     TEST_REQUIRES_ARM_NEON;
15911     GemmMicrokernelTester()
15912       .mr(6)
15913       .nr(8)
15914       .kr(1)
15915       .sr(1)
15916       .m(6)
15917       .n(8)
15918       .k(8)
15919       .qmax(128)
15920       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_6x8__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
15921   }
15922 
TEST(QC8_IGEMM_MINMAX_FP32_6X8__NEON_MLAL_LANE_PRFM,strided_cm)15923   TEST(QC8_IGEMM_MINMAX_FP32_6X8__NEON_MLAL_LANE_PRFM, strided_cm) {
15924     TEST_REQUIRES_ARM_NEON;
15925     GemmMicrokernelTester()
15926       .mr(6)
15927       .nr(8)
15928       .kr(1)
15929       .sr(1)
15930       .m(6)
15931       .n(8)
15932       .k(8)
15933       .cm_stride(11)
15934       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_6x8__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
15935   }
15936 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
15937 
15938 
15939 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QC8_IGEMM_MINMAX_FP32_6X8__NEONV8_MLAL_LANE,k_eq_8)15940   TEST(QC8_IGEMM_MINMAX_FP32_6X8__NEONV8_MLAL_LANE, k_eq_8) {
15941     TEST_REQUIRES_ARM_NEON_V8;
15942     GemmMicrokernelTester()
15943       .mr(6)
15944       .nr(8)
15945       .kr(1)
15946       .sr(1)
15947       .m(6)
15948       .n(8)
15949       .k(8)
15950       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_6x8__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
15951   }
15952 
TEST(QC8_IGEMM_MINMAX_FP32_6X8__NEONV8_MLAL_LANE,strided_cn)15953   TEST(QC8_IGEMM_MINMAX_FP32_6X8__NEONV8_MLAL_LANE, strided_cn) {
15954     TEST_REQUIRES_ARM_NEON_V8;
15955     GemmMicrokernelTester()
15956       .mr(6)
15957       .nr(8)
15958       .kr(1)
15959       .sr(1)
15960       .m(6)
15961       .n(8)
15962       .k(8)
15963       .cn_stride(11)
15964       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_6x8__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
15965   }
15966 
TEST(QC8_IGEMM_MINMAX_FP32_6X8__NEONV8_MLAL_LANE,k_eq_8_subtile)15967   TEST(QC8_IGEMM_MINMAX_FP32_6X8__NEONV8_MLAL_LANE, k_eq_8_subtile) {
15968     TEST_REQUIRES_ARM_NEON_V8;
15969     for (uint32_t n = 1; n <= 8; n++) {
15970       for (uint32_t m = 1; m <= 6; m++) {
15971         GemmMicrokernelTester()
15972           .mr(6)
15973           .nr(8)
15974           .kr(1)
15975           .sr(1)
15976           .m(m)
15977           .n(n)
15978           .k(8)
15979           .iterations(1)
15980           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_6x8__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
15981       }
15982     }
15983   }
15984 
TEST(QC8_IGEMM_MINMAX_FP32_6X8__NEONV8_MLAL_LANE,k_eq_8_subtile_m)15985   TEST(QC8_IGEMM_MINMAX_FP32_6X8__NEONV8_MLAL_LANE, k_eq_8_subtile_m) {
15986     TEST_REQUIRES_ARM_NEON_V8;
15987     for (uint32_t m = 1; m <= 6; m++) {
15988       GemmMicrokernelTester()
15989         .mr(6)
15990         .nr(8)
15991         .kr(1)
15992         .sr(1)
15993         .m(m)
15994         .n(8)
15995         .k(8)
15996         .iterations(1)
15997         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_6x8__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
15998     }
15999   }
16000 
TEST(QC8_IGEMM_MINMAX_FP32_6X8__NEONV8_MLAL_LANE,k_eq_8_subtile_n)16001   TEST(QC8_IGEMM_MINMAX_FP32_6X8__NEONV8_MLAL_LANE, k_eq_8_subtile_n) {
16002     TEST_REQUIRES_ARM_NEON_V8;
16003     for (uint32_t n = 1; n <= 8; n++) {
16004       GemmMicrokernelTester()
16005         .mr(6)
16006         .nr(8)
16007         .kr(1)
16008         .sr(1)
16009         .m(6)
16010         .n(n)
16011         .k(8)
16012         .iterations(1)
16013         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_6x8__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
16014     }
16015   }
16016 
TEST(QC8_IGEMM_MINMAX_FP32_6X8__NEONV8_MLAL_LANE,k_lt_8)16017   TEST(QC8_IGEMM_MINMAX_FP32_6X8__NEONV8_MLAL_LANE, k_lt_8) {
16018     TEST_REQUIRES_ARM_NEON_V8;
16019     for (size_t k = 1; k < 8; k++) {
16020       GemmMicrokernelTester()
16021         .mr(6)
16022         .nr(8)
16023         .kr(1)
16024         .sr(1)
16025         .m(6)
16026         .n(8)
16027         .k(k)
16028         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_6x8__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
16029     }
16030   }
16031 
TEST(QC8_IGEMM_MINMAX_FP32_6X8__NEONV8_MLAL_LANE,k_lt_8_subtile)16032   TEST(QC8_IGEMM_MINMAX_FP32_6X8__NEONV8_MLAL_LANE, k_lt_8_subtile) {
16033     TEST_REQUIRES_ARM_NEON_V8;
16034     for (size_t k = 1; k < 8; k++) {
16035       for (uint32_t n = 1; n <= 8; n++) {
16036         for (uint32_t m = 1; m <= 6; m++) {
16037           GemmMicrokernelTester()
16038             .mr(6)
16039             .nr(8)
16040             .kr(1)
16041             .sr(1)
16042             .m(m)
16043             .n(n)
16044             .k(k)
16045             .iterations(1)
16046             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_6x8__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
16047         }
16048       }
16049     }
16050   }
16051 
TEST(QC8_IGEMM_MINMAX_FP32_6X8__NEONV8_MLAL_LANE,k_gt_8)16052   TEST(QC8_IGEMM_MINMAX_FP32_6X8__NEONV8_MLAL_LANE, k_gt_8) {
16053     TEST_REQUIRES_ARM_NEON_V8;
16054     for (size_t k = 9; k < 16; k++) {
16055       GemmMicrokernelTester()
16056         .mr(6)
16057         .nr(8)
16058         .kr(1)
16059         .sr(1)
16060         .m(6)
16061         .n(8)
16062         .k(k)
16063         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_6x8__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
16064     }
16065   }
16066 
TEST(QC8_IGEMM_MINMAX_FP32_6X8__NEONV8_MLAL_LANE,k_gt_8_subtile)16067   TEST(QC8_IGEMM_MINMAX_FP32_6X8__NEONV8_MLAL_LANE, k_gt_8_subtile) {
16068     TEST_REQUIRES_ARM_NEON_V8;
16069     for (size_t k = 9; k < 16; k++) {
16070       for (uint32_t n = 1; n <= 8; n++) {
16071         for (uint32_t m = 1; m <= 6; m++) {
16072           GemmMicrokernelTester()
16073             .mr(6)
16074             .nr(8)
16075             .kr(1)
16076             .sr(1)
16077             .m(m)
16078             .n(n)
16079             .k(k)
16080             .iterations(1)
16081             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_6x8__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
16082         }
16083       }
16084     }
16085   }
16086 
TEST(QC8_IGEMM_MINMAX_FP32_6X8__NEONV8_MLAL_LANE,k_div_8)16087   TEST(QC8_IGEMM_MINMAX_FP32_6X8__NEONV8_MLAL_LANE, k_div_8) {
16088     TEST_REQUIRES_ARM_NEON_V8;
16089     for (size_t k = 16; k <= 80; k += 8) {
16090       GemmMicrokernelTester()
16091         .mr(6)
16092         .nr(8)
16093         .kr(1)
16094         .sr(1)
16095         .m(6)
16096         .n(8)
16097         .k(k)
16098         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_6x8__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
16099     }
16100   }
16101 
TEST(QC8_IGEMM_MINMAX_FP32_6X8__NEONV8_MLAL_LANE,k_div_8_subtile)16102   TEST(QC8_IGEMM_MINMAX_FP32_6X8__NEONV8_MLAL_LANE, k_div_8_subtile) {
16103     TEST_REQUIRES_ARM_NEON_V8;
16104     for (size_t k = 16; k <= 80; k += 8) {
16105       for (uint32_t n = 1; n <= 8; n++) {
16106         for (uint32_t m = 1; m <= 6; m++) {
16107           GemmMicrokernelTester()
16108             .mr(6)
16109             .nr(8)
16110             .kr(1)
16111             .sr(1)
16112             .m(m)
16113             .n(n)
16114             .k(k)
16115             .iterations(1)
16116             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_6x8__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
16117         }
16118       }
16119     }
16120   }
16121 
TEST(QC8_IGEMM_MINMAX_FP32_6X8__NEONV8_MLAL_LANE,n_gt_8)16122   TEST(QC8_IGEMM_MINMAX_FP32_6X8__NEONV8_MLAL_LANE, n_gt_8) {
16123     TEST_REQUIRES_ARM_NEON_V8;
16124     for (uint32_t n = 9; n < 16; n++) {
16125       for (size_t k = 1; k <= 40; k += 9) {
16126         GemmMicrokernelTester()
16127           .mr(6)
16128           .nr(8)
16129           .kr(1)
16130           .sr(1)
16131           .m(6)
16132           .n(n)
16133           .k(k)
16134           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_6x8__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
16135       }
16136     }
16137   }
16138 
TEST(QC8_IGEMM_MINMAX_FP32_6X8__NEONV8_MLAL_LANE,n_gt_8_strided_cn)16139   TEST(QC8_IGEMM_MINMAX_FP32_6X8__NEONV8_MLAL_LANE, n_gt_8_strided_cn) {
16140     TEST_REQUIRES_ARM_NEON_V8;
16141     for (uint32_t n = 9; n < 16; n++) {
16142       for (size_t k = 1; k <= 40; k += 9) {
16143         GemmMicrokernelTester()
16144           .mr(6)
16145           .nr(8)
16146           .kr(1)
16147           .sr(1)
16148           .m(6)
16149           .n(n)
16150           .k(k)
16151           .cn_stride(11)
16152           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_6x8__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
16153       }
16154     }
16155   }
16156 
TEST(QC8_IGEMM_MINMAX_FP32_6X8__NEONV8_MLAL_LANE,n_gt_8_subtile)16157   TEST(QC8_IGEMM_MINMAX_FP32_6X8__NEONV8_MLAL_LANE, n_gt_8_subtile) {
16158     TEST_REQUIRES_ARM_NEON_V8;
16159     for (uint32_t n = 9; n < 16; n++) {
16160       for (size_t k = 1; k <= 40; k += 9) {
16161         for (uint32_t m = 1; m <= 6; m++) {
16162           GemmMicrokernelTester()
16163             .mr(6)
16164             .nr(8)
16165             .kr(1)
16166             .sr(1)
16167             .m(m)
16168             .n(n)
16169             .k(k)
16170             .iterations(1)
16171             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_6x8__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
16172         }
16173       }
16174     }
16175   }
16176 
TEST(QC8_IGEMM_MINMAX_FP32_6X8__NEONV8_MLAL_LANE,n_div_8)16177   TEST(QC8_IGEMM_MINMAX_FP32_6X8__NEONV8_MLAL_LANE, n_div_8) {
16178     TEST_REQUIRES_ARM_NEON_V8;
16179     for (uint32_t n = 16; n <= 24; n += 8) {
16180       for (size_t k = 1; k <= 40; k += 9) {
16181         GemmMicrokernelTester()
16182           .mr(6)
16183           .nr(8)
16184           .kr(1)
16185           .sr(1)
16186           .m(6)
16187           .n(n)
16188           .k(k)
16189           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_6x8__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
16190       }
16191     }
16192   }
16193 
TEST(QC8_IGEMM_MINMAX_FP32_6X8__NEONV8_MLAL_LANE,n_div_8_strided_cn)16194   TEST(QC8_IGEMM_MINMAX_FP32_6X8__NEONV8_MLAL_LANE, n_div_8_strided_cn) {
16195     TEST_REQUIRES_ARM_NEON_V8;
16196     for (uint32_t n = 16; n <= 24; n += 8) {
16197       for (size_t k = 1; k <= 40; k += 9) {
16198         GemmMicrokernelTester()
16199           .mr(6)
16200           .nr(8)
16201           .kr(1)
16202           .sr(1)
16203           .m(6)
16204           .n(n)
16205           .k(k)
16206           .cn_stride(11)
16207           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_6x8__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
16208       }
16209     }
16210   }
16211 
TEST(QC8_IGEMM_MINMAX_FP32_6X8__NEONV8_MLAL_LANE,n_div_8_subtile)16212   TEST(QC8_IGEMM_MINMAX_FP32_6X8__NEONV8_MLAL_LANE, n_div_8_subtile) {
16213     TEST_REQUIRES_ARM_NEON_V8;
16214     for (uint32_t n = 16; n <= 24; n += 8) {
16215       for (size_t k = 1; k <= 40; k += 9) {
16216         for (uint32_t m = 1; m <= 6; m++) {
16217           GemmMicrokernelTester()
16218             .mr(6)
16219             .nr(8)
16220             .kr(1)
16221             .sr(1)
16222             .m(m)
16223             .n(n)
16224             .k(k)
16225             .iterations(1)
16226             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_6x8__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
16227         }
16228       }
16229     }
16230   }
16231 
TEST(QC8_IGEMM_MINMAX_FP32_6X8__NEONV8_MLAL_LANE,small_kernel)16232   TEST(QC8_IGEMM_MINMAX_FP32_6X8__NEONV8_MLAL_LANE, small_kernel) {
16233     TEST_REQUIRES_ARM_NEON_V8;
16234     for (size_t k = 1; k <= 40; k += 9) {
16235       GemmMicrokernelTester()
16236         .mr(6)
16237         .nr(8)
16238         .kr(1)
16239         .sr(1)
16240         .m(6)
16241         .n(8)
16242         .k(k)
16243         .ks(3)
16244         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_6x8__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
16245     }
16246   }
16247 
TEST(QC8_IGEMM_MINMAX_FP32_6X8__NEONV8_MLAL_LANE,small_kernel_subtile)16248   TEST(QC8_IGEMM_MINMAX_FP32_6X8__NEONV8_MLAL_LANE, small_kernel_subtile) {
16249     TEST_REQUIRES_ARM_NEON_V8;
16250     for (size_t k = 1; k <= 40; k += 9) {
16251       for (uint32_t n = 1; n <= 8; n++) {
16252         for (uint32_t m = 1; m <= 6; m++) {
16253           GemmMicrokernelTester()
16254             .mr(6)
16255             .nr(8)
16256             .kr(1)
16257             .sr(1)
16258             .m(m)
16259             .n(n)
16260             .k(k)
16261             .ks(3)
16262             .iterations(1)
16263             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_6x8__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
16264         }
16265       }
16266     }
16267   }
16268 
TEST(QC8_IGEMM_MINMAX_FP32_6X8__NEONV8_MLAL_LANE,n_gt_8_small_kernel)16269   TEST(QC8_IGEMM_MINMAX_FP32_6X8__NEONV8_MLAL_LANE, n_gt_8_small_kernel) {
16270     TEST_REQUIRES_ARM_NEON_V8;
16271     for (uint32_t n = 9; n < 16; n++) {
16272       for (size_t k = 1; k <= 40; k += 9) {
16273         GemmMicrokernelTester()
16274           .mr(6)
16275           .nr(8)
16276           .kr(1)
16277           .sr(1)
16278           .m(6)
16279           .n(n)
16280           .k(k)
16281           .ks(3)
16282           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_6x8__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
16283       }
16284     }
16285   }
16286 
TEST(QC8_IGEMM_MINMAX_FP32_6X8__NEONV8_MLAL_LANE,n_div_8_small_kernel)16287   TEST(QC8_IGEMM_MINMAX_FP32_6X8__NEONV8_MLAL_LANE, n_div_8_small_kernel) {
16288     TEST_REQUIRES_ARM_NEON_V8;
16289     for (uint32_t n = 16; n <= 24; n += 8) {
16290       for (size_t k = 1; k <= 40; k += 9) {
16291         GemmMicrokernelTester()
16292           .mr(6)
16293           .nr(8)
16294           .kr(1)
16295           .sr(1)
16296           .m(6)
16297           .n(n)
16298           .k(k)
16299           .ks(3)
16300           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_6x8__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
16301       }
16302     }
16303   }
16304 
TEST(QC8_IGEMM_MINMAX_FP32_6X8__NEONV8_MLAL_LANE,strided_cm_subtile)16305   TEST(QC8_IGEMM_MINMAX_FP32_6X8__NEONV8_MLAL_LANE, strided_cm_subtile) {
16306     TEST_REQUIRES_ARM_NEON_V8;
16307     for (size_t k = 1; k <= 40; k += 9) {
16308       for (uint32_t n = 1; n <= 8; n++) {
16309         for (uint32_t m = 1; m <= 6; m++) {
16310           GemmMicrokernelTester()
16311             .mr(6)
16312             .nr(8)
16313             .kr(1)
16314             .sr(1)
16315             .m(m)
16316             .n(n)
16317             .k(k)
16318             .cm_stride(11)
16319             .iterations(1)
16320             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_6x8__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
16321         }
16322       }
16323     }
16324   }
16325 
TEST(QC8_IGEMM_MINMAX_FP32_6X8__NEONV8_MLAL_LANE,a_offset)16326   TEST(QC8_IGEMM_MINMAX_FP32_6X8__NEONV8_MLAL_LANE, a_offset) {
16327     TEST_REQUIRES_ARM_NEON_V8;
16328     for (size_t k = 1; k <= 40; k += 9) {
16329       GemmMicrokernelTester()
16330         .mr(6)
16331         .nr(8)
16332         .kr(1)
16333         .sr(1)
16334         .m(6)
16335         .n(8)
16336         .k(k)
16337         .ks(3)
16338         .a_offset(251)
16339         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_6x8__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
16340     }
16341   }
16342 
TEST(QC8_IGEMM_MINMAX_FP32_6X8__NEONV8_MLAL_LANE,zero)16343   TEST(QC8_IGEMM_MINMAX_FP32_6X8__NEONV8_MLAL_LANE, zero) {
16344     TEST_REQUIRES_ARM_NEON_V8;
16345     for (size_t k = 1; k <= 40; k += 9) {
16346       for (uint32_t mz = 0; mz < 6; mz++) {
16347         GemmMicrokernelTester()
16348           .mr(6)
16349           .nr(8)
16350           .kr(1)
16351           .sr(1)
16352           .m(6)
16353           .n(8)
16354           .k(k)
16355           .ks(3)
16356           .a_offset(251)
16357           .zero_index(mz)
16358           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_6x8__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
16359       }
16360     }
16361   }
16362 
TEST(QC8_IGEMM_MINMAX_FP32_6X8__NEONV8_MLAL_LANE,qmin)16363   TEST(QC8_IGEMM_MINMAX_FP32_6X8__NEONV8_MLAL_LANE, qmin) {
16364     TEST_REQUIRES_ARM_NEON_V8;
16365     GemmMicrokernelTester()
16366       .mr(6)
16367       .nr(8)
16368       .kr(1)
16369       .sr(1)
16370       .m(6)
16371       .n(8)
16372       .k(8)
16373       .qmin(128)
16374       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_6x8__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
16375   }
16376 
TEST(QC8_IGEMM_MINMAX_FP32_6X8__NEONV8_MLAL_LANE,qmax)16377   TEST(QC8_IGEMM_MINMAX_FP32_6X8__NEONV8_MLAL_LANE, qmax) {
16378     TEST_REQUIRES_ARM_NEON_V8;
16379     GemmMicrokernelTester()
16380       .mr(6)
16381       .nr(8)
16382       .kr(1)
16383       .sr(1)
16384       .m(6)
16385       .n(8)
16386       .k(8)
16387       .qmax(128)
16388       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_6x8__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
16389   }
16390 
TEST(QC8_IGEMM_MINMAX_FP32_6X8__NEONV8_MLAL_LANE,strided_cm)16391   TEST(QC8_IGEMM_MINMAX_FP32_6X8__NEONV8_MLAL_LANE, strided_cm) {
16392     TEST_REQUIRES_ARM_NEON_V8;
16393     GemmMicrokernelTester()
16394       .mr(6)
16395       .nr(8)
16396       .kr(1)
16397       .sr(1)
16398       .m(6)
16399       .n(8)
16400       .k(8)
16401       .cm_stride(11)
16402       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_6x8__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
16403   }
16404 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
16405 
16406 
16407 #if XNN_ENABLE_ARM_DOTPROD && (XNN_ARCH_ARM && !XNN_PLATFORM_IOS || XNN_ARCH_ARM64)
TEST(QC8_IGEMM_MINMAX_FP32_6X8C4__NEONDOT,k_eq_8)16408   TEST(QC8_IGEMM_MINMAX_FP32_6X8C4__NEONDOT, k_eq_8) {
16409     TEST_REQUIRES_ARM_NEON_DOT;
16410     GemmMicrokernelTester()
16411       .mr(6)
16412       .nr(8)
16413       .kr(4)
16414       .sr(1)
16415       .m(6)
16416       .n(8)
16417       .k(8)
16418       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_6x8c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
16419   }
16420 
TEST(QC8_IGEMM_MINMAX_FP32_6X8C4__NEONDOT,strided_cn)16421   TEST(QC8_IGEMM_MINMAX_FP32_6X8C4__NEONDOT, strided_cn) {
16422     TEST_REQUIRES_ARM_NEON_DOT;
16423     GemmMicrokernelTester()
16424       .mr(6)
16425       .nr(8)
16426       .kr(4)
16427       .sr(1)
16428       .m(6)
16429       .n(8)
16430       .k(8)
16431       .cn_stride(11)
16432       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_6x8c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
16433   }
16434 
TEST(QC8_IGEMM_MINMAX_FP32_6X8C4__NEONDOT,k_eq_8_subtile)16435   TEST(QC8_IGEMM_MINMAX_FP32_6X8C4__NEONDOT, k_eq_8_subtile) {
16436     TEST_REQUIRES_ARM_NEON_DOT;
16437     for (uint32_t n = 1; n <= 8; n++) {
16438       for (uint32_t m = 1; m <= 6; m++) {
16439         GemmMicrokernelTester()
16440           .mr(6)
16441           .nr(8)
16442           .kr(4)
16443           .sr(1)
16444           .m(m)
16445           .n(n)
16446           .k(8)
16447           .iterations(1)
16448           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_6x8c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
16449       }
16450     }
16451   }
16452 
TEST(QC8_IGEMM_MINMAX_FP32_6X8C4__NEONDOT,k_eq_8_subtile_m)16453   TEST(QC8_IGEMM_MINMAX_FP32_6X8C4__NEONDOT, k_eq_8_subtile_m) {
16454     TEST_REQUIRES_ARM_NEON_DOT;
16455     for (uint32_t m = 1; m <= 6; m++) {
16456       GemmMicrokernelTester()
16457         .mr(6)
16458         .nr(8)
16459         .kr(4)
16460         .sr(1)
16461         .m(m)
16462         .n(8)
16463         .k(8)
16464         .iterations(1)
16465         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_6x8c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
16466     }
16467   }
16468 
TEST(QC8_IGEMM_MINMAX_FP32_6X8C4__NEONDOT,k_eq_8_subtile_n)16469   TEST(QC8_IGEMM_MINMAX_FP32_6X8C4__NEONDOT, k_eq_8_subtile_n) {
16470     TEST_REQUIRES_ARM_NEON_DOT;
16471     for (uint32_t n = 1; n <= 8; n++) {
16472       GemmMicrokernelTester()
16473         .mr(6)
16474         .nr(8)
16475         .kr(4)
16476         .sr(1)
16477         .m(6)
16478         .n(n)
16479         .k(8)
16480         .iterations(1)
16481         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_6x8c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
16482     }
16483   }
16484 
TEST(QC8_IGEMM_MINMAX_FP32_6X8C4__NEONDOT,k_lt_8)16485   TEST(QC8_IGEMM_MINMAX_FP32_6X8C4__NEONDOT, k_lt_8) {
16486     TEST_REQUIRES_ARM_NEON_DOT;
16487     for (size_t k = 1; k < 8; k++) {
16488       GemmMicrokernelTester()
16489         .mr(6)
16490         .nr(8)
16491         .kr(4)
16492         .sr(1)
16493         .m(6)
16494         .n(8)
16495         .k(k)
16496         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_6x8c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
16497     }
16498   }
16499 
TEST(QC8_IGEMM_MINMAX_FP32_6X8C4__NEONDOT,k_lt_8_subtile)16500   TEST(QC8_IGEMM_MINMAX_FP32_6X8C4__NEONDOT, k_lt_8_subtile) {
16501     TEST_REQUIRES_ARM_NEON_DOT;
16502     for (size_t k = 1; k < 8; k++) {
16503       for (uint32_t n = 1; n <= 8; n++) {
16504         for (uint32_t m = 1; m <= 6; m++) {
16505           GemmMicrokernelTester()
16506             .mr(6)
16507             .nr(8)
16508             .kr(4)
16509             .sr(1)
16510             .m(m)
16511             .n(n)
16512             .k(k)
16513             .iterations(1)
16514             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_6x8c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
16515         }
16516       }
16517     }
16518   }
16519 
TEST(QC8_IGEMM_MINMAX_FP32_6X8C4__NEONDOT,k_gt_8)16520   TEST(QC8_IGEMM_MINMAX_FP32_6X8C4__NEONDOT, k_gt_8) {
16521     TEST_REQUIRES_ARM_NEON_DOT;
16522     for (size_t k = 9; k < 16; k++) {
16523       GemmMicrokernelTester()
16524         .mr(6)
16525         .nr(8)
16526         .kr(4)
16527         .sr(1)
16528         .m(6)
16529         .n(8)
16530         .k(k)
16531         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_6x8c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
16532     }
16533   }
16534 
TEST(QC8_IGEMM_MINMAX_FP32_6X8C4__NEONDOT,k_gt_8_subtile)16535   TEST(QC8_IGEMM_MINMAX_FP32_6X8C4__NEONDOT, k_gt_8_subtile) {
16536     TEST_REQUIRES_ARM_NEON_DOT;
16537     for (size_t k = 9; k < 16; k++) {
16538       for (uint32_t n = 1; n <= 8; n++) {
16539         for (uint32_t m = 1; m <= 6; m++) {
16540           GemmMicrokernelTester()
16541             .mr(6)
16542             .nr(8)
16543             .kr(4)
16544             .sr(1)
16545             .m(m)
16546             .n(n)
16547             .k(k)
16548             .iterations(1)
16549             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_6x8c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
16550         }
16551       }
16552     }
16553   }
16554 
TEST(QC8_IGEMM_MINMAX_FP32_6X8C4__NEONDOT,k_div_8)16555   TEST(QC8_IGEMM_MINMAX_FP32_6X8C4__NEONDOT, k_div_8) {
16556     TEST_REQUIRES_ARM_NEON_DOT;
16557     for (size_t k = 16; k <= 80; k += 8) {
16558       GemmMicrokernelTester()
16559         .mr(6)
16560         .nr(8)
16561         .kr(4)
16562         .sr(1)
16563         .m(6)
16564         .n(8)
16565         .k(k)
16566         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_6x8c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
16567     }
16568   }
16569 
TEST(QC8_IGEMM_MINMAX_FP32_6X8C4__NEONDOT,k_div_8_subtile)16570   TEST(QC8_IGEMM_MINMAX_FP32_6X8C4__NEONDOT, k_div_8_subtile) {
16571     TEST_REQUIRES_ARM_NEON_DOT;
16572     for (size_t k = 16; k <= 80; k += 8) {
16573       for (uint32_t n = 1; n <= 8; n++) {
16574         for (uint32_t m = 1; m <= 6; m++) {
16575           GemmMicrokernelTester()
16576             .mr(6)
16577             .nr(8)
16578             .kr(4)
16579             .sr(1)
16580             .m(m)
16581             .n(n)
16582             .k(k)
16583             .iterations(1)
16584             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_6x8c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
16585         }
16586       }
16587     }
16588   }
16589 
TEST(QC8_IGEMM_MINMAX_FP32_6X8C4__NEONDOT,n_gt_8)16590   TEST(QC8_IGEMM_MINMAX_FP32_6X8C4__NEONDOT, n_gt_8) {
16591     TEST_REQUIRES_ARM_NEON_DOT;
16592     for (uint32_t n = 9; n < 16; n++) {
16593       for (size_t k = 1; k <= 40; k += 9) {
16594         GemmMicrokernelTester()
16595           .mr(6)
16596           .nr(8)
16597           .kr(4)
16598           .sr(1)
16599           .m(6)
16600           .n(n)
16601           .k(k)
16602           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_6x8c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
16603       }
16604     }
16605   }
16606 
TEST(QC8_IGEMM_MINMAX_FP32_6X8C4__NEONDOT,n_gt_8_strided_cn)16607   TEST(QC8_IGEMM_MINMAX_FP32_6X8C4__NEONDOT, n_gt_8_strided_cn) {
16608     TEST_REQUIRES_ARM_NEON_DOT;
16609     for (uint32_t n = 9; n < 16; n++) {
16610       for (size_t k = 1; k <= 40; k += 9) {
16611         GemmMicrokernelTester()
16612           .mr(6)
16613           .nr(8)
16614           .kr(4)
16615           .sr(1)
16616           .m(6)
16617           .n(n)
16618           .k(k)
16619           .cn_stride(11)
16620           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_6x8c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
16621       }
16622     }
16623   }
16624 
TEST(QC8_IGEMM_MINMAX_FP32_6X8C4__NEONDOT,n_gt_8_subtile)16625   TEST(QC8_IGEMM_MINMAX_FP32_6X8C4__NEONDOT, n_gt_8_subtile) {
16626     TEST_REQUIRES_ARM_NEON_DOT;
16627     for (uint32_t n = 9; n < 16; n++) {
16628       for (size_t k = 1; k <= 40; k += 9) {
16629         for (uint32_t m = 1; m <= 6; m++) {
16630           GemmMicrokernelTester()
16631             .mr(6)
16632             .nr(8)
16633             .kr(4)
16634             .sr(1)
16635             .m(m)
16636             .n(n)
16637             .k(k)
16638             .iterations(1)
16639             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_6x8c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
16640         }
16641       }
16642     }
16643   }
16644 
TEST(QC8_IGEMM_MINMAX_FP32_6X8C4__NEONDOT,n_div_8)16645   TEST(QC8_IGEMM_MINMAX_FP32_6X8C4__NEONDOT, n_div_8) {
16646     TEST_REQUIRES_ARM_NEON_DOT;
16647     for (uint32_t n = 16; n <= 24; n += 8) {
16648       for (size_t k = 1; k <= 40; k += 9) {
16649         GemmMicrokernelTester()
16650           .mr(6)
16651           .nr(8)
16652           .kr(4)
16653           .sr(1)
16654           .m(6)
16655           .n(n)
16656           .k(k)
16657           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_6x8c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
16658       }
16659     }
16660   }
16661 
TEST(QC8_IGEMM_MINMAX_FP32_6X8C4__NEONDOT,n_div_8_strided_cn)16662   TEST(QC8_IGEMM_MINMAX_FP32_6X8C4__NEONDOT, n_div_8_strided_cn) {
16663     TEST_REQUIRES_ARM_NEON_DOT;
16664     for (uint32_t n = 16; n <= 24; n += 8) {
16665       for (size_t k = 1; k <= 40; k += 9) {
16666         GemmMicrokernelTester()
16667           .mr(6)
16668           .nr(8)
16669           .kr(4)
16670           .sr(1)
16671           .m(6)
16672           .n(n)
16673           .k(k)
16674           .cn_stride(11)
16675           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_6x8c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
16676       }
16677     }
16678   }
16679 
TEST(QC8_IGEMM_MINMAX_FP32_6X8C4__NEONDOT,n_div_8_subtile)16680   TEST(QC8_IGEMM_MINMAX_FP32_6X8C4__NEONDOT, n_div_8_subtile) {
16681     TEST_REQUIRES_ARM_NEON_DOT;
16682     for (uint32_t n = 16; n <= 24; n += 8) {
16683       for (size_t k = 1; k <= 40; k += 9) {
16684         for (uint32_t m = 1; m <= 6; m++) {
16685           GemmMicrokernelTester()
16686             .mr(6)
16687             .nr(8)
16688             .kr(4)
16689             .sr(1)
16690             .m(m)
16691             .n(n)
16692             .k(k)
16693             .iterations(1)
16694             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_6x8c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
16695         }
16696       }
16697     }
16698   }
16699 
TEST(QC8_IGEMM_MINMAX_FP32_6X8C4__NEONDOT,small_kernel)16700   TEST(QC8_IGEMM_MINMAX_FP32_6X8C4__NEONDOT, small_kernel) {
16701     TEST_REQUIRES_ARM_NEON_DOT;
16702     for (size_t k = 1; k <= 40; k += 9) {
16703       GemmMicrokernelTester()
16704         .mr(6)
16705         .nr(8)
16706         .kr(4)
16707         .sr(1)
16708         .m(6)
16709         .n(8)
16710         .k(k)
16711         .ks(3)
16712         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_6x8c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
16713     }
16714   }
16715 
TEST(QC8_IGEMM_MINMAX_FP32_6X8C4__NEONDOT,small_kernel_subtile)16716   TEST(QC8_IGEMM_MINMAX_FP32_6X8C4__NEONDOT, small_kernel_subtile) {
16717     TEST_REQUIRES_ARM_NEON_DOT;
16718     for (size_t k = 1; k <= 40; k += 9) {
16719       for (uint32_t n = 1; n <= 8; n++) {
16720         for (uint32_t m = 1; m <= 6; m++) {
16721           GemmMicrokernelTester()
16722             .mr(6)
16723             .nr(8)
16724             .kr(4)
16725             .sr(1)
16726             .m(m)
16727             .n(n)
16728             .k(k)
16729             .ks(3)
16730             .iterations(1)
16731             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_6x8c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
16732         }
16733       }
16734     }
16735   }
16736 
TEST(QC8_IGEMM_MINMAX_FP32_6X8C4__NEONDOT,n_gt_8_small_kernel)16737   TEST(QC8_IGEMM_MINMAX_FP32_6X8C4__NEONDOT, n_gt_8_small_kernel) {
16738     TEST_REQUIRES_ARM_NEON_DOT;
16739     for (uint32_t n = 9; n < 16; n++) {
16740       for (size_t k = 1; k <= 40; k += 9) {
16741         GemmMicrokernelTester()
16742           .mr(6)
16743           .nr(8)
16744           .kr(4)
16745           .sr(1)
16746           .m(6)
16747           .n(n)
16748           .k(k)
16749           .ks(3)
16750           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_6x8c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
16751       }
16752     }
16753   }
16754 
TEST(QC8_IGEMM_MINMAX_FP32_6X8C4__NEONDOT,n_div_8_small_kernel)16755   TEST(QC8_IGEMM_MINMAX_FP32_6X8C4__NEONDOT, n_div_8_small_kernel) {
16756     TEST_REQUIRES_ARM_NEON_DOT;
16757     for (uint32_t n = 16; n <= 24; n += 8) {
16758       for (size_t k = 1; k <= 40; k += 9) {
16759         GemmMicrokernelTester()
16760           .mr(6)
16761           .nr(8)
16762           .kr(4)
16763           .sr(1)
16764           .m(6)
16765           .n(n)
16766           .k(k)
16767           .ks(3)
16768           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_6x8c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
16769       }
16770     }
16771   }
16772 
TEST(QC8_IGEMM_MINMAX_FP32_6X8C4__NEONDOT,strided_cm_subtile)16773   TEST(QC8_IGEMM_MINMAX_FP32_6X8C4__NEONDOT, strided_cm_subtile) {
16774     TEST_REQUIRES_ARM_NEON_DOT;
16775     for (size_t k = 1; k <= 40; k += 9) {
16776       for (uint32_t n = 1; n <= 8; n++) {
16777         for (uint32_t m = 1; m <= 6; m++) {
16778           GemmMicrokernelTester()
16779             .mr(6)
16780             .nr(8)
16781             .kr(4)
16782             .sr(1)
16783             .m(m)
16784             .n(n)
16785             .k(k)
16786             .cm_stride(11)
16787             .iterations(1)
16788             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_6x8c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
16789         }
16790       }
16791     }
16792   }
16793 
TEST(QC8_IGEMM_MINMAX_FP32_6X8C4__NEONDOT,a_offset)16794   TEST(QC8_IGEMM_MINMAX_FP32_6X8C4__NEONDOT, a_offset) {
16795     TEST_REQUIRES_ARM_NEON_DOT;
16796     for (size_t k = 1; k <= 40; k += 9) {
16797       GemmMicrokernelTester()
16798         .mr(6)
16799         .nr(8)
16800         .kr(4)
16801         .sr(1)
16802         .m(6)
16803         .n(8)
16804         .k(k)
16805         .ks(3)
16806         .a_offset(251)
16807         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_6x8c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
16808     }
16809   }
16810 
TEST(QC8_IGEMM_MINMAX_FP32_6X8C4__NEONDOT,zero)16811   TEST(QC8_IGEMM_MINMAX_FP32_6X8C4__NEONDOT, zero) {
16812     TEST_REQUIRES_ARM_NEON_DOT;
16813     for (size_t k = 1; k <= 40; k += 9) {
16814       for (uint32_t mz = 0; mz < 6; mz++) {
16815         GemmMicrokernelTester()
16816           .mr(6)
16817           .nr(8)
16818           .kr(4)
16819           .sr(1)
16820           .m(6)
16821           .n(8)
16822           .k(k)
16823           .ks(3)
16824           .a_offset(251)
16825           .zero_index(mz)
16826           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_6x8c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
16827       }
16828     }
16829   }
16830 
TEST(QC8_IGEMM_MINMAX_FP32_6X8C4__NEONDOT,qmin)16831   TEST(QC8_IGEMM_MINMAX_FP32_6X8C4__NEONDOT, qmin) {
16832     TEST_REQUIRES_ARM_NEON_DOT;
16833     GemmMicrokernelTester()
16834       .mr(6)
16835       .nr(8)
16836       .kr(4)
16837       .sr(1)
16838       .m(6)
16839       .n(8)
16840       .k(8)
16841       .qmin(128)
16842       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_6x8c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
16843   }
16844 
TEST(QC8_IGEMM_MINMAX_FP32_6X8C4__NEONDOT,qmax)16845   TEST(QC8_IGEMM_MINMAX_FP32_6X8C4__NEONDOT, qmax) {
16846     TEST_REQUIRES_ARM_NEON_DOT;
16847     GemmMicrokernelTester()
16848       .mr(6)
16849       .nr(8)
16850       .kr(4)
16851       .sr(1)
16852       .m(6)
16853       .n(8)
16854       .k(8)
16855       .qmax(128)
16856       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_6x8c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
16857   }
16858 
TEST(QC8_IGEMM_MINMAX_FP32_6X8C4__NEONDOT,strided_cm)16859   TEST(QC8_IGEMM_MINMAX_FP32_6X8C4__NEONDOT, strided_cm) {
16860     TEST_REQUIRES_ARM_NEON_DOT;
16861     GemmMicrokernelTester()
16862       .mr(6)
16863       .nr(8)
16864       .kr(4)
16865       .sr(1)
16866       .m(6)
16867       .n(8)
16868       .k(8)
16869       .cm_stride(11)
16870       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_6x8c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
16871   }
16872 #endif  // XNN_ENABLE_ARM_DOTPROD && (XNN_ARCH_ARM && !XNN_PLATFORM_IOS || XNN_ARCH_ARM64)
16873 
16874 
16875 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QC8_IGEMM_MINMAX_FP32_6X16__NEON_MLAL_LANE,k_eq_8)16876   TEST(QC8_IGEMM_MINMAX_FP32_6X16__NEON_MLAL_LANE, k_eq_8) {
16877     TEST_REQUIRES_ARM_NEON;
16878     GemmMicrokernelTester()
16879       .mr(6)
16880       .nr(16)
16881       .kr(1)
16882       .sr(1)
16883       .m(6)
16884       .n(16)
16885       .k(8)
16886       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_6x16__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
16887   }
16888 
TEST(QC8_IGEMM_MINMAX_FP32_6X16__NEON_MLAL_LANE,strided_cn)16889   TEST(QC8_IGEMM_MINMAX_FP32_6X16__NEON_MLAL_LANE, strided_cn) {
16890     TEST_REQUIRES_ARM_NEON;
16891     GemmMicrokernelTester()
16892       .mr(6)
16893       .nr(16)
16894       .kr(1)
16895       .sr(1)
16896       .m(6)
16897       .n(16)
16898       .k(8)
16899       .cn_stride(19)
16900       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_6x16__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
16901   }
16902 
TEST(QC8_IGEMM_MINMAX_FP32_6X16__NEON_MLAL_LANE,k_eq_8_subtile)16903   TEST(QC8_IGEMM_MINMAX_FP32_6X16__NEON_MLAL_LANE, k_eq_8_subtile) {
16904     TEST_REQUIRES_ARM_NEON;
16905     for (uint32_t n = 1; n <= 16; n++) {
16906       for (uint32_t m = 1; m <= 6; m++) {
16907         GemmMicrokernelTester()
16908           .mr(6)
16909           .nr(16)
16910           .kr(1)
16911           .sr(1)
16912           .m(m)
16913           .n(n)
16914           .k(8)
16915           .iterations(1)
16916           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_6x16__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
16917       }
16918     }
16919   }
16920 
TEST(QC8_IGEMM_MINMAX_FP32_6X16__NEON_MLAL_LANE,k_eq_8_subtile_m)16921   TEST(QC8_IGEMM_MINMAX_FP32_6X16__NEON_MLAL_LANE, k_eq_8_subtile_m) {
16922     TEST_REQUIRES_ARM_NEON;
16923     for (uint32_t m = 1; m <= 6; m++) {
16924       GemmMicrokernelTester()
16925         .mr(6)
16926         .nr(16)
16927         .kr(1)
16928         .sr(1)
16929         .m(m)
16930         .n(16)
16931         .k(8)
16932         .iterations(1)
16933         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_6x16__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
16934     }
16935   }
16936 
TEST(QC8_IGEMM_MINMAX_FP32_6X16__NEON_MLAL_LANE,k_eq_8_subtile_n)16937   TEST(QC8_IGEMM_MINMAX_FP32_6X16__NEON_MLAL_LANE, k_eq_8_subtile_n) {
16938     TEST_REQUIRES_ARM_NEON;
16939     for (uint32_t n = 1; n <= 16; n++) {
16940       GemmMicrokernelTester()
16941         .mr(6)
16942         .nr(16)
16943         .kr(1)
16944         .sr(1)
16945         .m(6)
16946         .n(n)
16947         .k(8)
16948         .iterations(1)
16949         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_6x16__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
16950     }
16951   }
16952 
TEST(QC8_IGEMM_MINMAX_FP32_6X16__NEON_MLAL_LANE,k_lt_8)16953   TEST(QC8_IGEMM_MINMAX_FP32_6X16__NEON_MLAL_LANE, k_lt_8) {
16954     TEST_REQUIRES_ARM_NEON;
16955     for (size_t k = 1; k < 8; k++) {
16956       GemmMicrokernelTester()
16957         .mr(6)
16958         .nr(16)
16959         .kr(1)
16960         .sr(1)
16961         .m(6)
16962         .n(16)
16963         .k(k)
16964         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_6x16__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
16965     }
16966   }
16967 
TEST(QC8_IGEMM_MINMAX_FP32_6X16__NEON_MLAL_LANE,k_lt_8_subtile)16968   TEST(QC8_IGEMM_MINMAX_FP32_6X16__NEON_MLAL_LANE, k_lt_8_subtile) {
16969     TEST_REQUIRES_ARM_NEON;
16970     for (size_t k = 1; k < 8; k++) {
16971       for (uint32_t n = 1; n <= 16; n++) {
16972         for (uint32_t m = 1; m <= 6; m++) {
16973           GemmMicrokernelTester()
16974             .mr(6)
16975             .nr(16)
16976             .kr(1)
16977             .sr(1)
16978             .m(m)
16979             .n(n)
16980             .k(k)
16981             .iterations(1)
16982             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_6x16__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
16983         }
16984       }
16985     }
16986   }
16987 
TEST(QC8_IGEMM_MINMAX_FP32_6X16__NEON_MLAL_LANE,k_gt_8)16988   TEST(QC8_IGEMM_MINMAX_FP32_6X16__NEON_MLAL_LANE, k_gt_8) {
16989     TEST_REQUIRES_ARM_NEON;
16990     for (size_t k = 9; k < 16; k++) {
16991       GemmMicrokernelTester()
16992         .mr(6)
16993         .nr(16)
16994         .kr(1)
16995         .sr(1)
16996         .m(6)
16997         .n(16)
16998         .k(k)
16999         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_6x16__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
17000     }
17001   }
17002 
TEST(QC8_IGEMM_MINMAX_FP32_6X16__NEON_MLAL_LANE,k_gt_8_subtile)17003   TEST(QC8_IGEMM_MINMAX_FP32_6X16__NEON_MLAL_LANE, k_gt_8_subtile) {
17004     TEST_REQUIRES_ARM_NEON;
17005     for (size_t k = 9; k < 16; k++) {
17006       for (uint32_t n = 1; n <= 16; n++) {
17007         for (uint32_t m = 1; m <= 6; m++) {
17008           GemmMicrokernelTester()
17009             .mr(6)
17010             .nr(16)
17011             .kr(1)
17012             .sr(1)
17013             .m(m)
17014             .n(n)
17015             .k(k)
17016             .iterations(1)
17017             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_6x16__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
17018         }
17019       }
17020     }
17021   }
17022 
TEST(QC8_IGEMM_MINMAX_FP32_6X16__NEON_MLAL_LANE,k_div_8)17023   TEST(QC8_IGEMM_MINMAX_FP32_6X16__NEON_MLAL_LANE, k_div_8) {
17024     TEST_REQUIRES_ARM_NEON;
17025     for (size_t k = 16; k <= 80; k += 8) {
17026       GemmMicrokernelTester()
17027         .mr(6)
17028         .nr(16)
17029         .kr(1)
17030         .sr(1)
17031         .m(6)
17032         .n(16)
17033         .k(k)
17034         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_6x16__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
17035     }
17036   }
17037 
TEST(QC8_IGEMM_MINMAX_FP32_6X16__NEON_MLAL_LANE,k_div_8_subtile)17038   TEST(QC8_IGEMM_MINMAX_FP32_6X16__NEON_MLAL_LANE, k_div_8_subtile) {
17039     TEST_REQUIRES_ARM_NEON;
17040     for (size_t k = 16; k <= 80; k += 8) {
17041       for (uint32_t n = 1; n <= 16; n++) {
17042         for (uint32_t m = 1; m <= 6; m++) {
17043           GemmMicrokernelTester()
17044             .mr(6)
17045             .nr(16)
17046             .kr(1)
17047             .sr(1)
17048             .m(m)
17049             .n(n)
17050             .k(k)
17051             .iterations(1)
17052             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_6x16__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
17053         }
17054       }
17055     }
17056   }
17057 
TEST(QC8_IGEMM_MINMAX_FP32_6X16__NEON_MLAL_LANE,n_gt_16)17058   TEST(QC8_IGEMM_MINMAX_FP32_6X16__NEON_MLAL_LANE, n_gt_16) {
17059     TEST_REQUIRES_ARM_NEON;
17060     for (uint32_t n = 17; n < 32; n++) {
17061       for (size_t k = 1; k <= 40; k += 9) {
17062         GemmMicrokernelTester()
17063           .mr(6)
17064           .nr(16)
17065           .kr(1)
17066           .sr(1)
17067           .m(6)
17068           .n(n)
17069           .k(k)
17070           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_6x16__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
17071       }
17072     }
17073   }
17074 
TEST(QC8_IGEMM_MINMAX_FP32_6X16__NEON_MLAL_LANE,n_gt_16_strided_cn)17075   TEST(QC8_IGEMM_MINMAX_FP32_6X16__NEON_MLAL_LANE, n_gt_16_strided_cn) {
17076     TEST_REQUIRES_ARM_NEON;
17077     for (uint32_t n = 17; n < 32; n++) {
17078       for (size_t k = 1; k <= 40; k += 9) {
17079         GemmMicrokernelTester()
17080           .mr(6)
17081           .nr(16)
17082           .kr(1)
17083           .sr(1)
17084           .m(6)
17085           .n(n)
17086           .k(k)
17087           .cn_stride(19)
17088           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_6x16__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
17089       }
17090     }
17091   }
17092 
TEST(QC8_IGEMM_MINMAX_FP32_6X16__NEON_MLAL_LANE,n_gt_16_subtile)17093   TEST(QC8_IGEMM_MINMAX_FP32_6X16__NEON_MLAL_LANE, n_gt_16_subtile) {
17094     TEST_REQUIRES_ARM_NEON;
17095     for (uint32_t n = 17; n < 32; n++) {
17096       for (size_t k = 1; k <= 40; k += 9) {
17097         for (uint32_t m = 1; m <= 6; m++) {
17098           GemmMicrokernelTester()
17099             .mr(6)
17100             .nr(16)
17101             .kr(1)
17102             .sr(1)
17103             .m(m)
17104             .n(n)
17105             .k(k)
17106             .iterations(1)
17107             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_6x16__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
17108         }
17109       }
17110     }
17111   }
17112 
TEST(QC8_IGEMM_MINMAX_FP32_6X16__NEON_MLAL_LANE,n_div_16)17113   TEST(QC8_IGEMM_MINMAX_FP32_6X16__NEON_MLAL_LANE, n_div_16) {
17114     TEST_REQUIRES_ARM_NEON;
17115     for (uint32_t n = 32; n <= 48; n += 16) {
17116       for (size_t k = 1; k <= 40; k += 9) {
17117         GemmMicrokernelTester()
17118           .mr(6)
17119           .nr(16)
17120           .kr(1)
17121           .sr(1)
17122           .m(6)
17123           .n(n)
17124           .k(k)
17125           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_6x16__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
17126       }
17127     }
17128   }
17129 
TEST(QC8_IGEMM_MINMAX_FP32_6X16__NEON_MLAL_LANE,n_div_16_strided_cn)17130   TEST(QC8_IGEMM_MINMAX_FP32_6X16__NEON_MLAL_LANE, n_div_16_strided_cn) {
17131     TEST_REQUIRES_ARM_NEON;
17132     for (uint32_t n = 32; n <= 48; n += 16) {
17133       for (size_t k = 1; k <= 40; k += 9) {
17134         GemmMicrokernelTester()
17135           .mr(6)
17136           .nr(16)
17137           .kr(1)
17138           .sr(1)
17139           .m(6)
17140           .n(n)
17141           .k(k)
17142           .cn_stride(19)
17143           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_6x16__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
17144       }
17145     }
17146   }
17147 
TEST(QC8_IGEMM_MINMAX_FP32_6X16__NEON_MLAL_LANE,n_div_16_subtile)17148   TEST(QC8_IGEMM_MINMAX_FP32_6X16__NEON_MLAL_LANE, n_div_16_subtile) {
17149     TEST_REQUIRES_ARM_NEON;
17150     for (uint32_t n = 32; n <= 48; n += 16) {
17151       for (size_t k = 1; k <= 40; k += 9) {
17152         for (uint32_t m = 1; m <= 6; m++) {
17153           GemmMicrokernelTester()
17154             .mr(6)
17155             .nr(16)
17156             .kr(1)
17157             .sr(1)
17158             .m(m)
17159             .n(n)
17160             .k(k)
17161             .iterations(1)
17162             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_6x16__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
17163         }
17164       }
17165     }
17166   }
17167 
TEST(QC8_IGEMM_MINMAX_FP32_6X16__NEON_MLAL_LANE,small_kernel)17168   TEST(QC8_IGEMM_MINMAX_FP32_6X16__NEON_MLAL_LANE, small_kernel) {
17169     TEST_REQUIRES_ARM_NEON;
17170     for (size_t k = 1; k <= 40; k += 9) {
17171       GemmMicrokernelTester()
17172         .mr(6)
17173         .nr(16)
17174         .kr(1)
17175         .sr(1)
17176         .m(6)
17177         .n(16)
17178         .k(k)
17179         .ks(3)
17180         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_6x16__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
17181     }
17182   }
17183 
TEST(QC8_IGEMM_MINMAX_FP32_6X16__NEON_MLAL_LANE,small_kernel_subtile)17184   TEST(QC8_IGEMM_MINMAX_FP32_6X16__NEON_MLAL_LANE, small_kernel_subtile) {
17185     TEST_REQUIRES_ARM_NEON;
17186     for (size_t k = 1; k <= 40; k += 9) {
17187       for (uint32_t n = 1; n <= 16; n++) {
17188         for (uint32_t m = 1; m <= 6; m++) {
17189           GemmMicrokernelTester()
17190             .mr(6)
17191             .nr(16)
17192             .kr(1)
17193             .sr(1)
17194             .m(m)
17195             .n(n)
17196             .k(k)
17197             .ks(3)
17198             .iterations(1)
17199             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_6x16__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
17200         }
17201       }
17202     }
17203   }
17204 
TEST(QC8_IGEMM_MINMAX_FP32_6X16__NEON_MLAL_LANE,n_gt_16_small_kernel)17205   TEST(QC8_IGEMM_MINMAX_FP32_6X16__NEON_MLAL_LANE, n_gt_16_small_kernel) {
17206     TEST_REQUIRES_ARM_NEON;
17207     for (uint32_t n = 17; n < 32; n++) {
17208       for (size_t k = 1; k <= 40; k += 9) {
17209         GemmMicrokernelTester()
17210           .mr(6)
17211           .nr(16)
17212           .kr(1)
17213           .sr(1)
17214           .m(6)
17215           .n(n)
17216           .k(k)
17217           .ks(3)
17218           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_6x16__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
17219       }
17220     }
17221   }
17222 
TEST(QC8_IGEMM_MINMAX_FP32_6X16__NEON_MLAL_LANE,n_div_16_small_kernel)17223   TEST(QC8_IGEMM_MINMAX_FP32_6X16__NEON_MLAL_LANE, n_div_16_small_kernel) {
17224     TEST_REQUIRES_ARM_NEON;
17225     for (uint32_t n = 32; n <= 48; n += 16) {
17226       for (size_t k = 1; k <= 40; k += 9) {
17227         GemmMicrokernelTester()
17228           .mr(6)
17229           .nr(16)
17230           .kr(1)
17231           .sr(1)
17232           .m(6)
17233           .n(n)
17234           .k(k)
17235           .ks(3)
17236           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_6x16__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
17237       }
17238     }
17239   }
17240 
TEST(QC8_IGEMM_MINMAX_FP32_6X16__NEON_MLAL_LANE,strided_cm_subtile)17241   TEST(QC8_IGEMM_MINMAX_FP32_6X16__NEON_MLAL_LANE, strided_cm_subtile) {
17242     TEST_REQUIRES_ARM_NEON;
17243     for (size_t k = 1; k <= 40; k += 9) {
17244       for (uint32_t n = 1; n <= 16; n++) {
17245         for (uint32_t m = 1; m <= 6; m++) {
17246           GemmMicrokernelTester()
17247             .mr(6)
17248             .nr(16)
17249             .kr(1)
17250             .sr(1)
17251             .m(m)
17252             .n(n)
17253             .k(k)
17254             .cm_stride(19)
17255             .iterations(1)
17256             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_6x16__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
17257         }
17258       }
17259     }
17260   }
17261 
TEST(QC8_IGEMM_MINMAX_FP32_6X16__NEON_MLAL_LANE,a_offset)17262   TEST(QC8_IGEMM_MINMAX_FP32_6X16__NEON_MLAL_LANE, a_offset) {
17263     TEST_REQUIRES_ARM_NEON;
17264     for (size_t k = 1; k <= 40; k += 9) {
17265       GemmMicrokernelTester()
17266         .mr(6)
17267         .nr(16)
17268         .kr(1)
17269         .sr(1)
17270         .m(6)
17271         .n(16)
17272         .k(k)
17273         .ks(3)
17274         .a_offset(251)
17275         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_6x16__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
17276     }
17277   }
17278 
TEST(QC8_IGEMM_MINMAX_FP32_6X16__NEON_MLAL_LANE,zero)17279   TEST(QC8_IGEMM_MINMAX_FP32_6X16__NEON_MLAL_LANE, zero) {
17280     TEST_REQUIRES_ARM_NEON;
17281     for (size_t k = 1; k <= 40; k += 9) {
17282       for (uint32_t mz = 0; mz < 6; mz++) {
17283         GemmMicrokernelTester()
17284           .mr(6)
17285           .nr(16)
17286           .kr(1)
17287           .sr(1)
17288           .m(6)
17289           .n(16)
17290           .k(k)
17291           .ks(3)
17292           .a_offset(251)
17293           .zero_index(mz)
17294           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_6x16__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
17295       }
17296     }
17297   }
17298 
TEST(QC8_IGEMM_MINMAX_FP32_6X16__NEON_MLAL_LANE,qmin)17299   TEST(QC8_IGEMM_MINMAX_FP32_6X16__NEON_MLAL_LANE, qmin) {
17300     TEST_REQUIRES_ARM_NEON;
17301     GemmMicrokernelTester()
17302       .mr(6)
17303       .nr(16)
17304       .kr(1)
17305       .sr(1)
17306       .m(6)
17307       .n(16)
17308       .k(8)
17309       .qmin(128)
17310       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_6x16__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
17311   }
17312 
TEST(QC8_IGEMM_MINMAX_FP32_6X16__NEON_MLAL_LANE,qmax)17313   TEST(QC8_IGEMM_MINMAX_FP32_6X16__NEON_MLAL_LANE, qmax) {
17314     TEST_REQUIRES_ARM_NEON;
17315     GemmMicrokernelTester()
17316       .mr(6)
17317       .nr(16)
17318       .kr(1)
17319       .sr(1)
17320       .m(6)
17321       .n(16)
17322       .k(8)
17323       .qmax(128)
17324       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_6x16__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
17325   }
17326 
TEST(QC8_IGEMM_MINMAX_FP32_6X16__NEON_MLAL_LANE,strided_cm)17327   TEST(QC8_IGEMM_MINMAX_FP32_6X16__NEON_MLAL_LANE, strided_cm) {
17328     TEST_REQUIRES_ARM_NEON;
17329     GemmMicrokernelTester()
17330       .mr(6)
17331       .nr(16)
17332       .kr(1)
17333       .sr(1)
17334       .m(6)
17335       .n(16)
17336       .k(8)
17337       .cm_stride(19)
17338       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_6x16__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
17339   }
17340 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
17341 
17342 
17343 #if XNN_ENABLE_ARM_DOTPROD && (XNN_ARCH_ARM && !XNN_PLATFORM_IOS || XNN_ARCH_ARM64)
TEST(QC8_IGEMM_MINMAX_FP32_6X16C4__NEONDOT,k_eq_8)17344   TEST(QC8_IGEMM_MINMAX_FP32_6X16C4__NEONDOT, k_eq_8) {
17345     TEST_REQUIRES_ARM_NEON_DOT;
17346     GemmMicrokernelTester()
17347       .mr(6)
17348       .nr(16)
17349       .kr(4)
17350       .sr(1)
17351       .m(6)
17352       .n(16)
17353       .k(8)
17354       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_6x16c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
17355   }
17356 
TEST(QC8_IGEMM_MINMAX_FP32_6X16C4__NEONDOT,strided_cn)17357   TEST(QC8_IGEMM_MINMAX_FP32_6X16C4__NEONDOT, strided_cn) {
17358     TEST_REQUIRES_ARM_NEON_DOT;
17359     GemmMicrokernelTester()
17360       .mr(6)
17361       .nr(16)
17362       .kr(4)
17363       .sr(1)
17364       .m(6)
17365       .n(16)
17366       .k(8)
17367       .cn_stride(19)
17368       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_6x16c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
17369   }
17370 
TEST(QC8_IGEMM_MINMAX_FP32_6X16C4__NEONDOT,k_eq_8_subtile)17371   TEST(QC8_IGEMM_MINMAX_FP32_6X16C4__NEONDOT, k_eq_8_subtile) {
17372     TEST_REQUIRES_ARM_NEON_DOT;
17373     for (uint32_t n = 1; n <= 16; n++) {
17374       for (uint32_t m = 1; m <= 6; m++) {
17375         GemmMicrokernelTester()
17376           .mr(6)
17377           .nr(16)
17378           .kr(4)
17379           .sr(1)
17380           .m(m)
17381           .n(n)
17382           .k(8)
17383           .iterations(1)
17384           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_6x16c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
17385       }
17386     }
17387   }
17388 
TEST(QC8_IGEMM_MINMAX_FP32_6X16C4__NEONDOT,k_eq_8_subtile_m)17389   TEST(QC8_IGEMM_MINMAX_FP32_6X16C4__NEONDOT, k_eq_8_subtile_m) {
17390     TEST_REQUIRES_ARM_NEON_DOT;
17391     for (uint32_t m = 1; m <= 6; m++) {
17392       GemmMicrokernelTester()
17393         .mr(6)
17394         .nr(16)
17395         .kr(4)
17396         .sr(1)
17397         .m(m)
17398         .n(16)
17399         .k(8)
17400         .iterations(1)
17401         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_6x16c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
17402     }
17403   }
17404 
TEST(QC8_IGEMM_MINMAX_FP32_6X16C4__NEONDOT,k_eq_8_subtile_n)17405   TEST(QC8_IGEMM_MINMAX_FP32_6X16C4__NEONDOT, k_eq_8_subtile_n) {
17406     TEST_REQUIRES_ARM_NEON_DOT;
17407     for (uint32_t n = 1; n <= 16; n++) {
17408       GemmMicrokernelTester()
17409         .mr(6)
17410         .nr(16)
17411         .kr(4)
17412         .sr(1)
17413         .m(6)
17414         .n(n)
17415         .k(8)
17416         .iterations(1)
17417         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_6x16c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
17418     }
17419   }
17420 
TEST(QC8_IGEMM_MINMAX_FP32_6X16C4__NEONDOT,k_lt_8)17421   TEST(QC8_IGEMM_MINMAX_FP32_6X16C4__NEONDOT, k_lt_8) {
17422     TEST_REQUIRES_ARM_NEON_DOT;
17423     for (size_t k = 1; k < 8; k++) {
17424       GemmMicrokernelTester()
17425         .mr(6)
17426         .nr(16)
17427         .kr(4)
17428         .sr(1)
17429         .m(6)
17430         .n(16)
17431         .k(k)
17432         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_6x16c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
17433     }
17434   }
17435 
TEST(QC8_IGEMM_MINMAX_FP32_6X16C4__NEONDOT,k_lt_8_subtile)17436   TEST(QC8_IGEMM_MINMAX_FP32_6X16C4__NEONDOT, k_lt_8_subtile) {
17437     TEST_REQUIRES_ARM_NEON_DOT;
17438     for (size_t k = 1; k < 8; k++) {
17439       for (uint32_t n = 1; n <= 16; n++) {
17440         for (uint32_t m = 1; m <= 6; m++) {
17441           GemmMicrokernelTester()
17442             .mr(6)
17443             .nr(16)
17444             .kr(4)
17445             .sr(1)
17446             .m(m)
17447             .n(n)
17448             .k(k)
17449             .iterations(1)
17450             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_6x16c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
17451         }
17452       }
17453     }
17454   }
17455 
TEST(QC8_IGEMM_MINMAX_FP32_6X16C4__NEONDOT,k_gt_8)17456   TEST(QC8_IGEMM_MINMAX_FP32_6X16C4__NEONDOT, k_gt_8) {
17457     TEST_REQUIRES_ARM_NEON_DOT;
17458     for (size_t k = 9; k < 16; k++) {
17459       GemmMicrokernelTester()
17460         .mr(6)
17461         .nr(16)
17462         .kr(4)
17463         .sr(1)
17464         .m(6)
17465         .n(16)
17466         .k(k)
17467         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_6x16c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
17468     }
17469   }
17470 
TEST(QC8_IGEMM_MINMAX_FP32_6X16C4__NEONDOT,k_gt_8_subtile)17471   TEST(QC8_IGEMM_MINMAX_FP32_6X16C4__NEONDOT, k_gt_8_subtile) {
17472     TEST_REQUIRES_ARM_NEON_DOT;
17473     for (size_t k = 9; k < 16; k++) {
17474       for (uint32_t n = 1; n <= 16; n++) {
17475         for (uint32_t m = 1; m <= 6; m++) {
17476           GemmMicrokernelTester()
17477             .mr(6)
17478             .nr(16)
17479             .kr(4)
17480             .sr(1)
17481             .m(m)
17482             .n(n)
17483             .k(k)
17484             .iterations(1)
17485             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_6x16c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
17486         }
17487       }
17488     }
17489   }
17490 
TEST(QC8_IGEMM_MINMAX_FP32_6X16C4__NEONDOT,k_div_8)17491   TEST(QC8_IGEMM_MINMAX_FP32_6X16C4__NEONDOT, k_div_8) {
17492     TEST_REQUIRES_ARM_NEON_DOT;
17493     for (size_t k = 16; k <= 80; k += 8) {
17494       GemmMicrokernelTester()
17495         .mr(6)
17496         .nr(16)
17497         .kr(4)
17498         .sr(1)
17499         .m(6)
17500         .n(16)
17501         .k(k)
17502         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_6x16c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
17503     }
17504   }
17505 
TEST(QC8_IGEMM_MINMAX_FP32_6X16C4__NEONDOT,k_div_8_subtile)17506   TEST(QC8_IGEMM_MINMAX_FP32_6X16C4__NEONDOT, k_div_8_subtile) {
17507     TEST_REQUIRES_ARM_NEON_DOT;
17508     for (size_t k = 16; k <= 80; k += 8) {
17509       for (uint32_t n = 1; n <= 16; n++) {
17510         for (uint32_t m = 1; m <= 6; m++) {
17511           GemmMicrokernelTester()
17512             .mr(6)
17513             .nr(16)
17514             .kr(4)
17515             .sr(1)
17516             .m(m)
17517             .n(n)
17518             .k(k)
17519             .iterations(1)
17520             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_6x16c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
17521         }
17522       }
17523     }
17524   }
17525 
TEST(QC8_IGEMM_MINMAX_FP32_6X16C4__NEONDOT,n_gt_16)17526   TEST(QC8_IGEMM_MINMAX_FP32_6X16C4__NEONDOT, n_gt_16) {
17527     TEST_REQUIRES_ARM_NEON_DOT;
17528     for (uint32_t n = 17; n < 32; n++) {
17529       for (size_t k = 1; k <= 40; k += 9) {
17530         GemmMicrokernelTester()
17531           .mr(6)
17532           .nr(16)
17533           .kr(4)
17534           .sr(1)
17535           .m(6)
17536           .n(n)
17537           .k(k)
17538           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_6x16c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
17539       }
17540     }
17541   }
17542 
TEST(QC8_IGEMM_MINMAX_FP32_6X16C4__NEONDOT,n_gt_16_strided_cn)17543   TEST(QC8_IGEMM_MINMAX_FP32_6X16C4__NEONDOT, n_gt_16_strided_cn) {
17544     TEST_REQUIRES_ARM_NEON_DOT;
17545     for (uint32_t n = 17; n < 32; n++) {
17546       for (size_t k = 1; k <= 40; k += 9) {
17547         GemmMicrokernelTester()
17548           .mr(6)
17549           .nr(16)
17550           .kr(4)
17551           .sr(1)
17552           .m(6)
17553           .n(n)
17554           .k(k)
17555           .cn_stride(19)
17556           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_6x16c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
17557       }
17558     }
17559   }
17560 
TEST(QC8_IGEMM_MINMAX_FP32_6X16C4__NEONDOT,n_gt_16_subtile)17561   TEST(QC8_IGEMM_MINMAX_FP32_6X16C4__NEONDOT, n_gt_16_subtile) {
17562     TEST_REQUIRES_ARM_NEON_DOT;
17563     for (uint32_t n = 17; n < 32; n++) {
17564       for (size_t k = 1; k <= 40; k += 9) {
17565         for (uint32_t m = 1; m <= 6; m++) {
17566           GemmMicrokernelTester()
17567             .mr(6)
17568             .nr(16)
17569             .kr(4)
17570             .sr(1)
17571             .m(m)
17572             .n(n)
17573             .k(k)
17574             .iterations(1)
17575             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_6x16c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
17576         }
17577       }
17578     }
17579   }
17580 
TEST(QC8_IGEMM_MINMAX_FP32_6X16C4__NEONDOT,n_div_16)17581   TEST(QC8_IGEMM_MINMAX_FP32_6X16C4__NEONDOT, n_div_16) {
17582     TEST_REQUIRES_ARM_NEON_DOT;
17583     for (uint32_t n = 32; n <= 48; n += 16) {
17584       for (size_t k = 1; k <= 40; k += 9) {
17585         GemmMicrokernelTester()
17586           .mr(6)
17587           .nr(16)
17588           .kr(4)
17589           .sr(1)
17590           .m(6)
17591           .n(n)
17592           .k(k)
17593           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_6x16c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
17594       }
17595     }
17596   }
17597 
TEST(QC8_IGEMM_MINMAX_FP32_6X16C4__NEONDOT,n_div_16_strided_cn)17598   TEST(QC8_IGEMM_MINMAX_FP32_6X16C4__NEONDOT, n_div_16_strided_cn) {
17599     TEST_REQUIRES_ARM_NEON_DOT;
17600     for (uint32_t n = 32; n <= 48; n += 16) {
17601       for (size_t k = 1; k <= 40; k += 9) {
17602         GemmMicrokernelTester()
17603           .mr(6)
17604           .nr(16)
17605           .kr(4)
17606           .sr(1)
17607           .m(6)
17608           .n(n)
17609           .k(k)
17610           .cn_stride(19)
17611           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_6x16c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
17612       }
17613     }
17614   }
17615 
TEST(QC8_IGEMM_MINMAX_FP32_6X16C4__NEONDOT,n_div_16_subtile)17616   TEST(QC8_IGEMM_MINMAX_FP32_6X16C4__NEONDOT, n_div_16_subtile) {
17617     TEST_REQUIRES_ARM_NEON_DOT;
17618     for (uint32_t n = 32; n <= 48; n += 16) {
17619       for (size_t k = 1; k <= 40; k += 9) {
17620         for (uint32_t m = 1; m <= 6; m++) {
17621           GemmMicrokernelTester()
17622             .mr(6)
17623             .nr(16)
17624             .kr(4)
17625             .sr(1)
17626             .m(m)
17627             .n(n)
17628             .k(k)
17629             .iterations(1)
17630             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_6x16c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
17631         }
17632       }
17633     }
17634   }
17635 
TEST(QC8_IGEMM_MINMAX_FP32_6X16C4__NEONDOT,small_kernel)17636   TEST(QC8_IGEMM_MINMAX_FP32_6X16C4__NEONDOT, small_kernel) {
17637     TEST_REQUIRES_ARM_NEON_DOT;
17638     for (size_t k = 1; k <= 40; k += 9) {
17639       GemmMicrokernelTester()
17640         .mr(6)
17641         .nr(16)
17642         .kr(4)
17643         .sr(1)
17644         .m(6)
17645         .n(16)
17646         .k(k)
17647         .ks(3)
17648         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_6x16c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
17649     }
17650   }
17651 
TEST(QC8_IGEMM_MINMAX_FP32_6X16C4__NEONDOT,small_kernel_subtile)17652   TEST(QC8_IGEMM_MINMAX_FP32_6X16C4__NEONDOT, small_kernel_subtile) {
17653     TEST_REQUIRES_ARM_NEON_DOT;
17654     for (size_t k = 1; k <= 40; k += 9) {
17655       for (uint32_t n = 1; n <= 16; n++) {
17656         for (uint32_t m = 1; m <= 6; m++) {
17657           GemmMicrokernelTester()
17658             .mr(6)
17659             .nr(16)
17660             .kr(4)
17661             .sr(1)
17662             .m(m)
17663             .n(n)
17664             .k(k)
17665             .ks(3)
17666             .iterations(1)
17667             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_6x16c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
17668         }
17669       }
17670     }
17671   }
17672 
TEST(QC8_IGEMM_MINMAX_FP32_6X16C4__NEONDOT,n_gt_16_small_kernel)17673   TEST(QC8_IGEMM_MINMAX_FP32_6X16C4__NEONDOT, n_gt_16_small_kernel) {
17674     TEST_REQUIRES_ARM_NEON_DOT;
17675     for (uint32_t n = 17; n < 32; n++) {
17676       for (size_t k = 1; k <= 40; k += 9) {
17677         GemmMicrokernelTester()
17678           .mr(6)
17679           .nr(16)
17680           .kr(4)
17681           .sr(1)
17682           .m(6)
17683           .n(n)
17684           .k(k)
17685           .ks(3)
17686           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_6x16c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
17687       }
17688     }
17689   }
17690 
TEST(QC8_IGEMM_MINMAX_FP32_6X16C4__NEONDOT,n_div_16_small_kernel)17691   TEST(QC8_IGEMM_MINMAX_FP32_6X16C4__NEONDOT, n_div_16_small_kernel) {
17692     TEST_REQUIRES_ARM_NEON_DOT;
17693     for (uint32_t n = 32; n <= 48; n += 16) {
17694       for (size_t k = 1; k <= 40; k += 9) {
17695         GemmMicrokernelTester()
17696           .mr(6)
17697           .nr(16)
17698           .kr(4)
17699           .sr(1)
17700           .m(6)
17701           .n(n)
17702           .k(k)
17703           .ks(3)
17704           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_6x16c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
17705       }
17706     }
17707   }
17708 
TEST(QC8_IGEMM_MINMAX_FP32_6X16C4__NEONDOT,strided_cm_subtile)17709   TEST(QC8_IGEMM_MINMAX_FP32_6X16C4__NEONDOT, strided_cm_subtile) {
17710     TEST_REQUIRES_ARM_NEON_DOT;
17711     for (size_t k = 1; k <= 40; k += 9) {
17712       for (uint32_t n = 1; n <= 16; n++) {
17713         for (uint32_t m = 1; m <= 6; m++) {
17714           GemmMicrokernelTester()
17715             .mr(6)
17716             .nr(16)
17717             .kr(4)
17718             .sr(1)
17719             .m(m)
17720             .n(n)
17721             .k(k)
17722             .cm_stride(19)
17723             .iterations(1)
17724             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_6x16c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
17725         }
17726       }
17727     }
17728   }
17729 
TEST(QC8_IGEMM_MINMAX_FP32_6X16C4__NEONDOT,a_offset)17730   TEST(QC8_IGEMM_MINMAX_FP32_6X16C4__NEONDOT, a_offset) {
17731     TEST_REQUIRES_ARM_NEON_DOT;
17732     for (size_t k = 1; k <= 40; k += 9) {
17733       GemmMicrokernelTester()
17734         .mr(6)
17735         .nr(16)
17736         .kr(4)
17737         .sr(1)
17738         .m(6)
17739         .n(16)
17740         .k(k)
17741         .ks(3)
17742         .a_offset(251)
17743         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_6x16c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
17744     }
17745   }
17746 
TEST(QC8_IGEMM_MINMAX_FP32_6X16C4__NEONDOT,zero)17747   TEST(QC8_IGEMM_MINMAX_FP32_6X16C4__NEONDOT, zero) {
17748     TEST_REQUIRES_ARM_NEON_DOT;
17749     for (size_t k = 1; k <= 40; k += 9) {
17750       for (uint32_t mz = 0; mz < 6; mz++) {
17751         GemmMicrokernelTester()
17752           .mr(6)
17753           .nr(16)
17754           .kr(4)
17755           .sr(1)
17756           .m(6)
17757           .n(16)
17758           .k(k)
17759           .ks(3)
17760           .a_offset(251)
17761           .zero_index(mz)
17762           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_6x16c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
17763       }
17764     }
17765   }
17766 
TEST(QC8_IGEMM_MINMAX_FP32_6X16C4__NEONDOT,qmin)17767   TEST(QC8_IGEMM_MINMAX_FP32_6X16C4__NEONDOT, qmin) {
17768     TEST_REQUIRES_ARM_NEON_DOT;
17769     GemmMicrokernelTester()
17770       .mr(6)
17771       .nr(16)
17772       .kr(4)
17773       .sr(1)
17774       .m(6)
17775       .n(16)
17776       .k(8)
17777       .qmin(128)
17778       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_6x16c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
17779   }
17780 
TEST(QC8_IGEMM_MINMAX_FP32_6X16C4__NEONDOT,qmax)17781   TEST(QC8_IGEMM_MINMAX_FP32_6X16C4__NEONDOT, qmax) {
17782     TEST_REQUIRES_ARM_NEON_DOT;
17783     GemmMicrokernelTester()
17784       .mr(6)
17785       .nr(16)
17786       .kr(4)
17787       .sr(1)
17788       .m(6)
17789       .n(16)
17790       .k(8)
17791       .qmax(128)
17792       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_6x16c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
17793   }
17794 
TEST(QC8_IGEMM_MINMAX_FP32_6X16C4__NEONDOT,strided_cm)17795   TEST(QC8_IGEMM_MINMAX_FP32_6X16C4__NEONDOT, strided_cm) {
17796     TEST_REQUIRES_ARM_NEON_DOT;
17797     GemmMicrokernelTester()
17798       .mr(6)
17799       .nr(16)
17800       .kr(4)
17801       .sr(1)
17802       .m(6)
17803       .n(16)
17804       .k(8)
17805       .cm_stride(19)
17806       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_6x16c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
17807   }
17808 #endif  // XNN_ENABLE_ARM_DOTPROD && (XNN_ARCH_ARM && !XNN_PLATFORM_IOS || XNN_ARCH_ARM64)
17809 
17810 
17811 #if XNN_ENABLE_ARM_DOTPROD && (XNN_ARCH_ARM && !XNN_PLATFORM_IOS || XNN_ARCH_ARM64)
TEST(QC8_IGEMM_MINMAX_FP32_8X8C4__NEONDOT,k_eq_8)17812   TEST(QC8_IGEMM_MINMAX_FP32_8X8C4__NEONDOT, k_eq_8) {
17813     TEST_REQUIRES_ARM_NEON_DOT;
17814     GemmMicrokernelTester()
17815       .mr(8)
17816       .nr(8)
17817       .kr(4)
17818       .sr(1)
17819       .m(8)
17820       .n(8)
17821       .k(8)
17822       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_8x8c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
17823   }
17824 
TEST(QC8_IGEMM_MINMAX_FP32_8X8C4__NEONDOT,strided_cn)17825   TEST(QC8_IGEMM_MINMAX_FP32_8X8C4__NEONDOT, strided_cn) {
17826     TEST_REQUIRES_ARM_NEON_DOT;
17827     GemmMicrokernelTester()
17828       .mr(8)
17829       .nr(8)
17830       .kr(4)
17831       .sr(1)
17832       .m(8)
17833       .n(8)
17834       .k(8)
17835       .cn_stride(11)
17836       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_8x8c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
17837   }
17838 
TEST(QC8_IGEMM_MINMAX_FP32_8X8C4__NEONDOT,k_eq_8_subtile)17839   TEST(QC8_IGEMM_MINMAX_FP32_8X8C4__NEONDOT, k_eq_8_subtile) {
17840     TEST_REQUIRES_ARM_NEON_DOT;
17841     for (uint32_t n = 1; n <= 8; n++) {
17842       for (uint32_t m = 1; m <= 8; m++) {
17843         GemmMicrokernelTester()
17844           .mr(8)
17845           .nr(8)
17846           .kr(4)
17847           .sr(1)
17848           .m(m)
17849           .n(n)
17850           .k(8)
17851           .iterations(1)
17852           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_8x8c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
17853       }
17854     }
17855   }
17856 
TEST(QC8_IGEMM_MINMAX_FP32_8X8C4__NEONDOT,k_eq_8_subtile_m)17857   TEST(QC8_IGEMM_MINMAX_FP32_8X8C4__NEONDOT, k_eq_8_subtile_m) {
17858     TEST_REQUIRES_ARM_NEON_DOT;
17859     for (uint32_t m = 1; m <= 8; m++) {
17860       GemmMicrokernelTester()
17861         .mr(8)
17862         .nr(8)
17863         .kr(4)
17864         .sr(1)
17865         .m(m)
17866         .n(8)
17867         .k(8)
17868         .iterations(1)
17869         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_8x8c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
17870     }
17871   }
17872 
TEST(QC8_IGEMM_MINMAX_FP32_8X8C4__NEONDOT,k_eq_8_subtile_n)17873   TEST(QC8_IGEMM_MINMAX_FP32_8X8C4__NEONDOT, k_eq_8_subtile_n) {
17874     TEST_REQUIRES_ARM_NEON_DOT;
17875     for (uint32_t n = 1; n <= 8; n++) {
17876       GemmMicrokernelTester()
17877         .mr(8)
17878         .nr(8)
17879         .kr(4)
17880         .sr(1)
17881         .m(8)
17882         .n(n)
17883         .k(8)
17884         .iterations(1)
17885         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_8x8c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
17886     }
17887   }
17888 
TEST(QC8_IGEMM_MINMAX_FP32_8X8C4__NEONDOT,k_lt_8)17889   TEST(QC8_IGEMM_MINMAX_FP32_8X8C4__NEONDOT, k_lt_8) {
17890     TEST_REQUIRES_ARM_NEON_DOT;
17891     for (size_t k = 1; k < 8; k++) {
17892       GemmMicrokernelTester()
17893         .mr(8)
17894         .nr(8)
17895         .kr(4)
17896         .sr(1)
17897         .m(8)
17898         .n(8)
17899         .k(k)
17900         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_8x8c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
17901     }
17902   }
17903 
TEST(QC8_IGEMM_MINMAX_FP32_8X8C4__NEONDOT,k_lt_8_subtile)17904   TEST(QC8_IGEMM_MINMAX_FP32_8X8C4__NEONDOT, k_lt_8_subtile) {
17905     TEST_REQUIRES_ARM_NEON_DOT;
17906     for (size_t k = 1; k < 8; k++) {
17907       for (uint32_t n = 1; n <= 8; n++) {
17908         for (uint32_t m = 1; m <= 8; m++) {
17909           GemmMicrokernelTester()
17910             .mr(8)
17911             .nr(8)
17912             .kr(4)
17913             .sr(1)
17914             .m(m)
17915             .n(n)
17916             .k(k)
17917             .iterations(1)
17918             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_8x8c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
17919         }
17920       }
17921     }
17922   }
17923 
TEST(QC8_IGEMM_MINMAX_FP32_8X8C4__NEONDOT,k_gt_8)17924   TEST(QC8_IGEMM_MINMAX_FP32_8X8C4__NEONDOT, k_gt_8) {
17925     TEST_REQUIRES_ARM_NEON_DOT;
17926     for (size_t k = 9; k < 16; k++) {
17927       GemmMicrokernelTester()
17928         .mr(8)
17929         .nr(8)
17930         .kr(4)
17931         .sr(1)
17932         .m(8)
17933         .n(8)
17934         .k(k)
17935         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_8x8c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
17936     }
17937   }
17938 
TEST(QC8_IGEMM_MINMAX_FP32_8X8C4__NEONDOT,k_gt_8_subtile)17939   TEST(QC8_IGEMM_MINMAX_FP32_8X8C4__NEONDOT, k_gt_8_subtile) {
17940     TEST_REQUIRES_ARM_NEON_DOT;
17941     for (size_t k = 9; k < 16; k++) {
17942       for (uint32_t n = 1; n <= 8; n++) {
17943         for (uint32_t m = 1; m <= 8; m++) {
17944           GemmMicrokernelTester()
17945             .mr(8)
17946             .nr(8)
17947             .kr(4)
17948             .sr(1)
17949             .m(m)
17950             .n(n)
17951             .k(k)
17952             .iterations(1)
17953             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_8x8c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
17954         }
17955       }
17956     }
17957   }
17958 
TEST(QC8_IGEMM_MINMAX_FP32_8X8C4__NEONDOT,k_div_8)17959   TEST(QC8_IGEMM_MINMAX_FP32_8X8C4__NEONDOT, k_div_8) {
17960     TEST_REQUIRES_ARM_NEON_DOT;
17961     for (size_t k = 16; k <= 80; k += 8) {
17962       GemmMicrokernelTester()
17963         .mr(8)
17964         .nr(8)
17965         .kr(4)
17966         .sr(1)
17967         .m(8)
17968         .n(8)
17969         .k(k)
17970         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_8x8c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
17971     }
17972   }
17973 
TEST(QC8_IGEMM_MINMAX_FP32_8X8C4__NEONDOT,k_div_8_subtile)17974   TEST(QC8_IGEMM_MINMAX_FP32_8X8C4__NEONDOT, k_div_8_subtile) {
17975     TEST_REQUIRES_ARM_NEON_DOT;
17976     for (size_t k = 16; k <= 80; k += 8) {
17977       for (uint32_t n = 1; n <= 8; n++) {
17978         for (uint32_t m = 1; m <= 8; m++) {
17979           GemmMicrokernelTester()
17980             .mr(8)
17981             .nr(8)
17982             .kr(4)
17983             .sr(1)
17984             .m(m)
17985             .n(n)
17986             .k(k)
17987             .iterations(1)
17988             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_8x8c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
17989         }
17990       }
17991     }
17992   }
17993 
TEST(QC8_IGEMM_MINMAX_FP32_8X8C4__NEONDOT,n_gt_8)17994   TEST(QC8_IGEMM_MINMAX_FP32_8X8C4__NEONDOT, n_gt_8) {
17995     TEST_REQUIRES_ARM_NEON_DOT;
17996     for (uint32_t n = 9; n < 16; n++) {
17997       for (size_t k = 1; k <= 40; k += 9) {
17998         GemmMicrokernelTester()
17999           .mr(8)
18000           .nr(8)
18001           .kr(4)
18002           .sr(1)
18003           .m(8)
18004           .n(n)
18005           .k(k)
18006           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_8x8c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
18007       }
18008     }
18009   }
18010 
TEST(QC8_IGEMM_MINMAX_FP32_8X8C4__NEONDOT,n_gt_8_strided_cn)18011   TEST(QC8_IGEMM_MINMAX_FP32_8X8C4__NEONDOT, n_gt_8_strided_cn) {
18012     TEST_REQUIRES_ARM_NEON_DOT;
18013     for (uint32_t n = 9; n < 16; n++) {
18014       for (size_t k = 1; k <= 40; k += 9) {
18015         GemmMicrokernelTester()
18016           .mr(8)
18017           .nr(8)
18018           .kr(4)
18019           .sr(1)
18020           .m(8)
18021           .n(n)
18022           .k(k)
18023           .cn_stride(11)
18024           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_8x8c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
18025       }
18026     }
18027   }
18028 
TEST(QC8_IGEMM_MINMAX_FP32_8X8C4__NEONDOT,n_gt_8_subtile)18029   TEST(QC8_IGEMM_MINMAX_FP32_8X8C4__NEONDOT, n_gt_8_subtile) {
18030     TEST_REQUIRES_ARM_NEON_DOT;
18031     for (uint32_t n = 9; n < 16; n++) {
18032       for (size_t k = 1; k <= 40; k += 9) {
18033         for (uint32_t m = 1; m <= 8; m++) {
18034           GemmMicrokernelTester()
18035             .mr(8)
18036             .nr(8)
18037             .kr(4)
18038             .sr(1)
18039             .m(m)
18040             .n(n)
18041             .k(k)
18042             .iterations(1)
18043             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_8x8c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
18044         }
18045       }
18046     }
18047   }
18048 
TEST(QC8_IGEMM_MINMAX_FP32_8X8C4__NEONDOT,n_div_8)18049   TEST(QC8_IGEMM_MINMAX_FP32_8X8C4__NEONDOT, n_div_8) {
18050     TEST_REQUIRES_ARM_NEON_DOT;
18051     for (uint32_t n = 16; n <= 24; n += 8) {
18052       for (size_t k = 1; k <= 40; k += 9) {
18053         GemmMicrokernelTester()
18054           .mr(8)
18055           .nr(8)
18056           .kr(4)
18057           .sr(1)
18058           .m(8)
18059           .n(n)
18060           .k(k)
18061           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_8x8c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
18062       }
18063     }
18064   }
18065 
TEST(QC8_IGEMM_MINMAX_FP32_8X8C4__NEONDOT,n_div_8_strided_cn)18066   TEST(QC8_IGEMM_MINMAX_FP32_8X8C4__NEONDOT, n_div_8_strided_cn) {
18067     TEST_REQUIRES_ARM_NEON_DOT;
18068     for (uint32_t n = 16; n <= 24; n += 8) {
18069       for (size_t k = 1; k <= 40; k += 9) {
18070         GemmMicrokernelTester()
18071           .mr(8)
18072           .nr(8)
18073           .kr(4)
18074           .sr(1)
18075           .m(8)
18076           .n(n)
18077           .k(k)
18078           .cn_stride(11)
18079           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_8x8c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
18080       }
18081     }
18082   }
18083 
TEST(QC8_IGEMM_MINMAX_FP32_8X8C4__NEONDOT,n_div_8_subtile)18084   TEST(QC8_IGEMM_MINMAX_FP32_8X8C4__NEONDOT, n_div_8_subtile) {
18085     TEST_REQUIRES_ARM_NEON_DOT;
18086     for (uint32_t n = 16; n <= 24; n += 8) {
18087       for (size_t k = 1; k <= 40; k += 9) {
18088         for (uint32_t m = 1; m <= 8; m++) {
18089           GemmMicrokernelTester()
18090             .mr(8)
18091             .nr(8)
18092             .kr(4)
18093             .sr(1)
18094             .m(m)
18095             .n(n)
18096             .k(k)
18097             .iterations(1)
18098             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_8x8c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
18099         }
18100       }
18101     }
18102   }
18103 
TEST(QC8_IGEMM_MINMAX_FP32_8X8C4__NEONDOT,small_kernel)18104   TEST(QC8_IGEMM_MINMAX_FP32_8X8C4__NEONDOT, small_kernel) {
18105     TEST_REQUIRES_ARM_NEON_DOT;
18106     for (size_t k = 1; k <= 40; k += 9) {
18107       GemmMicrokernelTester()
18108         .mr(8)
18109         .nr(8)
18110         .kr(4)
18111         .sr(1)
18112         .m(8)
18113         .n(8)
18114         .k(k)
18115         .ks(3)
18116         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_8x8c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
18117     }
18118   }
18119 
TEST(QC8_IGEMM_MINMAX_FP32_8X8C4__NEONDOT,small_kernel_subtile)18120   TEST(QC8_IGEMM_MINMAX_FP32_8X8C4__NEONDOT, small_kernel_subtile) {
18121     TEST_REQUIRES_ARM_NEON_DOT;
18122     for (size_t k = 1; k <= 40; k += 9) {
18123       for (uint32_t n = 1; n <= 8; n++) {
18124         for (uint32_t m = 1; m <= 8; m++) {
18125           GemmMicrokernelTester()
18126             .mr(8)
18127             .nr(8)
18128             .kr(4)
18129             .sr(1)
18130             .m(m)
18131             .n(n)
18132             .k(k)
18133             .ks(3)
18134             .iterations(1)
18135             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_8x8c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
18136         }
18137       }
18138     }
18139   }
18140 
TEST(QC8_IGEMM_MINMAX_FP32_8X8C4__NEONDOT,n_gt_8_small_kernel)18141   TEST(QC8_IGEMM_MINMAX_FP32_8X8C4__NEONDOT, n_gt_8_small_kernel) {
18142     TEST_REQUIRES_ARM_NEON_DOT;
18143     for (uint32_t n = 9; n < 16; n++) {
18144       for (size_t k = 1; k <= 40; k += 9) {
18145         GemmMicrokernelTester()
18146           .mr(8)
18147           .nr(8)
18148           .kr(4)
18149           .sr(1)
18150           .m(8)
18151           .n(n)
18152           .k(k)
18153           .ks(3)
18154           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_8x8c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
18155       }
18156     }
18157   }
18158 
TEST(QC8_IGEMM_MINMAX_FP32_8X8C4__NEONDOT,n_div_8_small_kernel)18159   TEST(QC8_IGEMM_MINMAX_FP32_8X8C4__NEONDOT, n_div_8_small_kernel) {
18160     TEST_REQUIRES_ARM_NEON_DOT;
18161     for (uint32_t n = 16; n <= 24; n += 8) {
18162       for (size_t k = 1; k <= 40; k += 9) {
18163         GemmMicrokernelTester()
18164           .mr(8)
18165           .nr(8)
18166           .kr(4)
18167           .sr(1)
18168           .m(8)
18169           .n(n)
18170           .k(k)
18171           .ks(3)
18172           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_8x8c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
18173       }
18174     }
18175   }
18176 
TEST(QC8_IGEMM_MINMAX_FP32_8X8C4__NEONDOT,strided_cm_subtile)18177   TEST(QC8_IGEMM_MINMAX_FP32_8X8C4__NEONDOT, strided_cm_subtile) {
18178     TEST_REQUIRES_ARM_NEON_DOT;
18179     for (size_t k = 1; k <= 40; k += 9) {
18180       for (uint32_t n = 1; n <= 8; n++) {
18181         for (uint32_t m = 1; m <= 8; m++) {
18182           GemmMicrokernelTester()
18183             .mr(8)
18184             .nr(8)
18185             .kr(4)
18186             .sr(1)
18187             .m(m)
18188             .n(n)
18189             .k(k)
18190             .cm_stride(11)
18191             .iterations(1)
18192             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_8x8c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
18193         }
18194       }
18195     }
18196   }
18197 
TEST(QC8_IGEMM_MINMAX_FP32_8X8C4__NEONDOT,a_offset)18198   TEST(QC8_IGEMM_MINMAX_FP32_8X8C4__NEONDOT, a_offset) {
18199     TEST_REQUIRES_ARM_NEON_DOT;
18200     for (size_t k = 1; k <= 40; k += 9) {
18201       GemmMicrokernelTester()
18202         .mr(8)
18203         .nr(8)
18204         .kr(4)
18205         .sr(1)
18206         .m(8)
18207         .n(8)
18208         .k(k)
18209         .ks(3)
18210         .a_offset(331)
18211         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_8x8c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
18212     }
18213   }
18214 
TEST(QC8_IGEMM_MINMAX_FP32_8X8C4__NEONDOT,zero)18215   TEST(QC8_IGEMM_MINMAX_FP32_8X8C4__NEONDOT, zero) {
18216     TEST_REQUIRES_ARM_NEON_DOT;
18217     for (size_t k = 1; k <= 40; k += 9) {
18218       for (uint32_t mz = 0; mz < 8; mz++) {
18219         GemmMicrokernelTester()
18220           .mr(8)
18221           .nr(8)
18222           .kr(4)
18223           .sr(1)
18224           .m(8)
18225           .n(8)
18226           .k(k)
18227           .ks(3)
18228           .a_offset(331)
18229           .zero_index(mz)
18230           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_8x8c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
18231       }
18232     }
18233   }
18234 
TEST(QC8_IGEMM_MINMAX_FP32_8X8C4__NEONDOT,qmin)18235   TEST(QC8_IGEMM_MINMAX_FP32_8X8C4__NEONDOT, qmin) {
18236     TEST_REQUIRES_ARM_NEON_DOT;
18237     GemmMicrokernelTester()
18238       .mr(8)
18239       .nr(8)
18240       .kr(4)
18241       .sr(1)
18242       .m(8)
18243       .n(8)
18244       .k(8)
18245       .qmin(128)
18246       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_8x8c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
18247   }
18248 
TEST(QC8_IGEMM_MINMAX_FP32_8X8C4__NEONDOT,qmax)18249   TEST(QC8_IGEMM_MINMAX_FP32_8X8C4__NEONDOT, qmax) {
18250     TEST_REQUIRES_ARM_NEON_DOT;
18251     GemmMicrokernelTester()
18252       .mr(8)
18253       .nr(8)
18254       .kr(4)
18255       .sr(1)
18256       .m(8)
18257       .n(8)
18258       .k(8)
18259       .qmax(128)
18260       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_8x8c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
18261   }
18262 
TEST(QC8_IGEMM_MINMAX_FP32_8X8C4__NEONDOT,strided_cm)18263   TEST(QC8_IGEMM_MINMAX_FP32_8X8C4__NEONDOT, strided_cm) {
18264     TEST_REQUIRES_ARM_NEON_DOT;
18265     GemmMicrokernelTester()
18266       .mr(8)
18267       .nr(8)
18268       .kr(4)
18269       .sr(1)
18270       .m(8)
18271       .n(8)
18272       .k(8)
18273       .cm_stride(11)
18274       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_8x8c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
18275   }
18276 #endif  // XNN_ENABLE_ARM_DOTPROD && (XNN_ARCH_ARM && !XNN_PLATFORM_IOS || XNN_ARCH_ARM64)
18277 
18278 
18279 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD64,k_eq_8)18280   TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD64, k_eq_8) {
18281     TEST_REQUIRES_X86_SSE2;
18282     GemmMicrokernelTester()
18283       .mr(1)
18284       .nr(4)
18285       .kr(2)
18286       .sr(1)
18287       .m(1)
18288       .n(4)
18289       .k(8)
18290       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
18291   }
18292 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD64,strided_cn)18293   TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD64, strided_cn) {
18294     TEST_REQUIRES_X86_SSE2;
18295     GemmMicrokernelTester()
18296       .mr(1)
18297       .nr(4)
18298       .kr(2)
18299       .sr(1)
18300       .m(1)
18301       .n(4)
18302       .k(8)
18303       .cn_stride(7)
18304       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
18305   }
18306 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD64,k_eq_8_subtile)18307   TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD64, k_eq_8_subtile) {
18308     TEST_REQUIRES_X86_SSE2;
18309     for (uint32_t n = 1; n <= 4; n++) {
18310       for (uint32_t m = 1; m <= 1; m++) {
18311         GemmMicrokernelTester()
18312           .mr(1)
18313           .nr(4)
18314           .kr(2)
18315           .sr(1)
18316           .m(m)
18317           .n(n)
18318           .k(8)
18319           .iterations(1)
18320           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
18321       }
18322     }
18323   }
18324 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD64,k_eq_8_subtile_m)18325   TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD64, k_eq_8_subtile_m) {
18326     TEST_REQUIRES_X86_SSE2;
18327     for (uint32_t m = 1; m <= 1; m++) {
18328       GemmMicrokernelTester()
18329         .mr(1)
18330         .nr(4)
18331         .kr(2)
18332         .sr(1)
18333         .m(m)
18334         .n(4)
18335         .k(8)
18336         .iterations(1)
18337         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
18338     }
18339   }
18340 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD64,k_eq_8_subtile_n)18341   TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD64, k_eq_8_subtile_n) {
18342     TEST_REQUIRES_X86_SSE2;
18343     for (uint32_t n = 1; n <= 4; n++) {
18344       GemmMicrokernelTester()
18345         .mr(1)
18346         .nr(4)
18347         .kr(2)
18348         .sr(1)
18349         .m(1)
18350         .n(n)
18351         .k(8)
18352         .iterations(1)
18353         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
18354     }
18355   }
18356 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD64,k_lt_8)18357   TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD64, k_lt_8) {
18358     TEST_REQUIRES_X86_SSE2;
18359     for (size_t k = 1; k < 8; k++) {
18360       GemmMicrokernelTester()
18361         .mr(1)
18362         .nr(4)
18363         .kr(2)
18364         .sr(1)
18365         .m(1)
18366         .n(4)
18367         .k(k)
18368         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
18369     }
18370   }
18371 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD64,k_lt_8_subtile)18372   TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD64, k_lt_8_subtile) {
18373     TEST_REQUIRES_X86_SSE2;
18374     for (size_t k = 1; k < 8; k++) {
18375       for (uint32_t n = 1; n <= 4; n++) {
18376         for (uint32_t m = 1; m <= 1; m++) {
18377           GemmMicrokernelTester()
18378             .mr(1)
18379             .nr(4)
18380             .kr(2)
18381             .sr(1)
18382             .m(m)
18383             .n(n)
18384             .k(k)
18385             .iterations(1)
18386             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
18387         }
18388       }
18389     }
18390   }
18391 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD64,k_gt_8)18392   TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD64, k_gt_8) {
18393     TEST_REQUIRES_X86_SSE2;
18394     for (size_t k = 9; k < 16; k++) {
18395       GemmMicrokernelTester()
18396         .mr(1)
18397         .nr(4)
18398         .kr(2)
18399         .sr(1)
18400         .m(1)
18401         .n(4)
18402         .k(k)
18403         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
18404     }
18405   }
18406 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD64,k_gt_8_subtile)18407   TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD64, k_gt_8_subtile) {
18408     TEST_REQUIRES_X86_SSE2;
18409     for (size_t k = 9; k < 16; k++) {
18410       for (uint32_t n = 1; n <= 4; n++) {
18411         for (uint32_t m = 1; m <= 1; m++) {
18412           GemmMicrokernelTester()
18413             .mr(1)
18414             .nr(4)
18415             .kr(2)
18416             .sr(1)
18417             .m(m)
18418             .n(n)
18419             .k(k)
18420             .iterations(1)
18421             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
18422         }
18423       }
18424     }
18425   }
18426 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD64,k_div_8)18427   TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD64, k_div_8) {
18428     TEST_REQUIRES_X86_SSE2;
18429     for (size_t k = 16; k <= 80; k += 8) {
18430       GemmMicrokernelTester()
18431         .mr(1)
18432         .nr(4)
18433         .kr(2)
18434         .sr(1)
18435         .m(1)
18436         .n(4)
18437         .k(k)
18438         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
18439     }
18440   }
18441 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD64,k_div_8_subtile)18442   TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD64, k_div_8_subtile) {
18443     TEST_REQUIRES_X86_SSE2;
18444     for (size_t k = 16; k <= 80; k += 8) {
18445       for (uint32_t n = 1; n <= 4; n++) {
18446         for (uint32_t m = 1; m <= 1; m++) {
18447           GemmMicrokernelTester()
18448             .mr(1)
18449             .nr(4)
18450             .kr(2)
18451             .sr(1)
18452             .m(m)
18453             .n(n)
18454             .k(k)
18455             .iterations(1)
18456             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
18457         }
18458       }
18459     }
18460   }
18461 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD64,n_gt_4)18462   TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD64, n_gt_4) {
18463     TEST_REQUIRES_X86_SSE2;
18464     for (uint32_t n = 5; n < 8; n++) {
18465       for (size_t k = 1; k <= 40; k += 9) {
18466         GemmMicrokernelTester()
18467           .mr(1)
18468           .nr(4)
18469           .kr(2)
18470           .sr(1)
18471           .m(1)
18472           .n(n)
18473           .k(k)
18474           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
18475       }
18476     }
18477   }
18478 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD64,n_gt_4_strided_cn)18479   TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD64, n_gt_4_strided_cn) {
18480     TEST_REQUIRES_X86_SSE2;
18481     for (uint32_t n = 5; n < 8; n++) {
18482       for (size_t k = 1; k <= 40; k += 9) {
18483         GemmMicrokernelTester()
18484           .mr(1)
18485           .nr(4)
18486           .kr(2)
18487           .sr(1)
18488           .m(1)
18489           .n(n)
18490           .k(k)
18491           .cn_stride(7)
18492           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
18493       }
18494     }
18495   }
18496 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD64,n_gt_4_subtile)18497   TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD64, n_gt_4_subtile) {
18498     TEST_REQUIRES_X86_SSE2;
18499     for (uint32_t n = 5; n < 8; n++) {
18500       for (size_t k = 1; k <= 40; k += 9) {
18501         for (uint32_t m = 1; m <= 1; m++) {
18502           GemmMicrokernelTester()
18503             .mr(1)
18504             .nr(4)
18505             .kr(2)
18506             .sr(1)
18507             .m(m)
18508             .n(n)
18509             .k(k)
18510             .iterations(1)
18511             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
18512         }
18513       }
18514     }
18515   }
18516 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD64,n_div_4)18517   TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD64, n_div_4) {
18518     TEST_REQUIRES_X86_SSE2;
18519     for (uint32_t n = 8; n <= 12; n += 4) {
18520       for (size_t k = 1; k <= 40; k += 9) {
18521         GemmMicrokernelTester()
18522           .mr(1)
18523           .nr(4)
18524           .kr(2)
18525           .sr(1)
18526           .m(1)
18527           .n(n)
18528           .k(k)
18529           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
18530       }
18531     }
18532   }
18533 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD64,n_div_4_strided_cn)18534   TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD64, n_div_4_strided_cn) {
18535     TEST_REQUIRES_X86_SSE2;
18536     for (uint32_t n = 8; n <= 12; n += 4) {
18537       for (size_t k = 1; k <= 40; k += 9) {
18538         GemmMicrokernelTester()
18539           .mr(1)
18540           .nr(4)
18541           .kr(2)
18542           .sr(1)
18543           .m(1)
18544           .n(n)
18545           .k(k)
18546           .cn_stride(7)
18547           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
18548       }
18549     }
18550   }
18551 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD64,n_div_4_subtile)18552   TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD64, n_div_4_subtile) {
18553     TEST_REQUIRES_X86_SSE2;
18554     for (uint32_t n = 8; n <= 12; n += 4) {
18555       for (size_t k = 1; k <= 40; k += 9) {
18556         for (uint32_t m = 1; m <= 1; m++) {
18557           GemmMicrokernelTester()
18558             .mr(1)
18559             .nr(4)
18560             .kr(2)
18561             .sr(1)
18562             .m(m)
18563             .n(n)
18564             .k(k)
18565             .iterations(1)
18566             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
18567         }
18568       }
18569     }
18570   }
18571 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD64,small_kernel)18572   TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD64, small_kernel) {
18573     TEST_REQUIRES_X86_SSE2;
18574     for (size_t k = 1; k <= 40; k += 9) {
18575       GemmMicrokernelTester()
18576         .mr(1)
18577         .nr(4)
18578         .kr(2)
18579         .sr(1)
18580         .m(1)
18581         .n(4)
18582         .k(k)
18583         .ks(3)
18584         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
18585     }
18586   }
18587 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD64,small_kernel_subtile)18588   TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD64, small_kernel_subtile) {
18589     TEST_REQUIRES_X86_SSE2;
18590     for (size_t k = 1; k <= 40; k += 9) {
18591       for (uint32_t n = 1; n <= 4; n++) {
18592         for (uint32_t m = 1; m <= 1; m++) {
18593           GemmMicrokernelTester()
18594             .mr(1)
18595             .nr(4)
18596             .kr(2)
18597             .sr(1)
18598             .m(m)
18599             .n(n)
18600             .k(k)
18601             .ks(3)
18602             .iterations(1)
18603             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
18604         }
18605       }
18606     }
18607   }
18608 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD64,n_gt_4_small_kernel)18609   TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD64, n_gt_4_small_kernel) {
18610     TEST_REQUIRES_X86_SSE2;
18611     for (uint32_t n = 5; n < 8; n++) {
18612       for (size_t k = 1; k <= 40; k += 9) {
18613         GemmMicrokernelTester()
18614           .mr(1)
18615           .nr(4)
18616           .kr(2)
18617           .sr(1)
18618           .m(1)
18619           .n(n)
18620           .k(k)
18621           .ks(3)
18622           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
18623       }
18624     }
18625   }
18626 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD64,n_div_4_small_kernel)18627   TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD64, n_div_4_small_kernel) {
18628     TEST_REQUIRES_X86_SSE2;
18629     for (uint32_t n = 8; n <= 12; n += 4) {
18630       for (size_t k = 1; k <= 40; k += 9) {
18631         GemmMicrokernelTester()
18632           .mr(1)
18633           .nr(4)
18634           .kr(2)
18635           .sr(1)
18636           .m(1)
18637           .n(n)
18638           .k(k)
18639           .ks(3)
18640           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
18641       }
18642     }
18643   }
18644 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD64,strided_cm_subtile)18645   TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD64, strided_cm_subtile) {
18646     TEST_REQUIRES_X86_SSE2;
18647     for (size_t k = 1; k <= 40; k += 9) {
18648       for (uint32_t n = 1; n <= 4; n++) {
18649         for (uint32_t m = 1; m <= 1; m++) {
18650           GemmMicrokernelTester()
18651             .mr(1)
18652             .nr(4)
18653             .kr(2)
18654             .sr(1)
18655             .m(m)
18656             .n(n)
18657             .k(k)
18658             .cm_stride(7)
18659             .iterations(1)
18660             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
18661         }
18662       }
18663     }
18664   }
18665 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD64,a_offset)18666   TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD64, a_offset) {
18667     TEST_REQUIRES_X86_SSE2;
18668     for (size_t k = 1; k <= 40; k += 9) {
18669       GemmMicrokernelTester()
18670         .mr(1)
18671         .nr(4)
18672         .kr(2)
18673         .sr(1)
18674         .m(1)
18675         .n(4)
18676         .k(k)
18677         .ks(3)
18678         .a_offset(43)
18679         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
18680     }
18681   }
18682 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD64,zero)18683   TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD64, zero) {
18684     TEST_REQUIRES_X86_SSE2;
18685     for (size_t k = 1; k <= 40; k += 9) {
18686       for (uint32_t mz = 0; mz < 1; mz++) {
18687         GemmMicrokernelTester()
18688           .mr(1)
18689           .nr(4)
18690           .kr(2)
18691           .sr(1)
18692           .m(1)
18693           .n(4)
18694           .k(k)
18695           .ks(3)
18696           .a_offset(43)
18697           .zero_index(mz)
18698           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
18699       }
18700     }
18701   }
18702 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD64,qmin)18703   TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD64, qmin) {
18704     TEST_REQUIRES_X86_SSE2;
18705     GemmMicrokernelTester()
18706       .mr(1)
18707       .nr(4)
18708       .kr(2)
18709       .sr(1)
18710       .m(1)
18711       .n(4)
18712       .k(8)
18713       .qmin(128)
18714       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
18715   }
18716 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD64,qmax)18717   TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD64, qmax) {
18718     TEST_REQUIRES_X86_SSE2;
18719     GemmMicrokernelTester()
18720       .mr(1)
18721       .nr(4)
18722       .kr(2)
18723       .sr(1)
18724       .m(1)
18725       .n(4)
18726       .k(8)
18727       .qmax(128)
18728       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
18729   }
18730 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD64,strided_cm)18731   TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD64, strided_cm) {
18732     TEST_REQUIRES_X86_SSE2;
18733     GemmMicrokernelTester()
18734       .mr(1)
18735       .nr(4)
18736       .kr(2)
18737       .sr(1)
18738       .m(1)
18739       .n(4)
18740       .k(8)
18741       .cm_stride(7)
18742       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
18743   }
18744 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
18745 
18746 
18747 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD64,k_eq_8)18748   TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD64, k_eq_8) {
18749     TEST_REQUIRES_X86_SSE41;
18750     GemmMicrokernelTester()
18751       .mr(1)
18752       .nr(4)
18753       .kr(2)
18754       .sr(1)
18755       .m(1)
18756       .n(4)
18757       .k(8)
18758       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
18759   }
18760 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD64,strided_cn)18761   TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD64, strided_cn) {
18762     TEST_REQUIRES_X86_SSE41;
18763     GemmMicrokernelTester()
18764       .mr(1)
18765       .nr(4)
18766       .kr(2)
18767       .sr(1)
18768       .m(1)
18769       .n(4)
18770       .k(8)
18771       .cn_stride(7)
18772       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
18773   }
18774 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD64,k_eq_8_subtile)18775   TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD64, k_eq_8_subtile) {
18776     TEST_REQUIRES_X86_SSE41;
18777     for (uint32_t n = 1; n <= 4; n++) {
18778       for (uint32_t m = 1; m <= 1; m++) {
18779         GemmMicrokernelTester()
18780           .mr(1)
18781           .nr(4)
18782           .kr(2)
18783           .sr(1)
18784           .m(m)
18785           .n(n)
18786           .k(8)
18787           .iterations(1)
18788           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
18789       }
18790     }
18791   }
18792 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD64,k_eq_8_subtile_m)18793   TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD64, k_eq_8_subtile_m) {
18794     TEST_REQUIRES_X86_SSE41;
18795     for (uint32_t m = 1; m <= 1; m++) {
18796       GemmMicrokernelTester()
18797         .mr(1)
18798         .nr(4)
18799         .kr(2)
18800         .sr(1)
18801         .m(m)
18802         .n(4)
18803         .k(8)
18804         .iterations(1)
18805         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
18806     }
18807   }
18808 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD64,k_eq_8_subtile_n)18809   TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD64, k_eq_8_subtile_n) {
18810     TEST_REQUIRES_X86_SSE41;
18811     for (uint32_t n = 1; n <= 4; n++) {
18812       GemmMicrokernelTester()
18813         .mr(1)
18814         .nr(4)
18815         .kr(2)
18816         .sr(1)
18817         .m(1)
18818         .n(n)
18819         .k(8)
18820         .iterations(1)
18821         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
18822     }
18823   }
18824 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD64,k_lt_8)18825   TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD64, k_lt_8) {
18826     TEST_REQUIRES_X86_SSE41;
18827     for (size_t k = 1; k < 8; k++) {
18828       GemmMicrokernelTester()
18829         .mr(1)
18830         .nr(4)
18831         .kr(2)
18832         .sr(1)
18833         .m(1)
18834         .n(4)
18835         .k(k)
18836         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
18837     }
18838   }
18839 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD64,k_lt_8_subtile)18840   TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD64, k_lt_8_subtile) {
18841     TEST_REQUIRES_X86_SSE41;
18842     for (size_t k = 1; k < 8; k++) {
18843       for (uint32_t n = 1; n <= 4; n++) {
18844         for (uint32_t m = 1; m <= 1; m++) {
18845           GemmMicrokernelTester()
18846             .mr(1)
18847             .nr(4)
18848             .kr(2)
18849             .sr(1)
18850             .m(m)
18851             .n(n)
18852             .k(k)
18853             .iterations(1)
18854             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
18855         }
18856       }
18857     }
18858   }
18859 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD64,k_gt_8)18860   TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD64, k_gt_8) {
18861     TEST_REQUIRES_X86_SSE41;
18862     for (size_t k = 9; k < 16; k++) {
18863       GemmMicrokernelTester()
18864         .mr(1)
18865         .nr(4)
18866         .kr(2)
18867         .sr(1)
18868         .m(1)
18869         .n(4)
18870         .k(k)
18871         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
18872     }
18873   }
18874 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD64,k_gt_8_subtile)18875   TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD64, k_gt_8_subtile) {
18876     TEST_REQUIRES_X86_SSE41;
18877     for (size_t k = 9; k < 16; k++) {
18878       for (uint32_t n = 1; n <= 4; n++) {
18879         for (uint32_t m = 1; m <= 1; m++) {
18880           GemmMicrokernelTester()
18881             .mr(1)
18882             .nr(4)
18883             .kr(2)
18884             .sr(1)
18885             .m(m)
18886             .n(n)
18887             .k(k)
18888             .iterations(1)
18889             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
18890         }
18891       }
18892     }
18893   }
18894 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD64,k_div_8)18895   TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD64, k_div_8) {
18896     TEST_REQUIRES_X86_SSE41;
18897     for (size_t k = 16; k <= 80; k += 8) {
18898       GemmMicrokernelTester()
18899         .mr(1)
18900         .nr(4)
18901         .kr(2)
18902         .sr(1)
18903         .m(1)
18904         .n(4)
18905         .k(k)
18906         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
18907     }
18908   }
18909 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD64,k_div_8_subtile)18910   TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD64, k_div_8_subtile) {
18911     TEST_REQUIRES_X86_SSE41;
18912     for (size_t k = 16; k <= 80; k += 8) {
18913       for (uint32_t n = 1; n <= 4; n++) {
18914         for (uint32_t m = 1; m <= 1; m++) {
18915           GemmMicrokernelTester()
18916             .mr(1)
18917             .nr(4)
18918             .kr(2)
18919             .sr(1)
18920             .m(m)
18921             .n(n)
18922             .k(k)
18923             .iterations(1)
18924             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
18925         }
18926       }
18927     }
18928   }
18929 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD64,n_gt_4)18930   TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD64, n_gt_4) {
18931     TEST_REQUIRES_X86_SSE41;
18932     for (uint32_t n = 5; n < 8; n++) {
18933       for (size_t k = 1; k <= 40; k += 9) {
18934         GemmMicrokernelTester()
18935           .mr(1)
18936           .nr(4)
18937           .kr(2)
18938           .sr(1)
18939           .m(1)
18940           .n(n)
18941           .k(k)
18942           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
18943       }
18944     }
18945   }
18946 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD64,n_gt_4_strided_cn)18947   TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD64, n_gt_4_strided_cn) {
18948     TEST_REQUIRES_X86_SSE41;
18949     for (uint32_t n = 5; n < 8; n++) {
18950       for (size_t k = 1; k <= 40; k += 9) {
18951         GemmMicrokernelTester()
18952           .mr(1)
18953           .nr(4)
18954           .kr(2)
18955           .sr(1)
18956           .m(1)
18957           .n(n)
18958           .k(k)
18959           .cn_stride(7)
18960           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
18961       }
18962     }
18963   }
18964 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD64,n_gt_4_subtile)18965   TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD64, n_gt_4_subtile) {
18966     TEST_REQUIRES_X86_SSE41;
18967     for (uint32_t n = 5; n < 8; n++) {
18968       for (size_t k = 1; k <= 40; k += 9) {
18969         for (uint32_t m = 1; m <= 1; m++) {
18970           GemmMicrokernelTester()
18971             .mr(1)
18972             .nr(4)
18973             .kr(2)
18974             .sr(1)
18975             .m(m)
18976             .n(n)
18977             .k(k)
18978             .iterations(1)
18979             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
18980         }
18981       }
18982     }
18983   }
18984 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD64,n_div_4)18985   TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD64, n_div_4) {
18986     TEST_REQUIRES_X86_SSE41;
18987     for (uint32_t n = 8; n <= 12; n += 4) {
18988       for (size_t k = 1; k <= 40; k += 9) {
18989         GemmMicrokernelTester()
18990           .mr(1)
18991           .nr(4)
18992           .kr(2)
18993           .sr(1)
18994           .m(1)
18995           .n(n)
18996           .k(k)
18997           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
18998       }
18999     }
19000   }
19001 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD64,n_div_4_strided_cn)19002   TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD64, n_div_4_strided_cn) {
19003     TEST_REQUIRES_X86_SSE41;
19004     for (uint32_t n = 8; n <= 12; n += 4) {
19005       for (size_t k = 1; k <= 40; k += 9) {
19006         GemmMicrokernelTester()
19007           .mr(1)
19008           .nr(4)
19009           .kr(2)
19010           .sr(1)
19011           .m(1)
19012           .n(n)
19013           .k(k)
19014           .cn_stride(7)
19015           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
19016       }
19017     }
19018   }
19019 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD64,n_div_4_subtile)19020   TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD64, n_div_4_subtile) {
19021     TEST_REQUIRES_X86_SSE41;
19022     for (uint32_t n = 8; n <= 12; n += 4) {
19023       for (size_t k = 1; k <= 40; k += 9) {
19024         for (uint32_t m = 1; m <= 1; m++) {
19025           GemmMicrokernelTester()
19026             .mr(1)
19027             .nr(4)
19028             .kr(2)
19029             .sr(1)
19030             .m(m)
19031             .n(n)
19032             .k(k)
19033             .iterations(1)
19034             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
19035         }
19036       }
19037     }
19038   }
19039 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD64,small_kernel)19040   TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD64, small_kernel) {
19041     TEST_REQUIRES_X86_SSE41;
19042     for (size_t k = 1; k <= 40; k += 9) {
19043       GemmMicrokernelTester()
19044         .mr(1)
19045         .nr(4)
19046         .kr(2)
19047         .sr(1)
19048         .m(1)
19049         .n(4)
19050         .k(k)
19051         .ks(3)
19052         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
19053     }
19054   }
19055 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD64,small_kernel_subtile)19056   TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD64, small_kernel_subtile) {
19057     TEST_REQUIRES_X86_SSE41;
19058     for (size_t k = 1; k <= 40; k += 9) {
19059       for (uint32_t n = 1; n <= 4; n++) {
19060         for (uint32_t m = 1; m <= 1; m++) {
19061           GemmMicrokernelTester()
19062             .mr(1)
19063             .nr(4)
19064             .kr(2)
19065             .sr(1)
19066             .m(m)
19067             .n(n)
19068             .k(k)
19069             .ks(3)
19070             .iterations(1)
19071             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
19072         }
19073       }
19074     }
19075   }
19076 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD64,n_gt_4_small_kernel)19077   TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD64, n_gt_4_small_kernel) {
19078     TEST_REQUIRES_X86_SSE41;
19079     for (uint32_t n = 5; n < 8; n++) {
19080       for (size_t k = 1; k <= 40; k += 9) {
19081         GemmMicrokernelTester()
19082           .mr(1)
19083           .nr(4)
19084           .kr(2)
19085           .sr(1)
19086           .m(1)
19087           .n(n)
19088           .k(k)
19089           .ks(3)
19090           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
19091       }
19092     }
19093   }
19094 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD64,n_div_4_small_kernel)19095   TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD64, n_div_4_small_kernel) {
19096     TEST_REQUIRES_X86_SSE41;
19097     for (uint32_t n = 8; n <= 12; n += 4) {
19098       for (size_t k = 1; k <= 40; k += 9) {
19099         GemmMicrokernelTester()
19100           .mr(1)
19101           .nr(4)
19102           .kr(2)
19103           .sr(1)
19104           .m(1)
19105           .n(n)
19106           .k(k)
19107           .ks(3)
19108           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
19109       }
19110     }
19111   }
19112 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD64,strided_cm_subtile)19113   TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD64, strided_cm_subtile) {
19114     TEST_REQUIRES_X86_SSE41;
19115     for (size_t k = 1; k <= 40; k += 9) {
19116       for (uint32_t n = 1; n <= 4; n++) {
19117         for (uint32_t m = 1; m <= 1; m++) {
19118           GemmMicrokernelTester()
19119             .mr(1)
19120             .nr(4)
19121             .kr(2)
19122             .sr(1)
19123             .m(m)
19124             .n(n)
19125             .k(k)
19126             .cm_stride(7)
19127             .iterations(1)
19128             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
19129         }
19130       }
19131     }
19132   }
19133 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD64,a_offset)19134   TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD64, a_offset) {
19135     TEST_REQUIRES_X86_SSE41;
19136     for (size_t k = 1; k <= 40; k += 9) {
19137       GemmMicrokernelTester()
19138         .mr(1)
19139         .nr(4)
19140         .kr(2)
19141         .sr(1)
19142         .m(1)
19143         .n(4)
19144         .k(k)
19145         .ks(3)
19146         .a_offset(43)
19147         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
19148     }
19149   }
19150 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD64,zero)19151   TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD64, zero) {
19152     TEST_REQUIRES_X86_SSE41;
19153     for (size_t k = 1; k <= 40; k += 9) {
19154       for (uint32_t mz = 0; mz < 1; mz++) {
19155         GemmMicrokernelTester()
19156           .mr(1)
19157           .nr(4)
19158           .kr(2)
19159           .sr(1)
19160           .m(1)
19161           .n(4)
19162           .k(k)
19163           .ks(3)
19164           .a_offset(43)
19165           .zero_index(mz)
19166           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
19167       }
19168     }
19169   }
19170 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD64,qmin)19171   TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD64, qmin) {
19172     TEST_REQUIRES_X86_SSE41;
19173     GemmMicrokernelTester()
19174       .mr(1)
19175       .nr(4)
19176       .kr(2)
19177       .sr(1)
19178       .m(1)
19179       .n(4)
19180       .k(8)
19181       .qmin(128)
19182       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
19183   }
19184 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD64,qmax)19185   TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD64, qmax) {
19186     TEST_REQUIRES_X86_SSE41;
19187     GemmMicrokernelTester()
19188       .mr(1)
19189       .nr(4)
19190       .kr(2)
19191       .sr(1)
19192       .m(1)
19193       .n(4)
19194       .k(8)
19195       .qmax(128)
19196       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
19197   }
19198 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD64,strided_cm)19199   TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD64, strided_cm) {
19200     TEST_REQUIRES_X86_SSE41;
19201     GemmMicrokernelTester()
19202       .mr(1)
19203       .nr(4)
19204       .kr(2)
19205       .sr(1)
19206       .m(1)
19207       .n(4)
19208       .k(8)
19209       .cm_stride(7)
19210       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
19211   }
19212 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
19213 
19214 
19215 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD64,k_eq_8)19216   TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD64, k_eq_8) {
19217     TEST_REQUIRES_X86_SSE2;
19218     GemmMicrokernelTester()
19219       .mr(3)
19220       .nr(4)
19221       .kr(2)
19222       .sr(1)
19223       .m(3)
19224       .n(4)
19225       .k(8)
19226       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
19227   }
19228 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD64,strided_cn)19229   TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD64, strided_cn) {
19230     TEST_REQUIRES_X86_SSE2;
19231     GemmMicrokernelTester()
19232       .mr(3)
19233       .nr(4)
19234       .kr(2)
19235       .sr(1)
19236       .m(3)
19237       .n(4)
19238       .k(8)
19239       .cn_stride(7)
19240       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
19241   }
19242 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD64,k_eq_8_subtile)19243   TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD64, k_eq_8_subtile) {
19244     TEST_REQUIRES_X86_SSE2;
19245     for (uint32_t n = 1; n <= 4; n++) {
19246       for (uint32_t m = 1; m <= 3; m++) {
19247         GemmMicrokernelTester()
19248           .mr(3)
19249           .nr(4)
19250           .kr(2)
19251           .sr(1)
19252           .m(m)
19253           .n(n)
19254           .k(8)
19255           .iterations(1)
19256           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
19257       }
19258     }
19259   }
19260 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD64,k_eq_8_subtile_m)19261   TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD64, k_eq_8_subtile_m) {
19262     TEST_REQUIRES_X86_SSE2;
19263     for (uint32_t m = 1; m <= 3; m++) {
19264       GemmMicrokernelTester()
19265         .mr(3)
19266         .nr(4)
19267         .kr(2)
19268         .sr(1)
19269         .m(m)
19270         .n(4)
19271         .k(8)
19272         .iterations(1)
19273         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
19274     }
19275   }
19276 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD64,k_eq_8_subtile_n)19277   TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD64, k_eq_8_subtile_n) {
19278     TEST_REQUIRES_X86_SSE2;
19279     for (uint32_t n = 1; n <= 4; n++) {
19280       GemmMicrokernelTester()
19281         .mr(3)
19282         .nr(4)
19283         .kr(2)
19284         .sr(1)
19285         .m(3)
19286         .n(n)
19287         .k(8)
19288         .iterations(1)
19289         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
19290     }
19291   }
19292 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD64,k_lt_8)19293   TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD64, k_lt_8) {
19294     TEST_REQUIRES_X86_SSE2;
19295     for (size_t k = 1; k < 8; k++) {
19296       GemmMicrokernelTester()
19297         .mr(3)
19298         .nr(4)
19299         .kr(2)
19300         .sr(1)
19301         .m(3)
19302         .n(4)
19303         .k(k)
19304         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
19305     }
19306   }
19307 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD64,k_lt_8_subtile)19308   TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD64, k_lt_8_subtile) {
19309     TEST_REQUIRES_X86_SSE2;
19310     for (size_t k = 1; k < 8; k++) {
19311       for (uint32_t n = 1; n <= 4; n++) {
19312         for (uint32_t m = 1; m <= 3; m++) {
19313           GemmMicrokernelTester()
19314             .mr(3)
19315             .nr(4)
19316             .kr(2)
19317             .sr(1)
19318             .m(m)
19319             .n(n)
19320             .k(k)
19321             .iterations(1)
19322             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
19323         }
19324       }
19325     }
19326   }
19327 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD64,k_gt_8)19328   TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD64, k_gt_8) {
19329     TEST_REQUIRES_X86_SSE2;
19330     for (size_t k = 9; k < 16; k++) {
19331       GemmMicrokernelTester()
19332         .mr(3)
19333         .nr(4)
19334         .kr(2)
19335         .sr(1)
19336         .m(3)
19337         .n(4)
19338         .k(k)
19339         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
19340     }
19341   }
19342 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD64,k_gt_8_subtile)19343   TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD64, k_gt_8_subtile) {
19344     TEST_REQUIRES_X86_SSE2;
19345     for (size_t k = 9; k < 16; k++) {
19346       for (uint32_t n = 1; n <= 4; n++) {
19347         for (uint32_t m = 1; m <= 3; m++) {
19348           GemmMicrokernelTester()
19349             .mr(3)
19350             .nr(4)
19351             .kr(2)
19352             .sr(1)
19353             .m(m)
19354             .n(n)
19355             .k(k)
19356             .iterations(1)
19357             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
19358         }
19359       }
19360     }
19361   }
19362 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD64,k_div_8)19363   TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD64, k_div_8) {
19364     TEST_REQUIRES_X86_SSE2;
19365     for (size_t k = 16; k <= 80; k += 8) {
19366       GemmMicrokernelTester()
19367         .mr(3)
19368         .nr(4)
19369         .kr(2)
19370         .sr(1)
19371         .m(3)
19372         .n(4)
19373         .k(k)
19374         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
19375     }
19376   }
19377 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD64,k_div_8_subtile)19378   TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD64, k_div_8_subtile) {
19379     TEST_REQUIRES_X86_SSE2;
19380     for (size_t k = 16; k <= 80; k += 8) {
19381       for (uint32_t n = 1; n <= 4; n++) {
19382         for (uint32_t m = 1; m <= 3; m++) {
19383           GemmMicrokernelTester()
19384             .mr(3)
19385             .nr(4)
19386             .kr(2)
19387             .sr(1)
19388             .m(m)
19389             .n(n)
19390             .k(k)
19391             .iterations(1)
19392             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
19393         }
19394       }
19395     }
19396   }
19397 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD64,n_gt_4)19398   TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD64, n_gt_4) {
19399     TEST_REQUIRES_X86_SSE2;
19400     for (uint32_t n = 5; n < 8; n++) {
19401       for (size_t k = 1; k <= 40; k += 9) {
19402         GemmMicrokernelTester()
19403           .mr(3)
19404           .nr(4)
19405           .kr(2)
19406           .sr(1)
19407           .m(3)
19408           .n(n)
19409           .k(k)
19410           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
19411       }
19412     }
19413   }
19414 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD64,n_gt_4_strided_cn)19415   TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD64, n_gt_4_strided_cn) {
19416     TEST_REQUIRES_X86_SSE2;
19417     for (uint32_t n = 5; n < 8; n++) {
19418       for (size_t k = 1; k <= 40; k += 9) {
19419         GemmMicrokernelTester()
19420           .mr(3)
19421           .nr(4)
19422           .kr(2)
19423           .sr(1)
19424           .m(3)
19425           .n(n)
19426           .k(k)
19427           .cn_stride(7)
19428           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
19429       }
19430     }
19431   }
19432 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD64,n_gt_4_subtile)19433   TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD64, n_gt_4_subtile) {
19434     TEST_REQUIRES_X86_SSE2;
19435     for (uint32_t n = 5; n < 8; n++) {
19436       for (size_t k = 1; k <= 40; k += 9) {
19437         for (uint32_t m = 1; m <= 3; m++) {
19438           GemmMicrokernelTester()
19439             .mr(3)
19440             .nr(4)
19441             .kr(2)
19442             .sr(1)
19443             .m(m)
19444             .n(n)
19445             .k(k)
19446             .iterations(1)
19447             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
19448         }
19449       }
19450     }
19451   }
19452 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD64,n_div_4)19453   TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD64, n_div_4) {
19454     TEST_REQUIRES_X86_SSE2;
19455     for (uint32_t n = 8; n <= 12; n += 4) {
19456       for (size_t k = 1; k <= 40; k += 9) {
19457         GemmMicrokernelTester()
19458           .mr(3)
19459           .nr(4)
19460           .kr(2)
19461           .sr(1)
19462           .m(3)
19463           .n(n)
19464           .k(k)
19465           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
19466       }
19467     }
19468   }
19469 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD64,n_div_4_strided_cn)19470   TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD64, n_div_4_strided_cn) {
19471     TEST_REQUIRES_X86_SSE2;
19472     for (uint32_t n = 8; n <= 12; n += 4) {
19473       for (size_t k = 1; k <= 40; k += 9) {
19474         GemmMicrokernelTester()
19475           .mr(3)
19476           .nr(4)
19477           .kr(2)
19478           .sr(1)
19479           .m(3)
19480           .n(n)
19481           .k(k)
19482           .cn_stride(7)
19483           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
19484       }
19485     }
19486   }
19487 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD64,n_div_4_subtile)19488   TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD64, n_div_4_subtile) {
19489     TEST_REQUIRES_X86_SSE2;
19490     for (uint32_t n = 8; n <= 12; n += 4) {
19491       for (size_t k = 1; k <= 40; k += 9) {
19492         for (uint32_t m = 1; m <= 3; m++) {
19493           GemmMicrokernelTester()
19494             .mr(3)
19495             .nr(4)
19496             .kr(2)
19497             .sr(1)
19498             .m(m)
19499             .n(n)
19500             .k(k)
19501             .iterations(1)
19502             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
19503         }
19504       }
19505     }
19506   }
19507 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD64,small_kernel)19508   TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD64, small_kernel) {
19509     TEST_REQUIRES_X86_SSE2;
19510     for (size_t k = 1; k <= 40; k += 9) {
19511       GemmMicrokernelTester()
19512         .mr(3)
19513         .nr(4)
19514         .kr(2)
19515         .sr(1)
19516         .m(3)
19517         .n(4)
19518         .k(k)
19519         .ks(3)
19520         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
19521     }
19522   }
19523 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD64,small_kernel_subtile)19524   TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD64, small_kernel_subtile) {
19525     TEST_REQUIRES_X86_SSE2;
19526     for (size_t k = 1; k <= 40; k += 9) {
19527       for (uint32_t n = 1; n <= 4; n++) {
19528         for (uint32_t m = 1; m <= 3; m++) {
19529           GemmMicrokernelTester()
19530             .mr(3)
19531             .nr(4)
19532             .kr(2)
19533             .sr(1)
19534             .m(m)
19535             .n(n)
19536             .k(k)
19537             .ks(3)
19538             .iterations(1)
19539             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
19540         }
19541       }
19542     }
19543   }
19544 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD64,n_gt_4_small_kernel)19545   TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD64, n_gt_4_small_kernel) {
19546     TEST_REQUIRES_X86_SSE2;
19547     for (uint32_t n = 5; n < 8; n++) {
19548       for (size_t k = 1; k <= 40; k += 9) {
19549         GemmMicrokernelTester()
19550           .mr(3)
19551           .nr(4)
19552           .kr(2)
19553           .sr(1)
19554           .m(3)
19555           .n(n)
19556           .k(k)
19557           .ks(3)
19558           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
19559       }
19560     }
19561   }
19562 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD64,n_div_4_small_kernel)19563   TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD64, n_div_4_small_kernel) {
19564     TEST_REQUIRES_X86_SSE2;
19565     for (uint32_t n = 8; n <= 12; n += 4) {
19566       for (size_t k = 1; k <= 40; k += 9) {
19567         GemmMicrokernelTester()
19568           .mr(3)
19569           .nr(4)
19570           .kr(2)
19571           .sr(1)
19572           .m(3)
19573           .n(n)
19574           .k(k)
19575           .ks(3)
19576           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
19577       }
19578     }
19579   }
19580 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD64,strided_cm_subtile)19581   TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD64, strided_cm_subtile) {
19582     TEST_REQUIRES_X86_SSE2;
19583     for (size_t k = 1; k <= 40; k += 9) {
19584       for (uint32_t n = 1; n <= 4; n++) {
19585         for (uint32_t m = 1; m <= 3; m++) {
19586           GemmMicrokernelTester()
19587             .mr(3)
19588             .nr(4)
19589             .kr(2)
19590             .sr(1)
19591             .m(m)
19592             .n(n)
19593             .k(k)
19594             .cm_stride(7)
19595             .iterations(1)
19596             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
19597         }
19598       }
19599     }
19600   }
19601 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD64,a_offset)19602   TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD64, a_offset) {
19603     TEST_REQUIRES_X86_SSE2;
19604     for (size_t k = 1; k <= 40; k += 9) {
19605       GemmMicrokernelTester()
19606         .mr(3)
19607         .nr(4)
19608         .kr(2)
19609         .sr(1)
19610         .m(3)
19611         .n(4)
19612         .k(k)
19613         .ks(3)
19614         .a_offset(127)
19615         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
19616     }
19617   }
19618 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD64,zero)19619   TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD64, zero) {
19620     TEST_REQUIRES_X86_SSE2;
19621     for (size_t k = 1; k <= 40; k += 9) {
19622       for (uint32_t mz = 0; mz < 3; mz++) {
19623         GemmMicrokernelTester()
19624           .mr(3)
19625           .nr(4)
19626           .kr(2)
19627           .sr(1)
19628           .m(3)
19629           .n(4)
19630           .k(k)
19631           .ks(3)
19632           .a_offset(127)
19633           .zero_index(mz)
19634           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
19635       }
19636     }
19637   }
19638 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD64,qmin)19639   TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD64, qmin) {
19640     TEST_REQUIRES_X86_SSE2;
19641     GemmMicrokernelTester()
19642       .mr(3)
19643       .nr(4)
19644       .kr(2)
19645       .sr(1)
19646       .m(3)
19647       .n(4)
19648       .k(8)
19649       .qmin(128)
19650       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
19651   }
19652 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD64,qmax)19653   TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD64, qmax) {
19654     TEST_REQUIRES_X86_SSE2;
19655     GemmMicrokernelTester()
19656       .mr(3)
19657       .nr(4)
19658       .kr(2)
19659       .sr(1)
19660       .m(3)
19661       .n(4)
19662       .k(8)
19663       .qmax(128)
19664       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
19665   }
19666 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD64,strided_cm)19667   TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD64, strided_cm) {
19668     TEST_REQUIRES_X86_SSE2;
19669     GemmMicrokernelTester()
19670       .mr(3)
19671       .nr(4)
19672       .kr(2)
19673       .sr(1)
19674       .m(3)
19675       .n(4)
19676       .k(8)
19677       .cm_stride(7)
19678       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
19679   }
19680 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
19681 
19682 
19683 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD64,k_eq_8)19684   TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD64, k_eq_8) {
19685     TEST_REQUIRES_X86_SSE41;
19686     GemmMicrokernelTester()
19687       .mr(4)
19688       .nr(4)
19689       .kr(2)
19690       .sr(1)
19691       .m(4)
19692       .n(4)
19693       .k(8)
19694       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
19695   }
19696 
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD64,strided_cn)19697   TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD64, strided_cn) {
19698     TEST_REQUIRES_X86_SSE41;
19699     GemmMicrokernelTester()
19700       .mr(4)
19701       .nr(4)
19702       .kr(2)
19703       .sr(1)
19704       .m(4)
19705       .n(4)
19706       .k(8)
19707       .cn_stride(7)
19708       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
19709   }
19710 
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD64,k_eq_8_subtile)19711   TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD64, k_eq_8_subtile) {
19712     TEST_REQUIRES_X86_SSE41;
19713     for (uint32_t n = 1; n <= 4; n++) {
19714       for (uint32_t m = 1; m <= 4; m++) {
19715         GemmMicrokernelTester()
19716           .mr(4)
19717           .nr(4)
19718           .kr(2)
19719           .sr(1)
19720           .m(m)
19721           .n(n)
19722           .k(8)
19723           .iterations(1)
19724           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
19725       }
19726     }
19727   }
19728 
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD64,k_eq_8_subtile_m)19729   TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD64, k_eq_8_subtile_m) {
19730     TEST_REQUIRES_X86_SSE41;
19731     for (uint32_t m = 1; m <= 4; m++) {
19732       GemmMicrokernelTester()
19733         .mr(4)
19734         .nr(4)
19735         .kr(2)
19736         .sr(1)
19737         .m(m)
19738         .n(4)
19739         .k(8)
19740         .iterations(1)
19741         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
19742     }
19743   }
19744 
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD64,k_eq_8_subtile_n)19745   TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD64, k_eq_8_subtile_n) {
19746     TEST_REQUIRES_X86_SSE41;
19747     for (uint32_t n = 1; n <= 4; n++) {
19748       GemmMicrokernelTester()
19749         .mr(4)
19750         .nr(4)
19751         .kr(2)
19752         .sr(1)
19753         .m(4)
19754         .n(n)
19755         .k(8)
19756         .iterations(1)
19757         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
19758     }
19759   }
19760 
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD64,k_lt_8)19761   TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD64, k_lt_8) {
19762     TEST_REQUIRES_X86_SSE41;
19763     for (size_t k = 1; k < 8; k++) {
19764       GemmMicrokernelTester()
19765         .mr(4)
19766         .nr(4)
19767         .kr(2)
19768         .sr(1)
19769         .m(4)
19770         .n(4)
19771         .k(k)
19772         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
19773     }
19774   }
19775 
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD64,k_lt_8_subtile)19776   TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD64, k_lt_8_subtile) {
19777     TEST_REQUIRES_X86_SSE41;
19778     for (size_t k = 1; k < 8; k++) {
19779       for (uint32_t n = 1; n <= 4; n++) {
19780         for (uint32_t m = 1; m <= 4; m++) {
19781           GemmMicrokernelTester()
19782             .mr(4)
19783             .nr(4)
19784             .kr(2)
19785             .sr(1)
19786             .m(m)
19787             .n(n)
19788             .k(k)
19789             .iterations(1)
19790             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
19791         }
19792       }
19793     }
19794   }
19795 
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD64,k_gt_8)19796   TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD64, k_gt_8) {
19797     TEST_REQUIRES_X86_SSE41;
19798     for (size_t k = 9; k < 16; k++) {
19799       GemmMicrokernelTester()
19800         .mr(4)
19801         .nr(4)
19802         .kr(2)
19803         .sr(1)
19804         .m(4)
19805         .n(4)
19806         .k(k)
19807         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
19808     }
19809   }
19810 
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD64,k_gt_8_subtile)19811   TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD64, k_gt_8_subtile) {
19812     TEST_REQUIRES_X86_SSE41;
19813     for (size_t k = 9; k < 16; k++) {
19814       for (uint32_t n = 1; n <= 4; n++) {
19815         for (uint32_t m = 1; m <= 4; m++) {
19816           GemmMicrokernelTester()
19817             .mr(4)
19818             .nr(4)
19819             .kr(2)
19820             .sr(1)
19821             .m(m)
19822             .n(n)
19823             .k(k)
19824             .iterations(1)
19825             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
19826         }
19827       }
19828     }
19829   }
19830 
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD64,k_div_8)19831   TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD64, k_div_8) {
19832     TEST_REQUIRES_X86_SSE41;
19833     for (size_t k = 16; k <= 80; k += 8) {
19834       GemmMicrokernelTester()
19835         .mr(4)
19836         .nr(4)
19837         .kr(2)
19838         .sr(1)
19839         .m(4)
19840         .n(4)
19841         .k(k)
19842         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
19843     }
19844   }
19845 
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD64,k_div_8_subtile)19846   TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD64, k_div_8_subtile) {
19847     TEST_REQUIRES_X86_SSE41;
19848     for (size_t k = 16; k <= 80; k += 8) {
19849       for (uint32_t n = 1; n <= 4; n++) {
19850         for (uint32_t m = 1; m <= 4; m++) {
19851           GemmMicrokernelTester()
19852             .mr(4)
19853             .nr(4)
19854             .kr(2)
19855             .sr(1)
19856             .m(m)
19857             .n(n)
19858             .k(k)
19859             .iterations(1)
19860             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
19861         }
19862       }
19863     }
19864   }
19865 
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD64,n_gt_4)19866   TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD64, n_gt_4) {
19867     TEST_REQUIRES_X86_SSE41;
19868     for (uint32_t n = 5; n < 8; n++) {
19869       for (size_t k = 1; k <= 40; k += 9) {
19870         GemmMicrokernelTester()
19871           .mr(4)
19872           .nr(4)
19873           .kr(2)
19874           .sr(1)
19875           .m(4)
19876           .n(n)
19877           .k(k)
19878           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
19879       }
19880     }
19881   }
19882 
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD64,n_gt_4_strided_cn)19883   TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD64, n_gt_4_strided_cn) {
19884     TEST_REQUIRES_X86_SSE41;
19885     for (uint32_t n = 5; n < 8; n++) {
19886       for (size_t k = 1; k <= 40; k += 9) {
19887         GemmMicrokernelTester()
19888           .mr(4)
19889           .nr(4)
19890           .kr(2)
19891           .sr(1)
19892           .m(4)
19893           .n(n)
19894           .k(k)
19895           .cn_stride(7)
19896           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
19897       }
19898     }
19899   }
19900 
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD64,n_gt_4_subtile)19901   TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD64, n_gt_4_subtile) {
19902     TEST_REQUIRES_X86_SSE41;
19903     for (uint32_t n = 5; n < 8; n++) {
19904       for (size_t k = 1; k <= 40; k += 9) {
19905         for (uint32_t m = 1; m <= 4; m++) {
19906           GemmMicrokernelTester()
19907             .mr(4)
19908             .nr(4)
19909             .kr(2)
19910             .sr(1)
19911             .m(m)
19912             .n(n)
19913             .k(k)
19914             .iterations(1)
19915             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
19916         }
19917       }
19918     }
19919   }
19920 
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD64,n_div_4)19921   TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD64, n_div_4) {
19922     TEST_REQUIRES_X86_SSE41;
19923     for (uint32_t n = 8; n <= 12; n += 4) {
19924       for (size_t k = 1; k <= 40; k += 9) {
19925         GemmMicrokernelTester()
19926           .mr(4)
19927           .nr(4)
19928           .kr(2)
19929           .sr(1)
19930           .m(4)
19931           .n(n)
19932           .k(k)
19933           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
19934       }
19935     }
19936   }
19937 
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD64,n_div_4_strided_cn)19938   TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD64, n_div_4_strided_cn) {
19939     TEST_REQUIRES_X86_SSE41;
19940     for (uint32_t n = 8; n <= 12; n += 4) {
19941       for (size_t k = 1; k <= 40; k += 9) {
19942         GemmMicrokernelTester()
19943           .mr(4)
19944           .nr(4)
19945           .kr(2)
19946           .sr(1)
19947           .m(4)
19948           .n(n)
19949           .k(k)
19950           .cn_stride(7)
19951           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
19952       }
19953     }
19954   }
19955 
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD64,n_div_4_subtile)19956   TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD64, n_div_4_subtile) {
19957     TEST_REQUIRES_X86_SSE41;
19958     for (uint32_t n = 8; n <= 12; n += 4) {
19959       for (size_t k = 1; k <= 40; k += 9) {
19960         for (uint32_t m = 1; m <= 4; m++) {
19961           GemmMicrokernelTester()
19962             .mr(4)
19963             .nr(4)
19964             .kr(2)
19965             .sr(1)
19966             .m(m)
19967             .n(n)
19968             .k(k)
19969             .iterations(1)
19970             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
19971         }
19972       }
19973     }
19974   }
19975 
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD64,small_kernel)19976   TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD64, small_kernel) {
19977     TEST_REQUIRES_X86_SSE41;
19978     for (size_t k = 1; k <= 40; k += 9) {
19979       GemmMicrokernelTester()
19980         .mr(4)
19981         .nr(4)
19982         .kr(2)
19983         .sr(1)
19984         .m(4)
19985         .n(4)
19986         .k(k)
19987         .ks(3)
19988         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
19989     }
19990   }
19991 
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD64,small_kernel_subtile)19992   TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD64, small_kernel_subtile) {
19993     TEST_REQUIRES_X86_SSE41;
19994     for (size_t k = 1; k <= 40; k += 9) {
19995       for (uint32_t n = 1; n <= 4; n++) {
19996         for (uint32_t m = 1; m <= 4; m++) {
19997           GemmMicrokernelTester()
19998             .mr(4)
19999             .nr(4)
20000             .kr(2)
20001             .sr(1)
20002             .m(m)
20003             .n(n)
20004             .k(k)
20005             .ks(3)
20006             .iterations(1)
20007             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20008         }
20009       }
20010     }
20011   }
20012 
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD64,n_gt_4_small_kernel)20013   TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD64, n_gt_4_small_kernel) {
20014     TEST_REQUIRES_X86_SSE41;
20015     for (uint32_t n = 5; n < 8; n++) {
20016       for (size_t k = 1; k <= 40; k += 9) {
20017         GemmMicrokernelTester()
20018           .mr(4)
20019           .nr(4)
20020           .kr(2)
20021           .sr(1)
20022           .m(4)
20023           .n(n)
20024           .k(k)
20025           .ks(3)
20026           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20027       }
20028     }
20029   }
20030 
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD64,n_div_4_small_kernel)20031   TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD64, n_div_4_small_kernel) {
20032     TEST_REQUIRES_X86_SSE41;
20033     for (uint32_t n = 8; n <= 12; n += 4) {
20034       for (size_t k = 1; k <= 40; k += 9) {
20035         GemmMicrokernelTester()
20036           .mr(4)
20037           .nr(4)
20038           .kr(2)
20039           .sr(1)
20040           .m(4)
20041           .n(n)
20042           .k(k)
20043           .ks(3)
20044           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20045       }
20046     }
20047   }
20048 
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD64,strided_cm_subtile)20049   TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD64, strided_cm_subtile) {
20050     TEST_REQUIRES_X86_SSE41;
20051     for (size_t k = 1; k <= 40; k += 9) {
20052       for (uint32_t n = 1; n <= 4; n++) {
20053         for (uint32_t m = 1; m <= 4; m++) {
20054           GemmMicrokernelTester()
20055             .mr(4)
20056             .nr(4)
20057             .kr(2)
20058             .sr(1)
20059             .m(m)
20060             .n(n)
20061             .k(k)
20062             .cm_stride(7)
20063             .iterations(1)
20064             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20065         }
20066       }
20067     }
20068   }
20069 
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD64,a_offset)20070   TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD64, a_offset) {
20071     TEST_REQUIRES_X86_SSE41;
20072     for (size_t k = 1; k <= 40; k += 9) {
20073       GemmMicrokernelTester()
20074         .mr(4)
20075         .nr(4)
20076         .kr(2)
20077         .sr(1)
20078         .m(4)
20079         .n(4)
20080         .k(k)
20081         .ks(3)
20082         .a_offset(163)
20083         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20084     }
20085   }
20086 
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD64,zero)20087   TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD64, zero) {
20088     TEST_REQUIRES_X86_SSE41;
20089     for (size_t k = 1; k <= 40; k += 9) {
20090       for (uint32_t mz = 0; mz < 4; mz++) {
20091         GemmMicrokernelTester()
20092           .mr(4)
20093           .nr(4)
20094           .kr(2)
20095           .sr(1)
20096           .m(4)
20097           .n(4)
20098           .k(k)
20099           .ks(3)
20100           .a_offset(163)
20101           .zero_index(mz)
20102           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20103       }
20104     }
20105   }
20106 
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD64,qmin)20107   TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD64, qmin) {
20108     TEST_REQUIRES_X86_SSE41;
20109     GemmMicrokernelTester()
20110       .mr(4)
20111       .nr(4)
20112       .kr(2)
20113       .sr(1)
20114       .m(4)
20115       .n(4)
20116       .k(8)
20117       .qmin(128)
20118       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20119   }
20120 
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD64,qmax)20121   TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD64, qmax) {
20122     TEST_REQUIRES_X86_SSE41;
20123     GemmMicrokernelTester()
20124       .mr(4)
20125       .nr(4)
20126       .kr(2)
20127       .sr(1)
20128       .m(4)
20129       .n(4)
20130       .k(8)
20131       .qmax(128)
20132       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20133   }
20134 
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD64,strided_cm)20135   TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD64, strided_cm) {
20136     TEST_REQUIRES_X86_SSE41;
20137     GemmMicrokernelTester()
20138       .mr(4)
20139       .nr(4)
20140       .kr(2)
20141       .sr(1)
20142       .m(4)
20143       .n(4)
20144       .k(8)
20145       .cm_stride(7)
20146       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20147   }
20148 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
20149 
20150 
20151 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD64,k_eq_8)20152   TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD64, k_eq_8) {
20153     TEST_REQUIRES_X86_AVX;
20154     GemmMicrokernelTester()
20155       .mr(1)
20156       .nr(4)
20157       .kr(2)
20158       .sr(1)
20159       .m(1)
20160       .n(4)
20161       .k(8)
20162       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20163   }
20164 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD64,strided_cn)20165   TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD64, strided_cn) {
20166     TEST_REQUIRES_X86_AVX;
20167     GemmMicrokernelTester()
20168       .mr(1)
20169       .nr(4)
20170       .kr(2)
20171       .sr(1)
20172       .m(1)
20173       .n(4)
20174       .k(8)
20175       .cn_stride(7)
20176       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20177   }
20178 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD64,k_eq_8_subtile)20179   TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD64, k_eq_8_subtile) {
20180     TEST_REQUIRES_X86_AVX;
20181     for (uint32_t n = 1; n <= 4; n++) {
20182       for (uint32_t m = 1; m <= 1; m++) {
20183         GemmMicrokernelTester()
20184           .mr(1)
20185           .nr(4)
20186           .kr(2)
20187           .sr(1)
20188           .m(m)
20189           .n(n)
20190           .k(8)
20191           .iterations(1)
20192           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20193       }
20194     }
20195   }
20196 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD64,k_eq_8_subtile_m)20197   TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD64, k_eq_8_subtile_m) {
20198     TEST_REQUIRES_X86_AVX;
20199     for (uint32_t m = 1; m <= 1; m++) {
20200       GemmMicrokernelTester()
20201         .mr(1)
20202         .nr(4)
20203         .kr(2)
20204         .sr(1)
20205         .m(m)
20206         .n(4)
20207         .k(8)
20208         .iterations(1)
20209         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20210     }
20211   }
20212 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD64,k_eq_8_subtile_n)20213   TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD64, k_eq_8_subtile_n) {
20214     TEST_REQUIRES_X86_AVX;
20215     for (uint32_t n = 1; n <= 4; n++) {
20216       GemmMicrokernelTester()
20217         .mr(1)
20218         .nr(4)
20219         .kr(2)
20220         .sr(1)
20221         .m(1)
20222         .n(n)
20223         .k(8)
20224         .iterations(1)
20225         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20226     }
20227   }
20228 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD64,k_lt_8)20229   TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD64, k_lt_8) {
20230     TEST_REQUIRES_X86_AVX;
20231     for (size_t k = 1; k < 8; k++) {
20232       GemmMicrokernelTester()
20233         .mr(1)
20234         .nr(4)
20235         .kr(2)
20236         .sr(1)
20237         .m(1)
20238         .n(4)
20239         .k(k)
20240         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20241     }
20242   }
20243 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD64,k_lt_8_subtile)20244   TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD64, k_lt_8_subtile) {
20245     TEST_REQUIRES_X86_AVX;
20246     for (size_t k = 1; k < 8; k++) {
20247       for (uint32_t n = 1; n <= 4; n++) {
20248         for (uint32_t m = 1; m <= 1; m++) {
20249           GemmMicrokernelTester()
20250             .mr(1)
20251             .nr(4)
20252             .kr(2)
20253             .sr(1)
20254             .m(m)
20255             .n(n)
20256             .k(k)
20257             .iterations(1)
20258             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20259         }
20260       }
20261     }
20262   }
20263 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD64,k_gt_8)20264   TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD64, k_gt_8) {
20265     TEST_REQUIRES_X86_AVX;
20266     for (size_t k = 9; k < 16; k++) {
20267       GemmMicrokernelTester()
20268         .mr(1)
20269         .nr(4)
20270         .kr(2)
20271         .sr(1)
20272         .m(1)
20273         .n(4)
20274         .k(k)
20275         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20276     }
20277   }
20278 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD64,k_gt_8_subtile)20279   TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD64, k_gt_8_subtile) {
20280     TEST_REQUIRES_X86_AVX;
20281     for (size_t k = 9; k < 16; k++) {
20282       for (uint32_t n = 1; n <= 4; n++) {
20283         for (uint32_t m = 1; m <= 1; m++) {
20284           GemmMicrokernelTester()
20285             .mr(1)
20286             .nr(4)
20287             .kr(2)
20288             .sr(1)
20289             .m(m)
20290             .n(n)
20291             .k(k)
20292             .iterations(1)
20293             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20294         }
20295       }
20296     }
20297   }
20298 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD64,k_div_8)20299   TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD64, k_div_8) {
20300     TEST_REQUIRES_X86_AVX;
20301     for (size_t k = 16; k <= 80; k += 8) {
20302       GemmMicrokernelTester()
20303         .mr(1)
20304         .nr(4)
20305         .kr(2)
20306         .sr(1)
20307         .m(1)
20308         .n(4)
20309         .k(k)
20310         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20311     }
20312   }
20313 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD64,k_div_8_subtile)20314   TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD64, k_div_8_subtile) {
20315     TEST_REQUIRES_X86_AVX;
20316     for (size_t k = 16; k <= 80; k += 8) {
20317       for (uint32_t n = 1; n <= 4; n++) {
20318         for (uint32_t m = 1; m <= 1; m++) {
20319           GemmMicrokernelTester()
20320             .mr(1)
20321             .nr(4)
20322             .kr(2)
20323             .sr(1)
20324             .m(m)
20325             .n(n)
20326             .k(k)
20327             .iterations(1)
20328             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20329         }
20330       }
20331     }
20332   }
20333 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD64,n_gt_4)20334   TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD64, n_gt_4) {
20335     TEST_REQUIRES_X86_AVX;
20336     for (uint32_t n = 5; n < 8; n++) {
20337       for (size_t k = 1; k <= 40; k += 9) {
20338         GemmMicrokernelTester()
20339           .mr(1)
20340           .nr(4)
20341           .kr(2)
20342           .sr(1)
20343           .m(1)
20344           .n(n)
20345           .k(k)
20346           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20347       }
20348     }
20349   }
20350 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD64,n_gt_4_strided_cn)20351   TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD64, n_gt_4_strided_cn) {
20352     TEST_REQUIRES_X86_AVX;
20353     for (uint32_t n = 5; n < 8; n++) {
20354       for (size_t k = 1; k <= 40; k += 9) {
20355         GemmMicrokernelTester()
20356           .mr(1)
20357           .nr(4)
20358           .kr(2)
20359           .sr(1)
20360           .m(1)
20361           .n(n)
20362           .k(k)
20363           .cn_stride(7)
20364           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20365       }
20366     }
20367   }
20368 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD64,n_gt_4_subtile)20369   TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD64, n_gt_4_subtile) {
20370     TEST_REQUIRES_X86_AVX;
20371     for (uint32_t n = 5; n < 8; n++) {
20372       for (size_t k = 1; k <= 40; k += 9) {
20373         for (uint32_t m = 1; m <= 1; m++) {
20374           GemmMicrokernelTester()
20375             .mr(1)
20376             .nr(4)
20377             .kr(2)
20378             .sr(1)
20379             .m(m)
20380             .n(n)
20381             .k(k)
20382             .iterations(1)
20383             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20384         }
20385       }
20386     }
20387   }
20388 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD64,n_div_4)20389   TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD64, n_div_4) {
20390     TEST_REQUIRES_X86_AVX;
20391     for (uint32_t n = 8; n <= 12; n += 4) {
20392       for (size_t k = 1; k <= 40; k += 9) {
20393         GemmMicrokernelTester()
20394           .mr(1)
20395           .nr(4)
20396           .kr(2)
20397           .sr(1)
20398           .m(1)
20399           .n(n)
20400           .k(k)
20401           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20402       }
20403     }
20404   }
20405 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD64,n_div_4_strided_cn)20406   TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD64, n_div_4_strided_cn) {
20407     TEST_REQUIRES_X86_AVX;
20408     for (uint32_t n = 8; n <= 12; n += 4) {
20409       for (size_t k = 1; k <= 40; k += 9) {
20410         GemmMicrokernelTester()
20411           .mr(1)
20412           .nr(4)
20413           .kr(2)
20414           .sr(1)
20415           .m(1)
20416           .n(n)
20417           .k(k)
20418           .cn_stride(7)
20419           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20420       }
20421     }
20422   }
20423 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD64,n_div_4_subtile)20424   TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD64, n_div_4_subtile) {
20425     TEST_REQUIRES_X86_AVX;
20426     for (uint32_t n = 8; n <= 12; n += 4) {
20427       for (size_t k = 1; k <= 40; k += 9) {
20428         for (uint32_t m = 1; m <= 1; m++) {
20429           GemmMicrokernelTester()
20430             .mr(1)
20431             .nr(4)
20432             .kr(2)
20433             .sr(1)
20434             .m(m)
20435             .n(n)
20436             .k(k)
20437             .iterations(1)
20438             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20439         }
20440       }
20441     }
20442   }
20443 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD64,small_kernel)20444   TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD64, small_kernel) {
20445     TEST_REQUIRES_X86_AVX;
20446     for (size_t k = 1; k <= 40; k += 9) {
20447       GemmMicrokernelTester()
20448         .mr(1)
20449         .nr(4)
20450         .kr(2)
20451         .sr(1)
20452         .m(1)
20453         .n(4)
20454         .k(k)
20455         .ks(3)
20456         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20457     }
20458   }
20459 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD64,small_kernel_subtile)20460   TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD64, small_kernel_subtile) {
20461     TEST_REQUIRES_X86_AVX;
20462     for (size_t k = 1; k <= 40; k += 9) {
20463       for (uint32_t n = 1; n <= 4; n++) {
20464         for (uint32_t m = 1; m <= 1; m++) {
20465           GemmMicrokernelTester()
20466             .mr(1)
20467             .nr(4)
20468             .kr(2)
20469             .sr(1)
20470             .m(m)
20471             .n(n)
20472             .k(k)
20473             .ks(3)
20474             .iterations(1)
20475             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20476         }
20477       }
20478     }
20479   }
20480 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD64,n_gt_4_small_kernel)20481   TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD64, n_gt_4_small_kernel) {
20482     TEST_REQUIRES_X86_AVX;
20483     for (uint32_t n = 5; n < 8; n++) {
20484       for (size_t k = 1; k <= 40; k += 9) {
20485         GemmMicrokernelTester()
20486           .mr(1)
20487           .nr(4)
20488           .kr(2)
20489           .sr(1)
20490           .m(1)
20491           .n(n)
20492           .k(k)
20493           .ks(3)
20494           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20495       }
20496     }
20497   }
20498 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD64,n_div_4_small_kernel)20499   TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD64, n_div_4_small_kernel) {
20500     TEST_REQUIRES_X86_AVX;
20501     for (uint32_t n = 8; n <= 12; n += 4) {
20502       for (size_t k = 1; k <= 40; k += 9) {
20503         GemmMicrokernelTester()
20504           .mr(1)
20505           .nr(4)
20506           .kr(2)
20507           .sr(1)
20508           .m(1)
20509           .n(n)
20510           .k(k)
20511           .ks(3)
20512           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20513       }
20514     }
20515   }
20516 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD64,strided_cm_subtile)20517   TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD64, strided_cm_subtile) {
20518     TEST_REQUIRES_X86_AVX;
20519     for (size_t k = 1; k <= 40; k += 9) {
20520       for (uint32_t n = 1; n <= 4; n++) {
20521         for (uint32_t m = 1; m <= 1; m++) {
20522           GemmMicrokernelTester()
20523             .mr(1)
20524             .nr(4)
20525             .kr(2)
20526             .sr(1)
20527             .m(m)
20528             .n(n)
20529             .k(k)
20530             .cm_stride(7)
20531             .iterations(1)
20532             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20533         }
20534       }
20535     }
20536   }
20537 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD64,a_offset)20538   TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD64, a_offset) {
20539     TEST_REQUIRES_X86_AVX;
20540     for (size_t k = 1; k <= 40; k += 9) {
20541       GemmMicrokernelTester()
20542         .mr(1)
20543         .nr(4)
20544         .kr(2)
20545         .sr(1)
20546         .m(1)
20547         .n(4)
20548         .k(k)
20549         .ks(3)
20550         .a_offset(43)
20551         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20552     }
20553   }
20554 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD64,zero)20555   TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD64, zero) {
20556     TEST_REQUIRES_X86_AVX;
20557     for (size_t k = 1; k <= 40; k += 9) {
20558       for (uint32_t mz = 0; mz < 1; mz++) {
20559         GemmMicrokernelTester()
20560           .mr(1)
20561           .nr(4)
20562           .kr(2)
20563           .sr(1)
20564           .m(1)
20565           .n(4)
20566           .k(k)
20567           .ks(3)
20568           .a_offset(43)
20569           .zero_index(mz)
20570           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20571       }
20572     }
20573   }
20574 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD64,qmin)20575   TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD64, qmin) {
20576     TEST_REQUIRES_X86_AVX;
20577     GemmMicrokernelTester()
20578       .mr(1)
20579       .nr(4)
20580       .kr(2)
20581       .sr(1)
20582       .m(1)
20583       .n(4)
20584       .k(8)
20585       .qmin(128)
20586       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20587   }
20588 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD64,qmax)20589   TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD64, qmax) {
20590     TEST_REQUIRES_X86_AVX;
20591     GemmMicrokernelTester()
20592       .mr(1)
20593       .nr(4)
20594       .kr(2)
20595       .sr(1)
20596       .m(1)
20597       .n(4)
20598       .k(8)
20599       .qmax(128)
20600       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20601   }
20602 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD64,strided_cm)20603   TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD64, strided_cm) {
20604     TEST_REQUIRES_X86_AVX;
20605     GemmMicrokernelTester()
20606       .mr(1)
20607       .nr(4)
20608       .kr(2)
20609       .sr(1)
20610       .m(1)
20611       .n(4)
20612       .k(8)
20613       .cm_stride(7)
20614       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20615   }
20616 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
20617 
20618 
20619 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD64,k_eq_8)20620   TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD64, k_eq_8) {
20621     TEST_REQUIRES_X86_AVX;
20622     GemmMicrokernelTester()
20623       .mr(3)
20624       .nr(4)
20625       .kr(2)
20626       .sr(1)
20627       .m(3)
20628       .n(4)
20629       .k(8)
20630       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20631   }
20632 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD64,strided_cn)20633   TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD64, strided_cn) {
20634     TEST_REQUIRES_X86_AVX;
20635     GemmMicrokernelTester()
20636       .mr(3)
20637       .nr(4)
20638       .kr(2)
20639       .sr(1)
20640       .m(3)
20641       .n(4)
20642       .k(8)
20643       .cn_stride(7)
20644       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20645   }
20646 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD64,k_eq_8_subtile)20647   TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD64, k_eq_8_subtile) {
20648     TEST_REQUIRES_X86_AVX;
20649     for (uint32_t n = 1; n <= 4; n++) {
20650       for (uint32_t m = 1; m <= 3; m++) {
20651         GemmMicrokernelTester()
20652           .mr(3)
20653           .nr(4)
20654           .kr(2)
20655           .sr(1)
20656           .m(m)
20657           .n(n)
20658           .k(8)
20659           .iterations(1)
20660           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20661       }
20662     }
20663   }
20664 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD64,k_eq_8_subtile_m)20665   TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD64, k_eq_8_subtile_m) {
20666     TEST_REQUIRES_X86_AVX;
20667     for (uint32_t m = 1; m <= 3; m++) {
20668       GemmMicrokernelTester()
20669         .mr(3)
20670         .nr(4)
20671         .kr(2)
20672         .sr(1)
20673         .m(m)
20674         .n(4)
20675         .k(8)
20676         .iterations(1)
20677         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20678     }
20679   }
20680 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD64,k_eq_8_subtile_n)20681   TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD64, k_eq_8_subtile_n) {
20682     TEST_REQUIRES_X86_AVX;
20683     for (uint32_t n = 1; n <= 4; n++) {
20684       GemmMicrokernelTester()
20685         .mr(3)
20686         .nr(4)
20687         .kr(2)
20688         .sr(1)
20689         .m(3)
20690         .n(n)
20691         .k(8)
20692         .iterations(1)
20693         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20694     }
20695   }
20696 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD64,k_lt_8)20697   TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD64, k_lt_8) {
20698     TEST_REQUIRES_X86_AVX;
20699     for (size_t k = 1; k < 8; k++) {
20700       GemmMicrokernelTester()
20701         .mr(3)
20702         .nr(4)
20703         .kr(2)
20704         .sr(1)
20705         .m(3)
20706         .n(4)
20707         .k(k)
20708         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20709     }
20710   }
20711 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD64,k_lt_8_subtile)20712   TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD64, k_lt_8_subtile) {
20713     TEST_REQUIRES_X86_AVX;
20714     for (size_t k = 1; k < 8; k++) {
20715       for (uint32_t n = 1; n <= 4; n++) {
20716         for (uint32_t m = 1; m <= 3; m++) {
20717           GemmMicrokernelTester()
20718             .mr(3)
20719             .nr(4)
20720             .kr(2)
20721             .sr(1)
20722             .m(m)
20723             .n(n)
20724             .k(k)
20725             .iterations(1)
20726             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20727         }
20728       }
20729     }
20730   }
20731 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD64,k_gt_8)20732   TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD64, k_gt_8) {
20733     TEST_REQUIRES_X86_AVX;
20734     for (size_t k = 9; k < 16; k++) {
20735       GemmMicrokernelTester()
20736         .mr(3)
20737         .nr(4)
20738         .kr(2)
20739         .sr(1)
20740         .m(3)
20741         .n(4)
20742         .k(k)
20743         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20744     }
20745   }
20746 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD64,k_gt_8_subtile)20747   TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD64, k_gt_8_subtile) {
20748     TEST_REQUIRES_X86_AVX;
20749     for (size_t k = 9; k < 16; k++) {
20750       for (uint32_t n = 1; n <= 4; n++) {
20751         for (uint32_t m = 1; m <= 3; m++) {
20752           GemmMicrokernelTester()
20753             .mr(3)
20754             .nr(4)
20755             .kr(2)
20756             .sr(1)
20757             .m(m)
20758             .n(n)
20759             .k(k)
20760             .iterations(1)
20761             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20762         }
20763       }
20764     }
20765   }
20766 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD64,k_div_8)20767   TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD64, k_div_8) {
20768     TEST_REQUIRES_X86_AVX;
20769     for (size_t k = 16; k <= 80; k += 8) {
20770       GemmMicrokernelTester()
20771         .mr(3)
20772         .nr(4)
20773         .kr(2)
20774         .sr(1)
20775         .m(3)
20776         .n(4)
20777         .k(k)
20778         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20779     }
20780   }
20781 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD64,k_div_8_subtile)20782   TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD64, k_div_8_subtile) {
20783     TEST_REQUIRES_X86_AVX;
20784     for (size_t k = 16; k <= 80; k += 8) {
20785       for (uint32_t n = 1; n <= 4; n++) {
20786         for (uint32_t m = 1; m <= 3; m++) {
20787           GemmMicrokernelTester()
20788             .mr(3)
20789             .nr(4)
20790             .kr(2)
20791             .sr(1)
20792             .m(m)
20793             .n(n)
20794             .k(k)
20795             .iterations(1)
20796             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20797         }
20798       }
20799     }
20800   }
20801 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD64,n_gt_4)20802   TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD64, n_gt_4) {
20803     TEST_REQUIRES_X86_AVX;
20804     for (uint32_t n = 5; n < 8; n++) {
20805       for (size_t k = 1; k <= 40; k += 9) {
20806         GemmMicrokernelTester()
20807           .mr(3)
20808           .nr(4)
20809           .kr(2)
20810           .sr(1)
20811           .m(3)
20812           .n(n)
20813           .k(k)
20814           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20815       }
20816     }
20817   }
20818 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD64,n_gt_4_strided_cn)20819   TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD64, n_gt_4_strided_cn) {
20820     TEST_REQUIRES_X86_AVX;
20821     for (uint32_t n = 5; n < 8; n++) {
20822       for (size_t k = 1; k <= 40; k += 9) {
20823         GemmMicrokernelTester()
20824           .mr(3)
20825           .nr(4)
20826           .kr(2)
20827           .sr(1)
20828           .m(3)
20829           .n(n)
20830           .k(k)
20831           .cn_stride(7)
20832           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20833       }
20834     }
20835   }
20836 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD64,n_gt_4_subtile)20837   TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD64, n_gt_4_subtile) {
20838     TEST_REQUIRES_X86_AVX;
20839     for (uint32_t n = 5; n < 8; n++) {
20840       for (size_t k = 1; k <= 40; k += 9) {
20841         for (uint32_t m = 1; m <= 3; m++) {
20842           GemmMicrokernelTester()
20843             .mr(3)
20844             .nr(4)
20845             .kr(2)
20846             .sr(1)
20847             .m(m)
20848             .n(n)
20849             .k(k)
20850             .iterations(1)
20851             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20852         }
20853       }
20854     }
20855   }
20856 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD64,n_div_4)20857   TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD64, n_div_4) {
20858     TEST_REQUIRES_X86_AVX;
20859     for (uint32_t n = 8; n <= 12; n += 4) {
20860       for (size_t k = 1; k <= 40; k += 9) {
20861         GemmMicrokernelTester()
20862           .mr(3)
20863           .nr(4)
20864           .kr(2)
20865           .sr(1)
20866           .m(3)
20867           .n(n)
20868           .k(k)
20869           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20870       }
20871     }
20872   }
20873 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD64,n_div_4_strided_cn)20874   TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD64, n_div_4_strided_cn) {
20875     TEST_REQUIRES_X86_AVX;
20876     for (uint32_t n = 8; n <= 12; n += 4) {
20877       for (size_t k = 1; k <= 40; k += 9) {
20878         GemmMicrokernelTester()
20879           .mr(3)
20880           .nr(4)
20881           .kr(2)
20882           .sr(1)
20883           .m(3)
20884           .n(n)
20885           .k(k)
20886           .cn_stride(7)
20887           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20888       }
20889     }
20890   }
20891 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD64,n_div_4_subtile)20892   TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD64, n_div_4_subtile) {
20893     TEST_REQUIRES_X86_AVX;
20894     for (uint32_t n = 8; n <= 12; n += 4) {
20895       for (size_t k = 1; k <= 40; k += 9) {
20896         for (uint32_t m = 1; m <= 3; m++) {
20897           GemmMicrokernelTester()
20898             .mr(3)
20899             .nr(4)
20900             .kr(2)
20901             .sr(1)
20902             .m(m)
20903             .n(n)
20904             .k(k)
20905             .iterations(1)
20906             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20907         }
20908       }
20909     }
20910   }
20911 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD64,small_kernel)20912   TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD64, small_kernel) {
20913     TEST_REQUIRES_X86_AVX;
20914     for (size_t k = 1; k <= 40; k += 9) {
20915       GemmMicrokernelTester()
20916         .mr(3)
20917         .nr(4)
20918         .kr(2)
20919         .sr(1)
20920         .m(3)
20921         .n(4)
20922         .k(k)
20923         .ks(3)
20924         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20925     }
20926   }
20927 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD64,small_kernel_subtile)20928   TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD64, small_kernel_subtile) {
20929     TEST_REQUIRES_X86_AVX;
20930     for (size_t k = 1; k <= 40; k += 9) {
20931       for (uint32_t n = 1; n <= 4; n++) {
20932         for (uint32_t m = 1; m <= 3; m++) {
20933           GemmMicrokernelTester()
20934             .mr(3)
20935             .nr(4)
20936             .kr(2)
20937             .sr(1)
20938             .m(m)
20939             .n(n)
20940             .k(k)
20941             .ks(3)
20942             .iterations(1)
20943             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20944         }
20945       }
20946     }
20947   }
20948 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD64,n_gt_4_small_kernel)20949   TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD64, n_gt_4_small_kernel) {
20950     TEST_REQUIRES_X86_AVX;
20951     for (uint32_t n = 5; n < 8; n++) {
20952       for (size_t k = 1; k <= 40; k += 9) {
20953         GemmMicrokernelTester()
20954           .mr(3)
20955           .nr(4)
20956           .kr(2)
20957           .sr(1)
20958           .m(3)
20959           .n(n)
20960           .k(k)
20961           .ks(3)
20962           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20963       }
20964     }
20965   }
20966 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD64,n_div_4_small_kernel)20967   TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD64, n_div_4_small_kernel) {
20968     TEST_REQUIRES_X86_AVX;
20969     for (uint32_t n = 8; n <= 12; n += 4) {
20970       for (size_t k = 1; k <= 40; k += 9) {
20971         GemmMicrokernelTester()
20972           .mr(3)
20973           .nr(4)
20974           .kr(2)
20975           .sr(1)
20976           .m(3)
20977           .n(n)
20978           .k(k)
20979           .ks(3)
20980           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20981       }
20982     }
20983   }
20984 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD64,strided_cm_subtile)20985   TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD64, strided_cm_subtile) {
20986     TEST_REQUIRES_X86_AVX;
20987     for (size_t k = 1; k <= 40; k += 9) {
20988       for (uint32_t n = 1; n <= 4; n++) {
20989         for (uint32_t m = 1; m <= 3; m++) {
20990           GemmMicrokernelTester()
20991             .mr(3)
20992             .nr(4)
20993             .kr(2)
20994             .sr(1)
20995             .m(m)
20996             .n(n)
20997             .k(k)
20998             .cm_stride(7)
20999             .iterations(1)
21000             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21001         }
21002       }
21003     }
21004   }
21005 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD64,a_offset)21006   TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD64, a_offset) {
21007     TEST_REQUIRES_X86_AVX;
21008     for (size_t k = 1; k <= 40; k += 9) {
21009       GemmMicrokernelTester()
21010         .mr(3)
21011         .nr(4)
21012         .kr(2)
21013         .sr(1)
21014         .m(3)
21015         .n(4)
21016         .k(k)
21017         .ks(3)
21018         .a_offset(127)
21019         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21020     }
21021   }
21022 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD64,zero)21023   TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD64, zero) {
21024     TEST_REQUIRES_X86_AVX;
21025     for (size_t k = 1; k <= 40; k += 9) {
21026       for (uint32_t mz = 0; mz < 3; mz++) {
21027         GemmMicrokernelTester()
21028           .mr(3)
21029           .nr(4)
21030           .kr(2)
21031           .sr(1)
21032           .m(3)
21033           .n(4)
21034           .k(k)
21035           .ks(3)
21036           .a_offset(127)
21037           .zero_index(mz)
21038           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21039       }
21040     }
21041   }
21042 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD64,qmin)21043   TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD64, qmin) {
21044     TEST_REQUIRES_X86_AVX;
21045     GemmMicrokernelTester()
21046       .mr(3)
21047       .nr(4)
21048       .kr(2)
21049       .sr(1)
21050       .m(3)
21051       .n(4)
21052       .k(8)
21053       .qmin(128)
21054       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21055   }
21056 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD64,qmax)21057   TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD64, qmax) {
21058     TEST_REQUIRES_X86_AVX;
21059     GemmMicrokernelTester()
21060       .mr(3)
21061       .nr(4)
21062       .kr(2)
21063       .sr(1)
21064       .m(3)
21065       .n(4)
21066       .k(8)
21067       .qmax(128)
21068       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21069   }
21070 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD64,strided_cm)21071   TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD64, strided_cm) {
21072     TEST_REQUIRES_X86_AVX;
21073     GemmMicrokernelTester()
21074       .mr(3)
21075       .nr(4)
21076       .kr(2)
21077       .sr(1)
21078       .m(3)
21079       .n(4)
21080       .k(8)
21081       .cm_stride(7)
21082       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21083   }
21084 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
21085 
21086 
21087 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD128,k_eq_8)21088   TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD128, k_eq_8) {
21089     TEST_REQUIRES_X86_SSE2;
21090     GemmMicrokernelTester()
21091       .mr(1)
21092       .nr(4)
21093       .kr(2)
21094       .sr(1)
21095       .m(1)
21096       .n(4)
21097       .k(8)
21098       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
21099   }
21100 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD128,strided_cn)21101   TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD128, strided_cn) {
21102     TEST_REQUIRES_X86_SSE2;
21103     GemmMicrokernelTester()
21104       .mr(1)
21105       .nr(4)
21106       .kr(2)
21107       .sr(1)
21108       .m(1)
21109       .n(4)
21110       .k(8)
21111       .cn_stride(7)
21112       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
21113   }
21114 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD128,k_eq_8_subtile)21115   TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD128, k_eq_8_subtile) {
21116     TEST_REQUIRES_X86_SSE2;
21117     for (uint32_t n = 1; n <= 4; n++) {
21118       for (uint32_t m = 1; m <= 1; m++) {
21119         GemmMicrokernelTester()
21120           .mr(1)
21121           .nr(4)
21122           .kr(2)
21123           .sr(1)
21124           .m(m)
21125           .n(n)
21126           .k(8)
21127           .iterations(1)
21128           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
21129       }
21130     }
21131   }
21132 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD128,k_eq_8_subtile_m)21133   TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD128, k_eq_8_subtile_m) {
21134     TEST_REQUIRES_X86_SSE2;
21135     for (uint32_t m = 1; m <= 1; m++) {
21136       GemmMicrokernelTester()
21137         .mr(1)
21138         .nr(4)
21139         .kr(2)
21140         .sr(1)
21141         .m(m)
21142         .n(4)
21143         .k(8)
21144         .iterations(1)
21145         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
21146     }
21147   }
21148 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD128,k_eq_8_subtile_n)21149   TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD128, k_eq_8_subtile_n) {
21150     TEST_REQUIRES_X86_SSE2;
21151     for (uint32_t n = 1; n <= 4; n++) {
21152       GemmMicrokernelTester()
21153         .mr(1)
21154         .nr(4)
21155         .kr(2)
21156         .sr(1)
21157         .m(1)
21158         .n(n)
21159         .k(8)
21160         .iterations(1)
21161         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
21162     }
21163   }
21164 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD128,k_lt_8)21165   TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD128, k_lt_8) {
21166     TEST_REQUIRES_X86_SSE2;
21167     for (size_t k = 1; k < 8; k++) {
21168       GemmMicrokernelTester()
21169         .mr(1)
21170         .nr(4)
21171         .kr(2)
21172         .sr(1)
21173         .m(1)
21174         .n(4)
21175         .k(k)
21176         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
21177     }
21178   }
21179 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD128,k_lt_8_subtile)21180   TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD128, k_lt_8_subtile) {
21181     TEST_REQUIRES_X86_SSE2;
21182     for (size_t k = 1; k < 8; k++) {
21183       for (uint32_t n = 1; n <= 4; n++) {
21184         for (uint32_t m = 1; m <= 1; m++) {
21185           GemmMicrokernelTester()
21186             .mr(1)
21187             .nr(4)
21188             .kr(2)
21189             .sr(1)
21190             .m(m)
21191             .n(n)
21192             .k(k)
21193             .iterations(1)
21194             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
21195         }
21196       }
21197     }
21198   }
21199 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD128,k_gt_8)21200   TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD128, k_gt_8) {
21201     TEST_REQUIRES_X86_SSE2;
21202     for (size_t k = 9; k < 16; k++) {
21203       GemmMicrokernelTester()
21204         .mr(1)
21205         .nr(4)
21206         .kr(2)
21207         .sr(1)
21208         .m(1)
21209         .n(4)
21210         .k(k)
21211         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
21212     }
21213   }
21214 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD128,k_gt_8_subtile)21215   TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD128, k_gt_8_subtile) {
21216     TEST_REQUIRES_X86_SSE2;
21217     for (size_t k = 9; k < 16; k++) {
21218       for (uint32_t n = 1; n <= 4; n++) {
21219         for (uint32_t m = 1; m <= 1; m++) {
21220           GemmMicrokernelTester()
21221             .mr(1)
21222             .nr(4)
21223             .kr(2)
21224             .sr(1)
21225             .m(m)
21226             .n(n)
21227             .k(k)
21228             .iterations(1)
21229             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
21230         }
21231       }
21232     }
21233   }
21234 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD128,k_div_8)21235   TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD128, k_div_8) {
21236     TEST_REQUIRES_X86_SSE2;
21237     for (size_t k = 16; k <= 80; k += 8) {
21238       GemmMicrokernelTester()
21239         .mr(1)
21240         .nr(4)
21241         .kr(2)
21242         .sr(1)
21243         .m(1)
21244         .n(4)
21245         .k(k)
21246         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
21247     }
21248   }
21249 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD128,k_div_8_subtile)21250   TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD128, k_div_8_subtile) {
21251     TEST_REQUIRES_X86_SSE2;
21252     for (size_t k = 16; k <= 80; k += 8) {
21253       for (uint32_t n = 1; n <= 4; n++) {
21254         for (uint32_t m = 1; m <= 1; m++) {
21255           GemmMicrokernelTester()
21256             .mr(1)
21257             .nr(4)
21258             .kr(2)
21259             .sr(1)
21260             .m(m)
21261             .n(n)
21262             .k(k)
21263             .iterations(1)
21264             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
21265         }
21266       }
21267     }
21268   }
21269 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD128,n_gt_4)21270   TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD128, n_gt_4) {
21271     TEST_REQUIRES_X86_SSE2;
21272     for (uint32_t n = 5; n < 8; n++) {
21273       for (size_t k = 1; k <= 40; k += 9) {
21274         GemmMicrokernelTester()
21275           .mr(1)
21276           .nr(4)
21277           .kr(2)
21278           .sr(1)
21279           .m(1)
21280           .n(n)
21281           .k(k)
21282           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
21283       }
21284     }
21285   }
21286 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD128,n_gt_4_strided_cn)21287   TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD128, n_gt_4_strided_cn) {
21288     TEST_REQUIRES_X86_SSE2;
21289     for (uint32_t n = 5; n < 8; n++) {
21290       for (size_t k = 1; k <= 40; k += 9) {
21291         GemmMicrokernelTester()
21292           .mr(1)
21293           .nr(4)
21294           .kr(2)
21295           .sr(1)
21296           .m(1)
21297           .n(n)
21298           .k(k)
21299           .cn_stride(7)
21300           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
21301       }
21302     }
21303   }
21304 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD128,n_gt_4_subtile)21305   TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD128, n_gt_4_subtile) {
21306     TEST_REQUIRES_X86_SSE2;
21307     for (uint32_t n = 5; n < 8; n++) {
21308       for (size_t k = 1; k <= 40; k += 9) {
21309         for (uint32_t m = 1; m <= 1; m++) {
21310           GemmMicrokernelTester()
21311             .mr(1)
21312             .nr(4)
21313             .kr(2)
21314             .sr(1)
21315             .m(m)
21316             .n(n)
21317             .k(k)
21318             .iterations(1)
21319             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
21320         }
21321       }
21322     }
21323   }
21324 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD128,n_div_4)21325   TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD128, n_div_4) {
21326     TEST_REQUIRES_X86_SSE2;
21327     for (uint32_t n = 8; n <= 12; n += 4) {
21328       for (size_t k = 1; k <= 40; k += 9) {
21329         GemmMicrokernelTester()
21330           .mr(1)
21331           .nr(4)
21332           .kr(2)
21333           .sr(1)
21334           .m(1)
21335           .n(n)
21336           .k(k)
21337           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
21338       }
21339     }
21340   }
21341 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD128,n_div_4_strided_cn)21342   TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD128, n_div_4_strided_cn) {
21343     TEST_REQUIRES_X86_SSE2;
21344     for (uint32_t n = 8; n <= 12; n += 4) {
21345       for (size_t k = 1; k <= 40; k += 9) {
21346         GemmMicrokernelTester()
21347           .mr(1)
21348           .nr(4)
21349           .kr(2)
21350           .sr(1)
21351           .m(1)
21352           .n(n)
21353           .k(k)
21354           .cn_stride(7)
21355           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
21356       }
21357     }
21358   }
21359 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD128,n_div_4_subtile)21360   TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD128, n_div_4_subtile) {
21361     TEST_REQUIRES_X86_SSE2;
21362     for (uint32_t n = 8; n <= 12; n += 4) {
21363       for (size_t k = 1; k <= 40; k += 9) {
21364         for (uint32_t m = 1; m <= 1; m++) {
21365           GemmMicrokernelTester()
21366             .mr(1)
21367             .nr(4)
21368             .kr(2)
21369             .sr(1)
21370             .m(m)
21371             .n(n)
21372             .k(k)
21373             .iterations(1)
21374             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
21375         }
21376       }
21377     }
21378   }
21379 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD128,small_kernel)21380   TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD128, small_kernel) {
21381     TEST_REQUIRES_X86_SSE2;
21382     for (size_t k = 1; k <= 40; k += 9) {
21383       GemmMicrokernelTester()
21384         .mr(1)
21385         .nr(4)
21386         .kr(2)
21387         .sr(1)
21388         .m(1)
21389         .n(4)
21390         .k(k)
21391         .ks(3)
21392         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
21393     }
21394   }
21395 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD128,small_kernel_subtile)21396   TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD128, small_kernel_subtile) {
21397     TEST_REQUIRES_X86_SSE2;
21398     for (size_t k = 1; k <= 40; k += 9) {
21399       for (uint32_t n = 1; n <= 4; n++) {
21400         for (uint32_t m = 1; m <= 1; m++) {
21401           GemmMicrokernelTester()
21402             .mr(1)
21403             .nr(4)
21404             .kr(2)
21405             .sr(1)
21406             .m(m)
21407             .n(n)
21408             .k(k)
21409             .ks(3)
21410             .iterations(1)
21411             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
21412         }
21413       }
21414     }
21415   }
21416 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD128,n_gt_4_small_kernel)21417   TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD128, n_gt_4_small_kernel) {
21418     TEST_REQUIRES_X86_SSE2;
21419     for (uint32_t n = 5; n < 8; n++) {
21420       for (size_t k = 1; k <= 40; k += 9) {
21421         GemmMicrokernelTester()
21422           .mr(1)
21423           .nr(4)
21424           .kr(2)
21425           .sr(1)
21426           .m(1)
21427           .n(n)
21428           .k(k)
21429           .ks(3)
21430           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
21431       }
21432     }
21433   }
21434 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD128,n_div_4_small_kernel)21435   TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD128, n_div_4_small_kernel) {
21436     TEST_REQUIRES_X86_SSE2;
21437     for (uint32_t n = 8; n <= 12; n += 4) {
21438       for (size_t k = 1; k <= 40; k += 9) {
21439         GemmMicrokernelTester()
21440           .mr(1)
21441           .nr(4)
21442           .kr(2)
21443           .sr(1)
21444           .m(1)
21445           .n(n)
21446           .k(k)
21447           .ks(3)
21448           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
21449       }
21450     }
21451   }
21452 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD128,strided_cm_subtile)21453   TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD128, strided_cm_subtile) {
21454     TEST_REQUIRES_X86_SSE2;
21455     for (size_t k = 1; k <= 40; k += 9) {
21456       for (uint32_t n = 1; n <= 4; n++) {
21457         for (uint32_t m = 1; m <= 1; m++) {
21458           GemmMicrokernelTester()
21459             .mr(1)
21460             .nr(4)
21461             .kr(2)
21462             .sr(1)
21463             .m(m)
21464             .n(n)
21465             .k(k)
21466             .cm_stride(7)
21467             .iterations(1)
21468             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
21469         }
21470       }
21471     }
21472   }
21473 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD128,a_offset)21474   TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD128, a_offset) {
21475     TEST_REQUIRES_X86_SSE2;
21476     for (size_t k = 1; k <= 40; k += 9) {
21477       GemmMicrokernelTester()
21478         .mr(1)
21479         .nr(4)
21480         .kr(2)
21481         .sr(1)
21482         .m(1)
21483         .n(4)
21484         .k(k)
21485         .ks(3)
21486         .a_offset(43)
21487         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
21488     }
21489   }
21490 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD128,zero)21491   TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD128, zero) {
21492     TEST_REQUIRES_X86_SSE2;
21493     for (size_t k = 1; k <= 40; k += 9) {
21494       for (uint32_t mz = 0; mz < 1; mz++) {
21495         GemmMicrokernelTester()
21496           .mr(1)
21497           .nr(4)
21498           .kr(2)
21499           .sr(1)
21500           .m(1)
21501           .n(4)
21502           .k(k)
21503           .ks(3)
21504           .a_offset(43)
21505           .zero_index(mz)
21506           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
21507       }
21508     }
21509   }
21510 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD128,qmin)21511   TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD128, qmin) {
21512     TEST_REQUIRES_X86_SSE2;
21513     GemmMicrokernelTester()
21514       .mr(1)
21515       .nr(4)
21516       .kr(2)
21517       .sr(1)
21518       .m(1)
21519       .n(4)
21520       .k(8)
21521       .qmin(128)
21522       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
21523   }
21524 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD128,qmax)21525   TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD128, qmax) {
21526     TEST_REQUIRES_X86_SSE2;
21527     GemmMicrokernelTester()
21528       .mr(1)
21529       .nr(4)
21530       .kr(2)
21531       .sr(1)
21532       .m(1)
21533       .n(4)
21534       .k(8)
21535       .qmax(128)
21536       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
21537   }
21538 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD128,strided_cm)21539   TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD128, strided_cm) {
21540     TEST_REQUIRES_X86_SSE2;
21541     GemmMicrokernelTester()
21542       .mr(1)
21543       .nr(4)
21544       .kr(2)
21545       .sr(1)
21546       .m(1)
21547       .n(4)
21548       .k(8)
21549       .cm_stride(7)
21550       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
21551   }
21552 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
21553 
21554 
21555 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD128,k_eq_8)21556   TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD128, k_eq_8) {
21557     TEST_REQUIRES_X86_SSE2;
21558     GemmMicrokernelTester()
21559       .mr(3)
21560       .nr(4)
21561       .kr(2)
21562       .sr(1)
21563       .m(3)
21564       .n(4)
21565       .k(8)
21566       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
21567   }
21568 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD128,strided_cn)21569   TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD128, strided_cn) {
21570     TEST_REQUIRES_X86_SSE2;
21571     GemmMicrokernelTester()
21572       .mr(3)
21573       .nr(4)
21574       .kr(2)
21575       .sr(1)
21576       .m(3)
21577       .n(4)
21578       .k(8)
21579       .cn_stride(7)
21580       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
21581   }
21582 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD128,k_eq_8_subtile)21583   TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD128, k_eq_8_subtile) {
21584     TEST_REQUIRES_X86_SSE2;
21585     for (uint32_t n = 1; n <= 4; n++) {
21586       for (uint32_t m = 1; m <= 3; m++) {
21587         GemmMicrokernelTester()
21588           .mr(3)
21589           .nr(4)
21590           .kr(2)
21591           .sr(1)
21592           .m(m)
21593           .n(n)
21594           .k(8)
21595           .iterations(1)
21596           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
21597       }
21598     }
21599   }
21600 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD128,k_eq_8_subtile_m)21601   TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD128, k_eq_8_subtile_m) {
21602     TEST_REQUIRES_X86_SSE2;
21603     for (uint32_t m = 1; m <= 3; m++) {
21604       GemmMicrokernelTester()
21605         .mr(3)
21606         .nr(4)
21607         .kr(2)
21608         .sr(1)
21609         .m(m)
21610         .n(4)
21611         .k(8)
21612         .iterations(1)
21613         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
21614     }
21615   }
21616 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD128,k_eq_8_subtile_n)21617   TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD128, k_eq_8_subtile_n) {
21618     TEST_REQUIRES_X86_SSE2;
21619     for (uint32_t n = 1; n <= 4; n++) {
21620       GemmMicrokernelTester()
21621         .mr(3)
21622         .nr(4)
21623         .kr(2)
21624         .sr(1)
21625         .m(3)
21626         .n(n)
21627         .k(8)
21628         .iterations(1)
21629         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
21630     }
21631   }
21632 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD128,k_lt_8)21633   TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD128, k_lt_8) {
21634     TEST_REQUIRES_X86_SSE2;
21635     for (size_t k = 1; k < 8; k++) {
21636       GemmMicrokernelTester()
21637         .mr(3)
21638         .nr(4)
21639         .kr(2)
21640         .sr(1)
21641         .m(3)
21642         .n(4)
21643         .k(k)
21644         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
21645     }
21646   }
21647 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD128,k_lt_8_subtile)21648   TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD128, k_lt_8_subtile) {
21649     TEST_REQUIRES_X86_SSE2;
21650     for (size_t k = 1; k < 8; k++) {
21651       for (uint32_t n = 1; n <= 4; n++) {
21652         for (uint32_t m = 1; m <= 3; m++) {
21653           GemmMicrokernelTester()
21654             .mr(3)
21655             .nr(4)
21656             .kr(2)
21657             .sr(1)
21658             .m(m)
21659             .n(n)
21660             .k(k)
21661             .iterations(1)
21662             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
21663         }
21664       }
21665     }
21666   }
21667 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD128,k_gt_8)21668   TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD128, k_gt_8) {
21669     TEST_REQUIRES_X86_SSE2;
21670     for (size_t k = 9; k < 16; k++) {
21671       GemmMicrokernelTester()
21672         .mr(3)
21673         .nr(4)
21674         .kr(2)
21675         .sr(1)
21676         .m(3)
21677         .n(4)
21678         .k(k)
21679         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
21680     }
21681   }
21682 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD128,k_gt_8_subtile)21683   TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD128, k_gt_8_subtile) {
21684     TEST_REQUIRES_X86_SSE2;
21685     for (size_t k = 9; k < 16; k++) {
21686       for (uint32_t n = 1; n <= 4; n++) {
21687         for (uint32_t m = 1; m <= 3; m++) {
21688           GemmMicrokernelTester()
21689             .mr(3)
21690             .nr(4)
21691             .kr(2)
21692             .sr(1)
21693             .m(m)
21694             .n(n)
21695             .k(k)
21696             .iterations(1)
21697             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
21698         }
21699       }
21700     }
21701   }
21702 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD128,k_div_8)21703   TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD128, k_div_8) {
21704     TEST_REQUIRES_X86_SSE2;
21705     for (size_t k = 16; k <= 80; k += 8) {
21706       GemmMicrokernelTester()
21707         .mr(3)
21708         .nr(4)
21709         .kr(2)
21710         .sr(1)
21711         .m(3)
21712         .n(4)
21713         .k(k)
21714         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
21715     }
21716   }
21717 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD128,k_div_8_subtile)21718   TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD128, k_div_8_subtile) {
21719     TEST_REQUIRES_X86_SSE2;
21720     for (size_t k = 16; k <= 80; k += 8) {
21721       for (uint32_t n = 1; n <= 4; n++) {
21722         for (uint32_t m = 1; m <= 3; m++) {
21723           GemmMicrokernelTester()
21724             .mr(3)
21725             .nr(4)
21726             .kr(2)
21727             .sr(1)
21728             .m(m)
21729             .n(n)
21730             .k(k)
21731             .iterations(1)
21732             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
21733         }
21734       }
21735     }
21736   }
21737 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD128,n_gt_4)21738   TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD128, n_gt_4) {
21739     TEST_REQUIRES_X86_SSE2;
21740     for (uint32_t n = 5; n < 8; n++) {
21741       for (size_t k = 1; k <= 40; k += 9) {
21742         GemmMicrokernelTester()
21743           .mr(3)
21744           .nr(4)
21745           .kr(2)
21746           .sr(1)
21747           .m(3)
21748           .n(n)
21749           .k(k)
21750           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
21751       }
21752     }
21753   }
21754 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD128,n_gt_4_strided_cn)21755   TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD128, n_gt_4_strided_cn) {
21756     TEST_REQUIRES_X86_SSE2;
21757     for (uint32_t n = 5; n < 8; n++) {
21758       for (size_t k = 1; k <= 40; k += 9) {
21759         GemmMicrokernelTester()
21760           .mr(3)
21761           .nr(4)
21762           .kr(2)
21763           .sr(1)
21764           .m(3)
21765           .n(n)
21766           .k(k)
21767           .cn_stride(7)
21768           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
21769       }
21770     }
21771   }
21772 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD128,n_gt_4_subtile)21773   TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD128, n_gt_4_subtile) {
21774     TEST_REQUIRES_X86_SSE2;
21775     for (uint32_t n = 5; n < 8; n++) {
21776       for (size_t k = 1; k <= 40; k += 9) {
21777         for (uint32_t m = 1; m <= 3; m++) {
21778           GemmMicrokernelTester()
21779             .mr(3)
21780             .nr(4)
21781             .kr(2)
21782             .sr(1)
21783             .m(m)
21784             .n(n)
21785             .k(k)
21786             .iterations(1)
21787             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
21788         }
21789       }
21790     }
21791   }
21792 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD128,n_div_4)21793   TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD128, n_div_4) {
21794     TEST_REQUIRES_X86_SSE2;
21795     for (uint32_t n = 8; n <= 12; n += 4) {
21796       for (size_t k = 1; k <= 40; k += 9) {
21797         GemmMicrokernelTester()
21798           .mr(3)
21799           .nr(4)
21800           .kr(2)
21801           .sr(1)
21802           .m(3)
21803           .n(n)
21804           .k(k)
21805           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
21806       }
21807     }
21808   }
21809 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD128,n_div_4_strided_cn)21810   TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD128, n_div_4_strided_cn) {
21811     TEST_REQUIRES_X86_SSE2;
21812     for (uint32_t n = 8; n <= 12; n += 4) {
21813       for (size_t k = 1; k <= 40; k += 9) {
21814         GemmMicrokernelTester()
21815           .mr(3)
21816           .nr(4)
21817           .kr(2)
21818           .sr(1)
21819           .m(3)
21820           .n(n)
21821           .k(k)
21822           .cn_stride(7)
21823           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
21824       }
21825     }
21826   }
21827 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD128,n_div_4_subtile)21828   TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD128, n_div_4_subtile) {
21829     TEST_REQUIRES_X86_SSE2;
21830     for (uint32_t n = 8; n <= 12; n += 4) {
21831       for (size_t k = 1; k <= 40; k += 9) {
21832         for (uint32_t m = 1; m <= 3; m++) {
21833           GemmMicrokernelTester()
21834             .mr(3)
21835             .nr(4)
21836             .kr(2)
21837             .sr(1)
21838             .m(m)
21839             .n(n)
21840             .k(k)
21841             .iterations(1)
21842             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
21843         }
21844       }
21845     }
21846   }
21847 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD128,small_kernel)21848   TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD128, small_kernel) {
21849     TEST_REQUIRES_X86_SSE2;
21850     for (size_t k = 1; k <= 40; k += 9) {
21851       GemmMicrokernelTester()
21852         .mr(3)
21853         .nr(4)
21854         .kr(2)
21855         .sr(1)
21856         .m(3)
21857         .n(4)
21858         .k(k)
21859         .ks(3)
21860         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
21861     }
21862   }
21863 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD128,small_kernel_subtile)21864   TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD128, small_kernel_subtile) {
21865     TEST_REQUIRES_X86_SSE2;
21866     for (size_t k = 1; k <= 40; k += 9) {
21867       for (uint32_t n = 1; n <= 4; n++) {
21868         for (uint32_t m = 1; m <= 3; m++) {
21869           GemmMicrokernelTester()
21870             .mr(3)
21871             .nr(4)
21872             .kr(2)
21873             .sr(1)
21874             .m(m)
21875             .n(n)
21876             .k(k)
21877             .ks(3)
21878             .iterations(1)
21879             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
21880         }
21881       }
21882     }
21883   }
21884 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD128,n_gt_4_small_kernel)21885   TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD128, n_gt_4_small_kernel) {
21886     TEST_REQUIRES_X86_SSE2;
21887     for (uint32_t n = 5; n < 8; n++) {
21888       for (size_t k = 1; k <= 40; k += 9) {
21889         GemmMicrokernelTester()
21890           .mr(3)
21891           .nr(4)
21892           .kr(2)
21893           .sr(1)
21894           .m(3)
21895           .n(n)
21896           .k(k)
21897           .ks(3)
21898           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
21899       }
21900     }
21901   }
21902 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD128,n_div_4_small_kernel)21903   TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD128, n_div_4_small_kernel) {
21904     TEST_REQUIRES_X86_SSE2;
21905     for (uint32_t n = 8; n <= 12; n += 4) {
21906       for (size_t k = 1; k <= 40; k += 9) {
21907         GemmMicrokernelTester()
21908           .mr(3)
21909           .nr(4)
21910           .kr(2)
21911           .sr(1)
21912           .m(3)
21913           .n(n)
21914           .k(k)
21915           .ks(3)
21916           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
21917       }
21918     }
21919   }
21920 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD128,strided_cm_subtile)21921   TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD128, strided_cm_subtile) {
21922     TEST_REQUIRES_X86_SSE2;
21923     for (size_t k = 1; k <= 40; k += 9) {
21924       for (uint32_t n = 1; n <= 4; n++) {
21925         for (uint32_t m = 1; m <= 3; m++) {
21926           GemmMicrokernelTester()
21927             .mr(3)
21928             .nr(4)
21929             .kr(2)
21930             .sr(1)
21931             .m(m)
21932             .n(n)
21933             .k(k)
21934             .cm_stride(7)
21935             .iterations(1)
21936             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
21937         }
21938       }
21939     }
21940   }
21941 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD128,a_offset)21942   TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD128, a_offset) {
21943     TEST_REQUIRES_X86_SSE2;
21944     for (size_t k = 1; k <= 40; k += 9) {
21945       GemmMicrokernelTester()
21946         .mr(3)
21947         .nr(4)
21948         .kr(2)
21949         .sr(1)
21950         .m(3)
21951         .n(4)
21952         .k(k)
21953         .ks(3)
21954         .a_offset(127)
21955         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
21956     }
21957   }
21958 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD128,zero)21959   TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD128, zero) {
21960     TEST_REQUIRES_X86_SSE2;
21961     for (size_t k = 1; k <= 40; k += 9) {
21962       for (uint32_t mz = 0; mz < 3; mz++) {
21963         GemmMicrokernelTester()
21964           .mr(3)
21965           .nr(4)
21966           .kr(2)
21967           .sr(1)
21968           .m(3)
21969           .n(4)
21970           .k(k)
21971           .ks(3)
21972           .a_offset(127)
21973           .zero_index(mz)
21974           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
21975       }
21976     }
21977   }
21978 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD128,qmin)21979   TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD128, qmin) {
21980     TEST_REQUIRES_X86_SSE2;
21981     GemmMicrokernelTester()
21982       .mr(3)
21983       .nr(4)
21984       .kr(2)
21985       .sr(1)
21986       .m(3)
21987       .n(4)
21988       .k(8)
21989       .qmin(128)
21990       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
21991   }
21992 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD128,qmax)21993   TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD128, qmax) {
21994     TEST_REQUIRES_X86_SSE2;
21995     GemmMicrokernelTester()
21996       .mr(3)
21997       .nr(4)
21998       .kr(2)
21999       .sr(1)
22000       .m(3)
22001       .n(4)
22002       .k(8)
22003       .qmax(128)
22004       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
22005   }
22006 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD128,strided_cm)22007   TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD128, strided_cm) {
22008     TEST_REQUIRES_X86_SSE2;
22009     GemmMicrokernelTester()
22010       .mr(3)
22011       .nr(4)
22012       .kr(2)
22013       .sr(1)
22014       .m(3)
22015       .n(4)
22016       .k(8)
22017       .cm_stride(7)
22018       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
22019   }
22020 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
22021 
22022 
22023 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD128,k_eq_8)22024   TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD128, k_eq_8) {
22025     TEST_REQUIRES_X86_SSE41;
22026     GemmMicrokernelTester()
22027       .mr(3)
22028       .nr(4)
22029       .kr(2)
22030       .sr(1)
22031       .m(3)
22032       .n(4)
22033       .k(8)
22034       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
22035   }
22036 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD128,strided_cn)22037   TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD128, strided_cn) {
22038     TEST_REQUIRES_X86_SSE41;
22039     GemmMicrokernelTester()
22040       .mr(3)
22041       .nr(4)
22042       .kr(2)
22043       .sr(1)
22044       .m(3)
22045       .n(4)
22046       .k(8)
22047       .cn_stride(7)
22048       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
22049   }
22050 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD128,k_eq_8_subtile)22051   TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD128, k_eq_8_subtile) {
22052     TEST_REQUIRES_X86_SSE41;
22053     for (uint32_t n = 1; n <= 4; n++) {
22054       for (uint32_t m = 1; m <= 3; m++) {
22055         GemmMicrokernelTester()
22056           .mr(3)
22057           .nr(4)
22058           .kr(2)
22059           .sr(1)
22060           .m(m)
22061           .n(n)
22062           .k(8)
22063           .iterations(1)
22064           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
22065       }
22066     }
22067   }
22068 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD128,k_eq_8_subtile_m)22069   TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD128, k_eq_8_subtile_m) {
22070     TEST_REQUIRES_X86_SSE41;
22071     for (uint32_t m = 1; m <= 3; m++) {
22072       GemmMicrokernelTester()
22073         .mr(3)
22074         .nr(4)
22075         .kr(2)
22076         .sr(1)
22077         .m(m)
22078         .n(4)
22079         .k(8)
22080         .iterations(1)
22081         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
22082     }
22083   }
22084 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD128,k_eq_8_subtile_n)22085   TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD128, k_eq_8_subtile_n) {
22086     TEST_REQUIRES_X86_SSE41;
22087     for (uint32_t n = 1; n <= 4; n++) {
22088       GemmMicrokernelTester()
22089         .mr(3)
22090         .nr(4)
22091         .kr(2)
22092         .sr(1)
22093         .m(3)
22094         .n(n)
22095         .k(8)
22096         .iterations(1)
22097         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
22098     }
22099   }
22100 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD128,k_lt_8)22101   TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD128, k_lt_8) {
22102     TEST_REQUIRES_X86_SSE41;
22103     for (size_t k = 1; k < 8; k++) {
22104       GemmMicrokernelTester()
22105         .mr(3)
22106         .nr(4)
22107         .kr(2)
22108         .sr(1)
22109         .m(3)
22110         .n(4)
22111         .k(k)
22112         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
22113     }
22114   }
22115 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD128,k_lt_8_subtile)22116   TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD128, k_lt_8_subtile) {
22117     TEST_REQUIRES_X86_SSE41;
22118     for (size_t k = 1; k < 8; k++) {
22119       for (uint32_t n = 1; n <= 4; n++) {
22120         for (uint32_t m = 1; m <= 3; m++) {
22121           GemmMicrokernelTester()
22122             .mr(3)
22123             .nr(4)
22124             .kr(2)
22125             .sr(1)
22126             .m(m)
22127             .n(n)
22128             .k(k)
22129             .iterations(1)
22130             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
22131         }
22132       }
22133     }
22134   }
22135 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD128,k_gt_8)22136   TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD128, k_gt_8) {
22137     TEST_REQUIRES_X86_SSE41;
22138     for (size_t k = 9; k < 16; k++) {
22139       GemmMicrokernelTester()
22140         .mr(3)
22141         .nr(4)
22142         .kr(2)
22143         .sr(1)
22144         .m(3)
22145         .n(4)
22146         .k(k)
22147         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
22148     }
22149   }
22150 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD128,k_gt_8_subtile)22151   TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD128, k_gt_8_subtile) {
22152     TEST_REQUIRES_X86_SSE41;
22153     for (size_t k = 9; k < 16; k++) {
22154       for (uint32_t n = 1; n <= 4; n++) {
22155         for (uint32_t m = 1; m <= 3; m++) {
22156           GemmMicrokernelTester()
22157             .mr(3)
22158             .nr(4)
22159             .kr(2)
22160             .sr(1)
22161             .m(m)
22162             .n(n)
22163             .k(k)
22164             .iterations(1)
22165             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
22166         }
22167       }
22168     }
22169   }
22170 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD128,k_div_8)22171   TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD128, k_div_8) {
22172     TEST_REQUIRES_X86_SSE41;
22173     for (size_t k = 16; k <= 80; k += 8) {
22174       GemmMicrokernelTester()
22175         .mr(3)
22176         .nr(4)
22177         .kr(2)
22178         .sr(1)
22179         .m(3)
22180         .n(4)
22181         .k(k)
22182         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
22183     }
22184   }
22185 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD128,k_div_8_subtile)22186   TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD128, k_div_8_subtile) {
22187     TEST_REQUIRES_X86_SSE41;
22188     for (size_t k = 16; k <= 80; k += 8) {
22189       for (uint32_t n = 1; n <= 4; n++) {
22190         for (uint32_t m = 1; m <= 3; m++) {
22191           GemmMicrokernelTester()
22192             .mr(3)
22193             .nr(4)
22194             .kr(2)
22195             .sr(1)
22196             .m(m)
22197             .n(n)
22198             .k(k)
22199             .iterations(1)
22200             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
22201         }
22202       }
22203     }
22204   }
22205 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD128,n_gt_4)22206   TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD128, n_gt_4) {
22207     TEST_REQUIRES_X86_SSE41;
22208     for (uint32_t n = 5; n < 8; n++) {
22209       for (size_t k = 1; k <= 40; k += 9) {
22210         GemmMicrokernelTester()
22211           .mr(3)
22212           .nr(4)
22213           .kr(2)
22214           .sr(1)
22215           .m(3)
22216           .n(n)
22217           .k(k)
22218           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
22219       }
22220     }
22221   }
22222 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD128,n_gt_4_strided_cn)22223   TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD128, n_gt_4_strided_cn) {
22224     TEST_REQUIRES_X86_SSE41;
22225     for (uint32_t n = 5; n < 8; n++) {
22226       for (size_t k = 1; k <= 40; k += 9) {
22227         GemmMicrokernelTester()
22228           .mr(3)
22229           .nr(4)
22230           .kr(2)
22231           .sr(1)
22232           .m(3)
22233           .n(n)
22234           .k(k)
22235           .cn_stride(7)
22236           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
22237       }
22238     }
22239   }
22240 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD128,n_gt_4_subtile)22241   TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD128, n_gt_4_subtile) {
22242     TEST_REQUIRES_X86_SSE41;
22243     for (uint32_t n = 5; n < 8; n++) {
22244       for (size_t k = 1; k <= 40; k += 9) {
22245         for (uint32_t m = 1; m <= 3; m++) {
22246           GemmMicrokernelTester()
22247             .mr(3)
22248             .nr(4)
22249             .kr(2)
22250             .sr(1)
22251             .m(m)
22252             .n(n)
22253             .k(k)
22254             .iterations(1)
22255             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
22256         }
22257       }
22258     }
22259   }
22260 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD128,n_div_4)22261   TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD128, n_div_4) {
22262     TEST_REQUIRES_X86_SSE41;
22263     for (uint32_t n = 8; n <= 12; n += 4) {
22264       for (size_t k = 1; k <= 40; k += 9) {
22265         GemmMicrokernelTester()
22266           .mr(3)
22267           .nr(4)
22268           .kr(2)
22269           .sr(1)
22270           .m(3)
22271           .n(n)
22272           .k(k)
22273           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
22274       }
22275     }
22276   }
22277 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD128,n_div_4_strided_cn)22278   TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD128, n_div_4_strided_cn) {
22279     TEST_REQUIRES_X86_SSE41;
22280     for (uint32_t n = 8; n <= 12; n += 4) {
22281       for (size_t k = 1; k <= 40; k += 9) {
22282         GemmMicrokernelTester()
22283           .mr(3)
22284           .nr(4)
22285           .kr(2)
22286           .sr(1)
22287           .m(3)
22288           .n(n)
22289           .k(k)
22290           .cn_stride(7)
22291           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
22292       }
22293     }
22294   }
22295 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD128,n_div_4_subtile)22296   TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD128, n_div_4_subtile) {
22297     TEST_REQUIRES_X86_SSE41;
22298     for (uint32_t n = 8; n <= 12; n += 4) {
22299       for (size_t k = 1; k <= 40; k += 9) {
22300         for (uint32_t m = 1; m <= 3; m++) {
22301           GemmMicrokernelTester()
22302             .mr(3)
22303             .nr(4)
22304             .kr(2)
22305             .sr(1)
22306             .m(m)
22307             .n(n)
22308             .k(k)
22309             .iterations(1)
22310             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
22311         }
22312       }
22313     }
22314   }
22315 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD128,small_kernel)22316   TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD128, small_kernel) {
22317     TEST_REQUIRES_X86_SSE41;
22318     for (size_t k = 1; k <= 40; k += 9) {
22319       GemmMicrokernelTester()
22320         .mr(3)
22321         .nr(4)
22322         .kr(2)
22323         .sr(1)
22324         .m(3)
22325         .n(4)
22326         .k(k)
22327         .ks(3)
22328         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
22329     }
22330   }
22331 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD128,small_kernel_subtile)22332   TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD128, small_kernel_subtile) {
22333     TEST_REQUIRES_X86_SSE41;
22334     for (size_t k = 1; k <= 40; k += 9) {
22335       for (uint32_t n = 1; n <= 4; n++) {
22336         for (uint32_t m = 1; m <= 3; m++) {
22337           GemmMicrokernelTester()
22338             .mr(3)
22339             .nr(4)
22340             .kr(2)
22341             .sr(1)
22342             .m(m)
22343             .n(n)
22344             .k(k)
22345             .ks(3)
22346             .iterations(1)
22347             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
22348         }
22349       }
22350     }
22351   }
22352 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD128,n_gt_4_small_kernel)22353   TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD128, n_gt_4_small_kernel) {
22354     TEST_REQUIRES_X86_SSE41;
22355     for (uint32_t n = 5; n < 8; n++) {
22356       for (size_t k = 1; k <= 40; k += 9) {
22357         GemmMicrokernelTester()
22358           .mr(3)
22359           .nr(4)
22360           .kr(2)
22361           .sr(1)
22362           .m(3)
22363           .n(n)
22364           .k(k)
22365           .ks(3)
22366           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
22367       }
22368     }
22369   }
22370 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD128,n_div_4_small_kernel)22371   TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD128, n_div_4_small_kernel) {
22372     TEST_REQUIRES_X86_SSE41;
22373     for (uint32_t n = 8; n <= 12; n += 4) {
22374       for (size_t k = 1; k <= 40; k += 9) {
22375         GemmMicrokernelTester()
22376           .mr(3)
22377           .nr(4)
22378           .kr(2)
22379           .sr(1)
22380           .m(3)
22381           .n(n)
22382           .k(k)
22383           .ks(3)
22384           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
22385       }
22386     }
22387   }
22388 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD128,strided_cm_subtile)22389   TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD128, strided_cm_subtile) {
22390     TEST_REQUIRES_X86_SSE41;
22391     for (size_t k = 1; k <= 40; k += 9) {
22392       for (uint32_t n = 1; n <= 4; n++) {
22393         for (uint32_t m = 1; m <= 3; m++) {
22394           GemmMicrokernelTester()
22395             .mr(3)
22396             .nr(4)
22397             .kr(2)
22398             .sr(1)
22399             .m(m)
22400             .n(n)
22401             .k(k)
22402             .cm_stride(7)
22403             .iterations(1)
22404             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
22405         }
22406       }
22407     }
22408   }
22409 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD128,a_offset)22410   TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD128, a_offset) {
22411     TEST_REQUIRES_X86_SSE41;
22412     for (size_t k = 1; k <= 40; k += 9) {
22413       GemmMicrokernelTester()
22414         .mr(3)
22415         .nr(4)
22416         .kr(2)
22417         .sr(1)
22418         .m(3)
22419         .n(4)
22420         .k(k)
22421         .ks(3)
22422         .a_offset(127)
22423         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
22424     }
22425   }
22426 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD128,zero)22427   TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD128, zero) {
22428     TEST_REQUIRES_X86_SSE41;
22429     for (size_t k = 1; k <= 40; k += 9) {
22430       for (uint32_t mz = 0; mz < 3; mz++) {
22431         GemmMicrokernelTester()
22432           .mr(3)
22433           .nr(4)
22434           .kr(2)
22435           .sr(1)
22436           .m(3)
22437           .n(4)
22438           .k(k)
22439           .ks(3)
22440           .a_offset(127)
22441           .zero_index(mz)
22442           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
22443       }
22444     }
22445   }
22446 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD128,qmin)22447   TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD128, qmin) {
22448     TEST_REQUIRES_X86_SSE41;
22449     GemmMicrokernelTester()
22450       .mr(3)
22451       .nr(4)
22452       .kr(2)
22453       .sr(1)
22454       .m(3)
22455       .n(4)
22456       .k(8)
22457       .qmin(128)
22458       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
22459   }
22460 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD128,qmax)22461   TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD128, qmax) {
22462     TEST_REQUIRES_X86_SSE41;
22463     GemmMicrokernelTester()
22464       .mr(3)
22465       .nr(4)
22466       .kr(2)
22467       .sr(1)
22468       .m(3)
22469       .n(4)
22470       .k(8)
22471       .qmax(128)
22472       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
22473   }
22474 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD128,strided_cm)22475   TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD128, strided_cm) {
22476     TEST_REQUIRES_X86_SSE41;
22477     GemmMicrokernelTester()
22478       .mr(3)
22479       .nr(4)
22480       .kr(2)
22481       .sr(1)
22482       .m(3)
22483       .n(4)
22484       .k(8)
22485       .cm_stride(7)
22486       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
22487   }
22488 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
22489 
22490 
22491 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD128,k_eq_8)22492   TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD128, k_eq_8) {
22493     TEST_REQUIRES_X86_SSE2;
22494     GemmMicrokernelTester()
22495       .mr(4)
22496       .nr(4)
22497       .kr(2)
22498       .sr(1)
22499       .m(4)
22500       .n(4)
22501       .k(8)
22502       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
22503   }
22504 
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD128,strided_cn)22505   TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD128, strided_cn) {
22506     TEST_REQUIRES_X86_SSE2;
22507     GemmMicrokernelTester()
22508       .mr(4)
22509       .nr(4)
22510       .kr(2)
22511       .sr(1)
22512       .m(4)
22513       .n(4)
22514       .k(8)
22515       .cn_stride(7)
22516       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
22517   }
22518 
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD128,k_eq_8_subtile)22519   TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD128, k_eq_8_subtile) {
22520     TEST_REQUIRES_X86_SSE2;
22521     for (uint32_t n = 1; n <= 4; n++) {
22522       for (uint32_t m = 1; m <= 4; m++) {
22523         GemmMicrokernelTester()
22524           .mr(4)
22525           .nr(4)
22526           .kr(2)
22527           .sr(1)
22528           .m(m)
22529           .n(n)
22530           .k(8)
22531           .iterations(1)
22532           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
22533       }
22534     }
22535   }
22536 
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD128,k_eq_8_subtile_m)22537   TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD128, k_eq_8_subtile_m) {
22538     TEST_REQUIRES_X86_SSE2;
22539     for (uint32_t m = 1; m <= 4; m++) {
22540       GemmMicrokernelTester()
22541         .mr(4)
22542         .nr(4)
22543         .kr(2)
22544         .sr(1)
22545         .m(m)
22546         .n(4)
22547         .k(8)
22548         .iterations(1)
22549         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
22550     }
22551   }
22552 
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD128,k_eq_8_subtile_n)22553   TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD128, k_eq_8_subtile_n) {
22554     TEST_REQUIRES_X86_SSE2;
22555     for (uint32_t n = 1; n <= 4; n++) {
22556       GemmMicrokernelTester()
22557         .mr(4)
22558         .nr(4)
22559         .kr(2)
22560         .sr(1)
22561         .m(4)
22562         .n(n)
22563         .k(8)
22564         .iterations(1)
22565         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
22566     }
22567   }
22568 
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD128,k_lt_8)22569   TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD128, k_lt_8) {
22570     TEST_REQUIRES_X86_SSE2;
22571     for (size_t k = 1; k < 8; k++) {
22572       GemmMicrokernelTester()
22573         .mr(4)
22574         .nr(4)
22575         .kr(2)
22576         .sr(1)
22577         .m(4)
22578         .n(4)
22579         .k(k)
22580         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
22581     }
22582   }
22583 
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD128,k_lt_8_subtile)22584   TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD128, k_lt_8_subtile) {
22585     TEST_REQUIRES_X86_SSE2;
22586     for (size_t k = 1; k < 8; k++) {
22587       for (uint32_t n = 1; n <= 4; n++) {
22588         for (uint32_t m = 1; m <= 4; m++) {
22589           GemmMicrokernelTester()
22590             .mr(4)
22591             .nr(4)
22592             .kr(2)
22593             .sr(1)
22594             .m(m)
22595             .n(n)
22596             .k(k)
22597             .iterations(1)
22598             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
22599         }
22600       }
22601     }
22602   }
22603 
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD128,k_gt_8)22604   TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD128, k_gt_8) {
22605     TEST_REQUIRES_X86_SSE2;
22606     for (size_t k = 9; k < 16; k++) {
22607       GemmMicrokernelTester()
22608         .mr(4)
22609         .nr(4)
22610         .kr(2)
22611         .sr(1)
22612         .m(4)
22613         .n(4)
22614         .k(k)
22615         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
22616     }
22617   }
22618 
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD128,k_gt_8_subtile)22619   TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD128, k_gt_8_subtile) {
22620     TEST_REQUIRES_X86_SSE2;
22621     for (size_t k = 9; k < 16; k++) {
22622       for (uint32_t n = 1; n <= 4; n++) {
22623         for (uint32_t m = 1; m <= 4; m++) {
22624           GemmMicrokernelTester()
22625             .mr(4)
22626             .nr(4)
22627             .kr(2)
22628             .sr(1)
22629             .m(m)
22630             .n(n)
22631             .k(k)
22632             .iterations(1)
22633             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
22634         }
22635       }
22636     }
22637   }
22638 
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD128,k_div_8)22639   TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD128, k_div_8) {
22640     TEST_REQUIRES_X86_SSE2;
22641     for (size_t k = 16; k <= 80; k += 8) {
22642       GemmMicrokernelTester()
22643         .mr(4)
22644         .nr(4)
22645         .kr(2)
22646         .sr(1)
22647         .m(4)
22648         .n(4)
22649         .k(k)
22650         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
22651     }
22652   }
22653 
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD128,k_div_8_subtile)22654   TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD128, k_div_8_subtile) {
22655     TEST_REQUIRES_X86_SSE2;
22656     for (size_t k = 16; k <= 80; k += 8) {
22657       for (uint32_t n = 1; n <= 4; n++) {
22658         for (uint32_t m = 1; m <= 4; m++) {
22659           GemmMicrokernelTester()
22660             .mr(4)
22661             .nr(4)
22662             .kr(2)
22663             .sr(1)
22664             .m(m)
22665             .n(n)
22666             .k(k)
22667             .iterations(1)
22668             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
22669         }
22670       }
22671     }
22672   }
22673 
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD128,n_gt_4)22674   TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD128, n_gt_4) {
22675     TEST_REQUIRES_X86_SSE2;
22676     for (uint32_t n = 5; n < 8; n++) {
22677       for (size_t k = 1; k <= 40; k += 9) {
22678         GemmMicrokernelTester()
22679           .mr(4)
22680           .nr(4)
22681           .kr(2)
22682           .sr(1)
22683           .m(4)
22684           .n(n)
22685           .k(k)
22686           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
22687       }
22688     }
22689   }
22690 
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD128,n_gt_4_strided_cn)22691   TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD128, n_gt_4_strided_cn) {
22692     TEST_REQUIRES_X86_SSE2;
22693     for (uint32_t n = 5; n < 8; n++) {
22694       for (size_t k = 1; k <= 40; k += 9) {
22695         GemmMicrokernelTester()
22696           .mr(4)
22697           .nr(4)
22698           .kr(2)
22699           .sr(1)
22700           .m(4)
22701           .n(n)
22702           .k(k)
22703           .cn_stride(7)
22704           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
22705       }
22706     }
22707   }
22708 
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD128,n_gt_4_subtile)22709   TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD128, n_gt_4_subtile) {
22710     TEST_REQUIRES_X86_SSE2;
22711     for (uint32_t n = 5; n < 8; n++) {
22712       for (size_t k = 1; k <= 40; k += 9) {
22713         for (uint32_t m = 1; m <= 4; m++) {
22714           GemmMicrokernelTester()
22715             .mr(4)
22716             .nr(4)
22717             .kr(2)
22718             .sr(1)
22719             .m(m)
22720             .n(n)
22721             .k(k)
22722             .iterations(1)
22723             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
22724         }
22725       }
22726     }
22727   }
22728 
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD128,n_div_4)22729   TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD128, n_div_4) {
22730     TEST_REQUIRES_X86_SSE2;
22731     for (uint32_t n = 8; n <= 12; n += 4) {
22732       for (size_t k = 1; k <= 40; k += 9) {
22733         GemmMicrokernelTester()
22734           .mr(4)
22735           .nr(4)
22736           .kr(2)
22737           .sr(1)
22738           .m(4)
22739           .n(n)
22740           .k(k)
22741           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
22742       }
22743     }
22744   }
22745 
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD128,n_div_4_strided_cn)22746   TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD128, n_div_4_strided_cn) {
22747     TEST_REQUIRES_X86_SSE2;
22748     for (uint32_t n = 8; n <= 12; n += 4) {
22749       for (size_t k = 1; k <= 40; k += 9) {
22750         GemmMicrokernelTester()
22751           .mr(4)
22752           .nr(4)
22753           .kr(2)
22754           .sr(1)
22755           .m(4)
22756           .n(n)
22757           .k(k)
22758           .cn_stride(7)
22759           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
22760       }
22761     }
22762   }
22763 
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD128,n_div_4_subtile)22764   TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD128, n_div_4_subtile) {
22765     TEST_REQUIRES_X86_SSE2;
22766     for (uint32_t n = 8; n <= 12; n += 4) {
22767       for (size_t k = 1; k <= 40; k += 9) {
22768         for (uint32_t m = 1; m <= 4; m++) {
22769           GemmMicrokernelTester()
22770             .mr(4)
22771             .nr(4)
22772             .kr(2)
22773             .sr(1)
22774             .m(m)
22775             .n(n)
22776             .k(k)
22777             .iterations(1)
22778             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
22779         }
22780       }
22781     }
22782   }
22783 
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD128,small_kernel)22784   TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD128, small_kernel) {
22785     TEST_REQUIRES_X86_SSE2;
22786     for (size_t k = 1; k <= 40; k += 9) {
22787       GemmMicrokernelTester()
22788         .mr(4)
22789         .nr(4)
22790         .kr(2)
22791         .sr(1)
22792         .m(4)
22793         .n(4)
22794         .k(k)
22795         .ks(3)
22796         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
22797     }
22798   }
22799 
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD128,small_kernel_subtile)22800   TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD128, small_kernel_subtile) {
22801     TEST_REQUIRES_X86_SSE2;
22802     for (size_t k = 1; k <= 40; k += 9) {
22803       for (uint32_t n = 1; n <= 4; n++) {
22804         for (uint32_t m = 1; m <= 4; m++) {
22805           GemmMicrokernelTester()
22806             .mr(4)
22807             .nr(4)
22808             .kr(2)
22809             .sr(1)
22810             .m(m)
22811             .n(n)
22812             .k(k)
22813             .ks(3)
22814             .iterations(1)
22815             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
22816         }
22817       }
22818     }
22819   }
22820 
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD128,n_gt_4_small_kernel)22821   TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD128, n_gt_4_small_kernel) {
22822     TEST_REQUIRES_X86_SSE2;
22823     for (uint32_t n = 5; n < 8; n++) {
22824       for (size_t k = 1; k <= 40; k += 9) {
22825         GemmMicrokernelTester()
22826           .mr(4)
22827           .nr(4)
22828           .kr(2)
22829           .sr(1)
22830           .m(4)
22831           .n(n)
22832           .k(k)
22833           .ks(3)
22834           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
22835       }
22836     }
22837   }
22838 
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD128,n_div_4_small_kernel)22839   TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD128, n_div_4_small_kernel) {
22840     TEST_REQUIRES_X86_SSE2;
22841     for (uint32_t n = 8; n <= 12; n += 4) {
22842       for (size_t k = 1; k <= 40; k += 9) {
22843         GemmMicrokernelTester()
22844           .mr(4)
22845           .nr(4)
22846           .kr(2)
22847           .sr(1)
22848           .m(4)
22849           .n(n)
22850           .k(k)
22851           .ks(3)
22852           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
22853       }
22854     }
22855   }
22856 
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD128,strided_cm_subtile)22857   TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD128, strided_cm_subtile) {
22858     TEST_REQUIRES_X86_SSE2;
22859     for (size_t k = 1; k <= 40; k += 9) {
22860       for (uint32_t n = 1; n <= 4; n++) {
22861         for (uint32_t m = 1; m <= 4; m++) {
22862           GemmMicrokernelTester()
22863             .mr(4)
22864             .nr(4)
22865             .kr(2)
22866             .sr(1)
22867             .m(m)
22868             .n(n)
22869             .k(k)
22870             .cm_stride(7)
22871             .iterations(1)
22872             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
22873         }
22874       }
22875     }
22876   }
22877 
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD128,a_offset)22878   TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD128, a_offset) {
22879     TEST_REQUIRES_X86_SSE2;
22880     for (size_t k = 1; k <= 40; k += 9) {
22881       GemmMicrokernelTester()
22882         .mr(4)
22883         .nr(4)
22884         .kr(2)
22885         .sr(1)
22886         .m(4)
22887         .n(4)
22888         .k(k)
22889         .ks(3)
22890         .a_offset(163)
22891         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
22892     }
22893   }
22894 
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD128,zero)22895   TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD128, zero) {
22896     TEST_REQUIRES_X86_SSE2;
22897     for (size_t k = 1; k <= 40; k += 9) {
22898       for (uint32_t mz = 0; mz < 4; mz++) {
22899         GemmMicrokernelTester()
22900           .mr(4)
22901           .nr(4)
22902           .kr(2)
22903           .sr(1)
22904           .m(4)
22905           .n(4)
22906           .k(k)
22907           .ks(3)
22908           .a_offset(163)
22909           .zero_index(mz)
22910           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
22911       }
22912     }
22913   }
22914 
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD128,qmin)22915   TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD128, qmin) {
22916     TEST_REQUIRES_X86_SSE2;
22917     GemmMicrokernelTester()
22918       .mr(4)
22919       .nr(4)
22920       .kr(2)
22921       .sr(1)
22922       .m(4)
22923       .n(4)
22924       .k(8)
22925       .qmin(128)
22926       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
22927   }
22928 
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD128,qmax)22929   TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD128, qmax) {
22930     TEST_REQUIRES_X86_SSE2;
22931     GemmMicrokernelTester()
22932       .mr(4)
22933       .nr(4)
22934       .kr(2)
22935       .sr(1)
22936       .m(4)
22937       .n(4)
22938       .k(8)
22939       .qmax(128)
22940       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
22941   }
22942 
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD128,strided_cm)22943   TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD128, strided_cm) {
22944     TEST_REQUIRES_X86_SSE2;
22945     GemmMicrokernelTester()
22946       .mr(4)
22947       .nr(4)
22948       .kr(2)
22949       .sr(1)
22950       .m(4)
22951       .n(4)
22952       .k(8)
22953       .cm_stride(7)
22954       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
22955   }
22956 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
22957 
22958 
22959 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD128,k_eq_8)22960   TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD128, k_eq_8) {
22961     TEST_REQUIRES_X86_SSE41;
22962     GemmMicrokernelTester()
22963       .mr(4)
22964       .nr(4)
22965       .kr(2)
22966       .sr(1)
22967       .m(4)
22968       .n(4)
22969       .k(8)
22970       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
22971   }
22972 
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD128,strided_cn)22973   TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD128, strided_cn) {
22974     TEST_REQUIRES_X86_SSE41;
22975     GemmMicrokernelTester()
22976       .mr(4)
22977       .nr(4)
22978       .kr(2)
22979       .sr(1)
22980       .m(4)
22981       .n(4)
22982       .k(8)
22983       .cn_stride(7)
22984       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
22985   }
22986 
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD128,k_eq_8_subtile)22987   TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD128, k_eq_8_subtile) {
22988     TEST_REQUIRES_X86_SSE41;
22989     for (uint32_t n = 1; n <= 4; n++) {
22990       for (uint32_t m = 1; m <= 4; m++) {
22991         GemmMicrokernelTester()
22992           .mr(4)
22993           .nr(4)
22994           .kr(2)
22995           .sr(1)
22996           .m(m)
22997           .n(n)
22998           .k(8)
22999           .iterations(1)
23000           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23001       }
23002     }
23003   }
23004 
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD128,k_eq_8_subtile_m)23005   TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD128, k_eq_8_subtile_m) {
23006     TEST_REQUIRES_X86_SSE41;
23007     for (uint32_t m = 1; m <= 4; m++) {
23008       GemmMicrokernelTester()
23009         .mr(4)
23010         .nr(4)
23011         .kr(2)
23012         .sr(1)
23013         .m(m)
23014         .n(4)
23015         .k(8)
23016         .iterations(1)
23017         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23018     }
23019   }
23020 
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD128,k_eq_8_subtile_n)23021   TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD128, k_eq_8_subtile_n) {
23022     TEST_REQUIRES_X86_SSE41;
23023     for (uint32_t n = 1; n <= 4; n++) {
23024       GemmMicrokernelTester()
23025         .mr(4)
23026         .nr(4)
23027         .kr(2)
23028         .sr(1)
23029         .m(4)
23030         .n(n)
23031         .k(8)
23032         .iterations(1)
23033         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23034     }
23035   }
23036 
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD128,k_lt_8)23037   TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD128, k_lt_8) {
23038     TEST_REQUIRES_X86_SSE41;
23039     for (size_t k = 1; k < 8; k++) {
23040       GemmMicrokernelTester()
23041         .mr(4)
23042         .nr(4)
23043         .kr(2)
23044         .sr(1)
23045         .m(4)
23046         .n(4)
23047         .k(k)
23048         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23049     }
23050   }
23051 
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD128,k_lt_8_subtile)23052   TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD128, k_lt_8_subtile) {
23053     TEST_REQUIRES_X86_SSE41;
23054     for (size_t k = 1; k < 8; k++) {
23055       for (uint32_t n = 1; n <= 4; n++) {
23056         for (uint32_t m = 1; m <= 4; m++) {
23057           GemmMicrokernelTester()
23058             .mr(4)
23059             .nr(4)
23060             .kr(2)
23061             .sr(1)
23062             .m(m)
23063             .n(n)
23064             .k(k)
23065             .iterations(1)
23066             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23067         }
23068       }
23069     }
23070   }
23071 
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD128,k_gt_8)23072   TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD128, k_gt_8) {
23073     TEST_REQUIRES_X86_SSE41;
23074     for (size_t k = 9; k < 16; k++) {
23075       GemmMicrokernelTester()
23076         .mr(4)
23077         .nr(4)
23078         .kr(2)
23079         .sr(1)
23080         .m(4)
23081         .n(4)
23082         .k(k)
23083         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23084     }
23085   }
23086 
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD128,k_gt_8_subtile)23087   TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD128, k_gt_8_subtile) {
23088     TEST_REQUIRES_X86_SSE41;
23089     for (size_t k = 9; k < 16; k++) {
23090       for (uint32_t n = 1; n <= 4; n++) {
23091         for (uint32_t m = 1; m <= 4; m++) {
23092           GemmMicrokernelTester()
23093             .mr(4)
23094             .nr(4)
23095             .kr(2)
23096             .sr(1)
23097             .m(m)
23098             .n(n)
23099             .k(k)
23100             .iterations(1)
23101             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23102         }
23103       }
23104     }
23105   }
23106 
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD128,k_div_8)23107   TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD128, k_div_8) {
23108     TEST_REQUIRES_X86_SSE41;
23109     for (size_t k = 16; k <= 80; k += 8) {
23110       GemmMicrokernelTester()
23111         .mr(4)
23112         .nr(4)
23113         .kr(2)
23114         .sr(1)
23115         .m(4)
23116         .n(4)
23117         .k(k)
23118         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23119     }
23120   }
23121 
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD128,k_div_8_subtile)23122   TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD128, k_div_8_subtile) {
23123     TEST_REQUIRES_X86_SSE41;
23124     for (size_t k = 16; k <= 80; k += 8) {
23125       for (uint32_t n = 1; n <= 4; n++) {
23126         for (uint32_t m = 1; m <= 4; m++) {
23127           GemmMicrokernelTester()
23128             .mr(4)
23129             .nr(4)
23130             .kr(2)
23131             .sr(1)
23132             .m(m)
23133             .n(n)
23134             .k(k)
23135             .iterations(1)
23136             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23137         }
23138       }
23139     }
23140   }
23141 
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD128,n_gt_4)23142   TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD128, n_gt_4) {
23143     TEST_REQUIRES_X86_SSE41;
23144     for (uint32_t n = 5; n < 8; n++) {
23145       for (size_t k = 1; k <= 40; k += 9) {
23146         GemmMicrokernelTester()
23147           .mr(4)
23148           .nr(4)
23149           .kr(2)
23150           .sr(1)
23151           .m(4)
23152           .n(n)
23153           .k(k)
23154           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23155       }
23156     }
23157   }
23158 
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD128,n_gt_4_strided_cn)23159   TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD128, n_gt_4_strided_cn) {
23160     TEST_REQUIRES_X86_SSE41;
23161     for (uint32_t n = 5; n < 8; n++) {
23162       for (size_t k = 1; k <= 40; k += 9) {
23163         GemmMicrokernelTester()
23164           .mr(4)
23165           .nr(4)
23166           .kr(2)
23167           .sr(1)
23168           .m(4)
23169           .n(n)
23170           .k(k)
23171           .cn_stride(7)
23172           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23173       }
23174     }
23175   }
23176 
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD128,n_gt_4_subtile)23177   TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD128, n_gt_4_subtile) {
23178     TEST_REQUIRES_X86_SSE41;
23179     for (uint32_t n = 5; n < 8; n++) {
23180       for (size_t k = 1; k <= 40; k += 9) {
23181         for (uint32_t m = 1; m <= 4; m++) {
23182           GemmMicrokernelTester()
23183             .mr(4)
23184             .nr(4)
23185             .kr(2)
23186             .sr(1)
23187             .m(m)
23188             .n(n)
23189             .k(k)
23190             .iterations(1)
23191             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23192         }
23193       }
23194     }
23195   }
23196 
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD128,n_div_4)23197   TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD128, n_div_4) {
23198     TEST_REQUIRES_X86_SSE41;
23199     for (uint32_t n = 8; n <= 12; n += 4) {
23200       for (size_t k = 1; k <= 40; k += 9) {
23201         GemmMicrokernelTester()
23202           .mr(4)
23203           .nr(4)
23204           .kr(2)
23205           .sr(1)
23206           .m(4)
23207           .n(n)
23208           .k(k)
23209           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23210       }
23211     }
23212   }
23213 
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD128,n_div_4_strided_cn)23214   TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD128, n_div_4_strided_cn) {
23215     TEST_REQUIRES_X86_SSE41;
23216     for (uint32_t n = 8; n <= 12; n += 4) {
23217       for (size_t k = 1; k <= 40; k += 9) {
23218         GemmMicrokernelTester()
23219           .mr(4)
23220           .nr(4)
23221           .kr(2)
23222           .sr(1)
23223           .m(4)
23224           .n(n)
23225           .k(k)
23226           .cn_stride(7)
23227           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23228       }
23229     }
23230   }
23231 
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD128,n_div_4_subtile)23232   TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD128, n_div_4_subtile) {
23233     TEST_REQUIRES_X86_SSE41;
23234     for (uint32_t n = 8; n <= 12; n += 4) {
23235       for (size_t k = 1; k <= 40; k += 9) {
23236         for (uint32_t m = 1; m <= 4; m++) {
23237           GemmMicrokernelTester()
23238             .mr(4)
23239             .nr(4)
23240             .kr(2)
23241             .sr(1)
23242             .m(m)
23243             .n(n)
23244             .k(k)
23245             .iterations(1)
23246             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23247         }
23248       }
23249     }
23250   }
23251 
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD128,small_kernel)23252   TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD128, small_kernel) {
23253     TEST_REQUIRES_X86_SSE41;
23254     for (size_t k = 1; k <= 40; k += 9) {
23255       GemmMicrokernelTester()
23256         .mr(4)
23257         .nr(4)
23258         .kr(2)
23259         .sr(1)
23260         .m(4)
23261         .n(4)
23262         .k(k)
23263         .ks(3)
23264         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23265     }
23266   }
23267 
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD128,small_kernel_subtile)23268   TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD128, small_kernel_subtile) {
23269     TEST_REQUIRES_X86_SSE41;
23270     for (size_t k = 1; k <= 40; k += 9) {
23271       for (uint32_t n = 1; n <= 4; n++) {
23272         for (uint32_t m = 1; m <= 4; m++) {
23273           GemmMicrokernelTester()
23274             .mr(4)
23275             .nr(4)
23276             .kr(2)
23277             .sr(1)
23278             .m(m)
23279             .n(n)
23280             .k(k)
23281             .ks(3)
23282             .iterations(1)
23283             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23284         }
23285       }
23286     }
23287   }
23288 
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD128,n_gt_4_small_kernel)23289   TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD128, n_gt_4_small_kernel) {
23290     TEST_REQUIRES_X86_SSE41;
23291     for (uint32_t n = 5; n < 8; n++) {
23292       for (size_t k = 1; k <= 40; k += 9) {
23293         GemmMicrokernelTester()
23294           .mr(4)
23295           .nr(4)
23296           .kr(2)
23297           .sr(1)
23298           .m(4)
23299           .n(n)
23300           .k(k)
23301           .ks(3)
23302           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23303       }
23304     }
23305   }
23306 
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD128,n_div_4_small_kernel)23307   TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD128, n_div_4_small_kernel) {
23308     TEST_REQUIRES_X86_SSE41;
23309     for (uint32_t n = 8; n <= 12; n += 4) {
23310       for (size_t k = 1; k <= 40; k += 9) {
23311         GemmMicrokernelTester()
23312           .mr(4)
23313           .nr(4)
23314           .kr(2)
23315           .sr(1)
23316           .m(4)
23317           .n(n)
23318           .k(k)
23319           .ks(3)
23320           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23321       }
23322     }
23323   }
23324 
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD128,strided_cm_subtile)23325   TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD128, strided_cm_subtile) {
23326     TEST_REQUIRES_X86_SSE41;
23327     for (size_t k = 1; k <= 40; k += 9) {
23328       for (uint32_t n = 1; n <= 4; n++) {
23329         for (uint32_t m = 1; m <= 4; m++) {
23330           GemmMicrokernelTester()
23331             .mr(4)
23332             .nr(4)
23333             .kr(2)
23334             .sr(1)
23335             .m(m)
23336             .n(n)
23337             .k(k)
23338             .cm_stride(7)
23339             .iterations(1)
23340             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23341         }
23342       }
23343     }
23344   }
23345 
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD128,a_offset)23346   TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD128, a_offset) {
23347     TEST_REQUIRES_X86_SSE41;
23348     for (size_t k = 1; k <= 40; k += 9) {
23349       GemmMicrokernelTester()
23350         .mr(4)
23351         .nr(4)
23352         .kr(2)
23353         .sr(1)
23354         .m(4)
23355         .n(4)
23356         .k(k)
23357         .ks(3)
23358         .a_offset(163)
23359         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23360     }
23361   }
23362 
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD128,zero)23363   TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD128, zero) {
23364     TEST_REQUIRES_X86_SSE41;
23365     for (size_t k = 1; k <= 40; k += 9) {
23366       for (uint32_t mz = 0; mz < 4; mz++) {
23367         GemmMicrokernelTester()
23368           .mr(4)
23369           .nr(4)
23370           .kr(2)
23371           .sr(1)
23372           .m(4)
23373           .n(4)
23374           .k(k)
23375           .ks(3)
23376           .a_offset(163)
23377           .zero_index(mz)
23378           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23379       }
23380     }
23381   }
23382 
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD128,qmin)23383   TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD128, qmin) {
23384     TEST_REQUIRES_X86_SSE41;
23385     GemmMicrokernelTester()
23386       .mr(4)
23387       .nr(4)
23388       .kr(2)
23389       .sr(1)
23390       .m(4)
23391       .n(4)
23392       .k(8)
23393       .qmin(128)
23394       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23395   }
23396 
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD128,qmax)23397   TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD128, qmax) {
23398     TEST_REQUIRES_X86_SSE41;
23399     GemmMicrokernelTester()
23400       .mr(4)
23401       .nr(4)
23402       .kr(2)
23403       .sr(1)
23404       .m(4)
23405       .n(4)
23406       .k(8)
23407       .qmax(128)
23408       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23409   }
23410 
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD128,strided_cm)23411   TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__SSE41_LD128, strided_cm) {
23412     TEST_REQUIRES_X86_SSE41;
23413     GemmMicrokernelTester()
23414       .mr(4)
23415       .nr(4)
23416       .kr(2)
23417       .sr(1)
23418       .m(4)
23419       .n(4)
23420       .k(8)
23421       .cm_stride(7)
23422       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23423   }
23424 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
23425 
23426 
23427 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD128,k_eq_8)23428   TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD128, k_eq_8) {
23429     TEST_REQUIRES_X86_XOP;
23430     GemmMicrokernelTester()
23431       .mr(3)
23432       .nr(4)
23433       .kr(2)
23434       .sr(1)
23435       .m(3)
23436       .n(4)
23437       .k(8)
23438       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23439   }
23440 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD128,strided_cn)23441   TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD128, strided_cn) {
23442     TEST_REQUIRES_X86_XOP;
23443     GemmMicrokernelTester()
23444       .mr(3)
23445       .nr(4)
23446       .kr(2)
23447       .sr(1)
23448       .m(3)
23449       .n(4)
23450       .k(8)
23451       .cn_stride(7)
23452       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23453   }
23454 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD128,k_eq_8_subtile)23455   TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD128, k_eq_8_subtile) {
23456     TEST_REQUIRES_X86_XOP;
23457     for (uint32_t n = 1; n <= 4; n++) {
23458       for (uint32_t m = 1; m <= 3; m++) {
23459         GemmMicrokernelTester()
23460           .mr(3)
23461           .nr(4)
23462           .kr(2)
23463           .sr(1)
23464           .m(m)
23465           .n(n)
23466           .k(8)
23467           .iterations(1)
23468           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23469       }
23470     }
23471   }
23472 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD128,k_eq_8_subtile_m)23473   TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD128, k_eq_8_subtile_m) {
23474     TEST_REQUIRES_X86_XOP;
23475     for (uint32_t m = 1; m <= 3; m++) {
23476       GemmMicrokernelTester()
23477         .mr(3)
23478         .nr(4)
23479         .kr(2)
23480         .sr(1)
23481         .m(m)
23482         .n(4)
23483         .k(8)
23484         .iterations(1)
23485         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23486     }
23487   }
23488 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD128,k_eq_8_subtile_n)23489   TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD128, k_eq_8_subtile_n) {
23490     TEST_REQUIRES_X86_XOP;
23491     for (uint32_t n = 1; n <= 4; n++) {
23492       GemmMicrokernelTester()
23493         .mr(3)
23494         .nr(4)
23495         .kr(2)
23496         .sr(1)
23497         .m(3)
23498         .n(n)
23499         .k(8)
23500         .iterations(1)
23501         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23502     }
23503   }
23504 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD128,k_lt_8)23505   TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD128, k_lt_8) {
23506     TEST_REQUIRES_X86_XOP;
23507     for (size_t k = 1; k < 8; k++) {
23508       GemmMicrokernelTester()
23509         .mr(3)
23510         .nr(4)
23511         .kr(2)
23512         .sr(1)
23513         .m(3)
23514         .n(4)
23515         .k(k)
23516         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23517     }
23518   }
23519 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD128,k_lt_8_subtile)23520   TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD128, k_lt_8_subtile) {
23521     TEST_REQUIRES_X86_XOP;
23522     for (size_t k = 1; k < 8; k++) {
23523       for (uint32_t n = 1; n <= 4; n++) {
23524         for (uint32_t m = 1; m <= 3; m++) {
23525           GemmMicrokernelTester()
23526             .mr(3)
23527             .nr(4)
23528             .kr(2)
23529             .sr(1)
23530             .m(m)
23531             .n(n)
23532             .k(k)
23533             .iterations(1)
23534             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23535         }
23536       }
23537     }
23538   }
23539 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD128,k_gt_8)23540   TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD128, k_gt_8) {
23541     TEST_REQUIRES_X86_XOP;
23542     for (size_t k = 9; k < 16; k++) {
23543       GemmMicrokernelTester()
23544         .mr(3)
23545         .nr(4)
23546         .kr(2)
23547         .sr(1)
23548         .m(3)
23549         .n(4)
23550         .k(k)
23551         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23552     }
23553   }
23554 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD128,k_gt_8_subtile)23555   TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD128, k_gt_8_subtile) {
23556     TEST_REQUIRES_X86_XOP;
23557     for (size_t k = 9; k < 16; k++) {
23558       for (uint32_t n = 1; n <= 4; n++) {
23559         for (uint32_t m = 1; m <= 3; m++) {
23560           GemmMicrokernelTester()
23561             .mr(3)
23562             .nr(4)
23563             .kr(2)
23564             .sr(1)
23565             .m(m)
23566             .n(n)
23567             .k(k)
23568             .iterations(1)
23569             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23570         }
23571       }
23572     }
23573   }
23574 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD128,k_div_8)23575   TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD128, k_div_8) {
23576     TEST_REQUIRES_X86_XOP;
23577     for (size_t k = 16; k <= 80; k += 8) {
23578       GemmMicrokernelTester()
23579         .mr(3)
23580         .nr(4)
23581         .kr(2)
23582         .sr(1)
23583         .m(3)
23584         .n(4)
23585         .k(k)
23586         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23587     }
23588   }
23589 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD128,k_div_8_subtile)23590   TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD128, k_div_8_subtile) {
23591     TEST_REQUIRES_X86_XOP;
23592     for (size_t k = 16; k <= 80; k += 8) {
23593       for (uint32_t n = 1; n <= 4; n++) {
23594         for (uint32_t m = 1; m <= 3; m++) {
23595           GemmMicrokernelTester()
23596             .mr(3)
23597             .nr(4)
23598             .kr(2)
23599             .sr(1)
23600             .m(m)
23601             .n(n)
23602             .k(k)
23603             .iterations(1)
23604             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23605         }
23606       }
23607     }
23608   }
23609 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD128,n_gt_4)23610   TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD128, n_gt_4) {
23611     TEST_REQUIRES_X86_XOP;
23612     for (uint32_t n = 5; n < 8; n++) {
23613       for (size_t k = 1; k <= 40; k += 9) {
23614         GemmMicrokernelTester()
23615           .mr(3)
23616           .nr(4)
23617           .kr(2)
23618           .sr(1)
23619           .m(3)
23620           .n(n)
23621           .k(k)
23622           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23623       }
23624     }
23625   }
23626 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD128,n_gt_4_strided_cn)23627   TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD128, n_gt_4_strided_cn) {
23628     TEST_REQUIRES_X86_XOP;
23629     for (uint32_t n = 5; n < 8; n++) {
23630       for (size_t k = 1; k <= 40; k += 9) {
23631         GemmMicrokernelTester()
23632           .mr(3)
23633           .nr(4)
23634           .kr(2)
23635           .sr(1)
23636           .m(3)
23637           .n(n)
23638           .k(k)
23639           .cn_stride(7)
23640           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23641       }
23642     }
23643   }
23644 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD128,n_gt_4_subtile)23645   TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD128, n_gt_4_subtile) {
23646     TEST_REQUIRES_X86_XOP;
23647     for (uint32_t n = 5; n < 8; n++) {
23648       for (size_t k = 1; k <= 40; k += 9) {
23649         for (uint32_t m = 1; m <= 3; m++) {
23650           GemmMicrokernelTester()
23651             .mr(3)
23652             .nr(4)
23653             .kr(2)
23654             .sr(1)
23655             .m(m)
23656             .n(n)
23657             .k(k)
23658             .iterations(1)
23659             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23660         }
23661       }
23662     }
23663   }
23664 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD128,n_div_4)23665   TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD128, n_div_4) {
23666     TEST_REQUIRES_X86_XOP;
23667     for (uint32_t n = 8; n <= 12; n += 4) {
23668       for (size_t k = 1; k <= 40; k += 9) {
23669         GemmMicrokernelTester()
23670           .mr(3)
23671           .nr(4)
23672           .kr(2)
23673           .sr(1)
23674           .m(3)
23675           .n(n)
23676           .k(k)
23677           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23678       }
23679     }
23680   }
23681 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD128,n_div_4_strided_cn)23682   TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD128, n_div_4_strided_cn) {
23683     TEST_REQUIRES_X86_XOP;
23684     for (uint32_t n = 8; n <= 12; n += 4) {
23685       for (size_t k = 1; k <= 40; k += 9) {
23686         GemmMicrokernelTester()
23687           .mr(3)
23688           .nr(4)
23689           .kr(2)
23690           .sr(1)
23691           .m(3)
23692           .n(n)
23693           .k(k)
23694           .cn_stride(7)
23695           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23696       }
23697     }
23698   }
23699 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD128,n_div_4_subtile)23700   TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD128, n_div_4_subtile) {
23701     TEST_REQUIRES_X86_XOP;
23702     for (uint32_t n = 8; n <= 12; n += 4) {
23703       for (size_t k = 1; k <= 40; k += 9) {
23704         for (uint32_t m = 1; m <= 3; m++) {
23705           GemmMicrokernelTester()
23706             .mr(3)
23707             .nr(4)
23708             .kr(2)
23709             .sr(1)
23710             .m(m)
23711             .n(n)
23712             .k(k)
23713             .iterations(1)
23714             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23715         }
23716       }
23717     }
23718   }
23719 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD128,small_kernel)23720   TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD128, small_kernel) {
23721     TEST_REQUIRES_X86_XOP;
23722     for (size_t k = 1; k <= 40; k += 9) {
23723       GemmMicrokernelTester()
23724         .mr(3)
23725         .nr(4)
23726         .kr(2)
23727         .sr(1)
23728         .m(3)
23729         .n(4)
23730         .k(k)
23731         .ks(3)
23732         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23733     }
23734   }
23735 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD128,small_kernel_subtile)23736   TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD128, small_kernel_subtile) {
23737     TEST_REQUIRES_X86_XOP;
23738     for (size_t k = 1; k <= 40; k += 9) {
23739       for (uint32_t n = 1; n <= 4; n++) {
23740         for (uint32_t m = 1; m <= 3; m++) {
23741           GemmMicrokernelTester()
23742             .mr(3)
23743             .nr(4)
23744             .kr(2)
23745             .sr(1)
23746             .m(m)
23747             .n(n)
23748             .k(k)
23749             .ks(3)
23750             .iterations(1)
23751             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23752         }
23753       }
23754     }
23755   }
23756 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD128,n_gt_4_small_kernel)23757   TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD128, n_gt_4_small_kernel) {
23758     TEST_REQUIRES_X86_XOP;
23759     for (uint32_t n = 5; n < 8; n++) {
23760       for (size_t k = 1; k <= 40; k += 9) {
23761         GemmMicrokernelTester()
23762           .mr(3)
23763           .nr(4)
23764           .kr(2)
23765           .sr(1)
23766           .m(3)
23767           .n(n)
23768           .k(k)
23769           .ks(3)
23770           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23771       }
23772     }
23773   }
23774 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD128,n_div_4_small_kernel)23775   TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD128, n_div_4_small_kernel) {
23776     TEST_REQUIRES_X86_XOP;
23777     for (uint32_t n = 8; n <= 12; n += 4) {
23778       for (size_t k = 1; k <= 40; k += 9) {
23779         GemmMicrokernelTester()
23780           .mr(3)
23781           .nr(4)
23782           .kr(2)
23783           .sr(1)
23784           .m(3)
23785           .n(n)
23786           .k(k)
23787           .ks(3)
23788           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23789       }
23790     }
23791   }
23792 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD128,strided_cm_subtile)23793   TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD128, strided_cm_subtile) {
23794     TEST_REQUIRES_X86_XOP;
23795     for (size_t k = 1; k <= 40; k += 9) {
23796       for (uint32_t n = 1; n <= 4; n++) {
23797         for (uint32_t m = 1; m <= 3; m++) {
23798           GemmMicrokernelTester()
23799             .mr(3)
23800             .nr(4)
23801             .kr(2)
23802             .sr(1)
23803             .m(m)
23804             .n(n)
23805             .k(k)
23806             .cm_stride(7)
23807             .iterations(1)
23808             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23809         }
23810       }
23811     }
23812   }
23813 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD128,a_offset)23814   TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD128, a_offset) {
23815     TEST_REQUIRES_X86_XOP;
23816     for (size_t k = 1; k <= 40; k += 9) {
23817       GemmMicrokernelTester()
23818         .mr(3)
23819         .nr(4)
23820         .kr(2)
23821         .sr(1)
23822         .m(3)
23823         .n(4)
23824         .k(k)
23825         .ks(3)
23826         .a_offset(127)
23827         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23828     }
23829   }
23830 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD128,zero)23831   TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD128, zero) {
23832     TEST_REQUIRES_X86_XOP;
23833     for (size_t k = 1; k <= 40; k += 9) {
23834       for (uint32_t mz = 0; mz < 3; mz++) {
23835         GemmMicrokernelTester()
23836           .mr(3)
23837           .nr(4)
23838           .kr(2)
23839           .sr(1)
23840           .m(3)
23841           .n(4)
23842           .k(k)
23843           .ks(3)
23844           .a_offset(127)
23845           .zero_index(mz)
23846           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23847       }
23848     }
23849   }
23850 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD128,qmin)23851   TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD128, qmin) {
23852     TEST_REQUIRES_X86_XOP;
23853     GemmMicrokernelTester()
23854       .mr(3)
23855       .nr(4)
23856       .kr(2)
23857       .sr(1)
23858       .m(3)
23859       .n(4)
23860       .k(8)
23861       .qmin(128)
23862       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23863   }
23864 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD128,qmax)23865   TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD128, qmax) {
23866     TEST_REQUIRES_X86_XOP;
23867     GemmMicrokernelTester()
23868       .mr(3)
23869       .nr(4)
23870       .kr(2)
23871       .sr(1)
23872       .m(3)
23873       .n(4)
23874       .k(8)
23875       .qmax(128)
23876       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23877   }
23878 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD128,strided_cm)23879   TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD128, strided_cm) {
23880     TEST_REQUIRES_X86_XOP;
23881     GemmMicrokernelTester()
23882       .mr(3)
23883       .nr(4)
23884       .kr(2)
23885       .sr(1)
23886       .m(3)
23887       .n(4)
23888       .k(8)
23889       .cm_stride(7)
23890       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23891   }
23892 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
23893 
23894 
23895 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD128,k_eq_8)23896   TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD128, k_eq_8) {
23897     TEST_REQUIRES_X86_AVX;
23898     GemmMicrokernelTester()
23899       .mr(4)
23900       .nr(4)
23901       .kr(2)
23902       .sr(1)
23903       .m(4)
23904       .n(4)
23905       .k(8)
23906       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23907   }
23908 
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD128,strided_cn)23909   TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD128, strided_cn) {
23910     TEST_REQUIRES_X86_AVX;
23911     GemmMicrokernelTester()
23912       .mr(4)
23913       .nr(4)
23914       .kr(2)
23915       .sr(1)
23916       .m(4)
23917       .n(4)
23918       .k(8)
23919       .cn_stride(7)
23920       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23921   }
23922 
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD128,k_eq_8_subtile)23923   TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD128, k_eq_8_subtile) {
23924     TEST_REQUIRES_X86_AVX;
23925     for (uint32_t n = 1; n <= 4; n++) {
23926       for (uint32_t m = 1; m <= 4; m++) {
23927         GemmMicrokernelTester()
23928           .mr(4)
23929           .nr(4)
23930           .kr(2)
23931           .sr(1)
23932           .m(m)
23933           .n(n)
23934           .k(8)
23935           .iterations(1)
23936           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23937       }
23938     }
23939   }
23940 
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD128,k_eq_8_subtile_m)23941   TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD128, k_eq_8_subtile_m) {
23942     TEST_REQUIRES_X86_AVX;
23943     for (uint32_t m = 1; m <= 4; m++) {
23944       GemmMicrokernelTester()
23945         .mr(4)
23946         .nr(4)
23947         .kr(2)
23948         .sr(1)
23949         .m(m)
23950         .n(4)
23951         .k(8)
23952         .iterations(1)
23953         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23954     }
23955   }
23956 
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD128,k_eq_8_subtile_n)23957   TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD128, k_eq_8_subtile_n) {
23958     TEST_REQUIRES_X86_AVX;
23959     for (uint32_t n = 1; n <= 4; n++) {
23960       GemmMicrokernelTester()
23961         .mr(4)
23962         .nr(4)
23963         .kr(2)
23964         .sr(1)
23965         .m(4)
23966         .n(n)
23967         .k(8)
23968         .iterations(1)
23969         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23970     }
23971   }
23972 
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD128,k_lt_8)23973   TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD128, k_lt_8) {
23974     TEST_REQUIRES_X86_AVX;
23975     for (size_t k = 1; k < 8; k++) {
23976       GemmMicrokernelTester()
23977         .mr(4)
23978         .nr(4)
23979         .kr(2)
23980         .sr(1)
23981         .m(4)
23982         .n(4)
23983         .k(k)
23984         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23985     }
23986   }
23987 
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD128,k_lt_8_subtile)23988   TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD128, k_lt_8_subtile) {
23989     TEST_REQUIRES_X86_AVX;
23990     for (size_t k = 1; k < 8; k++) {
23991       for (uint32_t n = 1; n <= 4; n++) {
23992         for (uint32_t m = 1; m <= 4; m++) {
23993           GemmMicrokernelTester()
23994             .mr(4)
23995             .nr(4)
23996             .kr(2)
23997             .sr(1)
23998             .m(m)
23999             .n(n)
24000             .k(k)
24001             .iterations(1)
24002             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
24003         }
24004       }
24005     }
24006   }
24007 
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD128,k_gt_8)24008   TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD128, k_gt_8) {
24009     TEST_REQUIRES_X86_AVX;
24010     for (size_t k = 9; k < 16; k++) {
24011       GemmMicrokernelTester()
24012         .mr(4)
24013         .nr(4)
24014         .kr(2)
24015         .sr(1)
24016         .m(4)
24017         .n(4)
24018         .k(k)
24019         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
24020     }
24021   }
24022 
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD128,k_gt_8_subtile)24023   TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD128, k_gt_8_subtile) {
24024     TEST_REQUIRES_X86_AVX;
24025     for (size_t k = 9; k < 16; k++) {
24026       for (uint32_t n = 1; n <= 4; n++) {
24027         for (uint32_t m = 1; m <= 4; m++) {
24028           GemmMicrokernelTester()
24029             .mr(4)
24030             .nr(4)
24031             .kr(2)
24032             .sr(1)
24033             .m(m)
24034             .n(n)
24035             .k(k)
24036             .iterations(1)
24037             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
24038         }
24039       }
24040     }
24041   }
24042 
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD128,k_div_8)24043   TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD128, k_div_8) {
24044     TEST_REQUIRES_X86_AVX;
24045     for (size_t k = 16; k <= 80; k += 8) {
24046       GemmMicrokernelTester()
24047         .mr(4)
24048         .nr(4)
24049         .kr(2)
24050         .sr(1)
24051         .m(4)
24052         .n(4)
24053         .k(k)
24054         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
24055     }
24056   }
24057 
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD128,k_div_8_subtile)24058   TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD128, k_div_8_subtile) {
24059     TEST_REQUIRES_X86_AVX;
24060     for (size_t k = 16; k <= 80; k += 8) {
24061       for (uint32_t n = 1; n <= 4; n++) {
24062         for (uint32_t m = 1; m <= 4; m++) {
24063           GemmMicrokernelTester()
24064             .mr(4)
24065             .nr(4)
24066             .kr(2)
24067             .sr(1)
24068             .m(m)
24069             .n(n)
24070             .k(k)
24071             .iterations(1)
24072             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
24073         }
24074       }
24075     }
24076   }
24077 
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD128,n_gt_4)24078   TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD128, n_gt_4) {
24079     TEST_REQUIRES_X86_AVX;
24080     for (uint32_t n = 5; n < 8; n++) {
24081       for (size_t k = 1; k <= 40; k += 9) {
24082         GemmMicrokernelTester()
24083           .mr(4)
24084           .nr(4)
24085           .kr(2)
24086           .sr(1)
24087           .m(4)
24088           .n(n)
24089           .k(k)
24090           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
24091       }
24092     }
24093   }
24094 
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD128,n_gt_4_strided_cn)24095   TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD128, n_gt_4_strided_cn) {
24096     TEST_REQUIRES_X86_AVX;
24097     for (uint32_t n = 5; n < 8; n++) {
24098       for (size_t k = 1; k <= 40; k += 9) {
24099         GemmMicrokernelTester()
24100           .mr(4)
24101           .nr(4)
24102           .kr(2)
24103           .sr(1)
24104           .m(4)
24105           .n(n)
24106           .k(k)
24107           .cn_stride(7)
24108           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
24109       }
24110     }
24111   }
24112 
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD128,n_gt_4_subtile)24113   TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD128, n_gt_4_subtile) {
24114     TEST_REQUIRES_X86_AVX;
24115     for (uint32_t n = 5; n < 8; n++) {
24116       for (size_t k = 1; k <= 40; k += 9) {
24117         for (uint32_t m = 1; m <= 4; m++) {
24118           GemmMicrokernelTester()
24119             .mr(4)
24120             .nr(4)
24121             .kr(2)
24122             .sr(1)
24123             .m(m)
24124             .n(n)
24125             .k(k)
24126             .iterations(1)
24127             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
24128         }
24129       }
24130     }
24131   }
24132 
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD128,n_div_4)24133   TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD128, n_div_4) {
24134     TEST_REQUIRES_X86_AVX;
24135     for (uint32_t n = 8; n <= 12; n += 4) {
24136       for (size_t k = 1; k <= 40; k += 9) {
24137         GemmMicrokernelTester()
24138           .mr(4)
24139           .nr(4)
24140           .kr(2)
24141           .sr(1)
24142           .m(4)
24143           .n(n)
24144           .k(k)
24145           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
24146       }
24147     }
24148   }
24149 
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD128,n_div_4_strided_cn)24150   TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD128, n_div_4_strided_cn) {
24151     TEST_REQUIRES_X86_AVX;
24152     for (uint32_t n = 8; n <= 12; n += 4) {
24153       for (size_t k = 1; k <= 40; k += 9) {
24154         GemmMicrokernelTester()
24155           .mr(4)
24156           .nr(4)
24157           .kr(2)
24158           .sr(1)
24159           .m(4)
24160           .n(n)
24161           .k(k)
24162           .cn_stride(7)
24163           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
24164       }
24165     }
24166   }
24167 
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD128,n_div_4_subtile)24168   TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD128, n_div_4_subtile) {
24169     TEST_REQUIRES_X86_AVX;
24170     for (uint32_t n = 8; n <= 12; n += 4) {
24171       for (size_t k = 1; k <= 40; k += 9) {
24172         for (uint32_t m = 1; m <= 4; m++) {
24173           GemmMicrokernelTester()
24174             .mr(4)
24175             .nr(4)
24176             .kr(2)
24177             .sr(1)
24178             .m(m)
24179             .n(n)
24180             .k(k)
24181             .iterations(1)
24182             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
24183         }
24184       }
24185     }
24186   }
24187 
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD128,small_kernel)24188   TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD128, small_kernel) {
24189     TEST_REQUIRES_X86_AVX;
24190     for (size_t k = 1; k <= 40; k += 9) {
24191       GemmMicrokernelTester()
24192         .mr(4)
24193         .nr(4)
24194         .kr(2)
24195         .sr(1)
24196         .m(4)
24197         .n(4)
24198         .k(k)
24199         .ks(3)
24200         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
24201     }
24202   }
24203 
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD128,small_kernel_subtile)24204   TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD128, small_kernel_subtile) {
24205     TEST_REQUIRES_X86_AVX;
24206     for (size_t k = 1; k <= 40; k += 9) {
24207       for (uint32_t n = 1; n <= 4; n++) {
24208         for (uint32_t m = 1; m <= 4; m++) {
24209           GemmMicrokernelTester()
24210             .mr(4)
24211             .nr(4)
24212             .kr(2)
24213             .sr(1)
24214             .m(m)
24215             .n(n)
24216             .k(k)
24217             .ks(3)
24218             .iterations(1)
24219             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
24220         }
24221       }
24222     }
24223   }
24224 
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD128,n_gt_4_small_kernel)24225   TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD128, n_gt_4_small_kernel) {
24226     TEST_REQUIRES_X86_AVX;
24227     for (uint32_t n = 5; n < 8; n++) {
24228       for (size_t k = 1; k <= 40; k += 9) {
24229         GemmMicrokernelTester()
24230           .mr(4)
24231           .nr(4)
24232           .kr(2)
24233           .sr(1)
24234           .m(4)
24235           .n(n)
24236           .k(k)
24237           .ks(3)
24238           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
24239       }
24240     }
24241   }
24242 
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD128,n_div_4_small_kernel)24243   TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD128, n_div_4_small_kernel) {
24244     TEST_REQUIRES_X86_AVX;
24245     for (uint32_t n = 8; n <= 12; n += 4) {
24246       for (size_t k = 1; k <= 40; k += 9) {
24247         GemmMicrokernelTester()
24248           .mr(4)
24249           .nr(4)
24250           .kr(2)
24251           .sr(1)
24252           .m(4)
24253           .n(n)
24254           .k(k)
24255           .ks(3)
24256           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
24257       }
24258     }
24259   }
24260 
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD128,strided_cm_subtile)24261   TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD128, strided_cm_subtile) {
24262     TEST_REQUIRES_X86_AVX;
24263     for (size_t k = 1; k <= 40; k += 9) {
24264       for (uint32_t n = 1; n <= 4; n++) {
24265         for (uint32_t m = 1; m <= 4; m++) {
24266           GemmMicrokernelTester()
24267             .mr(4)
24268             .nr(4)
24269             .kr(2)
24270             .sr(1)
24271             .m(m)
24272             .n(n)
24273             .k(k)
24274             .cm_stride(7)
24275             .iterations(1)
24276             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
24277         }
24278       }
24279     }
24280   }
24281 
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD128,a_offset)24282   TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD128, a_offset) {
24283     TEST_REQUIRES_X86_AVX;
24284     for (size_t k = 1; k <= 40; k += 9) {
24285       GemmMicrokernelTester()
24286         .mr(4)
24287         .nr(4)
24288         .kr(2)
24289         .sr(1)
24290         .m(4)
24291         .n(4)
24292         .k(k)
24293         .ks(3)
24294         .a_offset(163)
24295         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
24296     }
24297   }
24298 
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD128,zero)24299   TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD128, zero) {
24300     TEST_REQUIRES_X86_AVX;
24301     for (size_t k = 1; k <= 40; k += 9) {
24302       for (uint32_t mz = 0; mz < 4; mz++) {
24303         GemmMicrokernelTester()
24304           .mr(4)
24305           .nr(4)
24306           .kr(2)
24307           .sr(1)
24308           .m(4)
24309           .n(4)
24310           .k(k)
24311           .ks(3)
24312           .a_offset(163)
24313           .zero_index(mz)
24314           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
24315       }
24316     }
24317   }
24318 
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD128,qmin)24319   TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD128, qmin) {
24320     TEST_REQUIRES_X86_AVX;
24321     GemmMicrokernelTester()
24322       .mr(4)
24323       .nr(4)
24324       .kr(2)
24325       .sr(1)
24326       .m(4)
24327       .n(4)
24328       .k(8)
24329       .qmin(128)
24330       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
24331   }
24332 
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD128,qmax)24333   TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD128, qmax) {
24334     TEST_REQUIRES_X86_AVX;
24335     GemmMicrokernelTester()
24336       .mr(4)
24337       .nr(4)
24338       .kr(2)
24339       .sr(1)
24340       .m(4)
24341       .n(4)
24342       .k(8)
24343       .qmax(128)
24344       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
24345   }
24346 
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD128,strided_cm)24347   TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD128, strided_cm) {
24348     TEST_REQUIRES_X86_AVX;
24349     GemmMicrokernelTester()
24350       .mr(4)
24351       .nr(4)
24352       .kr(2)
24353       .sr(1)
24354       .m(4)
24355       .n(4)
24356       .k(8)
24357       .cm_stride(7)
24358       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
24359   }
24360 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
24361 
24362 
24363 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD128,k_eq_8)24364   TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD128, k_eq_8) {
24365     TEST_REQUIRES_X86_XOP;
24366     GemmMicrokernelTester()
24367       .mr(4)
24368       .nr(4)
24369       .kr(2)
24370       .sr(1)
24371       .m(4)
24372       .n(4)
24373       .k(8)
24374       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
24375   }
24376 
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD128,strided_cn)24377   TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD128, strided_cn) {
24378     TEST_REQUIRES_X86_XOP;
24379     GemmMicrokernelTester()
24380       .mr(4)
24381       .nr(4)
24382       .kr(2)
24383       .sr(1)
24384       .m(4)
24385       .n(4)
24386       .k(8)
24387       .cn_stride(7)
24388       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
24389   }
24390 
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD128,k_eq_8_subtile)24391   TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD128, k_eq_8_subtile) {
24392     TEST_REQUIRES_X86_XOP;
24393     for (uint32_t n = 1; n <= 4; n++) {
24394       for (uint32_t m = 1; m <= 4; m++) {
24395         GemmMicrokernelTester()
24396           .mr(4)
24397           .nr(4)
24398           .kr(2)
24399           .sr(1)
24400           .m(m)
24401           .n(n)
24402           .k(8)
24403           .iterations(1)
24404           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
24405       }
24406     }
24407   }
24408 
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD128,k_eq_8_subtile_m)24409   TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD128, k_eq_8_subtile_m) {
24410     TEST_REQUIRES_X86_XOP;
24411     for (uint32_t m = 1; m <= 4; m++) {
24412       GemmMicrokernelTester()
24413         .mr(4)
24414         .nr(4)
24415         .kr(2)
24416         .sr(1)
24417         .m(m)
24418         .n(4)
24419         .k(8)
24420         .iterations(1)
24421         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
24422     }
24423   }
24424 
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD128,k_eq_8_subtile_n)24425   TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD128, k_eq_8_subtile_n) {
24426     TEST_REQUIRES_X86_XOP;
24427     for (uint32_t n = 1; n <= 4; n++) {
24428       GemmMicrokernelTester()
24429         .mr(4)
24430         .nr(4)
24431         .kr(2)
24432         .sr(1)
24433         .m(4)
24434         .n(n)
24435         .k(8)
24436         .iterations(1)
24437         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
24438     }
24439   }
24440 
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD128,k_lt_8)24441   TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD128, k_lt_8) {
24442     TEST_REQUIRES_X86_XOP;
24443     for (size_t k = 1; k < 8; k++) {
24444       GemmMicrokernelTester()
24445         .mr(4)
24446         .nr(4)
24447         .kr(2)
24448         .sr(1)
24449         .m(4)
24450         .n(4)
24451         .k(k)
24452         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
24453     }
24454   }
24455 
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD128,k_lt_8_subtile)24456   TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD128, k_lt_8_subtile) {
24457     TEST_REQUIRES_X86_XOP;
24458     for (size_t k = 1; k < 8; k++) {
24459       for (uint32_t n = 1; n <= 4; n++) {
24460         for (uint32_t m = 1; m <= 4; m++) {
24461           GemmMicrokernelTester()
24462             .mr(4)
24463             .nr(4)
24464             .kr(2)
24465             .sr(1)
24466             .m(m)
24467             .n(n)
24468             .k(k)
24469             .iterations(1)
24470             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
24471         }
24472       }
24473     }
24474   }
24475 
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD128,k_gt_8)24476   TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD128, k_gt_8) {
24477     TEST_REQUIRES_X86_XOP;
24478     for (size_t k = 9; k < 16; k++) {
24479       GemmMicrokernelTester()
24480         .mr(4)
24481         .nr(4)
24482         .kr(2)
24483         .sr(1)
24484         .m(4)
24485         .n(4)
24486         .k(k)
24487         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
24488     }
24489   }
24490 
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD128,k_gt_8_subtile)24491   TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD128, k_gt_8_subtile) {
24492     TEST_REQUIRES_X86_XOP;
24493     for (size_t k = 9; k < 16; k++) {
24494       for (uint32_t n = 1; n <= 4; n++) {
24495         for (uint32_t m = 1; m <= 4; m++) {
24496           GemmMicrokernelTester()
24497             .mr(4)
24498             .nr(4)
24499             .kr(2)
24500             .sr(1)
24501             .m(m)
24502             .n(n)
24503             .k(k)
24504             .iterations(1)
24505             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
24506         }
24507       }
24508     }
24509   }
24510 
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD128,k_div_8)24511   TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD128, k_div_8) {
24512     TEST_REQUIRES_X86_XOP;
24513     for (size_t k = 16; k <= 80; k += 8) {
24514       GemmMicrokernelTester()
24515         .mr(4)
24516         .nr(4)
24517         .kr(2)
24518         .sr(1)
24519         .m(4)
24520         .n(4)
24521         .k(k)
24522         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
24523     }
24524   }
24525 
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD128,k_div_8_subtile)24526   TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD128, k_div_8_subtile) {
24527     TEST_REQUIRES_X86_XOP;
24528     for (size_t k = 16; k <= 80; k += 8) {
24529       for (uint32_t n = 1; n <= 4; n++) {
24530         for (uint32_t m = 1; m <= 4; m++) {
24531           GemmMicrokernelTester()
24532             .mr(4)
24533             .nr(4)
24534             .kr(2)
24535             .sr(1)
24536             .m(m)
24537             .n(n)
24538             .k(k)
24539             .iterations(1)
24540             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
24541         }
24542       }
24543     }
24544   }
24545 
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD128,n_gt_4)24546   TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD128, n_gt_4) {
24547     TEST_REQUIRES_X86_XOP;
24548     for (uint32_t n = 5; n < 8; n++) {
24549       for (size_t k = 1; k <= 40; k += 9) {
24550         GemmMicrokernelTester()
24551           .mr(4)
24552           .nr(4)
24553           .kr(2)
24554           .sr(1)
24555           .m(4)
24556           .n(n)
24557           .k(k)
24558           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
24559       }
24560     }
24561   }
24562 
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD128,n_gt_4_strided_cn)24563   TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD128, n_gt_4_strided_cn) {
24564     TEST_REQUIRES_X86_XOP;
24565     for (uint32_t n = 5; n < 8; n++) {
24566       for (size_t k = 1; k <= 40; k += 9) {
24567         GemmMicrokernelTester()
24568           .mr(4)
24569           .nr(4)
24570           .kr(2)
24571           .sr(1)
24572           .m(4)
24573           .n(n)
24574           .k(k)
24575           .cn_stride(7)
24576           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
24577       }
24578     }
24579   }
24580 
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD128,n_gt_4_subtile)24581   TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD128, n_gt_4_subtile) {
24582     TEST_REQUIRES_X86_XOP;
24583     for (uint32_t n = 5; n < 8; n++) {
24584       for (size_t k = 1; k <= 40; k += 9) {
24585         for (uint32_t m = 1; m <= 4; m++) {
24586           GemmMicrokernelTester()
24587             .mr(4)
24588             .nr(4)
24589             .kr(2)
24590             .sr(1)
24591             .m(m)
24592             .n(n)
24593             .k(k)
24594             .iterations(1)
24595             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
24596         }
24597       }
24598     }
24599   }
24600 
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD128,n_div_4)24601   TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD128, n_div_4) {
24602     TEST_REQUIRES_X86_XOP;
24603     for (uint32_t n = 8; n <= 12; n += 4) {
24604       for (size_t k = 1; k <= 40; k += 9) {
24605         GemmMicrokernelTester()
24606           .mr(4)
24607           .nr(4)
24608           .kr(2)
24609           .sr(1)
24610           .m(4)
24611           .n(n)
24612           .k(k)
24613           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
24614       }
24615     }
24616   }
24617 
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD128,n_div_4_strided_cn)24618   TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD128, n_div_4_strided_cn) {
24619     TEST_REQUIRES_X86_XOP;
24620     for (uint32_t n = 8; n <= 12; n += 4) {
24621       for (size_t k = 1; k <= 40; k += 9) {
24622         GemmMicrokernelTester()
24623           .mr(4)
24624           .nr(4)
24625           .kr(2)
24626           .sr(1)
24627           .m(4)
24628           .n(n)
24629           .k(k)
24630           .cn_stride(7)
24631           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
24632       }
24633     }
24634   }
24635 
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD128,n_div_4_subtile)24636   TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD128, n_div_4_subtile) {
24637     TEST_REQUIRES_X86_XOP;
24638     for (uint32_t n = 8; n <= 12; n += 4) {
24639       for (size_t k = 1; k <= 40; k += 9) {
24640         for (uint32_t m = 1; m <= 4; m++) {
24641           GemmMicrokernelTester()
24642             .mr(4)
24643             .nr(4)
24644             .kr(2)
24645             .sr(1)
24646             .m(m)
24647             .n(n)
24648             .k(k)
24649             .iterations(1)
24650             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
24651         }
24652       }
24653     }
24654   }
24655 
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD128,small_kernel)24656   TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD128, small_kernel) {
24657     TEST_REQUIRES_X86_XOP;
24658     for (size_t k = 1; k <= 40; k += 9) {
24659       GemmMicrokernelTester()
24660         .mr(4)
24661         .nr(4)
24662         .kr(2)
24663         .sr(1)
24664         .m(4)
24665         .n(4)
24666         .k(k)
24667         .ks(3)
24668         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
24669     }
24670   }
24671 
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD128,small_kernel_subtile)24672   TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD128, small_kernel_subtile) {
24673     TEST_REQUIRES_X86_XOP;
24674     for (size_t k = 1; k <= 40; k += 9) {
24675       for (uint32_t n = 1; n <= 4; n++) {
24676         for (uint32_t m = 1; m <= 4; m++) {
24677           GemmMicrokernelTester()
24678             .mr(4)
24679             .nr(4)
24680             .kr(2)
24681             .sr(1)
24682             .m(m)
24683             .n(n)
24684             .k(k)
24685             .ks(3)
24686             .iterations(1)
24687             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
24688         }
24689       }
24690     }
24691   }
24692 
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD128,n_gt_4_small_kernel)24693   TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD128, n_gt_4_small_kernel) {
24694     TEST_REQUIRES_X86_XOP;
24695     for (uint32_t n = 5; n < 8; n++) {
24696       for (size_t k = 1; k <= 40; k += 9) {
24697         GemmMicrokernelTester()
24698           .mr(4)
24699           .nr(4)
24700           .kr(2)
24701           .sr(1)
24702           .m(4)
24703           .n(n)
24704           .k(k)
24705           .ks(3)
24706           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
24707       }
24708     }
24709   }
24710 
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD128,n_div_4_small_kernel)24711   TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD128, n_div_4_small_kernel) {
24712     TEST_REQUIRES_X86_XOP;
24713     for (uint32_t n = 8; n <= 12; n += 4) {
24714       for (size_t k = 1; k <= 40; k += 9) {
24715         GemmMicrokernelTester()
24716           .mr(4)
24717           .nr(4)
24718           .kr(2)
24719           .sr(1)
24720           .m(4)
24721           .n(n)
24722           .k(k)
24723           .ks(3)
24724           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
24725       }
24726     }
24727   }
24728 
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD128,strided_cm_subtile)24729   TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD128, strided_cm_subtile) {
24730     TEST_REQUIRES_X86_XOP;
24731     for (size_t k = 1; k <= 40; k += 9) {
24732       for (uint32_t n = 1; n <= 4; n++) {
24733         for (uint32_t m = 1; m <= 4; m++) {
24734           GemmMicrokernelTester()
24735             .mr(4)
24736             .nr(4)
24737             .kr(2)
24738             .sr(1)
24739             .m(m)
24740             .n(n)
24741             .k(k)
24742             .cm_stride(7)
24743             .iterations(1)
24744             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
24745         }
24746       }
24747     }
24748   }
24749 
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD128,a_offset)24750   TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD128, a_offset) {
24751     TEST_REQUIRES_X86_XOP;
24752     for (size_t k = 1; k <= 40; k += 9) {
24753       GemmMicrokernelTester()
24754         .mr(4)
24755         .nr(4)
24756         .kr(2)
24757         .sr(1)
24758         .m(4)
24759         .n(4)
24760         .k(k)
24761         .ks(3)
24762         .a_offset(163)
24763         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
24764     }
24765   }
24766 
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD128,zero)24767   TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD128, zero) {
24768     TEST_REQUIRES_X86_XOP;
24769     for (size_t k = 1; k <= 40; k += 9) {
24770       for (uint32_t mz = 0; mz < 4; mz++) {
24771         GemmMicrokernelTester()
24772           .mr(4)
24773           .nr(4)
24774           .kr(2)
24775           .sr(1)
24776           .m(4)
24777           .n(4)
24778           .k(k)
24779           .ks(3)
24780           .a_offset(163)
24781           .zero_index(mz)
24782           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
24783       }
24784     }
24785   }
24786 
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD128,qmin)24787   TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD128, qmin) {
24788     TEST_REQUIRES_X86_XOP;
24789     GemmMicrokernelTester()
24790       .mr(4)
24791       .nr(4)
24792       .kr(2)
24793       .sr(1)
24794       .m(4)
24795       .n(4)
24796       .k(8)
24797       .qmin(128)
24798       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
24799   }
24800 
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD128,qmax)24801   TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD128, qmax) {
24802     TEST_REQUIRES_X86_XOP;
24803     GemmMicrokernelTester()
24804       .mr(4)
24805       .nr(4)
24806       .kr(2)
24807       .sr(1)
24808       .m(4)
24809       .n(4)
24810       .k(8)
24811       .qmax(128)
24812       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
24813   }
24814 
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD128,strided_cm)24815   TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD128, strided_cm) {
24816     TEST_REQUIRES_X86_XOP;
24817     GemmMicrokernelTester()
24818       .mr(4)
24819       .nr(4)
24820       .kr(2)
24821       .sr(1)
24822       .m(4)
24823       .n(4)
24824       .k(8)
24825       .cm_stride(7)
24826       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
24827   }
24828 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
24829 
24830 
24831 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD64,k_eq_8)24832   TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD64, k_eq_8) {
24833     TEST_REQUIRES_X86_SSE41;
24834     GemmMicrokernelTester()
24835       .mr(1)
24836       .nr(4)
24837       .kr(2)
24838       .sr(4)
24839       .m(1)
24840       .n(4)
24841       .k(8)
24842       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2s4__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
24843   }
24844 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD64,strided_cn)24845   TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD64, strided_cn) {
24846     TEST_REQUIRES_X86_SSE41;
24847     GemmMicrokernelTester()
24848       .mr(1)
24849       .nr(4)
24850       .kr(2)
24851       .sr(4)
24852       .m(1)
24853       .n(4)
24854       .k(8)
24855       .cn_stride(7)
24856       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2s4__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
24857   }
24858 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD64,k_eq_8_subtile)24859   TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD64, k_eq_8_subtile) {
24860     TEST_REQUIRES_X86_SSE41;
24861     for (uint32_t n = 1; n <= 4; n++) {
24862       for (uint32_t m = 1; m <= 1; m++) {
24863         GemmMicrokernelTester()
24864           .mr(1)
24865           .nr(4)
24866           .kr(2)
24867           .sr(4)
24868           .m(m)
24869           .n(n)
24870           .k(8)
24871           .iterations(1)
24872           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2s4__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
24873       }
24874     }
24875   }
24876 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD64,k_eq_8_subtile_m)24877   TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD64, k_eq_8_subtile_m) {
24878     TEST_REQUIRES_X86_SSE41;
24879     for (uint32_t m = 1; m <= 1; m++) {
24880       GemmMicrokernelTester()
24881         .mr(1)
24882         .nr(4)
24883         .kr(2)
24884         .sr(4)
24885         .m(m)
24886         .n(4)
24887         .k(8)
24888         .iterations(1)
24889         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2s4__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
24890     }
24891   }
24892 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD64,k_eq_8_subtile_n)24893   TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD64, k_eq_8_subtile_n) {
24894     TEST_REQUIRES_X86_SSE41;
24895     for (uint32_t n = 1; n <= 4; n++) {
24896       GemmMicrokernelTester()
24897         .mr(1)
24898         .nr(4)
24899         .kr(2)
24900         .sr(4)
24901         .m(1)
24902         .n(n)
24903         .k(8)
24904         .iterations(1)
24905         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2s4__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
24906     }
24907   }
24908 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD64,k_lt_8)24909   TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD64, k_lt_8) {
24910     TEST_REQUIRES_X86_SSE41;
24911     for (size_t k = 1; k < 8; k++) {
24912       GemmMicrokernelTester()
24913         .mr(1)
24914         .nr(4)
24915         .kr(2)
24916         .sr(4)
24917         .m(1)
24918         .n(4)
24919         .k(k)
24920         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2s4__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
24921     }
24922   }
24923 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD64,k_lt_8_subtile)24924   TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD64, k_lt_8_subtile) {
24925     TEST_REQUIRES_X86_SSE41;
24926     for (size_t k = 1; k < 8; k++) {
24927       for (uint32_t n = 1; n <= 4; n++) {
24928         for (uint32_t m = 1; m <= 1; m++) {
24929           GemmMicrokernelTester()
24930             .mr(1)
24931             .nr(4)
24932             .kr(2)
24933             .sr(4)
24934             .m(m)
24935             .n(n)
24936             .k(k)
24937             .iterations(1)
24938             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2s4__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
24939         }
24940       }
24941     }
24942   }
24943 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD64,k_gt_8)24944   TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD64, k_gt_8) {
24945     TEST_REQUIRES_X86_SSE41;
24946     for (size_t k = 9; k < 16; k++) {
24947       GemmMicrokernelTester()
24948         .mr(1)
24949         .nr(4)
24950         .kr(2)
24951         .sr(4)
24952         .m(1)
24953         .n(4)
24954         .k(k)
24955         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2s4__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
24956     }
24957   }
24958 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD64,k_gt_8_subtile)24959   TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD64, k_gt_8_subtile) {
24960     TEST_REQUIRES_X86_SSE41;
24961     for (size_t k = 9; k < 16; k++) {
24962       for (uint32_t n = 1; n <= 4; n++) {
24963         for (uint32_t m = 1; m <= 1; m++) {
24964           GemmMicrokernelTester()
24965             .mr(1)
24966             .nr(4)
24967             .kr(2)
24968             .sr(4)
24969             .m(m)
24970             .n(n)
24971             .k(k)
24972             .iterations(1)
24973             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2s4__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
24974         }
24975       }
24976     }
24977   }
24978 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD64,k_div_8)24979   TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD64, k_div_8) {
24980     TEST_REQUIRES_X86_SSE41;
24981     for (size_t k = 16; k <= 80; k += 8) {
24982       GemmMicrokernelTester()
24983         .mr(1)
24984         .nr(4)
24985         .kr(2)
24986         .sr(4)
24987         .m(1)
24988         .n(4)
24989         .k(k)
24990         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2s4__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
24991     }
24992   }
24993 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD64,k_div_8_subtile)24994   TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD64, k_div_8_subtile) {
24995     TEST_REQUIRES_X86_SSE41;
24996     for (size_t k = 16; k <= 80; k += 8) {
24997       for (uint32_t n = 1; n <= 4; n++) {
24998         for (uint32_t m = 1; m <= 1; m++) {
24999           GemmMicrokernelTester()
25000             .mr(1)
25001             .nr(4)
25002             .kr(2)
25003             .sr(4)
25004             .m(m)
25005             .n(n)
25006             .k(k)
25007             .iterations(1)
25008             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2s4__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25009         }
25010       }
25011     }
25012   }
25013 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD64,n_gt_4)25014   TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD64, n_gt_4) {
25015     TEST_REQUIRES_X86_SSE41;
25016     for (uint32_t n = 5; n < 8; n++) {
25017       for (size_t k = 1; k <= 40; k += 9) {
25018         GemmMicrokernelTester()
25019           .mr(1)
25020           .nr(4)
25021           .kr(2)
25022           .sr(4)
25023           .m(1)
25024           .n(n)
25025           .k(k)
25026           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2s4__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25027       }
25028     }
25029   }
25030 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD64,n_gt_4_strided_cn)25031   TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD64, n_gt_4_strided_cn) {
25032     TEST_REQUIRES_X86_SSE41;
25033     for (uint32_t n = 5; n < 8; n++) {
25034       for (size_t k = 1; k <= 40; k += 9) {
25035         GemmMicrokernelTester()
25036           .mr(1)
25037           .nr(4)
25038           .kr(2)
25039           .sr(4)
25040           .m(1)
25041           .n(n)
25042           .k(k)
25043           .cn_stride(7)
25044           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2s4__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25045       }
25046     }
25047   }
25048 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD64,n_gt_4_subtile)25049   TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD64, n_gt_4_subtile) {
25050     TEST_REQUIRES_X86_SSE41;
25051     for (uint32_t n = 5; n < 8; n++) {
25052       for (size_t k = 1; k <= 40; k += 9) {
25053         for (uint32_t m = 1; m <= 1; m++) {
25054           GemmMicrokernelTester()
25055             .mr(1)
25056             .nr(4)
25057             .kr(2)
25058             .sr(4)
25059             .m(m)
25060             .n(n)
25061             .k(k)
25062             .iterations(1)
25063             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2s4__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25064         }
25065       }
25066     }
25067   }
25068 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD64,n_div_4)25069   TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD64, n_div_4) {
25070     TEST_REQUIRES_X86_SSE41;
25071     for (uint32_t n = 8; n <= 12; n += 4) {
25072       for (size_t k = 1; k <= 40; k += 9) {
25073         GemmMicrokernelTester()
25074           .mr(1)
25075           .nr(4)
25076           .kr(2)
25077           .sr(4)
25078           .m(1)
25079           .n(n)
25080           .k(k)
25081           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2s4__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25082       }
25083     }
25084   }
25085 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD64,n_div_4_strided_cn)25086   TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD64, n_div_4_strided_cn) {
25087     TEST_REQUIRES_X86_SSE41;
25088     for (uint32_t n = 8; n <= 12; n += 4) {
25089       for (size_t k = 1; k <= 40; k += 9) {
25090         GemmMicrokernelTester()
25091           .mr(1)
25092           .nr(4)
25093           .kr(2)
25094           .sr(4)
25095           .m(1)
25096           .n(n)
25097           .k(k)
25098           .cn_stride(7)
25099           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2s4__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25100       }
25101     }
25102   }
25103 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD64,n_div_4_subtile)25104   TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD64, n_div_4_subtile) {
25105     TEST_REQUIRES_X86_SSE41;
25106     for (uint32_t n = 8; n <= 12; n += 4) {
25107       for (size_t k = 1; k <= 40; k += 9) {
25108         for (uint32_t m = 1; m <= 1; m++) {
25109           GemmMicrokernelTester()
25110             .mr(1)
25111             .nr(4)
25112             .kr(2)
25113             .sr(4)
25114             .m(m)
25115             .n(n)
25116             .k(k)
25117             .iterations(1)
25118             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2s4__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25119         }
25120       }
25121     }
25122   }
25123 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD64,small_kernel)25124   TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD64, small_kernel) {
25125     TEST_REQUIRES_X86_SSE41;
25126     for (size_t k = 1; k <= 40; k += 9) {
25127       GemmMicrokernelTester()
25128         .mr(1)
25129         .nr(4)
25130         .kr(2)
25131         .sr(4)
25132         .m(1)
25133         .n(4)
25134         .k(k)
25135         .ks(3)
25136         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2s4__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25137     }
25138   }
25139 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD64,small_kernel_subtile)25140   TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD64, small_kernel_subtile) {
25141     TEST_REQUIRES_X86_SSE41;
25142     for (size_t k = 1; k <= 40; k += 9) {
25143       for (uint32_t n = 1; n <= 4; n++) {
25144         for (uint32_t m = 1; m <= 1; m++) {
25145           GemmMicrokernelTester()
25146             .mr(1)
25147             .nr(4)
25148             .kr(2)
25149             .sr(4)
25150             .m(m)
25151             .n(n)
25152             .k(k)
25153             .ks(3)
25154             .iterations(1)
25155             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2s4__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25156         }
25157       }
25158     }
25159   }
25160 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD64,n_gt_4_small_kernel)25161   TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD64, n_gt_4_small_kernel) {
25162     TEST_REQUIRES_X86_SSE41;
25163     for (uint32_t n = 5; n < 8; n++) {
25164       for (size_t k = 1; k <= 40; k += 9) {
25165         GemmMicrokernelTester()
25166           .mr(1)
25167           .nr(4)
25168           .kr(2)
25169           .sr(4)
25170           .m(1)
25171           .n(n)
25172           .k(k)
25173           .ks(3)
25174           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2s4__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25175       }
25176     }
25177   }
25178 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD64,n_div_4_small_kernel)25179   TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD64, n_div_4_small_kernel) {
25180     TEST_REQUIRES_X86_SSE41;
25181     for (uint32_t n = 8; n <= 12; n += 4) {
25182       for (size_t k = 1; k <= 40; k += 9) {
25183         GemmMicrokernelTester()
25184           .mr(1)
25185           .nr(4)
25186           .kr(2)
25187           .sr(4)
25188           .m(1)
25189           .n(n)
25190           .k(k)
25191           .ks(3)
25192           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2s4__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25193       }
25194     }
25195   }
25196 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD64,strided_cm_subtile)25197   TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD64, strided_cm_subtile) {
25198     TEST_REQUIRES_X86_SSE41;
25199     for (size_t k = 1; k <= 40; k += 9) {
25200       for (uint32_t n = 1; n <= 4; n++) {
25201         for (uint32_t m = 1; m <= 1; m++) {
25202           GemmMicrokernelTester()
25203             .mr(1)
25204             .nr(4)
25205             .kr(2)
25206             .sr(4)
25207             .m(m)
25208             .n(n)
25209             .k(k)
25210             .cm_stride(7)
25211             .iterations(1)
25212             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2s4__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25213         }
25214       }
25215     }
25216   }
25217 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD64,a_offset)25218   TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD64, a_offset) {
25219     TEST_REQUIRES_X86_SSE41;
25220     for (size_t k = 1; k <= 40; k += 9) {
25221       GemmMicrokernelTester()
25222         .mr(1)
25223         .nr(4)
25224         .kr(2)
25225         .sr(4)
25226         .m(1)
25227         .n(4)
25228         .k(k)
25229         .ks(3)
25230         .a_offset(43)
25231         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2s4__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25232     }
25233   }
25234 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD64,zero)25235   TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD64, zero) {
25236     TEST_REQUIRES_X86_SSE41;
25237     for (size_t k = 1; k <= 40; k += 9) {
25238       for (uint32_t mz = 0; mz < 1; mz++) {
25239         GemmMicrokernelTester()
25240           .mr(1)
25241           .nr(4)
25242           .kr(2)
25243           .sr(4)
25244           .m(1)
25245           .n(4)
25246           .k(k)
25247           .ks(3)
25248           .a_offset(43)
25249           .zero_index(mz)
25250           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2s4__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25251       }
25252     }
25253   }
25254 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD64,qmin)25255   TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD64, qmin) {
25256     TEST_REQUIRES_X86_SSE41;
25257     GemmMicrokernelTester()
25258       .mr(1)
25259       .nr(4)
25260       .kr(2)
25261       .sr(4)
25262       .m(1)
25263       .n(4)
25264       .k(8)
25265       .qmin(128)
25266       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2s4__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25267   }
25268 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD64,qmax)25269   TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD64, qmax) {
25270     TEST_REQUIRES_X86_SSE41;
25271     GemmMicrokernelTester()
25272       .mr(1)
25273       .nr(4)
25274       .kr(2)
25275       .sr(4)
25276       .m(1)
25277       .n(4)
25278       .k(8)
25279       .qmax(128)
25280       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2s4__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25281   }
25282 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD64,strided_cm)25283   TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__SSE41_LD64, strided_cm) {
25284     TEST_REQUIRES_X86_SSE41;
25285     GemmMicrokernelTester()
25286       .mr(1)
25287       .nr(4)
25288       .kr(2)
25289       .sr(4)
25290       .m(1)
25291       .n(4)
25292       .k(8)
25293       .cm_stride(7)
25294       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2s4__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25295   }
25296 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
25297 
25298 
25299 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD64,k_eq_8)25300   TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD64, k_eq_8) {
25301     TEST_REQUIRES_X86_SSE2;
25302     GemmMicrokernelTester()
25303       .mr(3)
25304       .nr(4)
25305       .kr(2)
25306       .sr(4)
25307       .m(3)
25308       .n(4)
25309       .k(8)
25310       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2s4__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
25311   }
25312 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD64,strided_cn)25313   TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD64, strided_cn) {
25314     TEST_REQUIRES_X86_SSE2;
25315     GemmMicrokernelTester()
25316       .mr(3)
25317       .nr(4)
25318       .kr(2)
25319       .sr(4)
25320       .m(3)
25321       .n(4)
25322       .k(8)
25323       .cn_stride(7)
25324       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2s4__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
25325   }
25326 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD64,k_eq_8_subtile)25327   TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD64, k_eq_8_subtile) {
25328     TEST_REQUIRES_X86_SSE2;
25329     for (uint32_t n = 1; n <= 4; n++) {
25330       for (uint32_t m = 1; m <= 3; m++) {
25331         GemmMicrokernelTester()
25332           .mr(3)
25333           .nr(4)
25334           .kr(2)
25335           .sr(4)
25336           .m(m)
25337           .n(n)
25338           .k(8)
25339           .iterations(1)
25340           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2s4__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
25341       }
25342     }
25343   }
25344 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD64,k_eq_8_subtile_m)25345   TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD64, k_eq_8_subtile_m) {
25346     TEST_REQUIRES_X86_SSE2;
25347     for (uint32_t m = 1; m <= 3; m++) {
25348       GemmMicrokernelTester()
25349         .mr(3)
25350         .nr(4)
25351         .kr(2)
25352         .sr(4)
25353         .m(m)
25354         .n(4)
25355         .k(8)
25356         .iterations(1)
25357         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2s4__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
25358     }
25359   }
25360 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD64,k_eq_8_subtile_n)25361   TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD64, k_eq_8_subtile_n) {
25362     TEST_REQUIRES_X86_SSE2;
25363     for (uint32_t n = 1; n <= 4; n++) {
25364       GemmMicrokernelTester()
25365         .mr(3)
25366         .nr(4)
25367         .kr(2)
25368         .sr(4)
25369         .m(3)
25370         .n(n)
25371         .k(8)
25372         .iterations(1)
25373         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2s4__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
25374     }
25375   }
25376 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD64,k_lt_8)25377   TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD64, k_lt_8) {
25378     TEST_REQUIRES_X86_SSE2;
25379     for (size_t k = 1; k < 8; k++) {
25380       GemmMicrokernelTester()
25381         .mr(3)
25382         .nr(4)
25383         .kr(2)
25384         .sr(4)
25385         .m(3)
25386         .n(4)
25387         .k(k)
25388         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2s4__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
25389     }
25390   }
25391 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD64,k_lt_8_subtile)25392   TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD64, k_lt_8_subtile) {
25393     TEST_REQUIRES_X86_SSE2;
25394     for (size_t k = 1; k < 8; k++) {
25395       for (uint32_t n = 1; n <= 4; n++) {
25396         for (uint32_t m = 1; m <= 3; m++) {
25397           GemmMicrokernelTester()
25398             .mr(3)
25399             .nr(4)
25400             .kr(2)
25401             .sr(4)
25402             .m(m)
25403             .n(n)
25404             .k(k)
25405             .iterations(1)
25406             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2s4__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
25407         }
25408       }
25409     }
25410   }
25411 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD64,k_gt_8)25412   TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD64, k_gt_8) {
25413     TEST_REQUIRES_X86_SSE2;
25414     for (size_t k = 9; k < 16; k++) {
25415       GemmMicrokernelTester()
25416         .mr(3)
25417         .nr(4)
25418         .kr(2)
25419         .sr(4)
25420         .m(3)
25421         .n(4)
25422         .k(k)
25423         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2s4__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
25424     }
25425   }
25426 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD64,k_gt_8_subtile)25427   TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD64, k_gt_8_subtile) {
25428     TEST_REQUIRES_X86_SSE2;
25429     for (size_t k = 9; k < 16; k++) {
25430       for (uint32_t n = 1; n <= 4; n++) {
25431         for (uint32_t m = 1; m <= 3; m++) {
25432           GemmMicrokernelTester()
25433             .mr(3)
25434             .nr(4)
25435             .kr(2)
25436             .sr(4)
25437             .m(m)
25438             .n(n)
25439             .k(k)
25440             .iterations(1)
25441             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2s4__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
25442         }
25443       }
25444     }
25445   }
25446 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD64,k_div_8)25447   TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD64, k_div_8) {
25448     TEST_REQUIRES_X86_SSE2;
25449     for (size_t k = 16; k <= 80; k += 8) {
25450       GemmMicrokernelTester()
25451         .mr(3)
25452         .nr(4)
25453         .kr(2)
25454         .sr(4)
25455         .m(3)
25456         .n(4)
25457         .k(k)
25458         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2s4__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
25459     }
25460   }
25461 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD64,k_div_8_subtile)25462   TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD64, k_div_8_subtile) {
25463     TEST_REQUIRES_X86_SSE2;
25464     for (size_t k = 16; k <= 80; k += 8) {
25465       for (uint32_t n = 1; n <= 4; n++) {
25466         for (uint32_t m = 1; m <= 3; m++) {
25467           GemmMicrokernelTester()
25468             .mr(3)
25469             .nr(4)
25470             .kr(2)
25471             .sr(4)
25472             .m(m)
25473             .n(n)
25474             .k(k)
25475             .iterations(1)
25476             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2s4__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
25477         }
25478       }
25479     }
25480   }
25481 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD64,n_gt_4)25482   TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD64, n_gt_4) {
25483     TEST_REQUIRES_X86_SSE2;
25484     for (uint32_t n = 5; n < 8; n++) {
25485       for (size_t k = 1; k <= 40; k += 9) {
25486         GemmMicrokernelTester()
25487           .mr(3)
25488           .nr(4)
25489           .kr(2)
25490           .sr(4)
25491           .m(3)
25492           .n(n)
25493           .k(k)
25494           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2s4__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
25495       }
25496     }
25497   }
25498 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD64,n_gt_4_strided_cn)25499   TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD64, n_gt_4_strided_cn) {
25500     TEST_REQUIRES_X86_SSE2;
25501     for (uint32_t n = 5; n < 8; n++) {
25502       for (size_t k = 1; k <= 40; k += 9) {
25503         GemmMicrokernelTester()
25504           .mr(3)
25505           .nr(4)
25506           .kr(2)
25507           .sr(4)
25508           .m(3)
25509           .n(n)
25510           .k(k)
25511           .cn_stride(7)
25512           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2s4__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
25513       }
25514     }
25515   }
25516 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD64,n_gt_4_subtile)25517   TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD64, n_gt_4_subtile) {
25518     TEST_REQUIRES_X86_SSE2;
25519     for (uint32_t n = 5; n < 8; n++) {
25520       for (size_t k = 1; k <= 40; k += 9) {
25521         for (uint32_t m = 1; m <= 3; m++) {
25522           GemmMicrokernelTester()
25523             .mr(3)
25524             .nr(4)
25525             .kr(2)
25526             .sr(4)
25527             .m(m)
25528             .n(n)
25529             .k(k)
25530             .iterations(1)
25531             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2s4__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
25532         }
25533       }
25534     }
25535   }
25536 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD64,n_div_4)25537   TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD64, n_div_4) {
25538     TEST_REQUIRES_X86_SSE2;
25539     for (uint32_t n = 8; n <= 12; n += 4) {
25540       for (size_t k = 1; k <= 40; k += 9) {
25541         GemmMicrokernelTester()
25542           .mr(3)
25543           .nr(4)
25544           .kr(2)
25545           .sr(4)
25546           .m(3)
25547           .n(n)
25548           .k(k)
25549           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2s4__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
25550       }
25551     }
25552   }
25553 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD64,n_div_4_strided_cn)25554   TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD64, n_div_4_strided_cn) {
25555     TEST_REQUIRES_X86_SSE2;
25556     for (uint32_t n = 8; n <= 12; n += 4) {
25557       for (size_t k = 1; k <= 40; k += 9) {
25558         GemmMicrokernelTester()
25559           .mr(3)
25560           .nr(4)
25561           .kr(2)
25562           .sr(4)
25563           .m(3)
25564           .n(n)
25565           .k(k)
25566           .cn_stride(7)
25567           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2s4__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
25568       }
25569     }
25570   }
25571 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD64,n_div_4_subtile)25572   TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD64, n_div_4_subtile) {
25573     TEST_REQUIRES_X86_SSE2;
25574     for (uint32_t n = 8; n <= 12; n += 4) {
25575       for (size_t k = 1; k <= 40; k += 9) {
25576         for (uint32_t m = 1; m <= 3; m++) {
25577           GemmMicrokernelTester()
25578             .mr(3)
25579             .nr(4)
25580             .kr(2)
25581             .sr(4)
25582             .m(m)
25583             .n(n)
25584             .k(k)
25585             .iterations(1)
25586             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2s4__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
25587         }
25588       }
25589     }
25590   }
25591 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD64,small_kernel)25592   TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD64, small_kernel) {
25593     TEST_REQUIRES_X86_SSE2;
25594     for (size_t k = 1; k <= 40; k += 9) {
25595       GemmMicrokernelTester()
25596         .mr(3)
25597         .nr(4)
25598         .kr(2)
25599         .sr(4)
25600         .m(3)
25601         .n(4)
25602         .k(k)
25603         .ks(3)
25604         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2s4__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
25605     }
25606   }
25607 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD64,small_kernel_subtile)25608   TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD64, small_kernel_subtile) {
25609     TEST_REQUIRES_X86_SSE2;
25610     for (size_t k = 1; k <= 40; k += 9) {
25611       for (uint32_t n = 1; n <= 4; n++) {
25612         for (uint32_t m = 1; m <= 3; m++) {
25613           GemmMicrokernelTester()
25614             .mr(3)
25615             .nr(4)
25616             .kr(2)
25617             .sr(4)
25618             .m(m)
25619             .n(n)
25620             .k(k)
25621             .ks(3)
25622             .iterations(1)
25623             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2s4__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
25624         }
25625       }
25626     }
25627   }
25628 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD64,n_gt_4_small_kernel)25629   TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD64, n_gt_4_small_kernel) {
25630     TEST_REQUIRES_X86_SSE2;
25631     for (uint32_t n = 5; n < 8; n++) {
25632       for (size_t k = 1; k <= 40; k += 9) {
25633         GemmMicrokernelTester()
25634           .mr(3)
25635           .nr(4)
25636           .kr(2)
25637           .sr(4)
25638           .m(3)
25639           .n(n)
25640           .k(k)
25641           .ks(3)
25642           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2s4__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
25643       }
25644     }
25645   }
25646 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD64,n_div_4_small_kernel)25647   TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD64, n_div_4_small_kernel) {
25648     TEST_REQUIRES_X86_SSE2;
25649     for (uint32_t n = 8; n <= 12; n += 4) {
25650       for (size_t k = 1; k <= 40; k += 9) {
25651         GemmMicrokernelTester()
25652           .mr(3)
25653           .nr(4)
25654           .kr(2)
25655           .sr(4)
25656           .m(3)
25657           .n(n)
25658           .k(k)
25659           .ks(3)
25660           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2s4__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
25661       }
25662     }
25663   }
25664 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD64,strided_cm_subtile)25665   TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD64, strided_cm_subtile) {
25666     TEST_REQUIRES_X86_SSE2;
25667     for (size_t k = 1; k <= 40; k += 9) {
25668       for (uint32_t n = 1; n <= 4; n++) {
25669         for (uint32_t m = 1; m <= 3; m++) {
25670           GemmMicrokernelTester()
25671             .mr(3)
25672             .nr(4)
25673             .kr(2)
25674             .sr(4)
25675             .m(m)
25676             .n(n)
25677             .k(k)
25678             .cm_stride(7)
25679             .iterations(1)
25680             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2s4__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
25681         }
25682       }
25683     }
25684   }
25685 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD64,a_offset)25686   TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD64, a_offset) {
25687     TEST_REQUIRES_X86_SSE2;
25688     for (size_t k = 1; k <= 40; k += 9) {
25689       GemmMicrokernelTester()
25690         .mr(3)
25691         .nr(4)
25692         .kr(2)
25693         .sr(4)
25694         .m(3)
25695         .n(4)
25696         .k(k)
25697         .ks(3)
25698         .a_offset(127)
25699         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2s4__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
25700     }
25701   }
25702 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD64,zero)25703   TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD64, zero) {
25704     TEST_REQUIRES_X86_SSE2;
25705     for (size_t k = 1; k <= 40; k += 9) {
25706       for (uint32_t mz = 0; mz < 3; mz++) {
25707         GemmMicrokernelTester()
25708           .mr(3)
25709           .nr(4)
25710           .kr(2)
25711           .sr(4)
25712           .m(3)
25713           .n(4)
25714           .k(k)
25715           .ks(3)
25716           .a_offset(127)
25717           .zero_index(mz)
25718           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2s4__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
25719       }
25720     }
25721   }
25722 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD64,qmin)25723   TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD64, qmin) {
25724     TEST_REQUIRES_X86_SSE2;
25725     GemmMicrokernelTester()
25726       .mr(3)
25727       .nr(4)
25728       .kr(2)
25729       .sr(4)
25730       .m(3)
25731       .n(4)
25732       .k(8)
25733       .qmin(128)
25734       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2s4__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
25735   }
25736 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD64,qmax)25737   TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD64, qmax) {
25738     TEST_REQUIRES_X86_SSE2;
25739     GemmMicrokernelTester()
25740       .mr(3)
25741       .nr(4)
25742       .kr(2)
25743       .sr(4)
25744       .m(3)
25745       .n(4)
25746       .k(8)
25747       .qmax(128)
25748       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2s4__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
25749   }
25750 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD64,strided_cm)25751   TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__SSE2_LD64, strided_cm) {
25752     TEST_REQUIRES_X86_SSE2;
25753     GemmMicrokernelTester()
25754       .mr(3)
25755       .nr(4)
25756       .kr(2)
25757       .sr(4)
25758       .m(3)
25759       .n(4)
25760       .k(8)
25761       .cm_stride(7)
25762       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2s4__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
25763   }
25764 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
25765 
25766 
25767 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD64,k_eq_8)25768   TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD64, k_eq_8) {
25769     TEST_REQUIRES_X86_XOP;
25770     GemmMicrokernelTester()
25771       .mr(3)
25772       .nr(4)
25773       .kr(2)
25774       .sr(4)
25775       .m(3)
25776       .n(4)
25777       .k(8)
25778       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2s4__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25779   }
25780 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD64,strided_cn)25781   TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD64, strided_cn) {
25782     TEST_REQUIRES_X86_XOP;
25783     GemmMicrokernelTester()
25784       .mr(3)
25785       .nr(4)
25786       .kr(2)
25787       .sr(4)
25788       .m(3)
25789       .n(4)
25790       .k(8)
25791       .cn_stride(7)
25792       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2s4__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25793   }
25794 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD64,k_eq_8_subtile)25795   TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD64, k_eq_8_subtile) {
25796     TEST_REQUIRES_X86_XOP;
25797     for (uint32_t n = 1; n <= 4; n++) {
25798       for (uint32_t m = 1; m <= 3; m++) {
25799         GemmMicrokernelTester()
25800           .mr(3)
25801           .nr(4)
25802           .kr(2)
25803           .sr(4)
25804           .m(m)
25805           .n(n)
25806           .k(8)
25807           .iterations(1)
25808           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2s4__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25809       }
25810     }
25811   }
25812 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD64,k_eq_8_subtile_m)25813   TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD64, k_eq_8_subtile_m) {
25814     TEST_REQUIRES_X86_XOP;
25815     for (uint32_t m = 1; m <= 3; m++) {
25816       GemmMicrokernelTester()
25817         .mr(3)
25818         .nr(4)
25819         .kr(2)
25820         .sr(4)
25821         .m(m)
25822         .n(4)
25823         .k(8)
25824         .iterations(1)
25825         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2s4__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25826     }
25827   }
25828 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD64,k_eq_8_subtile_n)25829   TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD64, k_eq_8_subtile_n) {
25830     TEST_REQUIRES_X86_XOP;
25831     for (uint32_t n = 1; n <= 4; n++) {
25832       GemmMicrokernelTester()
25833         .mr(3)
25834         .nr(4)
25835         .kr(2)
25836         .sr(4)
25837         .m(3)
25838         .n(n)
25839         .k(8)
25840         .iterations(1)
25841         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2s4__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25842     }
25843   }
25844 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD64,k_lt_8)25845   TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD64, k_lt_8) {
25846     TEST_REQUIRES_X86_XOP;
25847     for (size_t k = 1; k < 8; k++) {
25848       GemmMicrokernelTester()
25849         .mr(3)
25850         .nr(4)
25851         .kr(2)
25852         .sr(4)
25853         .m(3)
25854         .n(4)
25855         .k(k)
25856         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2s4__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25857     }
25858   }
25859 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD64,k_lt_8_subtile)25860   TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD64, k_lt_8_subtile) {
25861     TEST_REQUIRES_X86_XOP;
25862     for (size_t k = 1; k < 8; k++) {
25863       for (uint32_t n = 1; n <= 4; n++) {
25864         for (uint32_t m = 1; m <= 3; m++) {
25865           GemmMicrokernelTester()
25866             .mr(3)
25867             .nr(4)
25868             .kr(2)
25869             .sr(4)
25870             .m(m)
25871             .n(n)
25872             .k(k)
25873             .iterations(1)
25874             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2s4__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25875         }
25876       }
25877     }
25878   }
25879 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD64,k_gt_8)25880   TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD64, k_gt_8) {
25881     TEST_REQUIRES_X86_XOP;
25882     for (size_t k = 9; k < 16; k++) {
25883       GemmMicrokernelTester()
25884         .mr(3)
25885         .nr(4)
25886         .kr(2)
25887         .sr(4)
25888         .m(3)
25889         .n(4)
25890         .k(k)
25891         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2s4__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25892     }
25893   }
25894 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD64,k_gt_8_subtile)25895   TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD64, k_gt_8_subtile) {
25896     TEST_REQUIRES_X86_XOP;
25897     for (size_t k = 9; k < 16; k++) {
25898       for (uint32_t n = 1; n <= 4; n++) {
25899         for (uint32_t m = 1; m <= 3; m++) {
25900           GemmMicrokernelTester()
25901             .mr(3)
25902             .nr(4)
25903             .kr(2)
25904             .sr(4)
25905             .m(m)
25906             .n(n)
25907             .k(k)
25908             .iterations(1)
25909             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2s4__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25910         }
25911       }
25912     }
25913   }
25914 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD64,k_div_8)25915   TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD64, k_div_8) {
25916     TEST_REQUIRES_X86_XOP;
25917     for (size_t k = 16; k <= 80; k += 8) {
25918       GemmMicrokernelTester()
25919         .mr(3)
25920         .nr(4)
25921         .kr(2)
25922         .sr(4)
25923         .m(3)
25924         .n(4)
25925         .k(k)
25926         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2s4__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25927     }
25928   }
25929 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD64,k_div_8_subtile)25930   TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD64, k_div_8_subtile) {
25931     TEST_REQUIRES_X86_XOP;
25932     for (size_t k = 16; k <= 80; k += 8) {
25933       for (uint32_t n = 1; n <= 4; n++) {
25934         for (uint32_t m = 1; m <= 3; m++) {
25935           GemmMicrokernelTester()
25936             .mr(3)
25937             .nr(4)
25938             .kr(2)
25939             .sr(4)
25940             .m(m)
25941             .n(n)
25942             .k(k)
25943             .iterations(1)
25944             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2s4__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25945         }
25946       }
25947     }
25948   }
25949 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD64,n_gt_4)25950   TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD64, n_gt_4) {
25951     TEST_REQUIRES_X86_XOP;
25952     for (uint32_t n = 5; n < 8; n++) {
25953       for (size_t k = 1; k <= 40; k += 9) {
25954         GemmMicrokernelTester()
25955           .mr(3)
25956           .nr(4)
25957           .kr(2)
25958           .sr(4)
25959           .m(3)
25960           .n(n)
25961           .k(k)
25962           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2s4__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25963       }
25964     }
25965   }
25966 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD64,n_gt_4_strided_cn)25967   TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD64, n_gt_4_strided_cn) {
25968     TEST_REQUIRES_X86_XOP;
25969     for (uint32_t n = 5; n < 8; n++) {
25970       for (size_t k = 1; k <= 40; k += 9) {
25971         GemmMicrokernelTester()
25972           .mr(3)
25973           .nr(4)
25974           .kr(2)
25975           .sr(4)
25976           .m(3)
25977           .n(n)
25978           .k(k)
25979           .cn_stride(7)
25980           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2s4__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25981       }
25982     }
25983   }
25984 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD64,n_gt_4_subtile)25985   TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD64, n_gt_4_subtile) {
25986     TEST_REQUIRES_X86_XOP;
25987     for (uint32_t n = 5; n < 8; n++) {
25988       for (size_t k = 1; k <= 40; k += 9) {
25989         for (uint32_t m = 1; m <= 3; m++) {
25990           GemmMicrokernelTester()
25991             .mr(3)
25992             .nr(4)
25993             .kr(2)
25994             .sr(4)
25995             .m(m)
25996             .n(n)
25997             .k(k)
25998             .iterations(1)
25999             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2s4__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
26000         }
26001       }
26002     }
26003   }
26004 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD64,n_div_4)26005   TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD64, n_div_4) {
26006     TEST_REQUIRES_X86_XOP;
26007     for (uint32_t n = 8; n <= 12; n += 4) {
26008       for (size_t k = 1; k <= 40; k += 9) {
26009         GemmMicrokernelTester()
26010           .mr(3)
26011           .nr(4)
26012           .kr(2)
26013           .sr(4)
26014           .m(3)
26015           .n(n)
26016           .k(k)
26017           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2s4__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
26018       }
26019     }
26020   }
26021 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD64,n_div_4_strided_cn)26022   TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD64, n_div_4_strided_cn) {
26023     TEST_REQUIRES_X86_XOP;
26024     for (uint32_t n = 8; n <= 12; n += 4) {
26025       for (size_t k = 1; k <= 40; k += 9) {
26026         GemmMicrokernelTester()
26027           .mr(3)
26028           .nr(4)
26029           .kr(2)
26030           .sr(4)
26031           .m(3)
26032           .n(n)
26033           .k(k)
26034           .cn_stride(7)
26035           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2s4__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
26036       }
26037     }
26038   }
26039 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD64,n_div_4_subtile)26040   TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD64, n_div_4_subtile) {
26041     TEST_REQUIRES_X86_XOP;
26042     for (uint32_t n = 8; n <= 12; n += 4) {
26043       for (size_t k = 1; k <= 40; k += 9) {
26044         for (uint32_t m = 1; m <= 3; m++) {
26045           GemmMicrokernelTester()
26046             .mr(3)
26047             .nr(4)
26048             .kr(2)
26049             .sr(4)
26050             .m(m)
26051             .n(n)
26052             .k(k)
26053             .iterations(1)
26054             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2s4__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
26055         }
26056       }
26057     }
26058   }
26059 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD64,small_kernel)26060   TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD64, small_kernel) {
26061     TEST_REQUIRES_X86_XOP;
26062     for (size_t k = 1; k <= 40; k += 9) {
26063       GemmMicrokernelTester()
26064         .mr(3)
26065         .nr(4)
26066         .kr(2)
26067         .sr(4)
26068         .m(3)
26069         .n(4)
26070         .k(k)
26071         .ks(3)
26072         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2s4__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
26073     }
26074   }
26075 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD64,small_kernel_subtile)26076   TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD64, small_kernel_subtile) {
26077     TEST_REQUIRES_X86_XOP;
26078     for (size_t k = 1; k <= 40; k += 9) {
26079       for (uint32_t n = 1; n <= 4; n++) {
26080         for (uint32_t m = 1; m <= 3; m++) {
26081           GemmMicrokernelTester()
26082             .mr(3)
26083             .nr(4)
26084             .kr(2)
26085             .sr(4)
26086             .m(m)
26087             .n(n)
26088             .k(k)
26089             .ks(3)
26090             .iterations(1)
26091             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2s4__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
26092         }
26093       }
26094     }
26095   }
26096 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD64,n_gt_4_small_kernel)26097   TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD64, n_gt_4_small_kernel) {
26098     TEST_REQUIRES_X86_XOP;
26099     for (uint32_t n = 5; n < 8; n++) {
26100       for (size_t k = 1; k <= 40; k += 9) {
26101         GemmMicrokernelTester()
26102           .mr(3)
26103           .nr(4)
26104           .kr(2)
26105           .sr(4)
26106           .m(3)
26107           .n(n)
26108           .k(k)
26109           .ks(3)
26110           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2s4__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
26111       }
26112     }
26113   }
26114 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD64,n_div_4_small_kernel)26115   TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD64, n_div_4_small_kernel) {
26116     TEST_REQUIRES_X86_XOP;
26117     for (uint32_t n = 8; n <= 12; n += 4) {
26118       for (size_t k = 1; k <= 40; k += 9) {
26119         GemmMicrokernelTester()
26120           .mr(3)
26121           .nr(4)
26122           .kr(2)
26123           .sr(4)
26124           .m(3)
26125           .n(n)
26126           .k(k)
26127           .ks(3)
26128           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2s4__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
26129       }
26130     }
26131   }
26132 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD64,strided_cm_subtile)26133   TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD64, strided_cm_subtile) {
26134     TEST_REQUIRES_X86_XOP;
26135     for (size_t k = 1; k <= 40; k += 9) {
26136       for (uint32_t n = 1; n <= 4; n++) {
26137         for (uint32_t m = 1; m <= 3; m++) {
26138           GemmMicrokernelTester()
26139             .mr(3)
26140             .nr(4)
26141             .kr(2)
26142             .sr(4)
26143             .m(m)
26144             .n(n)
26145             .k(k)
26146             .cm_stride(7)
26147             .iterations(1)
26148             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2s4__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
26149         }
26150       }
26151     }
26152   }
26153 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD64,a_offset)26154   TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD64, a_offset) {
26155     TEST_REQUIRES_X86_XOP;
26156     for (size_t k = 1; k <= 40; k += 9) {
26157       GemmMicrokernelTester()
26158         .mr(3)
26159         .nr(4)
26160         .kr(2)
26161         .sr(4)
26162         .m(3)
26163         .n(4)
26164         .k(k)
26165         .ks(3)
26166         .a_offset(127)
26167         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2s4__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
26168     }
26169   }
26170 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD64,zero)26171   TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD64, zero) {
26172     TEST_REQUIRES_X86_XOP;
26173     for (size_t k = 1; k <= 40; k += 9) {
26174       for (uint32_t mz = 0; mz < 3; mz++) {
26175         GemmMicrokernelTester()
26176           .mr(3)
26177           .nr(4)
26178           .kr(2)
26179           .sr(4)
26180           .m(3)
26181           .n(4)
26182           .k(k)
26183           .ks(3)
26184           .a_offset(127)
26185           .zero_index(mz)
26186           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2s4__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
26187       }
26188     }
26189   }
26190 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD64,qmin)26191   TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD64, qmin) {
26192     TEST_REQUIRES_X86_XOP;
26193     GemmMicrokernelTester()
26194       .mr(3)
26195       .nr(4)
26196       .kr(2)
26197       .sr(4)
26198       .m(3)
26199       .n(4)
26200       .k(8)
26201       .qmin(128)
26202       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2s4__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
26203   }
26204 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD64,qmax)26205   TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD64, qmax) {
26206     TEST_REQUIRES_X86_XOP;
26207     GemmMicrokernelTester()
26208       .mr(3)
26209       .nr(4)
26210       .kr(2)
26211       .sr(4)
26212       .m(3)
26213       .n(4)
26214       .k(8)
26215       .qmax(128)
26216       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2s4__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
26217   }
26218 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD64,strided_cm)26219   TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD64, strided_cm) {
26220     TEST_REQUIRES_X86_XOP;
26221     GemmMicrokernelTester()
26222       .mr(3)
26223       .nr(4)
26224       .kr(2)
26225       .sr(4)
26226       .m(3)
26227       .n(4)
26228       .k(8)
26229       .cm_stride(7)
26230       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2s4__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
26231   }
26232 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
26233 
26234 
26235 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD128,k_eq_8)26236   TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD128, k_eq_8) {
26237     TEST_REQUIRES_X86_SSE41;
26238     GemmMicrokernelTester()
26239       .mr(2)
26240       .nr(4)
26241       .kr(2)
26242       .sr(4)
26243       .m(2)
26244       .n(4)
26245       .k(8)
26246       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2s4__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
26247   }
26248 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD128,strided_cn)26249   TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD128, strided_cn) {
26250     TEST_REQUIRES_X86_SSE41;
26251     GemmMicrokernelTester()
26252       .mr(2)
26253       .nr(4)
26254       .kr(2)
26255       .sr(4)
26256       .m(2)
26257       .n(4)
26258       .k(8)
26259       .cn_stride(7)
26260       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2s4__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
26261   }
26262 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD128,k_eq_8_subtile)26263   TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD128, k_eq_8_subtile) {
26264     TEST_REQUIRES_X86_SSE41;
26265     for (uint32_t n = 1; n <= 4; n++) {
26266       for (uint32_t m = 1; m <= 2; m++) {
26267         GemmMicrokernelTester()
26268           .mr(2)
26269           .nr(4)
26270           .kr(2)
26271           .sr(4)
26272           .m(m)
26273           .n(n)
26274           .k(8)
26275           .iterations(1)
26276           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2s4__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
26277       }
26278     }
26279   }
26280 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD128,k_eq_8_subtile_m)26281   TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD128, k_eq_8_subtile_m) {
26282     TEST_REQUIRES_X86_SSE41;
26283     for (uint32_t m = 1; m <= 2; m++) {
26284       GemmMicrokernelTester()
26285         .mr(2)
26286         .nr(4)
26287         .kr(2)
26288         .sr(4)
26289         .m(m)
26290         .n(4)
26291         .k(8)
26292         .iterations(1)
26293         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2s4__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
26294     }
26295   }
26296 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD128,k_eq_8_subtile_n)26297   TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD128, k_eq_8_subtile_n) {
26298     TEST_REQUIRES_X86_SSE41;
26299     for (uint32_t n = 1; n <= 4; n++) {
26300       GemmMicrokernelTester()
26301         .mr(2)
26302         .nr(4)
26303         .kr(2)
26304         .sr(4)
26305         .m(2)
26306         .n(n)
26307         .k(8)
26308         .iterations(1)
26309         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2s4__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
26310     }
26311   }
26312 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD128,k_lt_8)26313   TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD128, k_lt_8) {
26314     TEST_REQUIRES_X86_SSE41;
26315     for (size_t k = 1; k < 8; k++) {
26316       GemmMicrokernelTester()
26317         .mr(2)
26318         .nr(4)
26319         .kr(2)
26320         .sr(4)
26321         .m(2)
26322         .n(4)
26323         .k(k)
26324         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2s4__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
26325     }
26326   }
26327 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD128,k_lt_8_subtile)26328   TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD128, k_lt_8_subtile) {
26329     TEST_REQUIRES_X86_SSE41;
26330     for (size_t k = 1; k < 8; k++) {
26331       for (uint32_t n = 1; n <= 4; n++) {
26332         for (uint32_t m = 1; m <= 2; m++) {
26333           GemmMicrokernelTester()
26334             .mr(2)
26335             .nr(4)
26336             .kr(2)
26337             .sr(4)
26338             .m(m)
26339             .n(n)
26340             .k(k)
26341             .iterations(1)
26342             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2s4__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
26343         }
26344       }
26345     }
26346   }
26347 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD128,k_gt_8)26348   TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD128, k_gt_8) {
26349     TEST_REQUIRES_X86_SSE41;
26350     for (size_t k = 9; k < 16; k++) {
26351       GemmMicrokernelTester()
26352         .mr(2)
26353         .nr(4)
26354         .kr(2)
26355         .sr(4)
26356         .m(2)
26357         .n(4)
26358         .k(k)
26359         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2s4__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
26360     }
26361   }
26362 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD128,k_gt_8_subtile)26363   TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD128, k_gt_8_subtile) {
26364     TEST_REQUIRES_X86_SSE41;
26365     for (size_t k = 9; k < 16; k++) {
26366       for (uint32_t n = 1; n <= 4; n++) {
26367         for (uint32_t m = 1; m <= 2; m++) {
26368           GemmMicrokernelTester()
26369             .mr(2)
26370             .nr(4)
26371             .kr(2)
26372             .sr(4)
26373             .m(m)
26374             .n(n)
26375             .k(k)
26376             .iterations(1)
26377             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2s4__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
26378         }
26379       }
26380     }
26381   }
26382 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD128,k_div_8)26383   TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD128, k_div_8) {
26384     TEST_REQUIRES_X86_SSE41;
26385     for (size_t k = 16; k <= 80; k += 8) {
26386       GemmMicrokernelTester()
26387         .mr(2)
26388         .nr(4)
26389         .kr(2)
26390         .sr(4)
26391         .m(2)
26392         .n(4)
26393         .k(k)
26394         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2s4__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
26395     }
26396   }
26397 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD128,k_div_8_subtile)26398   TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD128, k_div_8_subtile) {
26399     TEST_REQUIRES_X86_SSE41;
26400     for (size_t k = 16; k <= 80; k += 8) {
26401       for (uint32_t n = 1; n <= 4; n++) {
26402         for (uint32_t m = 1; m <= 2; m++) {
26403           GemmMicrokernelTester()
26404             .mr(2)
26405             .nr(4)
26406             .kr(2)
26407             .sr(4)
26408             .m(m)
26409             .n(n)
26410             .k(k)
26411             .iterations(1)
26412             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2s4__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
26413         }
26414       }
26415     }
26416   }
26417 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD128,n_gt_4)26418   TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD128, n_gt_4) {
26419     TEST_REQUIRES_X86_SSE41;
26420     for (uint32_t n = 5; n < 8; n++) {
26421       for (size_t k = 1; k <= 40; k += 9) {
26422         GemmMicrokernelTester()
26423           .mr(2)
26424           .nr(4)
26425           .kr(2)
26426           .sr(4)
26427           .m(2)
26428           .n(n)
26429           .k(k)
26430           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2s4__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
26431       }
26432     }
26433   }
26434 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD128,n_gt_4_strided_cn)26435   TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD128, n_gt_4_strided_cn) {
26436     TEST_REQUIRES_X86_SSE41;
26437     for (uint32_t n = 5; n < 8; n++) {
26438       for (size_t k = 1; k <= 40; k += 9) {
26439         GemmMicrokernelTester()
26440           .mr(2)
26441           .nr(4)
26442           .kr(2)
26443           .sr(4)
26444           .m(2)
26445           .n(n)
26446           .k(k)
26447           .cn_stride(7)
26448           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2s4__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
26449       }
26450     }
26451   }
26452 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD128,n_gt_4_subtile)26453   TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD128, n_gt_4_subtile) {
26454     TEST_REQUIRES_X86_SSE41;
26455     for (uint32_t n = 5; n < 8; n++) {
26456       for (size_t k = 1; k <= 40; k += 9) {
26457         for (uint32_t m = 1; m <= 2; m++) {
26458           GemmMicrokernelTester()
26459             .mr(2)
26460             .nr(4)
26461             .kr(2)
26462             .sr(4)
26463             .m(m)
26464             .n(n)
26465             .k(k)
26466             .iterations(1)
26467             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2s4__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
26468         }
26469       }
26470     }
26471   }
26472 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD128,n_div_4)26473   TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD128, n_div_4) {
26474     TEST_REQUIRES_X86_SSE41;
26475     for (uint32_t n = 8; n <= 12; n += 4) {
26476       for (size_t k = 1; k <= 40; k += 9) {
26477         GemmMicrokernelTester()
26478           .mr(2)
26479           .nr(4)
26480           .kr(2)
26481           .sr(4)
26482           .m(2)
26483           .n(n)
26484           .k(k)
26485           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2s4__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
26486       }
26487     }
26488   }
26489 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD128,n_div_4_strided_cn)26490   TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD128, n_div_4_strided_cn) {
26491     TEST_REQUIRES_X86_SSE41;
26492     for (uint32_t n = 8; n <= 12; n += 4) {
26493       for (size_t k = 1; k <= 40; k += 9) {
26494         GemmMicrokernelTester()
26495           .mr(2)
26496           .nr(4)
26497           .kr(2)
26498           .sr(4)
26499           .m(2)
26500           .n(n)
26501           .k(k)
26502           .cn_stride(7)
26503           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2s4__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
26504       }
26505     }
26506   }
26507 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD128,n_div_4_subtile)26508   TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD128, n_div_4_subtile) {
26509     TEST_REQUIRES_X86_SSE41;
26510     for (uint32_t n = 8; n <= 12; n += 4) {
26511       for (size_t k = 1; k <= 40; k += 9) {
26512         for (uint32_t m = 1; m <= 2; m++) {
26513           GemmMicrokernelTester()
26514             .mr(2)
26515             .nr(4)
26516             .kr(2)
26517             .sr(4)
26518             .m(m)
26519             .n(n)
26520             .k(k)
26521             .iterations(1)
26522             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2s4__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
26523         }
26524       }
26525     }
26526   }
26527 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD128,small_kernel)26528   TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD128, small_kernel) {
26529     TEST_REQUIRES_X86_SSE41;
26530     for (size_t k = 1; k <= 40; k += 9) {
26531       GemmMicrokernelTester()
26532         .mr(2)
26533         .nr(4)
26534         .kr(2)
26535         .sr(4)
26536         .m(2)
26537         .n(4)
26538         .k(k)
26539         .ks(3)
26540         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2s4__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
26541     }
26542   }
26543 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD128,small_kernel_subtile)26544   TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD128, small_kernel_subtile) {
26545     TEST_REQUIRES_X86_SSE41;
26546     for (size_t k = 1; k <= 40; k += 9) {
26547       for (uint32_t n = 1; n <= 4; n++) {
26548         for (uint32_t m = 1; m <= 2; m++) {
26549           GemmMicrokernelTester()
26550             .mr(2)
26551             .nr(4)
26552             .kr(2)
26553             .sr(4)
26554             .m(m)
26555             .n(n)
26556             .k(k)
26557             .ks(3)
26558             .iterations(1)
26559             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2s4__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
26560         }
26561       }
26562     }
26563   }
26564 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD128,n_gt_4_small_kernel)26565   TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD128, n_gt_4_small_kernel) {
26566     TEST_REQUIRES_X86_SSE41;
26567     for (uint32_t n = 5; n < 8; n++) {
26568       for (size_t k = 1; k <= 40; k += 9) {
26569         GemmMicrokernelTester()
26570           .mr(2)
26571           .nr(4)
26572           .kr(2)
26573           .sr(4)
26574           .m(2)
26575           .n(n)
26576           .k(k)
26577           .ks(3)
26578           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2s4__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
26579       }
26580     }
26581   }
26582 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD128,n_div_4_small_kernel)26583   TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD128, n_div_4_small_kernel) {
26584     TEST_REQUIRES_X86_SSE41;
26585     for (uint32_t n = 8; n <= 12; n += 4) {
26586       for (size_t k = 1; k <= 40; k += 9) {
26587         GemmMicrokernelTester()
26588           .mr(2)
26589           .nr(4)
26590           .kr(2)
26591           .sr(4)
26592           .m(2)
26593           .n(n)
26594           .k(k)
26595           .ks(3)
26596           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2s4__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
26597       }
26598     }
26599   }
26600 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD128,strided_cm_subtile)26601   TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD128, strided_cm_subtile) {
26602     TEST_REQUIRES_X86_SSE41;
26603     for (size_t k = 1; k <= 40; k += 9) {
26604       for (uint32_t n = 1; n <= 4; n++) {
26605         for (uint32_t m = 1; m <= 2; m++) {
26606           GemmMicrokernelTester()
26607             .mr(2)
26608             .nr(4)
26609             .kr(2)
26610             .sr(4)
26611             .m(m)
26612             .n(n)
26613             .k(k)
26614             .cm_stride(7)
26615             .iterations(1)
26616             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2s4__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
26617         }
26618       }
26619     }
26620   }
26621 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD128,a_offset)26622   TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD128, a_offset) {
26623     TEST_REQUIRES_X86_SSE41;
26624     for (size_t k = 1; k <= 40; k += 9) {
26625       GemmMicrokernelTester()
26626         .mr(2)
26627         .nr(4)
26628         .kr(2)
26629         .sr(4)
26630         .m(2)
26631         .n(4)
26632         .k(k)
26633         .ks(3)
26634         .a_offset(83)
26635         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2s4__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
26636     }
26637   }
26638 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD128,zero)26639   TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD128, zero) {
26640     TEST_REQUIRES_X86_SSE41;
26641     for (size_t k = 1; k <= 40; k += 9) {
26642       for (uint32_t mz = 0; mz < 2; mz++) {
26643         GemmMicrokernelTester()
26644           .mr(2)
26645           .nr(4)
26646           .kr(2)
26647           .sr(4)
26648           .m(2)
26649           .n(4)
26650           .k(k)
26651           .ks(3)
26652           .a_offset(83)
26653           .zero_index(mz)
26654           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2s4__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
26655       }
26656     }
26657   }
26658 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD128,qmin)26659   TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD128, qmin) {
26660     TEST_REQUIRES_X86_SSE41;
26661     GemmMicrokernelTester()
26662       .mr(2)
26663       .nr(4)
26664       .kr(2)
26665       .sr(4)
26666       .m(2)
26667       .n(4)
26668       .k(8)
26669       .qmin(128)
26670       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2s4__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
26671   }
26672 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD128,qmax)26673   TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD128, qmax) {
26674     TEST_REQUIRES_X86_SSE41;
26675     GemmMicrokernelTester()
26676       .mr(2)
26677       .nr(4)
26678       .kr(2)
26679       .sr(4)
26680       .m(2)
26681       .n(4)
26682       .k(8)
26683       .qmax(128)
26684       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2s4__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
26685   }
26686 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD128,strided_cm)26687   TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD128, strided_cm) {
26688     TEST_REQUIRES_X86_SSE41;
26689     GemmMicrokernelTester()
26690       .mr(2)
26691       .nr(4)
26692       .kr(2)
26693       .sr(4)
26694       .m(2)
26695       .n(4)
26696       .k(8)
26697       .cm_stride(7)
26698       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2s4__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
26699   }
26700 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
26701 
26702 
26703 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128,k_eq_8)26704   TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128, k_eq_8) {
26705     TEST_REQUIRES_X86_SSE41;
26706     GemmMicrokernelTester()
26707       .mr(4)
26708       .nr(4)
26709       .kr(2)
26710       .sr(4)
26711       .m(4)
26712       .n(4)
26713       .k(8)
26714       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2s4__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
26715   }
26716 
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128,strided_cn)26717   TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128, strided_cn) {
26718     TEST_REQUIRES_X86_SSE41;
26719     GemmMicrokernelTester()
26720       .mr(4)
26721       .nr(4)
26722       .kr(2)
26723       .sr(4)
26724       .m(4)
26725       .n(4)
26726       .k(8)
26727       .cn_stride(7)
26728       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2s4__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
26729   }
26730 
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128,k_eq_8_subtile)26731   TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128, k_eq_8_subtile) {
26732     TEST_REQUIRES_X86_SSE41;
26733     for (uint32_t n = 1; n <= 4; n++) {
26734       for (uint32_t m = 1; m <= 4; m++) {
26735         GemmMicrokernelTester()
26736           .mr(4)
26737           .nr(4)
26738           .kr(2)
26739           .sr(4)
26740           .m(m)
26741           .n(n)
26742           .k(8)
26743           .iterations(1)
26744           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2s4__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
26745       }
26746     }
26747   }
26748 
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128,k_eq_8_subtile_m)26749   TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128, k_eq_8_subtile_m) {
26750     TEST_REQUIRES_X86_SSE41;
26751     for (uint32_t m = 1; m <= 4; m++) {
26752       GemmMicrokernelTester()
26753         .mr(4)
26754         .nr(4)
26755         .kr(2)
26756         .sr(4)
26757         .m(m)
26758         .n(4)
26759         .k(8)
26760         .iterations(1)
26761         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2s4__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
26762     }
26763   }
26764 
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128,k_eq_8_subtile_n)26765   TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128, k_eq_8_subtile_n) {
26766     TEST_REQUIRES_X86_SSE41;
26767     for (uint32_t n = 1; n <= 4; n++) {
26768       GemmMicrokernelTester()
26769         .mr(4)
26770         .nr(4)
26771         .kr(2)
26772         .sr(4)
26773         .m(4)
26774         .n(n)
26775         .k(8)
26776         .iterations(1)
26777         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2s4__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
26778     }
26779   }
26780 
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128,k_lt_8)26781   TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128, k_lt_8) {
26782     TEST_REQUIRES_X86_SSE41;
26783     for (size_t k = 1; k < 8; k++) {
26784       GemmMicrokernelTester()
26785         .mr(4)
26786         .nr(4)
26787         .kr(2)
26788         .sr(4)
26789         .m(4)
26790         .n(4)
26791         .k(k)
26792         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2s4__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
26793     }
26794   }
26795 
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128,k_lt_8_subtile)26796   TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128, k_lt_8_subtile) {
26797     TEST_REQUIRES_X86_SSE41;
26798     for (size_t k = 1; k < 8; k++) {
26799       for (uint32_t n = 1; n <= 4; n++) {
26800         for (uint32_t m = 1; m <= 4; m++) {
26801           GemmMicrokernelTester()
26802             .mr(4)
26803             .nr(4)
26804             .kr(2)
26805             .sr(4)
26806             .m(m)
26807             .n(n)
26808             .k(k)
26809             .iterations(1)
26810             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2s4__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
26811         }
26812       }
26813     }
26814   }
26815 
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128,k_gt_8)26816   TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128, k_gt_8) {
26817     TEST_REQUIRES_X86_SSE41;
26818     for (size_t k = 9; k < 16; k++) {
26819       GemmMicrokernelTester()
26820         .mr(4)
26821         .nr(4)
26822         .kr(2)
26823         .sr(4)
26824         .m(4)
26825         .n(4)
26826         .k(k)
26827         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2s4__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
26828     }
26829   }
26830 
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128,k_gt_8_subtile)26831   TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128, k_gt_8_subtile) {
26832     TEST_REQUIRES_X86_SSE41;
26833     for (size_t k = 9; k < 16; k++) {
26834       for (uint32_t n = 1; n <= 4; n++) {
26835         for (uint32_t m = 1; m <= 4; m++) {
26836           GemmMicrokernelTester()
26837             .mr(4)
26838             .nr(4)
26839             .kr(2)
26840             .sr(4)
26841             .m(m)
26842             .n(n)
26843             .k(k)
26844             .iterations(1)
26845             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2s4__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
26846         }
26847       }
26848     }
26849   }
26850 
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128,k_div_8)26851   TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128, k_div_8) {
26852     TEST_REQUIRES_X86_SSE41;
26853     for (size_t k = 16; k <= 80; k += 8) {
26854       GemmMicrokernelTester()
26855         .mr(4)
26856         .nr(4)
26857         .kr(2)
26858         .sr(4)
26859         .m(4)
26860         .n(4)
26861         .k(k)
26862         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2s4__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
26863     }
26864   }
26865 
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128,k_div_8_subtile)26866   TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128, k_div_8_subtile) {
26867     TEST_REQUIRES_X86_SSE41;
26868     for (size_t k = 16; k <= 80; k += 8) {
26869       for (uint32_t n = 1; n <= 4; n++) {
26870         for (uint32_t m = 1; m <= 4; m++) {
26871           GemmMicrokernelTester()
26872             .mr(4)
26873             .nr(4)
26874             .kr(2)
26875             .sr(4)
26876             .m(m)
26877             .n(n)
26878             .k(k)
26879             .iterations(1)
26880             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2s4__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
26881         }
26882       }
26883     }
26884   }
26885 
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128,n_gt_4)26886   TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128, n_gt_4) {
26887     TEST_REQUIRES_X86_SSE41;
26888     for (uint32_t n = 5; n < 8; n++) {
26889       for (size_t k = 1; k <= 40; k += 9) {
26890         GemmMicrokernelTester()
26891           .mr(4)
26892           .nr(4)
26893           .kr(2)
26894           .sr(4)
26895           .m(4)
26896           .n(n)
26897           .k(k)
26898           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2s4__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
26899       }
26900     }
26901   }
26902 
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128,n_gt_4_strided_cn)26903   TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128, n_gt_4_strided_cn) {
26904     TEST_REQUIRES_X86_SSE41;
26905     for (uint32_t n = 5; n < 8; n++) {
26906       for (size_t k = 1; k <= 40; k += 9) {
26907         GemmMicrokernelTester()
26908           .mr(4)
26909           .nr(4)
26910           .kr(2)
26911           .sr(4)
26912           .m(4)
26913           .n(n)
26914           .k(k)
26915           .cn_stride(7)
26916           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2s4__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
26917       }
26918     }
26919   }
26920 
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128,n_gt_4_subtile)26921   TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128, n_gt_4_subtile) {
26922     TEST_REQUIRES_X86_SSE41;
26923     for (uint32_t n = 5; n < 8; n++) {
26924       for (size_t k = 1; k <= 40; k += 9) {
26925         for (uint32_t m = 1; m <= 4; m++) {
26926           GemmMicrokernelTester()
26927             .mr(4)
26928             .nr(4)
26929             .kr(2)
26930             .sr(4)
26931             .m(m)
26932             .n(n)
26933             .k(k)
26934             .iterations(1)
26935             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2s4__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
26936         }
26937       }
26938     }
26939   }
26940 
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128,n_div_4)26941   TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128, n_div_4) {
26942     TEST_REQUIRES_X86_SSE41;
26943     for (uint32_t n = 8; n <= 12; n += 4) {
26944       for (size_t k = 1; k <= 40; k += 9) {
26945         GemmMicrokernelTester()
26946           .mr(4)
26947           .nr(4)
26948           .kr(2)
26949           .sr(4)
26950           .m(4)
26951           .n(n)
26952           .k(k)
26953           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2s4__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
26954       }
26955     }
26956   }
26957 
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128,n_div_4_strided_cn)26958   TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128, n_div_4_strided_cn) {
26959     TEST_REQUIRES_X86_SSE41;
26960     for (uint32_t n = 8; n <= 12; n += 4) {
26961       for (size_t k = 1; k <= 40; k += 9) {
26962         GemmMicrokernelTester()
26963           .mr(4)
26964           .nr(4)
26965           .kr(2)
26966           .sr(4)
26967           .m(4)
26968           .n(n)
26969           .k(k)
26970           .cn_stride(7)
26971           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2s4__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
26972       }
26973     }
26974   }
26975 
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128,n_div_4_subtile)26976   TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128, n_div_4_subtile) {
26977     TEST_REQUIRES_X86_SSE41;
26978     for (uint32_t n = 8; n <= 12; n += 4) {
26979       for (size_t k = 1; k <= 40; k += 9) {
26980         for (uint32_t m = 1; m <= 4; m++) {
26981           GemmMicrokernelTester()
26982             .mr(4)
26983             .nr(4)
26984             .kr(2)
26985             .sr(4)
26986             .m(m)
26987             .n(n)
26988             .k(k)
26989             .iterations(1)
26990             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2s4__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
26991         }
26992       }
26993     }
26994   }
26995 
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128,small_kernel)26996   TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128, small_kernel) {
26997     TEST_REQUIRES_X86_SSE41;
26998     for (size_t k = 1; k <= 40; k += 9) {
26999       GemmMicrokernelTester()
27000         .mr(4)
27001         .nr(4)
27002         .kr(2)
27003         .sr(4)
27004         .m(4)
27005         .n(4)
27006         .k(k)
27007         .ks(3)
27008         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2s4__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
27009     }
27010   }
27011 
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128,small_kernel_subtile)27012   TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128, small_kernel_subtile) {
27013     TEST_REQUIRES_X86_SSE41;
27014     for (size_t k = 1; k <= 40; k += 9) {
27015       for (uint32_t n = 1; n <= 4; n++) {
27016         for (uint32_t m = 1; m <= 4; m++) {
27017           GemmMicrokernelTester()
27018             .mr(4)
27019             .nr(4)
27020             .kr(2)
27021             .sr(4)
27022             .m(m)
27023             .n(n)
27024             .k(k)
27025             .ks(3)
27026             .iterations(1)
27027             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2s4__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
27028         }
27029       }
27030     }
27031   }
27032 
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128,n_gt_4_small_kernel)27033   TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128, n_gt_4_small_kernel) {
27034     TEST_REQUIRES_X86_SSE41;
27035     for (uint32_t n = 5; n < 8; n++) {
27036       for (size_t k = 1; k <= 40; k += 9) {
27037         GemmMicrokernelTester()
27038           .mr(4)
27039           .nr(4)
27040           .kr(2)
27041           .sr(4)
27042           .m(4)
27043           .n(n)
27044           .k(k)
27045           .ks(3)
27046           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2s4__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
27047       }
27048     }
27049   }
27050 
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128,n_div_4_small_kernel)27051   TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128, n_div_4_small_kernel) {
27052     TEST_REQUIRES_X86_SSE41;
27053     for (uint32_t n = 8; n <= 12; n += 4) {
27054       for (size_t k = 1; k <= 40; k += 9) {
27055         GemmMicrokernelTester()
27056           .mr(4)
27057           .nr(4)
27058           .kr(2)
27059           .sr(4)
27060           .m(4)
27061           .n(n)
27062           .k(k)
27063           .ks(3)
27064           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2s4__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
27065       }
27066     }
27067   }
27068 
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128,strided_cm_subtile)27069   TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128, strided_cm_subtile) {
27070     TEST_REQUIRES_X86_SSE41;
27071     for (size_t k = 1; k <= 40; k += 9) {
27072       for (uint32_t n = 1; n <= 4; n++) {
27073         for (uint32_t m = 1; m <= 4; m++) {
27074           GemmMicrokernelTester()
27075             .mr(4)
27076             .nr(4)
27077             .kr(2)
27078             .sr(4)
27079             .m(m)
27080             .n(n)
27081             .k(k)
27082             .cm_stride(7)
27083             .iterations(1)
27084             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2s4__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
27085         }
27086       }
27087     }
27088   }
27089 
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128,a_offset)27090   TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128, a_offset) {
27091     TEST_REQUIRES_X86_SSE41;
27092     for (size_t k = 1; k <= 40; k += 9) {
27093       GemmMicrokernelTester()
27094         .mr(4)
27095         .nr(4)
27096         .kr(2)
27097         .sr(4)
27098         .m(4)
27099         .n(4)
27100         .k(k)
27101         .ks(3)
27102         .a_offset(163)
27103         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2s4__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
27104     }
27105   }
27106 
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128,zero)27107   TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128, zero) {
27108     TEST_REQUIRES_X86_SSE41;
27109     for (size_t k = 1; k <= 40; k += 9) {
27110       for (uint32_t mz = 0; mz < 4; mz++) {
27111         GemmMicrokernelTester()
27112           .mr(4)
27113           .nr(4)
27114           .kr(2)
27115           .sr(4)
27116           .m(4)
27117           .n(4)
27118           .k(k)
27119           .ks(3)
27120           .a_offset(163)
27121           .zero_index(mz)
27122           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2s4__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
27123       }
27124     }
27125   }
27126 
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128,qmin)27127   TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128, qmin) {
27128     TEST_REQUIRES_X86_SSE41;
27129     GemmMicrokernelTester()
27130       .mr(4)
27131       .nr(4)
27132       .kr(2)
27133       .sr(4)
27134       .m(4)
27135       .n(4)
27136       .k(8)
27137       .qmin(128)
27138       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2s4__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
27139   }
27140 
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128,qmax)27141   TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128, qmax) {
27142     TEST_REQUIRES_X86_SSE41;
27143     GemmMicrokernelTester()
27144       .mr(4)
27145       .nr(4)
27146       .kr(2)
27147       .sr(4)
27148       .m(4)
27149       .n(4)
27150       .k(8)
27151       .qmax(128)
27152       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2s4__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
27153   }
27154 
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128,strided_cm)27155   TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128, strided_cm) {
27156     TEST_REQUIRES_X86_SSE41;
27157     GemmMicrokernelTester()
27158       .mr(4)
27159       .nr(4)
27160       .kr(2)
27161       .sr(4)
27162       .m(4)
27163       .n(4)
27164       .k(8)
27165       .cm_stride(7)
27166       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2s4__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
27167   }
27168 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
27169 
27170 
27171 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD128,k_eq_8)27172   TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD128, k_eq_8) {
27173     TEST_REQUIRES_X86_AVX;
27174     GemmMicrokernelTester()
27175       .mr(1)
27176       .nr(4)
27177       .kr(2)
27178       .sr(4)
27179       .m(1)
27180       .n(4)
27181       .k(8)
27182       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2s4__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
27183   }
27184 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD128,strided_cn)27185   TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD128, strided_cn) {
27186     TEST_REQUIRES_X86_AVX;
27187     GemmMicrokernelTester()
27188       .mr(1)
27189       .nr(4)
27190       .kr(2)
27191       .sr(4)
27192       .m(1)
27193       .n(4)
27194       .k(8)
27195       .cn_stride(7)
27196       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2s4__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
27197   }
27198 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD128,k_eq_8_subtile)27199   TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD128, k_eq_8_subtile) {
27200     TEST_REQUIRES_X86_AVX;
27201     for (uint32_t n = 1; n <= 4; n++) {
27202       for (uint32_t m = 1; m <= 1; m++) {
27203         GemmMicrokernelTester()
27204           .mr(1)
27205           .nr(4)
27206           .kr(2)
27207           .sr(4)
27208           .m(m)
27209           .n(n)
27210           .k(8)
27211           .iterations(1)
27212           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2s4__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
27213       }
27214     }
27215   }
27216 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD128,k_eq_8_subtile_m)27217   TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD128, k_eq_8_subtile_m) {
27218     TEST_REQUIRES_X86_AVX;
27219     for (uint32_t m = 1; m <= 1; m++) {
27220       GemmMicrokernelTester()
27221         .mr(1)
27222         .nr(4)
27223         .kr(2)
27224         .sr(4)
27225         .m(m)
27226         .n(4)
27227         .k(8)
27228         .iterations(1)
27229         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2s4__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
27230     }
27231   }
27232 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD128,k_eq_8_subtile_n)27233   TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD128, k_eq_8_subtile_n) {
27234     TEST_REQUIRES_X86_AVX;
27235     for (uint32_t n = 1; n <= 4; n++) {
27236       GemmMicrokernelTester()
27237         .mr(1)
27238         .nr(4)
27239         .kr(2)
27240         .sr(4)
27241         .m(1)
27242         .n(n)
27243         .k(8)
27244         .iterations(1)
27245         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2s4__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
27246     }
27247   }
27248 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD128,k_lt_8)27249   TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD128, k_lt_8) {
27250     TEST_REQUIRES_X86_AVX;
27251     for (size_t k = 1; k < 8; k++) {
27252       GemmMicrokernelTester()
27253         .mr(1)
27254         .nr(4)
27255         .kr(2)
27256         .sr(4)
27257         .m(1)
27258         .n(4)
27259         .k(k)
27260         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2s4__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
27261     }
27262   }
27263 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD128,k_lt_8_subtile)27264   TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD128, k_lt_8_subtile) {
27265     TEST_REQUIRES_X86_AVX;
27266     for (size_t k = 1; k < 8; k++) {
27267       for (uint32_t n = 1; n <= 4; n++) {
27268         for (uint32_t m = 1; m <= 1; m++) {
27269           GemmMicrokernelTester()
27270             .mr(1)
27271             .nr(4)
27272             .kr(2)
27273             .sr(4)
27274             .m(m)
27275             .n(n)
27276             .k(k)
27277             .iterations(1)
27278             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2s4__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
27279         }
27280       }
27281     }
27282   }
27283 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD128,k_gt_8)27284   TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD128, k_gt_8) {
27285     TEST_REQUIRES_X86_AVX;
27286     for (size_t k = 9; k < 16; k++) {
27287       GemmMicrokernelTester()
27288         .mr(1)
27289         .nr(4)
27290         .kr(2)
27291         .sr(4)
27292         .m(1)
27293         .n(4)
27294         .k(k)
27295         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2s4__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
27296     }
27297   }
27298 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD128,k_gt_8_subtile)27299   TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD128, k_gt_8_subtile) {
27300     TEST_REQUIRES_X86_AVX;
27301     for (size_t k = 9; k < 16; k++) {
27302       for (uint32_t n = 1; n <= 4; n++) {
27303         for (uint32_t m = 1; m <= 1; m++) {
27304           GemmMicrokernelTester()
27305             .mr(1)
27306             .nr(4)
27307             .kr(2)
27308             .sr(4)
27309             .m(m)
27310             .n(n)
27311             .k(k)
27312             .iterations(1)
27313             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2s4__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
27314         }
27315       }
27316     }
27317   }
27318 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD128,k_div_8)27319   TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD128, k_div_8) {
27320     TEST_REQUIRES_X86_AVX;
27321     for (size_t k = 16; k <= 80; k += 8) {
27322       GemmMicrokernelTester()
27323         .mr(1)
27324         .nr(4)
27325         .kr(2)
27326         .sr(4)
27327         .m(1)
27328         .n(4)
27329         .k(k)
27330         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2s4__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
27331     }
27332   }
27333 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD128,k_div_8_subtile)27334   TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD128, k_div_8_subtile) {
27335     TEST_REQUIRES_X86_AVX;
27336     for (size_t k = 16; k <= 80; k += 8) {
27337       for (uint32_t n = 1; n <= 4; n++) {
27338         for (uint32_t m = 1; m <= 1; m++) {
27339           GemmMicrokernelTester()
27340             .mr(1)
27341             .nr(4)
27342             .kr(2)
27343             .sr(4)
27344             .m(m)
27345             .n(n)
27346             .k(k)
27347             .iterations(1)
27348             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2s4__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
27349         }
27350       }
27351     }
27352   }
27353 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD128,n_gt_4)27354   TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD128, n_gt_4) {
27355     TEST_REQUIRES_X86_AVX;
27356     for (uint32_t n = 5; n < 8; n++) {
27357       for (size_t k = 1; k <= 40; k += 9) {
27358         GemmMicrokernelTester()
27359           .mr(1)
27360           .nr(4)
27361           .kr(2)
27362           .sr(4)
27363           .m(1)
27364           .n(n)
27365           .k(k)
27366           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2s4__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
27367       }
27368     }
27369   }
27370 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD128,n_gt_4_strided_cn)27371   TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD128, n_gt_4_strided_cn) {
27372     TEST_REQUIRES_X86_AVX;
27373     for (uint32_t n = 5; n < 8; n++) {
27374       for (size_t k = 1; k <= 40; k += 9) {
27375         GemmMicrokernelTester()
27376           .mr(1)
27377           .nr(4)
27378           .kr(2)
27379           .sr(4)
27380           .m(1)
27381           .n(n)
27382           .k(k)
27383           .cn_stride(7)
27384           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2s4__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
27385       }
27386     }
27387   }
27388 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD128,n_gt_4_subtile)27389   TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD128, n_gt_4_subtile) {
27390     TEST_REQUIRES_X86_AVX;
27391     for (uint32_t n = 5; n < 8; n++) {
27392       for (size_t k = 1; k <= 40; k += 9) {
27393         for (uint32_t m = 1; m <= 1; m++) {
27394           GemmMicrokernelTester()
27395             .mr(1)
27396             .nr(4)
27397             .kr(2)
27398             .sr(4)
27399             .m(m)
27400             .n(n)
27401             .k(k)
27402             .iterations(1)
27403             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2s4__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
27404         }
27405       }
27406     }
27407   }
27408 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD128,n_div_4)27409   TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD128, n_div_4) {
27410     TEST_REQUIRES_X86_AVX;
27411     for (uint32_t n = 8; n <= 12; n += 4) {
27412       for (size_t k = 1; k <= 40; k += 9) {
27413         GemmMicrokernelTester()
27414           .mr(1)
27415           .nr(4)
27416           .kr(2)
27417           .sr(4)
27418           .m(1)
27419           .n(n)
27420           .k(k)
27421           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2s4__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
27422       }
27423     }
27424   }
27425 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD128,n_div_4_strided_cn)27426   TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD128, n_div_4_strided_cn) {
27427     TEST_REQUIRES_X86_AVX;
27428     for (uint32_t n = 8; n <= 12; n += 4) {
27429       for (size_t k = 1; k <= 40; k += 9) {
27430         GemmMicrokernelTester()
27431           .mr(1)
27432           .nr(4)
27433           .kr(2)
27434           .sr(4)
27435           .m(1)
27436           .n(n)
27437           .k(k)
27438           .cn_stride(7)
27439           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2s4__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
27440       }
27441     }
27442   }
27443 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD128,n_div_4_subtile)27444   TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD128, n_div_4_subtile) {
27445     TEST_REQUIRES_X86_AVX;
27446     for (uint32_t n = 8; n <= 12; n += 4) {
27447       for (size_t k = 1; k <= 40; k += 9) {
27448         for (uint32_t m = 1; m <= 1; m++) {
27449           GemmMicrokernelTester()
27450             .mr(1)
27451             .nr(4)
27452             .kr(2)
27453             .sr(4)
27454             .m(m)
27455             .n(n)
27456             .k(k)
27457             .iterations(1)
27458             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2s4__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
27459         }
27460       }
27461     }
27462   }
27463 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD128,small_kernel)27464   TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD128, small_kernel) {
27465     TEST_REQUIRES_X86_AVX;
27466     for (size_t k = 1; k <= 40; k += 9) {
27467       GemmMicrokernelTester()
27468         .mr(1)
27469         .nr(4)
27470         .kr(2)
27471         .sr(4)
27472         .m(1)
27473         .n(4)
27474         .k(k)
27475         .ks(3)
27476         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2s4__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
27477     }
27478   }
27479 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD128,small_kernel_subtile)27480   TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD128, small_kernel_subtile) {
27481     TEST_REQUIRES_X86_AVX;
27482     for (size_t k = 1; k <= 40; k += 9) {
27483       for (uint32_t n = 1; n <= 4; n++) {
27484         for (uint32_t m = 1; m <= 1; m++) {
27485           GemmMicrokernelTester()
27486             .mr(1)
27487             .nr(4)
27488             .kr(2)
27489             .sr(4)
27490             .m(m)
27491             .n(n)
27492             .k(k)
27493             .ks(3)
27494             .iterations(1)
27495             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2s4__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
27496         }
27497       }
27498     }
27499   }
27500 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD128,n_gt_4_small_kernel)27501   TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD128, n_gt_4_small_kernel) {
27502     TEST_REQUIRES_X86_AVX;
27503     for (uint32_t n = 5; n < 8; n++) {
27504       for (size_t k = 1; k <= 40; k += 9) {
27505         GemmMicrokernelTester()
27506           .mr(1)
27507           .nr(4)
27508           .kr(2)
27509           .sr(4)
27510           .m(1)
27511           .n(n)
27512           .k(k)
27513           .ks(3)
27514           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2s4__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
27515       }
27516     }
27517   }
27518 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD128,n_div_4_small_kernel)27519   TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD128, n_div_4_small_kernel) {
27520     TEST_REQUIRES_X86_AVX;
27521     for (uint32_t n = 8; n <= 12; n += 4) {
27522       for (size_t k = 1; k <= 40; k += 9) {
27523         GemmMicrokernelTester()
27524           .mr(1)
27525           .nr(4)
27526           .kr(2)
27527           .sr(4)
27528           .m(1)
27529           .n(n)
27530           .k(k)
27531           .ks(3)
27532           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2s4__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
27533       }
27534     }
27535   }
27536 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD128,strided_cm_subtile)27537   TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD128, strided_cm_subtile) {
27538     TEST_REQUIRES_X86_AVX;
27539     for (size_t k = 1; k <= 40; k += 9) {
27540       for (uint32_t n = 1; n <= 4; n++) {
27541         for (uint32_t m = 1; m <= 1; m++) {
27542           GemmMicrokernelTester()
27543             .mr(1)
27544             .nr(4)
27545             .kr(2)
27546             .sr(4)
27547             .m(m)
27548             .n(n)
27549             .k(k)
27550             .cm_stride(7)
27551             .iterations(1)
27552             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2s4__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
27553         }
27554       }
27555     }
27556   }
27557 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD128,a_offset)27558   TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD128, a_offset) {
27559     TEST_REQUIRES_X86_AVX;
27560     for (size_t k = 1; k <= 40; k += 9) {
27561       GemmMicrokernelTester()
27562         .mr(1)
27563         .nr(4)
27564         .kr(2)
27565         .sr(4)
27566         .m(1)
27567         .n(4)
27568         .k(k)
27569         .ks(3)
27570         .a_offset(43)
27571         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2s4__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
27572     }
27573   }
27574 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD128,zero)27575   TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD128, zero) {
27576     TEST_REQUIRES_X86_AVX;
27577     for (size_t k = 1; k <= 40; k += 9) {
27578       for (uint32_t mz = 0; mz < 1; mz++) {
27579         GemmMicrokernelTester()
27580           .mr(1)
27581           .nr(4)
27582           .kr(2)
27583           .sr(4)
27584           .m(1)
27585           .n(4)
27586           .k(k)
27587           .ks(3)
27588           .a_offset(43)
27589           .zero_index(mz)
27590           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2s4__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
27591       }
27592     }
27593   }
27594 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD128,qmin)27595   TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD128, qmin) {
27596     TEST_REQUIRES_X86_AVX;
27597     GemmMicrokernelTester()
27598       .mr(1)
27599       .nr(4)
27600       .kr(2)
27601       .sr(4)
27602       .m(1)
27603       .n(4)
27604       .k(8)
27605       .qmin(128)
27606       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2s4__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
27607   }
27608 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD128,qmax)27609   TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD128, qmax) {
27610     TEST_REQUIRES_X86_AVX;
27611     GemmMicrokernelTester()
27612       .mr(1)
27613       .nr(4)
27614       .kr(2)
27615       .sr(4)
27616       .m(1)
27617       .n(4)
27618       .k(8)
27619       .qmax(128)
27620       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2s4__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
27621   }
27622 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD128,strided_cm)27623   TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD128, strided_cm) {
27624     TEST_REQUIRES_X86_AVX;
27625     GemmMicrokernelTester()
27626       .mr(1)
27627       .nr(4)
27628       .kr(2)
27629       .sr(4)
27630       .m(1)
27631       .n(4)
27632       .k(8)
27633       .cm_stride(7)
27634       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2s4__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
27635   }
27636 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
27637 
27638 
27639 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD128,k_eq_8)27640   TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD128, k_eq_8) {
27641     TEST_REQUIRES_X86_XOP;
27642     GemmMicrokernelTester()
27643       .mr(2)
27644       .nr(4)
27645       .kr(2)
27646       .sr(4)
27647       .m(2)
27648       .n(4)
27649       .k(8)
27650       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2s4__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
27651   }
27652 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD128,strided_cn)27653   TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD128, strided_cn) {
27654     TEST_REQUIRES_X86_XOP;
27655     GemmMicrokernelTester()
27656       .mr(2)
27657       .nr(4)
27658       .kr(2)
27659       .sr(4)
27660       .m(2)
27661       .n(4)
27662       .k(8)
27663       .cn_stride(7)
27664       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2s4__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
27665   }
27666 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD128,k_eq_8_subtile)27667   TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD128, k_eq_8_subtile) {
27668     TEST_REQUIRES_X86_XOP;
27669     for (uint32_t n = 1; n <= 4; n++) {
27670       for (uint32_t m = 1; m <= 2; m++) {
27671         GemmMicrokernelTester()
27672           .mr(2)
27673           .nr(4)
27674           .kr(2)
27675           .sr(4)
27676           .m(m)
27677           .n(n)
27678           .k(8)
27679           .iterations(1)
27680           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2s4__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
27681       }
27682     }
27683   }
27684 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD128,k_eq_8_subtile_m)27685   TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD128, k_eq_8_subtile_m) {
27686     TEST_REQUIRES_X86_XOP;
27687     for (uint32_t m = 1; m <= 2; m++) {
27688       GemmMicrokernelTester()
27689         .mr(2)
27690         .nr(4)
27691         .kr(2)
27692         .sr(4)
27693         .m(m)
27694         .n(4)
27695         .k(8)
27696         .iterations(1)
27697         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2s4__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
27698     }
27699   }
27700 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD128,k_eq_8_subtile_n)27701   TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD128, k_eq_8_subtile_n) {
27702     TEST_REQUIRES_X86_XOP;
27703     for (uint32_t n = 1; n <= 4; n++) {
27704       GemmMicrokernelTester()
27705         .mr(2)
27706         .nr(4)
27707         .kr(2)
27708         .sr(4)
27709         .m(2)
27710         .n(n)
27711         .k(8)
27712         .iterations(1)
27713         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2s4__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
27714     }
27715   }
27716 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD128,k_lt_8)27717   TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD128, k_lt_8) {
27718     TEST_REQUIRES_X86_XOP;
27719     for (size_t k = 1; k < 8; k++) {
27720       GemmMicrokernelTester()
27721         .mr(2)
27722         .nr(4)
27723         .kr(2)
27724         .sr(4)
27725         .m(2)
27726         .n(4)
27727         .k(k)
27728         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2s4__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
27729     }
27730   }
27731 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD128,k_lt_8_subtile)27732   TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD128, k_lt_8_subtile) {
27733     TEST_REQUIRES_X86_XOP;
27734     for (size_t k = 1; k < 8; k++) {
27735       for (uint32_t n = 1; n <= 4; n++) {
27736         for (uint32_t m = 1; m <= 2; m++) {
27737           GemmMicrokernelTester()
27738             .mr(2)
27739             .nr(4)
27740             .kr(2)
27741             .sr(4)
27742             .m(m)
27743             .n(n)
27744             .k(k)
27745             .iterations(1)
27746             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2s4__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
27747         }
27748       }
27749     }
27750   }
27751 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD128,k_gt_8)27752   TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD128, k_gt_8) {
27753     TEST_REQUIRES_X86_XOP;
27754     for (size_t k = 9; k < 16; k++) {
27755       GemmMicrokernelTester()
27756         .mr(2)
27757         .nr(4)
27758         .kr(2)
27759         .sr(4)
27760         .m(2)
27761         .n(4)
27762         .k(k)
27763         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2s4__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
27764     }
27765   }
27766 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD128,k_gt_8_subtile)27767   TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD128, k_gt_8_subtile) {
27768     TEST_REQUIRES_X86_XOP;
27769     for (size_t k = 9; k < 16; k++) {
27770       for (uint32_t n = 1; n <= 4; n++) {
27771         for (uint32_t m = 1; m <= 2; m++) {
27772           GemmMicrokernelTester()
27773             .mr(2)
27774             .nr(4)
27775             .kr(2)
27776             .sr(4)
27777             .m(m)
27778             .n(n)
27779             .k(k)
27780             .iterations(1)
27781             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2s4__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
27782         }
27783       }
27784     }
27785   }
27786 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD128,k_div_8)27787   TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD128, k_div_8) {
27788     TEST_REQUIRES_X86_XOP;
27789     for (size_t k = 16; k <= 80; k += 8) {
27790       GemmMicrokernelTester()
27791         .mr(2)
27792         .nr(4)
27793         .kr(2)
27794         .sr(4)
27795         .m(2)
27796         .n(4)
27797         .k(k)
27798         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2s4__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
27799     }
27800   }
27801 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD128,k_div_8_subtile)27802   TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD128, k_div_8_subtile) {
27803     TEST_REQUIRES_X86_XOP;
27804     for (size_t k = 16; k <= 80; k += 8) {
27805       for (uint32_t n = 1; n <= 4; n++) {
27806         for (uint32_t m = 1; m <= 2; m++) {
27807           GemmMicrokernelTester()
27808             .mr(2)
27809             .nr(4)
27810             .kr(2)
27811             .sr(4)
27812             .m(m)
27813             .n(n)
27814             .k(k)
27815             .iterations(1)
27816             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2s4__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
27817         }
27818       }
27819     }
27820   }
27821 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD128,n_gt_4)27822   TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD128, n_gt_4) {
27823     TEST_REQUIRES_X86_XOP;
27824     for (uint32_t n = 5; n < 8; n++) {
27825       for (size_t k = 1; k <= 40; k += 9) {
27826         GemmMicrokernelTester()
27827           .mr(2)
27828           .nr(4)
27829           .kr(2)
27830           .sr(4)
27831           .m(2)
27832           .n(n)
27833           .k(k)
27834           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2s4__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
27835       }
27836     }
27837   }
27838 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD128,n_gt_4_strided_cn)27839   TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD128, n_gt_4_strided_cn) {
27840     TEST_REQUIRES_X86_XOP;
27841     for (uint32_t n = 5; n < 8; n++) {
27842       for (size_t k = 1; k <= 40; k += 9) {
27843         GemmMicrokernelTester()
27844           .mr(2)
27845           .nr(4)
27846           .kr(2)
27847           .sr(4)
27848           .m(2)
27849           .n(n)
27850           .k(k)
27851           .cn_stride(7)
27852           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2s4__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
27853       }
27854     }
27855   }
27856 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD128,n_gt_4_subtile)27857   TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD128, n_gt_4_subtile) {
27858     TEST_REQUIRES_X86_XOP;
27859     for (uint32_t n = 5; n < 8; n++) {
27860       for (size_t k = 1; k <= 40; k += 9) {
27861         for (uint32_t m = 1; m <= 2; m++) {
27862           GemmMicrokernelTester()
27863             .mr(2)
27864             .nr(4)
27865             .kr(2)
27866             .sr(4)
27867             .m(m)
27868             .n(n)
27869             .k(k)
27870             .iterations(1)
27871             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2s4__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
27872         }
27873       }
27874     }
27875   }
27876 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD128,n_div_4)27877   TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD128, n_div_4) {
27878     TEST_REQUIRES_X86_XOP;
27879     for (uint32_t n = 8; n <= 12; n += 4) {
27880       for (size_t k = 1; k <= 40; k += 9) {
27881         GemmMicrokernelTester()
27882           .mr(2)
27883           .nr(4)
27884           .kr(2)
27885           .sr(4)
27886           .m(2)
27887           .n(n)
27888           .k(k)
27889           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2s4__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
27890       }
27891     }
27892   }
27893 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD128,n_div_4_strided_cn)27894   TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD128, n_div_4_strided_cn) {
27895     TEST_REQUIRES_X86_XOP;
27896     for (uint32_t n = 8; n <= 12; n += 4) {
27897       for (size_t k = 1; k <= 40; k += 9) {
27898         GemmMicrokernelTester()
27899           .mr(2)
27900           .nr(4)
27901           .kr(2)
27902           .sr(4)
27903           .m(2)
27904           .n(n)
27905           .k(k)
27906           .cn_stride(7)
27907           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2s4__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
27908       }
27909     }
27910   }
27911 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD128,n_div_4_subtile)27912   TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD128, n_div_4_subtile) {
27913     TEST_REQUIRES_X86_XOP;
27914     for (uint32_t n = 8; n <= 12; n += 4) {
27915       for (size_t k = 1; k <= 40; k += 9) {
27916         for (uint32_t m = 1; m <= 2; m++) {
27917           GemmMicrokernelTester()
27918             .mr(2)
27919             .nr(4)
27920             .kr(2)
27921             .sr(4)
27922             .m(m)
27923             .n(n)
27924             .k(k)
27925             .iterations(1)
27926             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2s4__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
27927         }
27928       }
27929     }
27930   }
27931 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD128,small_kernel)27932   TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD128, small_kernel) {
27933     TEST_REQUIRES_X86_XOP;
27934     for (size_t k = 1; k <= 40; k += 9) {
27935       GemmMicrokernelTester()
27936         .mr(2)
27937         .nr(4)
27938         .kr(2)
27939         .sr(4)
27940         .m(2)
27941         .n(4)
27942         .k(k)
27943         .ks(3)
27944         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2s4__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
27945     }
27946   }
27947 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD128,small_kernel_subtile)27948   TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD128, small_kernel_subtile) {
27949     TEST_REQUIRES_X86_XOP;
27950     for (size_t k = 1; k <= 40; k += 9) {
27951       for (uint32_t n = 1; n <= 4; n++) {
27952         for (uint32_t m = 1; m <= 2; m++) {
27953           GemmMicrokernelTester()
27954             .mr(2)
27955             .nr(4)
27956             .kr(2)
27957             .sr(4)
27958             .m(m)
27959             .n(n)
27960             .k(k)
27961             .ks(3)
27962             .iterations(1)
27963             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2s4__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
27964         }
27965       }
27966     }
27967   }
27968 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD128,n_gt_4_small_kernel)27969   TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD128, n_gt_4_small_kernel) {
27970     TEST_REQUIRES_X86_XOP;
27971     for (uint32_t n = 5; n < 8; n++) {
27972       for (size_t k = 1; k <= 40; k += 9) {
27973         GemmMicrokernelTester()
27974           .mr(2)
27975           .nr(4)
27976           .kr(2)
27977           .sr(4)
27978           .m(2)
27979           .n(n)
27980           .k(k)
27981           .ks(3)
27982           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2s4__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
27983       }
27984     }
27985   }
27986 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD128,n_div_4_small_kernel)27987   TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD128, n_div_4_small_kernel) {
27988     TEST_REQUIRES_X86_XOP;
27989     for (uint32_t n = 8; n <= 12; n += 4) {
27990       for (size_t k = 1; k <= 40; k += 9) {
27991         GemmMicrokernelTester()
27992           .mr(2)
27993           .nr(4)
27994           .kr(2)
27995           .sr(4)
27996           .m(2)
27997           .n(n)
27998           .k(k)
27999           .ks(3)
28000           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2s4__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
28001       }
28002     }
28003   }
28004 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD128,strided_cm_subtile)28005   TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD128, strided_cm_subtile) {
28006     TEST_REQUIRES_X86_XOP;
28007     for (size_t k = 1; k <= 40; k += 9) {
28008       for (uint32_t n = 1; n <= 4; n++) {
28009         for (uint32_t m = 1; m <= 2; m++) {
28010           GemmMicrokernelTester()
28011             .mr(2)
28012             .nr(4)
28013             .kr(2)
28014             .sr(4)
28015             .m(m)
28016             .n(n)
28017             .k(k)
28018             .cm_stride(7)
28019             .iterations(1)
28020             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2s4__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
28021         }
28022       }
28023     }
28024   }
28025 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD128,a_offset)28026   TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD128, a_offset) {
28027     TEST_REQUIRES_X86_XOP;
28028     for (size_t k = 1; k <= 40; k += 9) {
28029       GemmMicrokernelTester()
28030         .mr(2)
28031         .nr(4)
28032         .kr(2)
28033         .sr(4)
28034         .m(2)
28035         .n(4)
28036         .k(k)
28037         .ks(3)
28038         .a_offset(83)
28039         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2s4__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
28040     }
28041   }
28042 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD128,zero)28043   TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD128, zero) {
28044     TEST_REQUIRES_X86_XOP;
28045     for (size_t k = 1; k <= 40; k += 9) {
28046       for (uint32_t mz = 0; mz < 2; mz++) {
28047         GemmMicrokernelTester()
28048           .mr(2)
28049           .nr(4)
28050           .kr(2)
28051           .sr(4)
28052           .m(2)
28053           .n(4)
28054           .k(k)
28055           .ks(3)
28056           .a_offset(83)
28057           .zero_index(mz)
28058           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2s4__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
28059       }
28060     }
28061   }
28062 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD128,qmin)28063   TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD128, qmin) {
28064     TEST_REQUIRES_X86_XOP;
28065     GemmMicrokernelTester()
28066       .mr(2)
28067       .nr(4)
28068       .kr(2)
28069       .sr(4)
28070       .m(2)
28071       .n(4)
28072       .k(8)
28073       .qmin(128)
28074       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2s4__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
28075   }
28076 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD128,qmax)28077   TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD128, qmax) {
28078     TEST_REQUIRES_X86_XOP;
28079     GemmMicrokernelTester()
28080       .mr(2)
28081       .nr(4)
28082       .kr(2)
28083       .sr(4)
28084       .m(2)
28085       .n(4)
28086       .k(8)
28087       .qmax(128)
28088       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2s4__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
28089   }
28090 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD128,strided_cm)28091   TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__XOP_LD128, strided_cm) {
28092     TEST_REQUIRES_X86_XOP;
28093     GemmMicrokernelTester()
28094       .mr(2)
28095       .nr(4)
28096       .kr(2)
28097       .sr(4)
28098       .m(2)
28099       .n(4)
28100       .k(8)
28101       .cm_stride(7)
28102       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2s4__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
28103   }
28104 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
28105 
28106 
28107 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD128,k_eq_8)28108   TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD128, k_eq_8) {
28109     TEST_REQUIRES_X86_AVX;
28110     GemmMicrokernelTester()
28111       .mr(3)
28112       .nr(4)
28113       .kr(2)
28114       .sr(4)
28115       .m(3)
28116       .n(4)
28117       .k(8)
28118       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2s4__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
28119   }
28120 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD128,strided_cn)28121   TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD128, strided_cn) {
28122     TEST_REQUIRES_X86_AVX;
28123     GemmMicrokernelTester()
28124       .mr(3)
28125       .nr(4)
28126       .kr(2)
28127       .sr(4)
28128       .m(3)
28129       .n(4)
28130       .k(8)
28131       .cn_stride(7)
28132       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2s4__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
28133   }
28134 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD128,k_eq_8_subtile)28135   TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD128, k_eq_8_subtile) {
28136     TEST_REQUIRES_X86_AVX;
28137     for (uint32_t n = 1; n <= 4; n++) {
28138       for (uint32_t m = 1; m <= 3; m++) {
28139         GemmMicrokernelTester()
28140           .mr(3)
28141           .nr(4)
28142           .kr(2)
28143           .sr(4)
28144           .m(m)
28145           .n(n)
28146           .k(8)
28147           .iterations(1)
28148           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2s4__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
28149       }
28150     }
28151   }
28152 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD128,k_eq_8_subtile_m)28153   TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD128, k_eq_8_subtile_m) {
28154     TEST_REQUIRES_X86_AVX;
28155     for (uint32_t m = 1; m <= 3; m++) {
28156       GemmMicrokernelTester()
28157         .mr(3)
28158         .nr(4)
28159         .kr(2)
28160         .sr(4)
28161         .m(m)
28162         .n(4)
28163         .k(8)
28164         .iterations(1)
28165         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2s4__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
28166     }
28167   }
28168 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD128,k_eq_8_subtile_n)28169   TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD128, k_eq_8_subtile_n) {
28170     TEST_REQUIRES_X86_AVX;
28171     for (uint32_t n = 1; n <= 4; n++) {
28172       GemmMicrokernelTester()
28173         .mr(3)
28174         .nr(4)
28175         .kr(2)
28176         .sr(4)
28177         .m(3)
28178         .n(n)
28179         .k(8)
28180         .iterations(1)
28181         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2s4__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
28182     }
28183   }
28184 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD128,k_lt_8)28185   TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD128, k_lt_8) {
28186     TEST_REQUIRES_X86_AVX;
28187     for (size_t k = 1; k < 8; k++) {
28188       GemmMicrokernelTester()
28189         .mr(3)
28190         .nr(4)
28191         .kr(2)
28192         .sr(4)
28193         .m(3)
28194         .n(4)
28195         .k(k)
28196         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2s4__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
28197     }
28198   }
28199 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD128,k_lt_8_subtile)28200   TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD128, k_lt_8_subtile) {
28201     TEST_REQUIRES_X86_AVX;
28202     for (size_t k = 1; k < 8; k++) {
28203       for (uint32_t n = 1; n <= 4; n++) {
28204         for (uint32_t m = 1; m <= 3; m++) {
28205           GemmMicrokernelTester()
28206             .mr(3)
28207             .nr(4)
28208             .kr(2)
28209             .sr(4)
28210             .m(m)
28211             .n(n)
28212             .k(k)
28213             .iterations(1)
28214             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2s4__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
28215         }
28216       }
28217     }
28218   }
28219 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD128,k_gt_8)28220   TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD128, k_gt_8) {
28221     TEST_REQUIRES_X86_AVX;
28222     for (size_t k = 9; k < 16; k++) {
28223       GemmMicrokernelTester()
28224         .mr(3)
28225         .nr(4)
28226         .kr(2)
28227         .sr(4)
28228         .m(3)
28229         .n(4)
28230         .k(k)
28231         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2s4__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
28232     }
28233   }
28234 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD128,k_gt_8_subtile)28235   TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD128, k_gt_8_subtile) {
28236     TEST_REQUIRES_X86_AVX;
28237     for (size_t k = 9; k < 16; k++) {
28238       for (uint32_t n = 1; n <= 4; n++) {
28239         for (uint32_t m = 1; m <= 3; m++) {
28240           GemmMicrokernelTester()
28241             .mr(3)
28242             .nr(4)
28243             .kr(2)
28244             .sr(4)
28245             .m(m)
28246             .n(n)
28247             .k(k)
28248             .iterations(1)
28249             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2s4__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
28250         }
28251       }
28252     }
28253   }
28254 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD128,k_div_8)28255   TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD128, k_div_8) {
28256     TEST_REQUIRES_X86_AVX;
28257     for (size_t k = 16; k <= 80; k += 8) {
28258       GemmMicrokernelTester()
28259         .mr(3)
28260         .nr(4)
28261         .kr(2)
28262         .sr(4)
28263         .m(3)
28264         .n(4)
28265         .k(k)
28266         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2s4__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
28267     }
28268   }
28269 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD128,k_div_8_subtile)28270   TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD128, k_div_8_subtile) {
28271     TEST_REQUIRES_X86_AVX;
28272     for (size_t k = 16; k <= 80; k += 8) {
28273       for (uint32_t n = 1; n <= 4; n++) {
28274         for (uint32_t m = 1; m <= 3; m++) {
28275           GemmMicrokernelTester()
28276             .mr(3)
28277             .nr(4)
28278             .kr(2)
28279             .sr(4)
28280             .m(m)
28281             .n(n)
28282             .k(k)
28283             .iterations(1)
28284             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2s4__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
28285         }
28286       }
28287     }
28288   }
28289 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD128,n_gt_4)28290   TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD128, n_gt_4) {
28291     TEST_REQUIRES_X86_AVX;
28292     for (uint32_t n = 5; n < 8; n++) {
28293       for (size_t k = 1; k <= 40; k += 9) {
28294         GemmMicrokernelTester()
28295           .mr(3)
28296           .nr(4)
28297           .kr(2)
28298           .sr(4)
28299           .m(3)
28300           .n(n)
28301           .k(k)
28302           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2s4__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
28303       }
28304     }
28305   }
28306 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD128,n_gt_4_strided_cn)28307   TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD128, n_gt_4_strided_cn) {
28308     TEST_REQUIRES_X86_AVX;
28309     for (uint32_t n = 5; n < 8; n++) {
28310       for (size_t k = 1; k <= 40; k += 9) {
28311         GemmMicrokernelTester()
28312           .mr(3)
28313           .nr(4)
28314           .kr(2)
28315           .sr(4)
28316           .m(3)
28317           .n(n)
28318           .k(k)
28319           .cn_stride(7)
28320           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2s4__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
28321       }
28322     }
28323   }
28324 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD128,n_gt_4_subtile)28325   TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD128, n_gt_4_subtile) {
28326     TEST_REQUIRES_X86_AVX;
28327     for (uint32_t n = 5; n < 8; n++) {
28328       for (size_t k = 1; k <= 40; k += 9) {
28329         for (uint32_t m = 1; m <= 3; m++) {
28330           GemmMicrokernelTester()
28331             .mr(3)
28332             .nr(4)
28333             .kr(2)
28334             .sr(4)
28335             .m(m)
28336             .n(n)
28337             .k(k)
28338             .iterations(1)
28339             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2s4__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
28340         }
28341       }
28342     }
28343   }
28344 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD128,n_div_4)28345   TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD128, n_div_4) {
28346     TEST_REQUIRES_X86_AVX;
28347     for (uint32_t n = 8; n <= 12; n += 4) {
28348       for (size_t k = 1; k <= 40; k += 9) {
28349         GemmMicrokernelTester()
28350           .mr(3)
28351           .nr(4)
28352           .kr(2)
28353           .sr(4)
28354           .m(3)
28355           .n(n)
28356           .k(k)
28357           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2s4__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
28358       }
28359     }
28360   }
28361 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD128,n_div_4_strided_cn)28362   TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD128, n_div_4_strided_cn) {
28363     TEST_REQUIRES_X86_AVX;
28364     for (uint32_t n = 8; n <= 12; n += 4) {
28365       for (size_t k = 1; k <= 40; k += 9) {
28366         GemmMicrokernelTester()
28367           .mr(3)
28368           .nr(4)
28369           .kr(2)
28370           .sr(4)
28371           .m(3)
28372           .n(n)
28373           .k(k)
28374           .cn_stride(7)
28375           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2s4__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
28376       }
28377     }
28378   }
28379 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD128,n_div_4_subtile)28380   TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD128, n_div_4_subtile) {
28381     TEST_REQUIRES_X86_AVX;
28382     for (uint32_t n = 8; n <= 12; n += 4) {
28383       for (size_t k = 1; k <= 40; k += 9) {
28384         for (uint32_t m = 1; m <= 3; m++) {
28385           GemmMicrokernelTester()
28386             .mr(3)
28387             .nr(4)
28388             .kr(2)
28389             .sr(4)
28390             .m(m)
28391             .n(n)
28392             .k(k)
28393             .iterations(1)
28394             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2s4__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
28395         }
28396       }
28397     }
28398   }
28399 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD128,small_kernel)28400   TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD128, small_kernel) {
28401     TEST_REQUIRES_X86_AVX;
28402     for (size_t k = 1; k <= 40; k += 9) {
28403       GemmMicrokernelTester()
28404         .mr(3)
28405         .nr(4)
28406         .kr(2)
28407         .sr(4)
28408         .m(3)
28409         .n(4)
28410         .k(k)
28411         .ks(3)
28412         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2s4__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
28413     }
28414   }
28415 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD128,small_kernel_subtile)28416   TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD128, small_kernel_subtile) {
28417     TEST_REQUIRES_X86_AVX;
28418     for (size_t k = 1; k <= 40; k += 9) {
28419       for (uint32_t n = 1; n <= 4; n++) {
28420         for (uint32_t m = 1; m <= 3; m++) {
28421           GemmMicrokernelTester()
28422             .mr(3)
28423             .nr(4)
28424             .kr(2)
28425             .sr(4)
28426             .m(m)
28427             .n(n)
28428             .k(k)
28429             .ks(3)
28430             .iterations(1)
28431             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2s4__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
28432         }
28433       }
28434     }
28435   }
28436 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD128,n_gt_4_small_kernel)28437   TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD128, n_gt_4_small_kernel) {
28438     TEST_REQUIRES_X86_AVX;
28439     for (uint32_t n = 5; n < 8; n++) {
28440       for (size_t k = 1; k <= 40; k += 9) {
28441         GemmMicrokernelTester()
28442           .mr(3)
28443           .nr(4)
28444           .kr(2)
28445           .sr(4)
28446           .m(3)
28447           .n(n)
28448           .k(k)
28449           .ks(3)
28450           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2s4__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
28451       }
28452     }
28453   }
28454 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD128,n_div_4_small_kernel)28455   TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD128, n_div_4_small_kernel) {
28456     TEST_REQUIRES_X86_AVX;
28457     for (uint32_t n = 8; n <= 12; n += 4) {
28458       for (size_t k = 1; k <= 40; k += 9) {
28459         GemmMicrokernelTester()
28460           .mr(3)
28461           .nr(4)
28462           .kr(2)
28463           .sr(4)
28464           .m(3)
28465           .n(n)
28466           .k(k)
28467           .ks(3)
28468           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2s4__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
28469       }
28470     }
28471   }
28472 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD128,strided_cm_subtile)28473   TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD128, strided_cm_subtile) {
28474     TEST_REQUIRES_X86_AVX;
28475     for (size_t k = 1; k <= 40; k += 9) {
28476       for (uint32_t n = 1; n <= 4; n++) {
28477         for (uint32_t m = 1; m <= 3; m++) {
28478           GemmMicrokernelTester()
28479             .mr(3)
28480             .nr(4)
28481             .kr(2)
28482             .sr(4)
28483             .m(m)
28484             .n(n)
28485             .k(k)
28486             .cm_stride(7)
28487             .iterations(1)
28488             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2s4__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
28489         }
28490       }
28491     }
28492   }
28493 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD128,a_offset)28494   TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD128, a_offset) {
28495     TEST_REQUIRES_X86_AVX;
28496     for (size_t k = 1; k <= 40; k += 9) {
28497       GemmMicrokernelTester()
28498         .mr(3)
28499         .nr(4)
28500         .kr(2)
28501         .sr(4)
28502         .m(3)
28503         .n(4)
28504         .k(k)
28505         .ks(3)
28506         .a_offset(127)
28507         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2s4__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
28508     }
28509   }
28510 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD128,zero)28511   TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD128, zero) {
28512     TEST_REQUIRES_X86_AVX;
28513     for (size_t k = 1; k <= 40; k += 9) {
28514       for (uint32_t mz = 0; mz < 3; mz++) {
28515         GemmMicrokernelTester()
28516           .mr(3)
28517           .nr(4)
28518           .kr(2)
28519           .sr(4)
28520           .m(3)
28521           .n(4)
28522           .k(k)
28523           .ks(3)
28524           .a_offset(127)
28525           .zero_index(mz)
28526           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2s4__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
28527       }
28528     }
28529   }
28530 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD128,qmin)28531   TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD128, qmin) {
28532     TEST_REQUIRES_X86_AVX;
28533     GemmMicrokernelTester()
28534       .mr(3)
28535       .nr(4)
28536       .kr(2)
28537       .sr(4)
28538       .m(3)
28539       .n(4)
28540       .k(8)
28541       .qmin(128)
28542       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2s4__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
28543   }
28544 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD128,qmax)28545   TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD128, qmax) {
28546     TEST_REQUIRES_X86_AVX;
28547     GemmMicrokernelTester()
28548       .mr(3)
28549       .nr(4)
28550       .kr(2)
28551       .sr(4)
28552       .m(3)
28553       .n(4)
28554       .k(8)
28555       .qmax(128)
28556       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2s4__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
28557   }
28558 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD128,strided_cm)28559   TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD128, strided_cm) {
28560     TEST_REQUIRES_X86_AVX;
28561     GemmMicrokernelTester()
28562       .mr(3)
28563       .nr(4)
28564       .kr(2)
28565       .sr(4)
28566       .m(3)
28567       .n(4)
28568       .k(8)
28569       .cm_stride(7)
28570       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2s4__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
28571   }
28572 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
28573 
28574 
28575 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD64,k_eq_8)28576   TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD64, k_eq_8) {
28577     TEST_REQUIRES_X86_SSE2;
28578     GemmMicrokernelTester()
28579       .mr(2)
28580       .nr(4)
28581       .kr(8)
28582       .sr(1)
28583       .m(2)
28584       .n(4)
28585       .k(8)
28586       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
28587   }
28588 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD64,strided_cn)28589   TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD64, strided_cn) {
28590     TEST_REQUIRES_X86_SSE2;
28591     GemmMicrokernelTester()
28592       .mr(2)
28593       .nr(4)
28594       .kr(8)
28595       .sr(1)
28596       .m(2)
28597       .n(4)
28598       .k(8)
28599       .cn_stride(7)
28600       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
28601   }
28602 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD64,k_eq_8_subtile)28603   TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD64, k_eq_8_subtile) {
28604     TEST_REQUIRES_X86_SSE2;
28605     for (uint32_t n = 1; n <= 4; n++) {
28606       for (uint32_t m = 1; m <= 2; m++) {
28607         GemmMicrokernelTester()
28608           .mr(2)
28609           .nr(4)
28610           .kr(8)
28611           .sr(1)
28612           .m(m)
28613           .n(n)
28614           .k(8)
28615           .iterations(1)
28616           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
28617       }
28618     }
28619   }
28620 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD64,k_eq_8_subtile_m)28621   TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD64, k_eq_8_subtile_m) {
28622     TEST_REQUIRES_X86_SSE2;
28623     for (uint32_t m = 1; m <= 2; m++) {
28624       GemmMicrokernelTester()
28625         .mr(2)
28626         .nr(4)
28627         .kr(8)
28628         .sr(1)
28629         .m(m)
28630         .n(4)
28631         .k(8)
28632         .iterations(1)
28633         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
28634     }
28635   }
28636 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD64,k_eq_8_subtile_n)28637   TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD64, k_eq_8_subtile_n) {
28638     TEST_REQUIRES_X86_SSE2;
28639     for (uint32_t n = 1; n <= 4; n++) {
28640       GemmMicrokernelTester()
28641         .mr(2)
28642         .nr(4)
28643         .kr(8)
28644         .sr(1)
28645         .m(2)
28646         .n(n)
28647         .k(8)
28648         .iterations(1)
28649         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
28650     }
28651   }
28652 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD64,k_lt_8)28653   TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD64, k_lt_8) {
28654     TEST_REQUIRES_X86_SSE2;
28655     for (size_t k = 1; k < 8; k++) {
28656       GemmMicrokernelTester()
28657         .mr(2)
28658         .nr(4)
28659         .kr(8)
28660         .sr(1)
28661         .m(2)
28662         .n(4)
28663         .k(k)
28664         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
28665     }
28666   }
28667 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD64,k_lt_8_subtile)28668   TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD64, k_lt_8_subtile) {
28669     TEST_REQUIRES_X86_SSE2;
28670     for (size_t k = 1; k < 8; k++) {
28671       for (uint32_t n = 1; n <= 4; n++) {
28672         for (uint32_t m = 1; m <= 2; m++) {
28673           GemmMicrokernelTester()
28674             .mr(2)
28675             .nr(4)
28676             .kr(8)
28677             .sr(1)
28678             .m(m)
28679             .n(n)
28680             .k(k)
28681             .iterations(1)
28682             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
28683         }
28684       }
28685     }
28686   }
28687 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD64,k_gt_8)28688   TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD64, k_gt_8) {
28689     TEST_REQUIRES_X86_SSE2;
28690     for (size_t k = 9; k < 16; k++) {
28691       GemmMicrokernelTester()
28692         .mr(2)
28693         .nr(4)
28694         .kr(8)
28695         .sr(1)
28696         .m(2)
28697         .n(4)
28698         .k(k)
28699         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
28700     }
28701   }
28702 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD64,k_gt_8_subtile)28703   TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD64, k_gt_8_subtile) {
28704     TEST_REQUIRES_X86_SSE2;
28705     for (size_t k = 9; k < 16; k++) {
28706       for (uint32_t n = 1; n <= 4; n++) {
28707         for (uint32_t m = 1; m <= 2; m++) {
28708           GemmMicrokernelTester()
28709             .mr(2)
28710             .nr(4)
28711             .kr(8)
28712             .sr(1)
28713             .m(m)
28714             .n(n)
28715             .k(k)
28716             .iterations(1)
28717             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
28718         }
28719       }
28720     }
28721   }
28722 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD64,k_div_8)28723   TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD64, k_div_8) {
28724     TEST_REQUIRES_X86_SSE2;
28725     for (size_t k = 16; k <= 80; k += 8) {
28726       GemmMicrokernelTester()
28727         .mr(2)
28728         .nr(4)
28729         .kr(8)
28730         .sr(1)
28731         .m(2)
28732         .n(4)
28733         .k(k)
28734         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
28735     }
28736   }
28737 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD64,k_div_8_subtile)28738   TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD64, k_div_8_subtile) {
28739     TEST_REQUIRES_X86_SSE2;
28740     for (size_t k = 16; k <= 80; k += 8) {
28741       for (uint32_t n = 1; n <= 4; n++) {
28742         for (uint32_t m = 1; m <= 2; m++) {
28743           GemmMicrokernelTester()
28744             .mr(2)
28745             .nr(4)
28746             .kr(8)
28747             .sr(1)
28748             .m(m)
28749             .n(n)
28750             .k(k)
28751             .iterations(1)
28752             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
28753         }
28754       }
28755     }
28756   }
28757 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD64,n_gt_4)28758   TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD64, n_gt_4) {
28759     TEST_REQUIRES_X86_SSE2;
28760     for (uint32_t n = 5; n < 8; n++) {
28761       for (size_t k = 1; k <= 40; k += 9) {
28762         GemmMicrokernelTester()
28763           .mr(2)
28764           .nr(4)
28765           .kr(8)
28766           .sr(1)
28767           .m(2)
28768           .n(n)
28769           .k(k)
28770           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
28771       }
28772     }
28773   }
28774 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD64,n_gt_4_strided_cn)28775   TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD64, n_gt_4_strided_cn) {
28776     TEST_REQUIRES_X86_SSE2;
28777     for (uint32_t n = 5; n < 8; n++) {
28778       for (size_t k = 1; k <= 40; k += 9) {
28779         GemmMicrokernelTester()
28780           .mr(2)
28781           .nr(4)
28782           .kr(8)
28783           .sr(1)
28784           .m(2)
28785           .n(n)
28786           .k(k)
28787           .cn_stride(7)
28788           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
28789       }
28790     }
28791   }
28792 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD64,n_gt_4_subtile)28793   TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD64, n_gt_4_subtile) {
28794     TEST_REQUIRES_X86_SSE2;
28795     for (uint32_t n = 5; n < 8; n++) {
28796       for (size_t k = 1; k <= 40; k += 9) {
28797         for (uint32_t m = 1; m <= 2; m++) {
28798           GemmMicrokernelTester()
28799             .mr(2)
28800             .nr(4)
28801             .kr(8)
28802             .sr(1)
28803             .m(m)
28804             .n(n)
28805             .k(k)
28806             .iterations(1)
28807             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
28808         }
28809       }
28810     }
28811   }
28812 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD64,n_div_4)28813   TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD64, n_div_4) {
28814     TEST_REQUIRES_X86_SSE2;
28815     for (uint32_t n = 8; n <= 12; n += 4) {
28816       for (size_t k = 1; k <= 40; k += 9) {
28817         GemmMicrokernelTester()
28818           .mr(2)
28819           .nr(4)
28820           .kr(8)
28821           .sr(1)
28822           .m(2)
28823           .n(n)
28824           .k(k)
28825           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
28826       }
28827     }
28828   }
28829 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD64,n_div_4_strided_cn)28830   TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD64, n_div_4_strided_cn) {
28831     TEST_REQUIRES_X86_SSE2;
28832     for (uint32_t n = 8; n <= 12; n += 4) {
28833       for (size_t k = 1; k <= 40; k += 9) {
28834         GemmMicrokernelTester()
28835           .mr(2)
28836           .nr(4)
28837           .kr(8)
28838           .sr(1)
28839           .m(2)
28840           .n(n)
28841           .k(k)
28842           .cn_stride(7)
28843           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
28844       }
28845     }
28846   }
28847 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD64,n_div_4_subtile)28848   TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD64, n_div_4_subtile) {
28849     TEST_REQUIRES_X86_SSE2;
28850     for (uint32_t n = 8; n <= 12; n += 4) {
28851       for (size_t k = 1; k <= 40; k += 9) {
28852         for (uint32_t m = 1; m <= 2; m++) {
28853           GemmMicrokernelTester()
28854             .mr(2)
28855             .nr(4)
28856             .kr(8)
28857             .sr(1)
28858             .m(m)
28859             .n(n)
28860             .k(k)
28861             .iterations(1)
28862             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
28863         }
28864       }
28865     }
28866   }
28867 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD64,small_kernel)28868   TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD64, small_kernel) {
28869     TEST_REQUIRES_X86_SSE2;
28870     for (size_t k = 1; k <= 40; k += 9) {
28871       GemmMicrokernelTester()
28872         .mr(2)
28873         .nr(4)
28874         .kr(8)
28875         .sr(1)
28876         .m(2)
28877         .n(4)
28878         .k(k)
28879         .ks(3)
28880         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
28881     }
28882   }
28883 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD64,small_kernel_subtile)28884   TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD64, small_kernel_subtile) {
28885     TEST_REQUIRES_X86_SSE2;
28886     for (size_t k = 1; k <= 40; k += 9) {
28887       for (uint32_t n = 1; n <= 4; n++) {
28888         for (uint32_t m = 1; m <= 2; m++) {
28889           GemmMicrokernelTester()
28890             .mr(2)
28891             .nr(4)
28892             .kr(8)
28893             .sr(1)
28894             .m(m)
28895             .n(n)
28896             .k(k)
28897             .ks(3)
28898             .iterations(1)
28899             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
28900         }
28901       }
28902     }
28903   }
28904 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD64,n_gt_4_small_kernel)28905   TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD64, n_gt_4_small_kernel) {
28906     TEST_REQUIRES_X86_SSE2;
28907     for (uint32_t n = 5; n < 8; n++) {
28908       for (size_t k = 1; k <= 40; k += 9) {
28909         GemmMicrokernelTester()
28910           .mr(2)
28911           .nr(4)
28912           .kr(8)
28913           .sr(1)
28914           .m(2)
28915           .n(n)
28916           .k(k)
28917           .ks(3)
28918           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
28919       }
28920     }
28921   }
28922 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD64,n_div_4_small_kernel)28923   TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD64, n_div_4_small_kernel) {
28924     TEST_REQUIRES_X86_SSE2;
28925     for (uint32_t n = 8; n <= 12; n += 4) {
28926       for (size_t k = 1; k <= 40; k += 9) {
28927         GemmMicrokernelTester()
28928           .mr(2)
28929           .nr(4)
28930           .kr(8)
28931           .sr(1)
28932           .m(2)
28933           .n(n)
28934           .k(k)
28935           .ks(3)
28936           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
28937       }
28938     }
28939   }
28940 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD64,strided_cm_subtile)28941   TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD64, strided_cm_subtile) {
28942     TEST_REQUIRES_X86_SSE2;
28943     for (size_t k = 1; k <= 40; k += 9) {
28944       for (uint32_t n = 1; n <= 4; n++) {
28945         for (uint32_t m = 1; m <= 2; m++) {
28946           GemmMicrokernelTester()
28947             .mr(2)
28948             .nr(4)
28949             .kr(8)
28950             .sr(1)
28951             .m(m)
28952             .n(n)
28953             .k(k)
28954             .cm_stride(7)
28955             .iterations(1)
28956             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
28957         }
28958       }
28959     }
28960   }
28961 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD64,a_offset)28962   TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD64, a_offset) {
28963     TEST_REQUIRES_X86_SSE2;
28964     for (size_t k = 1; k <= 40; k += 9) {
28965       GemmMicrokernelTester()
28966         .mr(2)
28967         .nr(4)
28968         .kr(8)
28969         .sr(1)
28970         .m(2)
28971         .n(4)
28972         .k(k)
28973         .ks(3)
28974         .a_offset(83)
28975         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
28976     }
28977   }
28978 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD64,zero)28979   TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD64, zero) {
28980     TEST_REQUIRES_X86_SSE2;
28981     for (size_t k = 1; k <= 40; k += 9) {
28982       for (uint32_t mz = 0; mz < 2; mz++) {
28983         GemmMicrokernelTester()
28984           .mr(2)
28985           .nr(4)
28986           .kr(8)
28987           .sr(1)
28988           .m(2)
28989           .n(4)
28990           .k(k)
28991           .ks(3)
28992           .a_offset(83)
28993           .zero_index(mz)
28994           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
28995       }
28996     }
28997   }
28998 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD64,qmin)28999   TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD64, qmin) {
29000     TEST_REQUIRES_X86_SSE2;
29001     GemmMicrokernelTester()
29002       .mr(2)
29003       .nr(4)
29004       .kr(8)
29005       .sr(1)
29006       .m(2)
29007       .n(4)
29008       .k(8)
29009       .qmin(128)
29010       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
29011   }
29012 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD64,qmax)29013   TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD64, qmax) {
29014     TEST_REQUIRES_X86_SSE2;
29015     GemmMicrokernelTester()
29016       .mr(2)
29017       .nr(4)
29018       .kr(8)
29019       .sr(1)
29020       .m(2)
29021       .n(4)
29022       .k(8)
29023       .qmax(128)
29024       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
29025   }
29026 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD64,strided_cm)29027   TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD64, strided_cm) {
29028     TEST_REQUIRES_X86_SSE2;
29029     GemmMicrokernelTester()
29030       .mr(2)
29031       .nr(4)
29032       .kr(8)
29033       .sr(1)
29034       .m(2)
29035       .n(4)
29036       .k(8)
29037       .cm_stride(7)
29038       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
29039   }
29040 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
29041 
29042 
29043 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD64,k_eq_8)29044   TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD64, k_eq_8) {
29045     TEST_REQUIRES_X86_SSE41;
29046     GemmMicrokernelTester()
29047       .mr(2)
29048       .nr(4)
29049       .kr(8)
29050       .sr(1)
29051       .m(2)
29052       .n(4)
29053       .k(8)
29054       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c8__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
29055   }
29056 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD64,strided_cn)29057   TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD64, strided_cn) {
29058     TEST_REQUIRES_X86_SSE41;
29059     GemmMicrokernelTester()
29060       .mr(2)
29061       .nr(4)
29062       .kr(8)
29063       .sr(1)
29064       .m(2)
29065       .n(4)
29066       .k(8)
29067       .cn_stride(7)
29068       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c8__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
29069   }
29070 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD64,k_eq_8_subtile)29071   TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD64, k_eq_8_subtile) {
29072     TEST_REQUIRES_X86_SSE41;
29073     for (uint32_t n = 1; n <= 4; n++) {
29074       for (uint32_t m = 1; m <= 2; m++) {
29075         GemmMicrokernelTester()
29076           .mr(2)
29077           .nr(4)
29078           .kr(8)
29079           .sr(1)
29080           .m(m)
29081           .n(n)
29082           .k(8)
29083           .iterations(1)
29084           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c8__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
29085       }
29086     }
29087   }
29088 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD64,k_eq_8_subtile_m)29089   TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD64, k_eq_8_subtile_m) {
29090     TEST_REQUIRES_X86_SSE41;
29091     for (uint32_t m = 1; m <= 2; m++) {
29092       GemmMicrokernelTester()
29093         .mr(2)
29094         .nr(4)
29095         .kr(8)
29096         .sr(1)
29097         .m(m)
29098         .n(4)
29099         .k(8)
29100         .iterations(1)
29101         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c8__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
29102     }
29103   }
29104 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD64,k_eq_8_subtile_n)29105   TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD64, k_eq_8_subtile_n) {
29106     TEST_REQUIRES_X86_SSE41;
29107     for (uint32_t n = 1; n <= 4; n++) {
29108       GemmMicrokernelTester()
29109         .mr(2)
29110         .nr(4)
29111         .kr(8)
29112         .sr(1)
29113         .m(2)
29114         .n(n)
29115         .k(8)
29116         .iterations(1)
29117         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c8__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
29118     }
29119   }
29120 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD64,k_lt_8)29121   TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD64, k_lt_8) {
29122     TEST_REQUIRES_X86_SSE41;
29123     for (size_t k = 1; k < 8; k++) {
29124       GemmMicrokernelTester()
29125         .mr(2)
29126         .nr(4)
29127         .kr(8)
29128         .sr(1)
29129         .m(2)
29130         .n(4)
29131         .k(k)
29132         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c8__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
29133     }
29134   }
29135 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD64,k_lt_8_subtile)29136   TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD64, k_lt_8_subtile) {
29137     TEST_REQUIRES_X86_SSE41;
29138     for (size_t k = 1; k < 8; k++) {
29139       for (uint32_t n = 1; n <= 4; n++) {
29140         for (uint32_t m = 1; m <= 2; m++) {
29141           GemmMicrokernelTester()
29142             .mr(2)
29143             .nr(4)
29144             .kr(8)
29145             .sr(1)
29146             .m(m)
29147             .n(n)
29148             .k(k)
29149             .iterations(1)
29150             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c8__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
29151         }
29152       }
29153     }
29154   }
29155 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD64,k_gt_8)29156   TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD64, k_gt_8) {
29157     TEST_REQUIRES_X86_SSE41;
29158     for (size_t k = 9; k < 16; k++) {
29159       GemmMicrokernelTester()
29160         .mr(2)
29161         .nr(4)
29162         .kr(8)
29163         .sr(1)
29164         .m(2)
29165         .n(4)
29166         .k(k)
29167         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c8__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
29168     }
29169   }
29170 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD64,k_gt_8_subtile)29171   TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD64, k_gt_8_subtile) {
29172     TEST_REQUIRES_X86_SSE41;
29173     for (size_t k = 9; k < 16; k++) {
29174       for (uint32_t n = 1; n <= 4; n++) {
29175         for (uint32_t m = 1; m <= 2; m++) {
29176           GemmMicrokernelTester()
29177             .mr(2)
29178             .nr(4)
29179             .kr(8)
29180             .sr(1)
29181             .m(m)
29182             .n(n)
29183             .k(k)
29184             .iterations(1)
29185             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c8__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
29186         }
29187       }
29188     }
29189   }
29190 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD64,k_div_8)29191   TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD64, k_div_8) {
29192     TEST_REQUIRES_X86_SSE41;
29193     for (size_t k = 16; k <= 80; k += 8) {
29194       GemmMicrokernelTester()
29195         .mr(2)
29196         .nr(4)
29197         .kr(8)
29198         .sr(1)
29199         .m(2)
29200         .n(4)
29201         .k(k)
29202         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c8__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
29203     }
29204   }
29205 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD64,k_div_8_subtile)29206   TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD64, k_div_8_subtile) {
29207     TEST_REQUIRES_X86_SSE41;
29208     for (size_t k = 16; k <= 80; k += 8) {
29209       for (uint32_t n = 1; n <= 4; n++) {
29210         for (uint32_t m = 1; m <= 2; m++) {
29211           GemmMicrokernelTester()
29212             .mr(2)
29213             .nr(4)
29214             .kr(8)
29215             .sr(1)
29216             .m(m)
29217             .n(n)
29218             .k(k)
29219             .iterations(1)
29220             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c8__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
29221         }
29222       }
29223     }
29224   }
29225 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD64,n_gt_4)29226   TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD64, n_gt_4) {
29227     TEST_REQUIRES_X86_SSE41;
29228     for (uint32_t n = 5; n < 8; n++) {
29229       for (size_t k = 1; k <= 40; k += 9) {
29230         GemmMicrokernelTester()
29231           .mr(2)
29232           .nr(4)
29233           .kr(8)
29234           .sr(1)
29235           .m(2)
29236           .n(n)
29237           .k(k)
29238           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c8__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
29239       }
29240     }
29241   }
29242 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD64,n_gt_4_strided_cn)29243   TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD64, n_gt_4_strided_cn) {
29244     TEST_REQUIRES_X86_SSE41;
29245     for (uint32_t n = 5; n < 8; n++) {
29246       for (size_t k = 1; k <= 40; k += 9) {
29247         GemmMicrokernelTester()
29248           .mr(2)
29249           .nr(4)
29250           .kr(8)
29251           .sr(1)
29252           .m(2)
29253           .n(n)
29254           .k(k)
29255           .cn_stride(7)
29256           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c8__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
29257       }
29258     }
29259   }
29260 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD64,n_gt_4_subtile)29261   TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD64, n_gt_4_subtile) {
29262     TEST_REQUIRES_X86_SSE41;
29263     for (uint32_t n = 5; n < 8; n++) {
29264       for (size_t k = 1; k <= 40; k += 9) {
29265         for (uint32_t m = 1; m <= 2; m++) {
29266           GemmMicrokernelTester()
29267             .mr(2)
29268             .nr(4)
29269             .kr(8)
29270             .sr(1)
29271             .m(m)
29272             .n(n)
29273             .k(k)
29274             .iterations(1)
29275             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c8__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
29276         }
29277       }
29278     }
29279   }
29280 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD64,n_div_4)29281   TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD64, n_div_4) {
29282     TEST_REQUIRES_X86_SSE41;
29283     for (uint32_t n = 8; n <= 12; n += 4) {
29284       for (size_t k = 1; k <= 40; k += 9) {
29285         GemmMicrokernelTester()
29286           .mr(2)
29287           .nr(4)
29288           .kr(8)
29289           .sr(1)
29290           .m(2)
29291           .n(n)
29292           .k(k)
29293           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c8__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
29294       }
29295     }
29296   }
29297 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD64,n_div_4_strided_cn)29298   TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD64, n_div_4_strided_cn) {
29299     TEST_REQUIRES_X86_SSE41;
29300     for (uint32_t n = 8; n <= 12; n += 4) {
29301       for (size_t k = 1; k <= 40; k += 9) {
29302         GemmMicrokernelTester()
29303           .mr(2)
29304           .nr(4)
29305           .kr(8)
29306           .sr(1)
29307           .m(2)
29308           .n(n)
29309           .k(k)
29310           .cn_stride(7)
29311           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c8__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
29312       }
29313     }
29314   }
29315 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD64,n_div_4_subtile)29316   TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD64, n_div_4_subtile) {
29317     TEST_REQUIRES_X86_SSE41;
29318     for (uint32_t n = 8; n <= 12; n += 4) {
29319       for (size_t k = 1; k <= 40; k += 9) {
29320         for (uint32_t m = 1; m <= 2; m++) {
29321           GemmMicrokernelTester()
29322             .mr(2)
29323             .nr(4)
29324             .kr(8)
29325             .sr(1)
29326             .m(m)
29327             .n(n)
29328             .k(k)
29329             .iterations(1)
29330             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c8__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
29331         }
29332       }
29333     }
29334   }
29335 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD64,small_kernel)29336   TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD64, small_kernel) {
29337     TEST_REQUIRES_X86_SSE41;
29338     for (size_t k = 1; k <= 40; k += 9) {
29339       GemmMicrokernelTester()
29340         .mr(2)
29341         .nr(4)
29342         .kr(8)
29343         .sr(1)
29344         .m(2)
29345         .n(4)
29346         .k(k)
29347         .ks(3)
29348         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c8__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
29349     }
29350   }
29351 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD64,small_kernel_subtile)29352   TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD64, small_kernel_subtile) {
29353     TEST_REQUIRES_X86_SSE41;
29354     for (size_t k = 1; k <= 40; k += 9) {
29355       for (uint32_t n = 1; n <= 4; n++) {
29356         for (uint32_t m = 1; m <= 2; m++) {
29357           GemmMicrokernelTester()
29358             .mr(2)
29359             .nr(4)
29360             .kr(8)
29361             .sr(1)
29362             .m(m)
29363             .n(n)
29364             .k(k)
29365             .ks(3)
29366             .iterations(1)
29367             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c8__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
29368         }
29369       }
29370     }
29371   }
29372 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD64,n_gt_4_small_kernel)29373   TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD64, n_gt_4_small_kernel) {
29374     TEST_REQUIRES_X86_SSE41;
29375     for (uint32_t n = 5; n < 8; n++) {
29376       for (size_t k = 1; k <= 40; k += 9) {
29377         GemmMicrokernelTester()
29378           .mr(2)
29379           .nr(4)
29380           .kr(8)
29381           .sr(1)
29382           .m(2)
29383           .n(n)
29384           .k(k)
29385           .ks(3)
29386           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c8__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
29387       }
29388     }
29389   }
29390 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD64,n_div_4_small_kernel)29391   TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD64, n_div_4_small_kernel) {
29392     TEST_REQUIRES_X86_SSE41;
29393     for (uint32_t n = 8; n <= 12; n += 4) {
29394       for (size_t k = 1; k <= 40; k += 9) {
29395         GemmMicrokernelTester()
29396           .mr(2)
29397           .nr(4)
29398           .kr(8)
29399           .sr(1)
29400           .m(2)
29401           .n(n)
29402           .k(k)
29403           .ks(3)
29404           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c8__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
29405       }
29406     }
29407   }
29408 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD64,strided_cm_subtile)29409   TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD64, strided_cm_subtile) {
29410     TEST_REQUIRES_X86_SSE41;
29411     for (size_t k = 1; k <= 40; k += 9) {
29412       for (uint32_t n = 1; n <= 4; n++) {
29413         for (uint32_t m = 1; m <= 2; m++) {
29414           GemmMicrokernelTester()
29415             .mr(2)
29416             .nr(4)
29417             .kr(8)
29418             .sr(1)
29419             .m(m)
29420             .n(n)
29421             .k(k)
29422             .cm_stride(7)
29423             .iterations(1)
29424             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c8__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
29425         }
29426       }
29427     }
29428   }
29429 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD64,a_offset)29430   TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD64, a_offset) {
29431     TEST_REQUIRES_X86_SSE41;
29432     for (size_t k = 1; k <= 40; k += 9) {
29433       GemmMicrokernelTester()
29434         .mr(2)
29435         .nr(4)
29436         .kr(8)
29437         .sr(1)
29438         .m(2)
29439         .n(4)
29440         .k(k)
29441         .ks(3)
29442         .a_offset(83)
29443         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c8__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
29444     }
29445   }
29446 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD64,zero)29447   TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD64, zero) {
29448     TEST_REQUIRES_X86_SSE41;
29449     for (size_t k = 1; k <= 40; k += 9) {
29450       for (uint32_t mz = 0; mz < 2; mz++) {
29451         GemmMicrokernelTester()
29452           .mr(2)
29453           .nr(4)
29454           .kr(8)
29455           .sr(1)
29456           .m(2)
29457           .n(4)
29458           .k(k)
29459           .ks(3)
29460           .a_offset(83)
29461           .zero_index(mz)
29462           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c8__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
29463       }
29464     }
29465   }
29466 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD64,qmin)29467   TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD64, qmin) {
29468     TEST_REQUIRES_X86_SSE41;
29469     GemmMicrokernelTester()
29470       .mr(2)
29471       .nr(4)
29472       .kr(8)
29473       .sr(1)
29474       .m(2)
29475       .n(4)
29476       .k(8)
29477       .qmin(128)
29478       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c8__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
29479   }
29480 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD64,qmax)29481   TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD64, qmax) {
29482     TEST_REQUIRES_X86_SSE41;
29483     GemmMicrokernelTester()
29484       .mr(2)
29485       .nr(4)
29486       .kr(8)
29487       .sr(1)
29488       .m(2)
29489       .n(4)
29490       .k(8)
29491       .qmax(128)
29492       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c8__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
29493   }
29494 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD64,strided_cm)29495   TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__SSE41_LD64, strided_cm) {
29496     TEST_REQUIRES_X86_SSE41;
29497     GemmMicrokernelTester()
29498       .mr(2)
29499       .nr(4)
29500       .kr(8)
29501       .sr(1)
29502       .m(2)
29503       .n(4)
29504       .k(8)
29505       .cm_stride(7)
29506       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c8__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
29507   }
29508 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
29509 
29510 
29511 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD64,k_eq_8)29512   TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD64, k_eq_8) {
29513     TEST_REQUIRES_X86_SSE2;
29514     GemmMicrokernelTester()
29515       .mr(3)
29516       .nr(4)
29517       .kr(8)
29518       .sr(1)
29519       .m(3)
29520       .n(4)
29521       .k(8)
29522       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c8__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
29523   }
29524 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD64,strided_cn)29525   TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD64, strided_cn) {
29526     TEST_REQUIRES_X86_SSE2;
29527     GemmMicrokernelTester()
29528       .mr(3)
29529       .nr(4)
29530       .kr(8)
29531       .sr(1)
29532       .m(3)
29533       .n(4)
29534       .k(8)
29535       .cn_stride(7)
29536       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c8__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
29537   }
29538 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD64,k_eq_8_subtile)29539   TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD64, k_eq_8_subtile) {
29540     TEST_REQUIRES_X86_SSE2;
29541     for (uint32_t n = 1; n <= 4; n++) {
29542       for (uint32_t m = 1; m <= 3; m++) {
29543         GemmMicrokernelTester()
29544           .mr(3)
29545           .nr(4)
29546           .kr(8)
29547           .sr(1)
29548           .m(m)
29549           .n(n)
29550           .k(8)
29551           .iterations(1)
29552           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c8__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
29553       }
29554     }
29555   }
29556 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD64,k_eq_8_subtile_m)29557   TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD64, k_eq_8_subtile_m) {
29558     TEST_REQUIRES_X86_SSE2;
29559     for (uint32_t m = 1; m <= 3; m++) {
29560       GemmMicrokernelTester()
29561         .mr(3)
29562         .nr(4)
29563         .kr(8)
29564         .sr(1)
29565         .m(m)
29566         .n(4)
29567         .k(8)
29568         .iterations(1)
29569         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c8__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
29570     }
29571   }
29572 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD64,k_eq_8_subtile_n)29573   TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD64, k_eq_8_subtile_n) {
29574     TEST_REQUIRES_X86_SSE2;
29575     for (uint32_t n = 1; n <= 4; n++) {
29576       GemmMicrokernelTester()
29577         .mr(3)
29578         .nr(4)
29579         .kr(8)
29580         .sr(1)
29581         .m(3)
29582         .n(n)
29583         .k(8)
29584         .iterations(1)
29585         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c8__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
29586     }
29587   }
29588 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD64,k_lt_8)29589   TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD64, k_lt_8) {
29590     TEST_REQUIRES_X86_SSE2;
29591     for (size_t k = 1; k < 8; k++) {
29592       GemmMicrokernelTester()
29593         .mr(3)
29594         .nr(4)
29595         .kr(8)
29596         .sr(1)
29597         .m(3)
29598         .n(4)
29599         .k(k)
29600         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c8__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
29601     }
29602   }
29603 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD64,k_lt_8_subtile)29604   TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD64, k_lt_8_subtile) {
29605     TEST_REQUIRES_X86_SSE2;
29606     for (size_t k = 1; k < 8; k++) {
29607       for (uint32_t n = 1; n <= 4; n++) {
29608         for (uint32_t m = 1; m <= 3; m++) {
29609           GemmMicrokernelTester()
29610             .mr(3)
29611             .nr(4)
29612             .kr(8)
29613             .sr(1)
29614             .m(m)
29615             .n(n)
29616             .k(k)
29617             .iterations(1)
29618             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c8__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
29619         }
29620       }
29621     }
29622   }
29623 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD64,k_gt_8)29624   TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD64, k_gt_8) {
29625     TEST_REQUIRES_X86_SSE2;
29626     for (size_t k = 9; k < 16; k++) {
29627       GemmMicrokernelTester()
29628         .mr(3)
29629         .nr(4)
29630         .kr(8)
29631         .sr(1)
29632         .m(3)
29633         .n(4)
29634         .k(k)
29635         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c8__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
29636     }
29637   }
29638 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD64,k_gt_8_subtile)29639   TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD64, k_gt_8_subtile) {
29640     TEST_REQUIRES_X86_SSE2;
29641     for (size_t k = 9; k < 16; k++) {
29642       for (uint32_t n = 1; n <= 4; n++) {
29643         for (uint32_t m = 1; m <= 3; m++) {
29644           GemmMicrokernelTester()
29645             .mr(3)
29646             .nr(4)
29647             .kr(8)
29648             .sr(1)
29649             .m(m)
29650             .n(n)
29651             .k(k)
29652             .iterations(1)
29653             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c8__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
29654         }
29655       }
29656     }
29657   }
29658 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD64,k_div_8)29659   TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD64, k_div_8) {
29660     TEST_REQUIRES_X86_SSE2;
29661     for (size_t k = 16; k <= 80; k += 8) {
29662       GemmMicrokernelTester()
29663         .mr(3)
29664         .nr(4)
29665         .kr(8)
29666         .sr(1)
29667         .m(3)
29668         .n(4)
29669         .k(k)
29670         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c8__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
29671     }
29672   }
29673 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD64,k_div_8_subtile)29674   TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD64, k_div_8_subtile) {
29675     TEST_REQUIRES_X86_SSE2;
29676     for (size_t k = 16; k <= 80; k += 8) {
29677       for (uint32_t n = 1; n <= 4; n++) {
29678         for (uint32_t m = 1; m <= 3; m++) {
29679           GemmMicrokernelTester()
29680             .mr(3)
29681             .nr(4)
29682             .kr(8)
29683             .sr(1)
29684             .m(m)
29685             .n(n)
29686             .k(k)
29687             .iterations(1)
29688             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c8__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
29689         }
29690       }
29691     }
29692   }
29693 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD64,n_gt_4)29694   TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD64, n_gt_4) {
29695     TEST_REQUIRES_X86_SSE2;
29696     for (uint32_t n = 5; n < 8; n++) {
29697       for (size_t k = 1; k <= 40; k += 9) {
29698         GemmMicrokernelTester()
29699           .mr(3)
29700           .nr(4)
29701           .kr(8)
29702           .sr(1)
29703           .m(3)
29704           .n(n)
29705           .k(k)
29706           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c8__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
29707       }
29708     }
29709   }
29710 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD64,n_gt_4_strided_cn)29711   TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD64, n_gt_4_strided_cn) {
29712     TEST_REQUIRES_X86_SSE2;
29713     for (uint32_t n = 5; n < 8; n++) {
29714       for (size_t k = 1; k <= 40; k += 9) {
29715         GemmMicrokernelTester()
29716           .mr(3)
29717           .nr(4)
29718           .kr(8)
29719           .sr(1)
29720           .m(3)
29721           .n(n)
29722           .k(k)
29723           .cn_stride(7)
29724           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c8__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
29725       }
29726     }
29727   }
29728 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD64,n_gt_4_subtile)29729   TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD64, n_gt_4_subtile) {
29730     TEST_REQUIRES_X86_SSE2;
29731     for (uint32_t n = 5; n < 8; n++) {
29732       for (size_t k = 1; k <= 40; k += 9) {
29733         for (uint32_t m = 1; m <= 3; m++) {
29734           GemmMicrokernelTester()
29735             .mr(3)
29736             .nr(4)
29737             .kr(8)
29738             .sr(1)
29739             .m(m)
29740             .n(n)
29741             .k(k)
29742             .iterations(1)
29743             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c8__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
29744         }
29745       }
29746     }
29747   }
29748 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD64,n_div_4)29749   TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD64, n_div_4) {
29750     TEST_REQUIRES_X86_SSE2;
29751     for (uint32_t n = 8; n <= 12; n += 4) {
29752       for (size_t k = 1; k <= 40; k += 9) {
29753         GemmMicrokernelTester()
29754           .mr(3)
29755           .nr(4)
29756           .kr(8)
29757           .sr(1)
29758           .m(3)
29759           .n(n)
29760           .k(k)
29761           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c8__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
29762       }
29763     }
29764   }
29765 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD64,n_div_4_strided_cn)29766   TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD64, n_div_4_strided_cn) {
29767     TEST_REQUIRES_X86_SSE2;
29768     for (uint32_t n = 8; n <= 12; n += 4) {
29769       for (size_t k = 1; k <= 40; k += 9) {
29770         GemmMicrokernelTester()
29771           .mr(3)
29772           .nr(4)
29773           .kr(8)
29774           .sr(1)
29775           .m(3)
29776           .n(n)
29777           .k(k)
29778           .cn_stride(7)
29779           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c8__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
29780       }
29781     }
29782   }
29783 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD64,n_div_4_subtile)29784   TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD64, n_div_4_subtile) {
29785     TEST_REQUIRES_X86_SSE2;
29786     for (uint32_t n = 8; n <= 12; n += 4) {
29787       for (size_t k = 1; k <= 40; k += 9) {
29788         for (uint32_t m = 1; m <= 3; m++) {
29789           GemmMicrokernelTester()
29790             .mr(3)
29791             .nr(4)
29792             .kr(8)
29793             .sr(1)
29794             .m(m)
29795             .n(n)
29796             .k(k)
29797             .iterations(1)
29798             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c8__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
29799         }
29800       }
29801     }
29802   }
29803 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD64,small_kernel)29804   TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD64, small_kernel) {
29805     TEST_REQUIRES_X86_SSE2;
29806     for (size_t k = 1; k <= 40; k += 9) {
29807       GemmMicrokernelTester()
29808         .mr(3)
29809         .nr(4)
29810         .kr(8)
29811         .sr(1)
29812         .m(3)
29813         .n(4)
29814         .k(k)
29815         .ks(3)
29816         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c8__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
29817     }
29818   }
29819 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD64,small_kernel_subtile)29820   TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD64, small_kernel_subtile) {
29821     TEST_REQUIRES_X86_SSE2;
29822     for (size_t k = 1; k <= 40; k += 9) {
29823       for (uint32_t n = 1; n <= 4; n++) {
29824         for (uint32_t m = 1; m <= 3; m++) {
29825           GemmMicrokernelTester()
29826             .mr(3)
29827             .nr(4)
29828             .kr(8)
29829             .sr(1)
29830             .m(m)
29831             .n(n)
29832             .k(k)
29833             .ks(3)
29834             .iterations(1)
29835             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c8__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
29836         }
29837       }
29838     }
29839   }
29840 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD64,n_gt_4_small_kernel)29841   TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD64, n_gt_4_small_kernel) {
29842     TEST_REQUIRES_X86_SSE2;
29843     for (uint32_t n = 5; n < 8; n++) {
29844       for (size_t k = 1; k <= 40; k += 9) {
29845         GemmMicrokernelTester()
29846           .mr(3)
29847           .nr(4)
29848           .kr(8)
29849           .sr(1)
29850           .m(3)
29851           .n(n)
29852           .k(k)
29853           .ks(3)
29854           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c8__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
29855       }
29856     }
29857   }
29858 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD64,n_div_4_small_kernel)29859   TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD64, n_div_4_small_kernel) {
29860     TEST_REQUIRES_X86_SSE2;
29861     for (uint32_t n = 8; n <= 12; n += 4) {
29862       for (size_t k = 1; k <= 40; k += 9) {
29863         GemmMicrokernelTester()
29864           .mr(3)
29865           .nr(4)
29866           .kr(8)
29867           .sr(1)
29868           .m(3)
29869           .n(n)
29870           .k(k)
29871           .ks(3)
29872           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c8__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
29873       }
29874     }
29875   }
29876 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD64,strided_cm_subtile)29877   TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD64, strided_cm_subtile) {
29878     TEST_REQUIRES_X86_SSE2;
29879     for (size_t k = 1; k <= 40; k += 9) {
29880       for (uint32_t n = 1; n <= 4; n++) {
29881         for (uint32_t m = 1; m <= 3; m++) {
29882           GemmMicrokernelTester()
29883             .mr(3)
29884             .nr(4)
29885             .kr(8)
29886             .sr(1)
29887             .m(m)
29888             .n(n)
29889             .k(k)
29890             .cm_stride(7)
29891             .iterations(1)
29892             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c8__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
29893         }
29894       }
29895     }
29896   }
29897 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD64,a_offset)29898   TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD64, a_offset) {
29899     TEST_REQUIRES_X86_SSE2;
29900     for (size_t k = 1; k <= 40; k += 9) {
29901       GemmMicrokernelTester()
29902         .mr(3)
29903         .nr(4)
29904         .kr(8)
29905         .sr(1)
29906         .m(3)
29907         .n(4)
29908         .k(k)
29909         .ks(3)
29910         .a_offset(127)
29911         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c8__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
29912     }
29913   }
29914 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD64,zero)29915   TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD64, zero) {
29916     TEST_REQUIRES_X86_SSE2;
29917     for (size_t k = 1; k <= 40; k += 9) {
29918       for (uint32_t mz = 0; mz < 3; mz++) {
29919         GemmMicrokernelTester()
29920           .mr(3)
29921           .nr(4)
29922           .kr(8)
29923           .sr(1)
29924           .m(3)
29925           .n(4)
29926           .k(k)
29927           .ks(3)
29928           .a_offset(127)
29929           .zero_index(mz)
29930           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c8__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
29931       }
29932     }
29933   }
29934 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD64,qmin)29935   TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD64, qmin) {
29936     TEST_REQUIRES_X86_SSE2;
29937     GemmMicrokernelTester()
29938       .mr(3)
29939       .nr(4)
29940       .kr(8)
29941       .sr(1)
29942       .m(3)
29943       .n(4)
29944       .k(8)
29945       .qmin(128)
29946       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c8__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
29947   }
29948 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD64,qmax)29949   TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD64, qmax) {
29950     TEST_REQUIRES_X86_SSE2;
29951     GemmMicrokernelTester()
29952       .mr(3)
29953       .nr(4)
29954       .kr(8)
29955       .sr(1)
29956       .m(3)
29957       .n(4)
29958       .k(8)
29959       .qmax(128)
29960       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c8__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
29961   }
29962 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD64,strided_cm)29963   TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD64, strided_cm) {
29964     TEST_REQUIRES_X86_SSE2;
29965     GemmMicrokernelTester()
29966       .mr(3)
29967       .nr(4)
29968       .kr(8)
29969       .sr(1)
29970       .m(3)
29971       .n(4)
29972       .k(8)
29973       .cm_stride(7)
29974       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c8__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
29975   }
29976 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
29977 
29978 
29979 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD64,k_eq_8)29980   TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD64, k_eq_8) {
29981     TEST_REQUIRES_X86_SSE41;
29982     GemmMicrokernelTester()
29983       .mr(3)
29984       .nr(4)
29985       .kr(8)
29986       .sr(1)
29987       .m(3)
29988       .n(4)
29989       .k(8)
29990       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
29991   }
29992 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD64,strided_cn)29993   TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD64, strided_cn) {
29994     TEST_REQUIRES_X86_SSE41;
29995     GemmMicrokernelTester()
29996       .mr(3)
29997       .nr(4)
29998       .kr(8)
29999       .sr(1)
30000       .m(3)
30001       .n(4)
30002       .k(8)
30003       .cn_stride(7)
30004       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
30005   }
30006 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD64,k_eq_8_subtile)30007   TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD64, k_eq_8_subtile) {
30008     TEST_REQUIRES_X86_SSE41;
30009     for (uint32_t n = 1; n <= 4; n++) {
30010       for (uint32_t m = 1; m <= 3; m++) {
30011         GemmMicrokernelTester()
30012           .mr(3)
30013           .nr(4)
30014           .kr(8)
30015           .sr(1)
30016           .m(m)
30017           .n(n)
30018           .k(8)
30019           .iterations(1)
30020           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
30021       }
30022     }
30023   }
30024 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD64,k_eq_8_subtile_m)30025   TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD64, k_eq_8_subtile_m) {
30026     TEST_REQUIRES_X86_SSE41;
30027     for (uint32_t m = 1; m <= 3; m++) {
30028       GemmMicrokernelTester()
30029         .mr(3)
30030         .nr(4)
30031         .kr(8)
30032         .sr(1)
30033         .m(m)
30034         .n(4)
30035         .k(8)
30036         .iterations(1)
30037         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
30038     }
30039   }
30040 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD64,k_eq_8_subtile_n)30041   TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD64, k_eq_8_subtile_n) {
30042     TEST_REQUIRES_X86_SSE41;
30043     for (uint32_t n = 1; n <= 4; n++) {
30044       GemmMicrokernelTester()
30045         .mr(3)
30046         .nr(4)
30047         .kr(8)
30048         .sr(1)
30049         .m(3)
30050         .n(n)
30051         .k(8)
30052         .iterations(1)
30053         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
30054     }
30055   }
30056 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD64,k_lt_8)30057   TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD64, k_lt_8) {
30058     TEST_REQUIRES_X86_SSE41;
30059     for (size_t k = 1; k < 8; k++) {
30060       GemmMicrokernelTester()
30061         .mr(3)
30062         .nr(4)
30063         .kr(8)
30064         .sr(1)
30065         .m(3)
30066         .n(4)
30067         .k(k)
30068         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
30069     }
30070   }
30071 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD64,k_lt_8_subtile)30072   TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD64, k_lt_8_subtile) {
30073     TEST_REQUIRES_X86_SSE41;
30074     for (size_t k = 1; k < 8; k++) {
30075       for (uint32_t n = 1; n <= 4; n++) {
30076         for (uint32_t m = 1; m <= 3; m++) {
30077           GemmMicrokernelTester()
30078             .mr(3)
30079             .nr(4)
30080             .kr(8)
30081             .sr(1)
30082             .m(m)
30083             .n(n)
30084             .k(k)
30085             .iterations(1)
30086             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
30087         }
30088       }
30089     }
30090   }
30091 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD64,k_gt_8)30092   TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD64, k_gt_8) {
30093     TEST_REQUIRES_X86_SSE41;
30094     for (size_t k = 9; k < 16; k++) {
30095       GemmMicrokernelTester()
30096         .mr(3)
30097         .nr(4)
30098         .kr(8)
30099         .sr(1)
30100         .m(3)
30101         .n(4)
30102         .k(k)
30103         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
30104     }
30105   }
30106 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD64,k_gt_8_subtile)30107   TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD64, k_gt_8_subtile) {
30108     TEST_REQUIRES_X86_SSE41;
30109     for (size_t k = 9; k < 16; k++) {
30110       for (uint32_t n = 1; n <= 4; n++) {
30111         for (uint32_t m = 1; m <= 3; m++) {
30112           GemmMicrokernelTester()
30113             .mr(3)
30114             .nr(4)
30115             .kr(8)
30116             .sr(1)
30117             .m(m)
30118             .n(n)
30119             .k(k)
30120             .iterations(1)
30121             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
30122         }
30123       }
30124     }
30125   }
30126 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD64,k_div_8)30127   TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD64, k_div_8) {
30128     TEST_REQUIRES_X86_SSE41;
30129     for (size_t k = 16; k <= 80; k += 8) {
30130       GemmMicrokernelTester()
30131         .mr(3)
30132         .nr(4)
30133         .kr(8)
30134         .sr(1)
30135         .m(3)
30136         .n(4)
30137         .k(k)
30138         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
30139     }
30140   }
30141 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD64,k_div_8_subtile)30142   TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD64, k_div_8_subtile) {
30143     TEST_REQUIRES_X86_SSE41;
30144     for (size_t k = 16; k <= 80; k += 8) {
30145       for (uint32_t n = 1; n <= 4; n++) {
30146         for (uint32_t m = 1; m <= 3; m++) {
30147           GemmMicrokernelTester()
30148             .mr(3)
30149             .nr(4)
30150             .kr(8)
30151             .sr(1)
30152             .m(m)
30153             .n(n)
30154             .k(k)
30155             .iterations(1)
30156             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
30157         }
30158       }
30159     }
30160   }
30161 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD64,n_gt_4)30162   TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD64, n_gt_4) {
30163     TEST_REQUIRES_X86_SSE41;
30164     for (uint32_t n = 5; n < 8; n++) {
30165       for (size_t k = 1; k <= 40; k += 9) {
30166         GemmMicrokernelTester()
30167           .mr(3)
30168           .nr(4)
30169           .kr(8)
30170           .sr(1)
30171           .m(3)
30172           .n(n)
30173           .k(k)
30174           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
30175       }
30176     }
30177   }
30178 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD64,n_gt_4_strided_cn)30179   TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD64, n_gt_4_strided_cn) {
30180     TEST_REQUIRES_X86_SSE41;
30181     for (uint32_t n = 5; n < 8; n++) {
30182       for (size_t k = 1; k <= 40; k += 9) {
30183         GemmMicrokernelTester()
30184           .mr(3)
30185           .nr(4)
30186           .kr(8)
30187           .sr(1)
30188           .m(3)
30189           .n(n)
30190           .k(k)
30191           .cn_stride(7)
30192           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
30193       }
30194     }
30195   }
30196 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD64,n_gt_4_subtile)30197   TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD64, n_gt_4_subtile) {
30198     TEST_REQUIRES_X86_SSE41;
30199     for (uint32_t n = 5; n < 8; n++) {
30200       for (size_t k = 1; k <= 40; k += 9) {
30201         for (uint32_t m = 1; m <= 3; m++) {
30202           GemmMicrokernelTester()
30203             .mr(3)
30204             .nr(4)
30205             .kr(8)
30206             .sr(1)
30207             .m(m)
30208             .n(n)
30209             .k(k)
30210             .iterations(1)
30211             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
30212         }
30213       }
30214     }
30215   }
30216 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD64,n_div_4)30217   TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD64, n_div_4) {
30218     TEST_REQUIRES_X86_SSE41;
30219     for (uint32_t n = 8; n <= 12; n += 4) {
30220       for (size_t k = 1; k <= 40; k += 9) {
30221         GemmMicrokernelTester()
30222           .mr(3)
30223           .nr(4)
30224           .kr(8)
30225           .sr(1)
30226           .m(3)
30227           .n(n)
30228           .k(k)
30229           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
30230       }
30231     }
30232   }
30233 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD64,n_div_4_strided_cn)30234   TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD64, n_div_4_strided_cn) {
30235     TEST_REQUIRES_X86_SSE41;
30236     for (uint32_t n = 8; n <= 12; n += 4) {
30237       for (size_t k = 1; k <= 40; k += 9) {
30238         GemmMicrokernelTester()
30239           .mr(3)
30240           .nr(4)
30241           .kr(8)
30242           .sr(1)
30243           .m(3)
30244           .n(n)
30245           .k(k)
30246           .cn_stride(7)
30247           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
30248       }
30249     }
30250   }
30251 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD64,n_div_4_subtile)30252   TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD64, n_div_4_subtile) {
30253     TEST_REQUIRES_X86_SSE41;
30254     for (uint32_t n = 8; n <= 12; n += 4) {
30255       for (size_t k = 1; k <= 40; k += 9) {
30256         for (uint32_t m = 1; m <= 3; m++) {
30257           GemmMicrokernelTester()
30258             .mr(3)
30259             .nr(4)
30260             .kr(8)
30261             .sr(1)
30262             .m(m)
30263             .n(n)
30264             .k(k)
30265             .iterations(1)
30266             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
30267         }
30268       }
30269     }
30270   }
30271 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD64,small_kernel)30272   TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD64, small_kernel) {
30273     TEST_REQUIRES_X86_SSE41;
30274     for (size_t k = 1; k <= 40; k += 9) {
30275       GemmMicrokernelTester()
30276         .mr(3)
30277         .nr(4)
30278         .kr(8)
30279         .sr(1)
30280         .m(3)
30281         .n(4)
30282         .k(k)
30283         .ks(3)
30284         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
30285     }
30286   }
30287 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD64,small_kernel_subtile)30288   TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD64, small_kernel_subtile) {
30289     TEST_REQUIRES_X86_SSE41;
30290     for (size_t k = 1; k <= 40; k += 9) {
30291       for (uint32_t n = 1; n <= 4; n++) {
30292         for (uint32_t m = 1; m <= 3; m++) {
30293           GemmMicrokernelTester()
30294             .mr(3)
30295             .nr(4)
30296             .kr(8)
30297             .sr(1)
30298             .m(m)
30299             .n(n)
30300             .k(k)
30301             .ks(3)
30302             .iterations(1)
30303             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
30304         }
30305       }
30306     }
30307   }
30308 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD64,n_gt_4_small_kernel)30309   TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD64, n_gt_4_small_kernel) {
30310     TEST_REQUIRES_X86_SSE41;
30311     for (uint32_t n = 5; n < 8; n++) {
30312       for (size_t k = 1; k <= 40; k += 9) {
30313         GemmMicrokernelTester()
30314           .mr(3)
30315           .nr(4)
30316           .kr(8)
30317           .sr(1)
30318           .m(3)
30319           .n(n)
30320           .k(k)
30321           .ks(3)
30322           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
30323       }
30324     }
30325   }
30326 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD64,n_div_4_small_kernel)30327   TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD64, n_div_4_small_kernel) {
30328     TEST_REQUIRES_X86_SSE41;
30329     for (uint32_t n = 8; n <= 12; n += 4) {
30330       for (size_t k = 1; k <= 40; k += 9) {
30331         GemmMicrokernelTester()
30332           .mr(3)
30333           .nr(4)
30334           .kr(8)
30335           .sr(1)
30336           .m(3)
30337           .n(n)
30338           .k(k)
30339           .ks(3)
30340           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
30341       }
30342     }
30343   }
30344 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD64,strided_cm_subtile)30345   TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD64, strided_cm_subtile) {
30346     TEST_REQUIRES_X86_SSE41;
30347     for (size_t k = 1; k <= 40; k += 9) {
30348       for (uint32_t n = 1; n <= 4; n++) {
30349         for (uint32_t m = 1; m <= 3; m++) {
30350           GemmMicrokernelTester()
30351             .mr(3)
30352             .nr(4)
30353             .kr(8)
30354             .sr(1)
30355             .m(m)
30356             .n(n)
30357             .k(k)
30358             .cm_stride(7)
30359             .iterations(1)
30360             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
30361         }
30362       }
30363     }
30364   }
30365 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD64,a_offset)30366   TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD64, a_offset) {
30367     TEST_REQUIRES_X86_SSE41;
30368     for (size_t k = 1; k <= 40; k += 9) {
30369       GemmMicrokernelTester()
30370         .mr(3)
30371         .nr(4)
30372         .kr(8)
30373         .sr(1)
30374         .m(3)
30375         .n(4)
30376         .k(k)
30377         .ks(3)
30378         .a_offset(127)
30379         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
30380     }
30381   }
30382 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD64,zero)30383   TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD64, zero) {
30384     TEST_REQUIRES_X86_SSE41;
30385     for (size_t k = 1; k <= 40; k += 9) {
30386       for (uint32_t mz = 0; mz < 3; mz++) {
30387         GemmMicrokernelTester()
30388           .mr(3)
30389           .nr(4)
30390           .kr(8)
30391           .sr(1)
30392           .m(3)
30393           .n(4)
30394           .k(k)
30395           .ks(3)
30396           .a_offset(127)
30397           .zero_index(mz)
30398           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
30399       }
30400     }
30401   }
30402 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD64,qmin)30403   TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD64, qmin) {
30404     TEST_REQUIRES_X86_SSE41;
30405     GemmMicrokernelTester()
30406       .mr(3)
30407       .nr(4)
30408       .kr(8)
30409       .sr(1)
30410       .m(3)
30411       .n(4)
30412       .k(8)
30413       .qmin(128)
30414       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
30415   }
30416 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD64,qmax)30417   TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD64, qmax) {
30418     TEST_REQUIRES_X86_SSE41;
30419     GemmMicrokernelTester()
30420       .mr(3)
30421       .nr(4)
30422       .kr(8)
30423       .sr(1)
30424       .m(3)
30425       .n(4)
30426       .k(8)
30427       .qmax(128)
30428       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
30429   }
30430 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD64,strided_cm)30431   TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD64, strided_cm) {
30432     TEST_REQUIRES_X86_SSE41;
30433     GemmMicrokernelTester()
30434       .mr(3)
30435       .nr(4)
30436       .kr(8)
30437       .sr(1)
30438       .m(3)
30439       .n(4)
30440       .k(8)
30441       .cm_stride(7)
30442       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
30443   }
30444 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
30445 
30446 
30447 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD64,k_eq_8)30448   TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD64, k_eq_8) {
30449     TEST_REQUIRES_X86_XOP;
30450     GemmMicrokernelTester()
30451       .mr(3)
30452       .nr(4)
30453       .kr(8)
30454       .sr(1)
30455       .m(3)
30456       .n(4)
30457       .k(8)
30458       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
30459   }
30460 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD64,strided_cn)30461   TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD64, strided_cn) {
30462     TEST_REQUIRES_X86_XOP;
30463     GemmMicrokernelTester()
30464       .mr(3)
30465       .nr(4)
30466       .kr(8)
30467       .sr(1)
30468       .m(3)
30469       .n(4)
30470       .k(8)
30471       .cn_stride(7)
30472       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
30473   }
30474 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD64,k_eq_8_subtile)30475   TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD64, k_eq_8_subtile) {
30476     TEST_REQUIRES_X86_XOP;
30477     for (uint32_t n = 1; n <= 4; n++) {
30478       for (uint32_t m = 1; m <= 3; m++) {
30479         GemmMicrokernelTester()
30480           .mr(3)
30481           .nr(4)
30482           .kr(8)
30483           .sr(1)
30484           .m(m)
30485           .n(n)
30486           .k(8)
30487           .iterations(1)
30488           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
30489       }
30490     }
30491   }
30492 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD64,k_eq_8_subtile_m)30493   TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD64, k_eq_8_subtile_m) {
30494     TEST_REQUIRES_X86_XOP;
30495     for (uint32_t m = 1; m <= 3; m++) {
30496       GemmMicrokernelTester()
30497         .mr(3)
30498         .nr(4)
30499         .kr(8)
30500         .sr(1)
30501         .m(m)
30502         .n(4)
30503         .k(8)
30504         .iterations(1)
30505         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
30506     }
30507   }
30508 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD64,k_eq_8_subtile_n)30509   TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD64, k_eq_8_subtile_n) {
30510     TEST_REQUIRES_X86_XOP;
30511     for (uint32_t n = 1; n <= 4; n++) {
30512       GemmMicrokernelTester()
30513         .mr(3)
30514         .nr(4)
30515         .kr(8)
30516         .sr(1)
30517         .m(3)
30518         .n(n)
30519         .k(8)
30520         .iterations(1)
30521         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
30522     }
30523   }
30524 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD64,k_lt_8)30525   TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD64, k_lt_8) {
30526     TEST_REQUIRES_X86_XOP;
30527     for (size_t k = 1; k < 8; k++) {
30528       GemmMicrokernelTester()
30529         .mr(3)
30530         .nr(4)
30531         .kr(8)
30532         .sr(1)
30533         .m(3)
30534         .n(4)
30535         .k(k)
30536         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
30537     }
30538   }
30539 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD64,k_lt_8_subtile)30540   TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD64, k_lt_8_subtile) {
30541     TEST_REQUIRES_X86_XOP;
30542     for (size_t k = 1; k < 8; k++) {
30543       for (uint32_t n = 1; n <= 4; n++) {
30544         for (uint32_t m = 1; m <= 3; m++) {
30545           GemmMicrokernelTester()
30546             .mr(3)
30547             .nr(4)
30548             .kr(8)
30549             .sr(1)
30550             .m(m)
30551             .n(n)
30552             .k(k)
30553             .iterations(1)
30554             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
30555         }
30556       }
30557     }
30558   }
30559 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD64,k_gt_8)30560   TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD64, k_gt_8) {
30561     TEST_REQUIRES_X86_XOP;
30562     for (size_t k = 9; k < 16; k++) {
30563       GemmMicrokernelTester()
30564         .mr(3)
30565         .nr(4)
30566         .kr(8)
30567         .sr(1)
30568         .m(3)
30569         .n(4)
30570         .k(k)
30571         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
30572     }
30573   }
30574 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD64,k_gt_8_subtile)30575   TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD64, k_gt_8_subtile) {
30576     TEST_REQUIRES_X86_XOP;
30577     for (size_t k = 9; k < 16; k++) {
30578       for (uint32_t n = 1; n <= 4; n++) {
30579         for (uint32_t m = 1; m <= 3; m++) {
30580           GemmMicrokernelTester()
30581             .mr(3)
30582             .nr(4)
30583             .kr(8)
30584             .sr(1)
30585             .m(m)
30586             .n(n)
30587             .k(k)
30588             .iterations(1)
30589             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
30590         }
30591       }
30592     }
30593   }
30594 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD64,k_div_8)30595   TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD64, k_div_8) {
30596     TEST_REQUIRES_X86_XOP;
30597     for (size_t k = 16; k <= 80; k += 8) {
30598       GemmMicrokernelTester()
30599         .mr(3)
30600         .nr(4)
30601         .kr(8)
30602         .sr(1)
30603         .m(3)
30604         .n(4)
30605         .k(k)
30606         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
30607     }
30608   }
30609 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD64,k_div_8_subtile)30610   TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD64, k_div_8_subtile) {
30611     TEST_REQUIRES_X86_XOP;
30612     for (size_t k = 16; k <= 80; k += 8) {
30613       for (uint32_t n = 1; n <= 4; n++) {
30614         for (uint32_t m = 1; m <= 3; m++) {
30615           GemmMicrokernelTester()
30616             .mr(3)
30617             .nr(4)
30618             .kr(8)
30619             .sr(1)
30620             .m(m)
30621             .n(n)
30622             .k(k)
30623             .iterations(1)
30624             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
30625         }
30626       }
30627     }
30628   }
30629 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD64,n_gt_4)30630   TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD64, n_gt_4) {
30631     TEST_REQUIRES_X86_XOP;
30632     for (uint32_t n = 5; n < 8; n++) {
30633       for (size_t k = 1; k <= 40; k += 9) {
30634         GemmMicrokernelTester()
30635           .mr(3)
30636           .nr(4)
30637           .kr(8)
30638           .sr(1)
30639           .m(3)
30640           .n(n)
30641           .k(k)
30642           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
30643       }
30644     }
30645   }
30646 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD64,n_gt_4_strided_cn)30647   TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD64, n_gt_4_strided_cn) {
30648     TEST_REQUIRES_X86_XOP;
30649     for (uint32_t n = 5; n < 8; n++) {
30650       for (size_t k = 1; k <= 40; k += 9) {
30651         GemmMicrokernelTester()
30652           .mr(3)
30653           .nr(4)
30654           .kr(8)
30655           .sr(1)
30656           .m(3)
30657           .n(n)
30658           .k(k)
30659           .cn_stride(7)
30660           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
30661       }
30662     }
30663   }
30664 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD64,n_gt_4_subtile)30665   TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD64, n_gt_4_subtile) {
30666     TEST_REQUIRES_X86_XOP;
30667     for (uint32_t n = 5; n < 8; n++) {
30668       for (size_t k = 1; k <= 40; k += 9) {
30669         for (uint32_t m = 1; m <= 3; m++) {
30670           GemmMicrokernelTester()
30671             .mr(3)
30672             .nr(4)
30673             .kr(8)
30674             .sr(1)
30675             .m(m)
30676             .n(n)
30677             .k(k)
30678             .iterations(1)
30679             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
30680         }
30681       }
30682     }
30683   }
30684 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD64,n_div_4)30685   TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD64, n_div_4) {
30686     TEST_REQUIRES_X86_XOP;
30687     for (uint32_t n = 8; n <= 12; n += 4) {
30688       for (size_t k = 1; k <= 40; k += 9) {
30689         GemmMicrokernelTester()
30690           .mr(3)
30691           .nr(4)
30692           .kr(8)
30693           .sr(1)
30694           .m(3)
30695           .n(n)
30696           .k(k)
30697           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
30698       }
30699     }
30700   }
30701 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD64,n_div_4_strided_cn)30702   TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD64, n_div_4_strided_cn) {
30703     TEST_REQUIRES_X86_XOP;
30704     for (uint32_t n = 8; n <= 12; n += 4) {
30705       for (size_t k = 1; k <= 40; k += 9) {
30706         GemmMicrokernelTester()
30707           .mr(3)
30708           .nr(4)
30709           .kr(8)
30710           .sr(1)
30711           .m(3)
30712           .n(n)
30713           .k(k)
30714           .cn_stride(7)
30715           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
30716       }
30717     }
30718   }
30719 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD64,n_div_4_subtile)30720   TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD64, n_div_4_subtile) {
30721     TEST_REQUIRES_X86_XOP;
30722     for (uint32_t n = 8; n <= 12; n += 4) {
30723       for (size_t k = 1; k <= 40; k += 9) {
30724         for (uint32_t m = 1; m <= 3; m++) {
30725           GemmMicrokernelTester()
30726             .mr(3)
30727             .nr(4)
30728             .kr(8)
30729             .sr(1)
30730             .m(m)
30731             .n(n)
30732             .k(k)
30733             .iterations(1)
30734             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
30735         }
30736       }
30737     }
30738   }
30739 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD64,small_kernel)30740   TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD64, small_kernel) {
30741     TEST_REQUIRES_X86_XOP;
30742     for (size_t k = 1; k <= 40; k += 9) {
30743       GemmMicrokernelTester()
30744         .mr(3)
30745         .nr(4)
30746         .kr(8)
30747         .sr(1)
30748         .m(3)
30749         .n(4)
30750         .k(k)
30751         .ks(3)
30752         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
30753     }
30754   }
30755 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD64,small_kernel_subtile)30756   TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD64, small_kernel_subtile) {
30757     TEST_REQUIRES_X86_XOP;
30758     for (size_t k = 1; k <= 40; k += 9) {
30759       for (uint32_t n = 1; n <= 4; n++) {
30760         for (uint32_t m = 1; m <= 3; m++) {
30761           GemmMicrokernelTester()
30762             .mr(3)
30763             .nr(4)
30764             .kr(8)
30765             .sr(1)
30766             .m(m)
30767             .n(n)
30768             .k(k)
30769             .ks(3)
30770             .iterations(1)
30771             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
30772         }
30773       }
30774     }
30775   }
30776 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD64,n_gt_4_small_kernel)30777   TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD64, n_gt_4_small_kernel) {
30778     TEST_REQUIRES_X86_XOP;
30779     for (uint32_t n = 5; n < 8; n++) {
30780       for (size_t k = 1; k <= 40; k += 9) {
30781         GemmMicrokernelTester()
30782           .mr(3)
30783           .nr(4)
30784           .kr(8)
30785           .sr(1)
30786           .m(3)
30787           .n(n)
30788           .k(k)
30789           .ks(3)
30790           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
30791       }
30792     }
30793   }
30794 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD64,n_div_4_small_kernel)30795   TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD64, n_div_4_small_kernel) {
30796     TEST_REQUIRES_X86_XOP;
30797     for (uint32_t n = 8; n <= 12; n += 4) {
30798       for (size_t k = 1; k <= 40; k += 9) {
30799         GemmMicrokernelTester()
30800           .mr(3)
30801           .nr(4)
30802           .kr(8)
30803           .sr(1)
30804           .m(3)
30805           .n(n)
30806           .k(k)
30807           .ks(3)
30808           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
30809       }
30810     }
30811   }
30812 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD64,strided_cm_subtile)30813   TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD64, strided_cm_subtile) {
30814     TEST_REQUIRES_X86_XOP;
30815     for (size_t k = 1; k <= 40; k += 9) {
30816       for (uint32_t n = 1; n <= 4; n++) {
30817         for (uint32_t m = 1; m <= 3; m++) {
30818           GemmMicrokernelTester()
30819             .mr(3)
30820             .nr(4)
30821             .kr(8)
30822             .sr(1)
30823             .m(m)
30824             .n(n)
30825             .k(k)
30826             .cm_stride(7)
30827             .iterations(1)
30828             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
30829         }
30830       }
30831     }
30832   }
30833 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD64,a_offset)30834   TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD64, a_offset) {
30835     TEST_REQUIRES_X86_XOP;
30836     for (size_t k = 1; k <= 40; k += 9) {
30837       GemmMicrokernelTester()
30838         .mr(3)
30839         .nr(4)
30840         .kr(8)
30841         .sr(1)
30842         .m(3)
30843         .n(4)
30844         .k(k)
30845         .ks(3)
30846         .a_offset(127)
30847         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
30848     }
30849   }
30850 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD64,zero)30851   TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD64, zero) {
30852     TEST_REQUIRES_X86_XOP;
30853     for (size_t k = 1; k <= 40; k += 9) {
30854       for (uint32_t mz = 0; mz < 3; mz++) {
30855         GemmMicrokernelTester()
30856           .mr(3)
30857           .nr(4)
30858           .kr(8)
30859           .sr(1)
30860           .m(3)
30861           .n(4)
30862           .k(k)
30863           .ks(3)
30864           .a_offset(127)
30865           .zero_index(mz)
30866           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
30867       }
30868     }
30869   }
30870 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD64,qmin)30871   TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD64, qmin) {
30872     TEST_REQUIRES_X86_XOP;
30873     GemmMicrokernelTester()
30874       .mr(3)
30875       .nr(4)
30876       .kr(8)
30877       .sr(1)
30878       .m(3)
30879       .n(4)
30880       .k(8)
30881       .qmin(128)
30882       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
30883   }
30884 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD64,qmax)30885   TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD64, qmax) {
30886     TEST_REQUIRES_X86_XOP;
30887     GemmMicrokernelTester()
30888       .mr(3)
30889       .nr(4)
30890       .kr(8)
30891       .sr(1)
30892       .m(3)
30893       .n(4)
30894       .k(8)
30895       .qmax(128)
30896       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
30897   }
30898 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD64,strided_cm)30899   TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD64, strided_cm) {
30900     TEST_REQUIRES_X86_XOP;
30901     GemmMicrokernelTester()
30902       .mr(3)
30903       .nr(4)
30904       .kr(8)
30905       .sr(1)
30906       .m(3)
30907       .n(4)
30908       .k(8)
30909       .cm_stride(7)
30910       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
30911   }
30912 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
30913 
30914 
30915 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD128,k_eq_8)30916   TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD128, k_eq_8) {
30917     TEST_REQUIRES_X86_SSE2;
30918     GemmMicrokernelTester()
30919       .mr(2)
30920       .nr(4)
30921       .kr(8)
30922       .sr(1)
30923       .m(2)
30924       .n(4)
30925       .k(8)
30926       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
30927   }
30928 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD128,strided_cn)30929   TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD128, strided_cn) {
30930     TEST_REQUIRES_X86_SSE2;
30931     GemmMicrokernelTester()
30932       .mr(2)
30933       .nr(4)
30934       .kr(8)
30935       .sr(1)
30936       .m(2)
30937       .n(4)
30938       .k(8)
30939       .cn_stride(7)
30940       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
30941   }
30942 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD128,k_eq_8_subtile)30943   TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD128, k_eq_8_subtile) {
30944     TEST_REQUIRES_X86_SSE2;
30945     for (uint32_t n = 1; n <= 4; n++) {
30946       for (uint32_t m = 1; m <= 2; m++) {
30947         GemmMicrokernelTester()
30948           .mr(2)
30949           .nr(4)
30950           .kr(8)
30951           .sr(1)
30952           .m(m)
30953           .n(n)
30954           .k(8)
30955           .iterations(1)
30956           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
30957       }
30958     }
30959   }
30960 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD128,k_eq_8_subtile_m)30961   TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD128, k_eq_8_subtile_m) {
30962     TEST_REQUIRES_X86_SSE2;
30963     for (uint32_t m = 1; m <= 2; m++) {
30964       GemmMicrokernelTester()
30965         .mr(2)
30966         .nr(4)
30967         .kr(8)
30968         .sr(1)
30969         .m(m)
30970         .n(4)
30971         .k(8)
30972         .iterations(1)
30973         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
30974     }
30975   }
30976 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD128,k_eq_8_subtile_n)30977   TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD128, k_eq_8_subtile_n) {
30978     TEST_REQUIRES_X86_SSE2;
30979     for (uint32_t n = 1; n <= 4; n++) {
30980       GemmMicrokernelTester()
30981         .mr(2)
30982         .nr(4)
30983         .kr(8)
30984         .sr(1)
30985         .m(2)
30986         .n(n)
30987         .k(8)
30988         .iterations(1)
30989         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
30990     }
30991   }
30992 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD128,k_lt_8)30993   TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD128, k_lt_8) {
30994     TEST_REQUIRES_X86_SSE2;
30995     for (size_t k = 1; k < 8; k++) {
30996       GemmMicrokernelTester()
30997         .mr(2)
30998         .nr(4)
30999         .kr(8)
31000         .sr(1)
31001         .m(2)
31002         .n(4)
31003         .k(k)
31004         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
31005     }
31006   }
31007 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD128,k_lt_8_subtile)31008   TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD128, k_lt_8_subtile) {
31009     TEST_REQUIRES_X86_SSE2;
31010     for (size_t k = 1; k < 8; k++) {
31011       for (uint32_t n = 1; n <= 4; n++) {
31012         for (uint32_t m = 1; m <= 2; m++) {
31013           GemmMicrokernelTester()
31014             .mr(2)
31015             .nr(4)
31016             .kr(8)
31017             .sr(1)
31018             .m(m)
31019             .n(n)
31020             .k(k)
31021             .iterations(1)
31022             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
31023         }
31024       }
31025     }
31026   }
31027 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD128,k_gt_8)31028   TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD128, k_gt_8) {
31029     TEST_REQUIRES_X86_SSE2;
31030     for (size_t k = 9; k < 16; k++) {
31031       GemmMicrokernelTester()
31032         .mr(2)
31033         .nr(4)
31034         .kr(8)
31035         .sr(1)
31036         .m(2)
31037         .n(4)
31038         .k(k)
31039         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
31040     }
31041   }
31042 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD128,k_gt_8_subtile)31043   TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD128, k_gt_8_subtile) {
31044     TEST_REQUIRES_X86_SSE2;
31045     for (size_t k = 9; k < 16; k++) {
31046       for (uint32_t n = 1; n <= 4; n++) {
31047         for (uint32_t m = 1; m <= 2; m++) {
31048           GemmMicrokernelTester()
31049             .mr(2)
31050             .nr(4)
31051             .kr(8)
31052             .sr(1)
31053             .m(m)
31054             .n(n)
31055             .k(k)
31056             .iterations(1)
31057             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
31058         }
31059       }
31060     }
31061   }
31062 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD128,k_div_8)31063   TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD128, k_div_8) {
31064     TEST_REQUIRES_X86_SSE2;
31065     for (size_t k = 16; k <= 80; k += 8) {
31066       GemmMicrokernelTester()
31067         .mr(2)
31068         .nr(4)
31069         .kr(8)
31070         .sr(1)
31071         .m(2)
31072         .n(4)
31073         .k(k)
31074         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
31075     }
31076   }
31077 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD128,k_div_8_subtile)31078   TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD128, k_div_8_subtile) {
31079     TEST_REQUIRES_X86_SSE2;
31080     for (size_t k = 16; k <= 80; k += 8) {
31081       for (uint32_t n = 1; n <= 4; n++) {
31082         for (uint32_t m = 1; m <= 2; m++) {
31083           GemmMicrokernelTester()
31084             .mr(2)
31085             .nr(4)
31086             .kr(8)
31087             .sr(1)
31088             .m(m)
31089             .n(n)
31090             .k(k)
31091             .iterations(1)
31092             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
31093         }
31094       }
31095     }
31096   }
31097 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD128,n_gt_4)31098   TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD128, n_gt_4) {
31099     TEST_REQUIRES_X86_SSE2;
31100     for (uint32_t n = 5; n < 8; n++) {
31101       for (size_t k = 1; k <= 40; k += 9) {
31102         GemmMicrokernelTester()
31103           .mr(2)
31104           .nr(4)
31105           .kr(8)
31106           .sr(1)
31107           .m(2)
31108           .n(n)
31109           .k(k)
31110           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
31111       }
31112     }
31113   }
31114 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD128,n_gt_4_strided_cn)31115   TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD128, n_gt_4_strided_cn) {
31116     TEST_REQUIRES_X86_SSE2;
31117     for (uint32_t n = 5; n < 8; n++) {
31118       for (size_t k = 1; k <= 40; k += 9) {
31119         GemmMicrokernelTester()
31120           .mr(2)
31121           .nr(4)
31122           .kr(8)
31123           .sr(1)
31124           .m(2)
31125           .n(n)
31126           .k(k)
31127           .cn_stride(7)
31128           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
31129       }
31130     }
31131   }
31132 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD128,n_gt_4_subtile)31133   TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD128, n_gt_4_subtile) {
31134     TEST_REQUIRES_X86_SSE2;
31135     for (uint32_t n = 5; n < 8; n++) {
31136       for (size_t k = 1; k <= 40; k += 9) {
31137         for (uint32_t m = 1; m <= 2; m++) {
31138           GemmMicrokernelTester()
31139             .mr(2)
31140             .nr(4)
31141             .kr(8)
31142             .sr(1)
31143             .m(m)
31144             .n(n)
31145             .k(k)
31146             .iterations(1)
31147             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
31148         }
31149       }
31150     }
31151   }
31152 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD128,n_div_4)31153   TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD128, n_div_4) {
31154     TEST_REQUIRES_X86_SSE2;
31155     for (uint32_t n = 8; n <= 12; n += 4) {
31156       for (size_t k = 1; k <= 40; k += 9) {
31157         GemmMicrokernelTester()
31158           .mr(2)
31159           .nr(4)
31160           .kr(8)
31161           .sr(1)
31162           .m(2)
31163           .n(n)
31164           .k(k)
31165           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
31166       }
31167     }
31168   }
31169 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD128,n_div_4_strided_cn)31170   TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD128, n_div_4_strided_cn) {
31171     TEST_REQUIRES_X86_SSE2;
31172     for (uint32_t n = 8; n <= 12; n += 4) {
31173       for (size_t k = 1; k <= 40; k += 9) {
31174         GemmMicrokernelTester()
31175           .mr(2)
31176           .nr(4)
31177           .kr(8)
31178           .sr(1)
31179           .m(2)
31180           .n(n)
31181           .k(k)
31182           .cn_stride(7)
31183           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
31184       }
31185     }
31186   }
31187 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD128,n_div_4_subtile)31188   TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD128, n_div_4_subtile) {
31189     TEST_REQUIRES_X86_SSE2;
31190     for (uint32_t n = 8; n <= 12; n += 4) {
31191       for (size_t k = 1; k <= 40; k += 9) {
31192         for (uint32_t m = 1; m <= 2; m++) {
31193           GemmMicrokernelTester()
31194             .mr(2)
31195             .nr(4)
31196             .kr(8)
31197             .sr(1)
31198             .m(m)
31199             .n(n)
31200             .k(k)
31201             .iterations(1)
31202             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
31203         }
31204       }
31205     }
31206   }
31207 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD128,small_kernel)31208   TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD128, small_kernel) {
31209     TEST_REQUIRES_X86_SSE2;
31210     for (size_t k = 1; k <= 40; k += 9) {
31211       GemmMicrokernelTester()
31212         .mr(2)
31213         .nr(4)
31214         .kr(8)
31215         .sr(1)
31216         .m(2)
31217         .n(4)
31218         .k(k)
31219         .ks(3)
31220         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
31221     }
31222   }
31223 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD128,small_kernel_subtile)31224   TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD128, small_kernel_subtile) {
31225     TEST_REQUIRES_X86_SSE2;
31226     for (size_t k = 1; k <= 40; k += 9) {
31227       for (uint32_t n = 1; n <= 4; n++) {
31228         for (uint32_t m = 1; m <= 2; m++) {
31229           GemmMicrokernelTester()
31230             .mr(2)
31231             .nr(4)
31232             .kr(8)
31233             .sr(1)
31234             .m(m)
31235             .n(n)
31236             .k(k)
31237             .ks(3)
31238             .iterations(1)
31239             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
31240         }
31241       }
31242     }
31243   }
31244 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD128,n_gt_4_small_kernel)31245   TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD128, n_gt_4_small_kernel) {
31246     TEST_REQUIRES_X86_SSE2;
31247     for (uint32_t n = 5; n < 8; n++) {
31248       for (size_t k = 1; k <= 40; k += 9) {
31249         GemmMicrokernelTester()
31250           .mr(2)
31251           .nr(4)
31252           .kr(8)
31253           .sr(1)
31254           .m(2)
31255           .n(n)
31256           .k(k)
31257           .ks(3)
31258           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
31259       }
31260     }
31261   }
31262 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD128,n_div_4_small_kernel)31263   TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD128, n_div_4_small_kernel) {
31264     TEST_REQUIRES_X86_SSE2;
31265     for (uint32_t n = 8; n <= 12; n += 4) {
31266       for (size_t k = 1; k <= 40; k += 9) {
31267         GemmMicrokernelTester()
31268           .mr(2)
31269           .nr(4)
31270           .kr(8)
31271           .sr(1)
31272           .m(2)
31273           .n(n)
31274           .k(k)
31275           .ks(3)
31276           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
31277       }
31278     }
31279   }
31280 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD128,strided_cm_subtile)31281   TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD128, strided_cm_subtile) {
31282     TEST_REQUIRES_X86_SSE2;
31283     for (size_t k = 1; k <= 40; k += 9) {
31284       for (uint32_t n = 1; n <= 4; n++) {
31285         for (uint32_t m = 1; m <= 2; m++) {
31286           GemmMicrokernelTester()
31287             .mr(2)
31288             .nr(4)
31289             .kr(8)
31290             .sr(1)
31291             .m(m)
31292             .n(n)
31293             .k(k)
31294             .cm_stride(7)
31295             .iterations(1)
31296             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
31297         }
31298       }
31299     }
31300   }
31301 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD128,a_offset)31302   TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD128, a_offset) {
31303     TEST_REQUIRES_X86_SSE2;
31304     for (size_t k = 1; k <= 40; k += 9) {
31305       GemmMicrokernelTester()
31306         .mr(2)
31307         .nr(4)
31308         .kr(8)
31309         .sr(1)
31310         .m(2)
31311         .n(4)
31312         .k(k)
31313         .ks(3)
31314         .a_offset(83)
31315         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
31316     }
31317   }
31318 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD128,zero)31319   TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD128, zero) {
31320     TEST_REQUIRES_X86_SSE2;
31321     for (size_t k = 1; k <= 40; k += 9) {
31322       for (uint32_t mz = 0; mz < 2; mz++) {
31323         GemmMicrokernelTester()
31324           .mr(2)
31325           .nr(4)
31326           .kr(8)
31327           .sr(1)
31328           .m(2)
31329           .n(4)
31330           .k(k)
31331           .ks(3)
31332           .a_offset(83)
31333           .zero_index(mz)
31334           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
31335       }
31336     }
31337   }
31338 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD128,qmin)31339   TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD128, qmin) {
31340     TEST_REQUIRES_X86_SSE2;
31341     GemmMicrokernelTester()
31342       .mr(2)
31343       .nr(4)
31344       .kr(8)
31345       .sr(1)
31346       .m(2)
31347       .n(4)
31348       .k(8)
31349       .qmin(128)
31350       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
31351   }
31352 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD128,qmax)31353   TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD128, qmax) {
31354     TEST_REQUIRES_X86_SSE2;
31355     GemmMicrokernelTester()
31356       .mr(2)
31357       .nr(4)
31358       .kr(8)
31359       .sr(1)
31360       .m(2)
31361       .n(4)
31362       .k(8)
31363       .qmax(128)
31364       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
31365   }
31366 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD128,strided_cm)31367   TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD128, strided_cm) {
31368     TEST_REQUIRES_X86_SSE2;
31369     GemmMicrokernelTester()
31370       .mr(2)
31371       .nr(4)
31372       .kr(8)
31373       .sr(1)
31374       .m(2)
31375       .n(4)
31376       .k(8)
31377       .cm_stride(7)
31378       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
31379   }
31380 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
31381 
31382 
31383 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD128,k_eq_8)31384   TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD128, k_eq_8) {
31385     TEST_REQUIRES_X86_SSE41;
31386     GemmMicrokernelTester()
31387       .mr(3)
31388       .nr(4)
31389       .kr(8)
31390       .sr(1)
31391       .m(3)
31392       .n(4)
31393       .k(8)
31394       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
31395   }
31396 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD128,strided_cn)31397   TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD128, strided_cn) {
31398     TEST_REQUIRES_X86_SSE41;
31399     GemmMicrokernelTester()
31400       .mr(3)
31401       .nr(4)
31402       .kr(8)
31403       .sr(1)
31404       .m(3)
31405       .n(4)
31406       .k(8)
31407       .cn_stride(7)
31408       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
31409   }
31410 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD128,k_eq_8_subtile)31411   TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD128, k_eq_8_subtile) {
31412     TEST_REQUIRES_X86_SSE41;
31413     for (uint32_t n = 1; n <= 4; n++) {
31414       for (uint32_t m = 1; m <= 3; m++) {
31415         GemmMicrokernelTester()
31416           .mr(3)
31417           .nr(4)
31418           .kr(8)
31419           .sr(1)
31420           .m(m)
31421           .n(n)
31422           .k(8)
31423           .iterations(1)
31424           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
31425       }
31426     }
31427   }
31428 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD128,k_eq_8_subtile_m)31429   TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD128, k_eq_8_subtile_m) {
31430     TEST_REQUIRES_X86_SSE41;
31431     for (uint32_t m = 1; m <= 3; m++) {
31432       GemmMicrokernelTester()
31433         .mr(3)
31434         .nr(4)
31435         .kr(8)
31436         .sr(1)
31437         .m(m)
31438         .n(4)
31439         .k(8)
31440         .iterations(1)
31441         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
31442     }
31443   }
31444 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD128,k_eq_8_subtile_n)31445   TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD128, k_eq_8_subtile_n) {
31446     TEST_REQUIRES_X86_SSE41;
31447     for (uint32_t n = 1; n <= 4; n++) {
31448       GemmMicrokernelTester()
31449         .mr(3)
31450         .nr(4)
31451         .kr(8)
31452         .sr(1)
31453         .m(3)
31454         .n(n)
31455         .k(8)
31456         .iterations(1)
31457         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
31458     }
31459   }
31460 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD128,k_lt_8)31461   TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD128, k_lt_8) {
31462     TEST_REQUIRES_X86_SSE41;
31463     for (size_t k = 1; k < 8; k++) {
31464       GemmMicrokernelTester()
31465         .mr(3)
31466         .nr(4)
31467         .kr(8)
31468         .sr(1)
31469         .m(3)
31470         .n(4)
31471         .k(k)
31472         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
31473     }
31474   }
31475 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD128,k_lt_8_subtile)31476   TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD128, k_lt_8_subtile) {
31477     TEST_REQUIRES_X86_SSE41;
31478     for (size_t k = 1; k < 8; k++) {
31479       for (uint32_t n = 1; n <= 4; n++) {
31480         for (uint32_t m = 1; m <= 3; m++) {
31481           GemmMicrokernelTester()
31482             .mr(3)
31483             .nr(4)
31484             .kr(8)
31485             .sr(1)
31486             .m(m)
31487             .n(n)
31488             .k(k)
31489             .iterations(1)
31490             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
31491         }
31492       }
31493     }
31494   }
31495 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD128,k_gt_8)31496   TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD128, k_gt_8) {
31497     TEST_REQUIRES_X86_SSE41;
31498     for (size_t k = 9; k < 16; k++) {
31499       GemmMicrokernelTester()
31500         .mr(3)
31501         .nr(4)
31502         .kr(8)
31503         .sr(1)
31504         .m(3)
31505         .n(4)
31506         .k(k)
31507         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
31508     }
31509   }
31510 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD128,k_gt_8_subtile)31511   TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD128, k_gt_8_subtile) {
31512     TEST_REQUIRES_X86_SSE41;
31513     for (size_t k = 9; k < 16; k++) {
31514       for (uint32_t n = 1; n <= 4; n++) {
31515         for (uint32_t m = 1; m <= 3; m++) {
31516           GemmMicrokernelTester()
31517             .mr(3)
31518             .nr(4)
31519             .kr(8)
31520             .sr(1)
31521             .m(m)
31522             .n(n)
31523             .k(k)
31524             .iterations(1)
31525             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
31526         }
31527       }
31528     }
31529   }
31530 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD128,k_div_8)31531   TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD128, k_div_8) {
31532     TEST_REQUIRES_X86_SSE41;
31533     for (size_t k = 16; k <= 80; k += 8) {
31534       GemmMicrokernelTester()
31535         .mr(3)
31536         .nr(4)
31537         .kr(8)
31538         .sr(1)
31539         .m(3)
31540         .n(4)
31541         .k(k)
31542         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
31543     }
31544   }
31545 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD128,k_div_8_subtile)31546   TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD128, k_div_8_subtile) {
31547     TEST_REQUIRES_X86_SSE41;
31548     for (size_t k = 16; k <= 80; k += 8) {
31549       for (uint32_t n = 1; n <= 4; n++) {
31550         for (uint32_t m = 1; m <= 3; m++) {
31551           GemmMicrokernelTester()
31552             .mr(3)
31553             .nr(4)
31554             .kr(8)
31555             .sr(1)
31556             .m(m)
31557             .n(n)
31558             .k(k)
31559             .iterations(1)
31560             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
31561         }
31562       }
31563     }
31564   }
31565 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD128,n_gt_4)31566   TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD128, n_gt_4) {
31567     TEST_REQUIRES_X86_SSE41;
31568     for (uint32_t n = 5; n < 8; n++) {
31569       for (size_t k = 1; k <= 40; k += 9) {
31570         GemmMicrokernelTester()
31571           .mr(3)
31572           .nr(4)
31573           .kr(8)
31574           .sr(1)
31575           .m(3)
31576           .n(n)
31577           .k(k)
31578           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
31579       }
31580     }
31581   }
31582 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD128,n_gt_4_strided_cn)31583   TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD128, n_gt_4_strided_cn) {
31584     TEST_REQUIRES_X86_SSE41;
31585     for (uint32_t n = 5; n < 8; n++) {
31586       for (size_t k = 1; k <= 40; k += 9) {
31587         GemmMicrokernelTester()
31588           .mr(3)
31589           .nr(4)
31590           .kr(8)
31591           .sr(1)
31592           .m(3)
31593           .n(n)
31594           .k(k)
31595           .cn_stride(7)
31596           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
31597       }
31598     }
31599   }
31600 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD128,n_gt_4_subtile)31601   TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD128, n_gt_4_subtile) {
31602     TEST_REQUIRES_X86_SSE41;
31603     for (uint32_t n = 5; n < 8; n++) {
31604       for (size_t k = 1; k <= 40; k += 9) {
31605         for (uint32_t m = 1; m <= 3; m++) {
31606           GemmMicrokernelTester()
31607             .mr(3)
31608             .nr(4)
31609             .kr(8)
31610             .sr(1)
31611             .m(m)
31612             .n(n)
31613             .k(k)
31614             .iterations(1)
31615             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
31616         }
31617       }
31618     }
31619   }
31620 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD128,n_div_4)31621   TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD128, n_div_4) {
31622     TEST_REQUIRES_X86_SSE41;
31623     for (uint32_t n = 8; n <= 12; n += 4) {
31624       for (size_t k = 1; k <= 40; k += 9) {
31625         GemmMicrokernelTester()
31626           .mr(3)
31627           .nr(4)
31628           .kr(8)
31629           .sr(1)
31630           .m(3)
31631           .n(n)
31632           .k(k)
31633           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
31634       }
31635     }
31636   }
31637 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD128,n_div_4_strided_cn)31638   TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD128, n_div_4_strided_cn) {
31639     TEST_REQUIRES_X86_SSE41;
31640     for (uint32_t n = 8; n <= 12; n += 4) {
31641       for (size_t k = 1; k <= 40; k += 9) {
31642         GemmMicrokernelTester()
31643           .mr(3)
31644           .nr(4)
31645           .kr(8)
31646           .sr(1)
31647           .m(3)
31648           .n(n)
31649           .k(k)
31650           .cn_stride(7)
31651           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
31652       }
31653     }
31654   }
31655 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD128,n_div_4_subtile)31656   TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD128, n_div_4_subtile) {
31657     TEST_REQUIRES_X86_SSE41;
31658     for (uint32_t n = 8; n <= 12; n += 4) {
31659       for (size_t k = 1; k <= 40; k += 9) {
31660         for (uint32_t m = 1; m <= 3; m++) {
31661           GemmMicrokernelTester()
31662             .mr(3)
31663             .nr(4)
31664             .kr(8)
31665             .sr(1)
31666             .m(m)
31667             .n(n)
31668             .k(k)
31669             .iterations(1)
31670             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
31671         }
31672       }
31673     }
31674   }
31675 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD128,small_kernel)31676   TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD128, small_kernel) {
31677     TEST_REQUIRES_X86_SSE41;
31678     for (size_t k = 1; k <= 40; k += 9) {
31679       GemmMicrokernelTester()
31680         .mr(3)
31681         .nr(4)
31682         .kr(8)
31683         .sr(1)
31684         .m(3)
31685         .n(4)
31686         .k(k)
31687         .ks(3)
31688         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
31689     }
31690   }
31691 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD128,small_kernel_subtile)31692   TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD128, small_kernel_subtile) {
31693     TEST_REQUIRES_X86_SSE41;
31694     for (size_t k = 1; k <= 40; k += 9) {
31695       for (uint32_t n = 1; n <= 4; n++) {
31696         for (uint32_t m = 1; m <= 3; m++) {
31697           GemmMicrokernelTester()
31698             .mr(3)
31699             .nr(4)
31700             .kr(8)
31701             .sr(1)
31702             .m(m)
31703             .n(n)
31704             .k(k)
31705             .ks(3)
31706             .iterations(1)
31707             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
31708         }
31709       }
31710     }
31711   }
31712 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD128,n_gt_4_small_kernel)31713   TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD128, n_gt_4_small_kernel) {
31714     TEST_REQUIRES_X86_SSE41;
31715     for (uint32_t n = 5; n < 8; n++) {
31716       for (size_t k = 1; k <= 40; k += 9) {
31717         GemmMicrokernelTester()
31718           .mr(3)
31719           .nr(4)
31720           .kr(8)
31721           .sr(1)
31722           .m(3)
31723           .n(n)
31724           .k(k)
31725           .ks(3)
31726           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
31727       }
31728     }
31729   }
31730 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD128,n_div_4_small_kernel)31731   TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD128, n_div_4_small_kernel) {
31732     TEST_REQUIRES_X86_SSE41;
31733     for (uint32_t n = 8; n <= 12; n += 4) {
31734       for (size_t k = 1; k <= 40; k += 9) {
31735         GemmMicrokernelTester()
31736           .mr(3)
31737           .nr(4)
31738           .kr(8)
31739           .sr(1)
31740           .m(3)
31741           .n(n)
31742           .k(k)
31743           .ks(3)
31744           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
31745       }
31746     }
31747   }
31748 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD128,strided_cm_subtile)31749   TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD128, strided_cm_subtile) {
31750     TEST_REQUIRES_X86_SSE41;
31751     for (size_t k = 1; k <= 40; k += 9) {
31752       for (uint32_t n = 1; n <= 4; n++) {
31753         for (uint32_t m = 1; m <= 3; m++) {
31754           GemmMicrokernelTester()
31755             .mr(3)
31756             .nr(4)
31757             .kr(8)
31758             .sr(1)
31759             .m(m)
31760             .n(n)
31761             .k(k)
31762             .cm_stride(7)
31763             .iterations(1)
31764             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
31765         }
31766       }
31767     }
31768   }
31769 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD128,a_offset)31770   TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD128, a_offset) {
31771     TEST_REQUIRES_X86_SSE41;
31772     for (size_t k = 1; k <= 40; k += 9) {
31773       GemmMicrokernelTester()
31774         .mr(3)
31775         .nr(4)
31776         .kr(8)
31777         .sr(1)
31778         .m(3)
31779         .n(4)
31780         .k(k)
31781         .ks(3)
31782         .a_offset(127)
31783         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
31784     }
31785   }
31786 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD128,zero)31787   TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD128, zero) {
31788     TEST_REQUIRES_X86_SSE41;
31789     for (size_t k = 1; k <= 40; k += 9) {
31790       for (uint32_t mz = 0; mz < 3; mz++) {
31791         GemmMicrokernelTester()
31792           .mr(3)
31793           .nr(4)
31794           .kr(8)
31795           .sr(1)
31796           .m(3)
31797           .n(4)
31798           .k(k)
31799           .ks(3)
31800           .a_offset(127)
31801           .zero_index(mz)
31802           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
31803       }
31804     }
31805   }
31806 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD128,qmin)31807   TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD128, qmin) {
31808     TEST_REQUIRES_X86_SSE41;
31809     GemmMicrokernelTester()
31810       .mr(3)
31811       .nr(4)
31812       .kr(8)
31813       .sr(1)
31814       .m(3)
31815       .n(4)
31816       .k(8)
31817       .qmin(128)
31818       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
31819   }
31820 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD128,qmax)31821   TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD128, qmax) {
31822     TEST_REQUIRES_X86_SSE41;
31823     GemmMicrokernelTester()
31824       .mr(3)
31825       .nr(4)
31826       .kr(8)
31827       .sr(1)
31828       .m(3)
31829       .n(4)
31830       .k(8)
31831       .qmax(128)
31832       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
31833   }
31834 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD128,strided_cm)31835   TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD128, strided_cm) {
31836     TEST_REQUIRES_X86_SSE41;
31837     GemmMicrokernelTester()
31838       .mr(3)
31839       .nr(4)
31840       .kr(8)
31841       .sr(1)
31842       .m(3)
31843       .n(4)
31844       .k(8)
31845       .cm_stride(7)
31846       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
31847   }
31848 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
31849 
31850 
31851 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__AVX2,k_eq_8)31852   TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__AVX2, k_eq_8) {
31853     TEST_REQUIRES_X86_AVX2;
31854     GemmMicrokernelTester()
31855       .mr(1)
31856       .nr(8)
31857       .kr(8)
31858       .sr(1)
31859       .m(1)
31860       .n(8)
31861       .k(8)
31862       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c8__avx2, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
31863   }
31864 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__AVX2,strided_cn)31865   TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__AVX2, strided_cn) {
31866     TEST_REQUIRES_X86_AVX2;
31867     GemmMicrokernelTester()
31868       .mr(1)
31869       .nr(8)
31870       .kr(8)
31871       .sr(1)
31872       .m(1)
31873       .n(8)
31874       .k(8)
31875       .cn_stride(11)
31876       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c8__avx2, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
31877   }
31878 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__AVX2,k_eq_8_subtile)31879   TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__AVX2, k_eq_8_subtile) {
31880     TEST_REQUIRES_X86_AVX2;
31881     for (uint32_t n = 1; n <= 8; n++) {
31882       for (uint32_t m = 1; m <= 1; m++) {
31883         GemmMicrokernelTester()
31884           .mr(1)
31885           .nr(8)
31886           .kr(8)
31887           .sr(1)
31888           .m(m)
31889           .n(n)
31890           .k(8)
31891           .iterations(1)
31892           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c8__avx2, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
31893       }
31894     }
31895   }
31896 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__AVX2,k_eq_8_subtile_m)31897   TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__AVX2, k_eq_8_subtile_m) {
31898     TEST_REQUIRES_X86_AVX2;
31899     for (uint32_t m = 1; m <= 1; m++) {
31900       GemmMicrokernelTester()
31901         .mr(1)
31902         .nr(8)
31903         .kr(8)
31904         .sr(1)
31905         .m(m)
31906         .n(8)
31907         .k(8)
31908         .iterations(1)
31909         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c8__avx2, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
31910     }
31911   }
31912 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__AVX2,k_eq_8_subtile_n)31913   TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__AVX2, k_eq_8_subtile_n) {
31914     TEST_REQUIRES_X86_AVX2;
31915     for (uint32_t n = 1; n <= 8; n++) {
31916       GemmMicrokernelTester()
31917         .mr(1)
31918         .nr(8)
31919         .kr(8)
31920         .sr(1)
31921         .m(1)
31922         .n(n)
31923         .k(8)
31924         .iterations(1)
31925         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c8__avx2, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
31926     }
31927   }
31928 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__AVX2,k_lt_8)31929   TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__AVX2, k_lt_8) {
31930     TEST_REQUIRES_X86_AVX2;
31931     for (size_t k = 1; k < 8; k++) {
31932       GemmMicrokernelTester()
31933         .mr(1)
31934         .nr(8)
31935         .kr(8)
31936         .sr(1)
31937         .m(1)
31938         .n(8)
31939         .k(k)
31940         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c8__avx2, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
31941     }
31942   }
31943 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__AVX2,k_lt_8_subtile)31944   TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__AVX2, k_lt_8_subtile) {
31945     TEST_REQUIRES_X86_AVX2;
31946     for (size_t k = 1; k < 8; k++) {
31947       for (uint32_t n = 1; n <= 8; n++) {
31948         for (uint32_t m = 1; m <= 1; m++) {
31949           GemmMicrokernelTester()
31950             .mr(1)
31951             .nr(8)
31952             .kr(8)
31953             .sr(1)
31954             .m(m)
31955             .n(n)
31956             .k(k)
31957             .iterations(1)
31958             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c8__avx2, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
31959         }
31960       }
31961     }
31962   }
31963 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__AVX2,k_gt_8)31964   TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__AVX2, k_gt_8) {
31965     TEST_REQUIRES_X86_AVX2;
31966     for (size_t k = 9; k < 16; k++) {
31967       GemmMicrokernelTester()
31968         .mr(1)
31969         .nr(8)
31970         .kr(8)
31971         .sr(1)
31972         .m(1)
31973         .n(8)
31974         .k(k)
31975         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c8__avx2, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
31976     }
31977   }
31978 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__AVX2,k_gt_8_subtile)31979   TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__AVX2, k_gt_8_subtile) {
31980     TEST_REQUIRES_X86_AVX2;
31981     for (size_t k = 9; k < 16; k++) {
31982       for (uint32_t n = 1; n <= 8; n++) {
31983         for (uint32_t m = 1; m <= 1; m++) {
31984           GemmMicrokernelTester()
31985             .mr(1)
31986             .nr(8)
31987             .kr(8)
31988             .sr(1)
31989             .m(m)
31990             .n(n)
31991             .k(k)
31992             .iterations(1)
31993             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c8__avx2, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
31994         }
31995       }
31996     }
31997   }
31998 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__AVX2,k_div_8)31999   TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__AVX2, k_div_8) {
32000     TEST_REQUIRES_X86_AVX2;
32001     for (size_t k = 16; k <= 80; k += 8) {
32002       GemmMicrokernelTester()
32003         .mr(1)
32004         .nr(8)
32005         .kr(8)
32006         .sr(1)
32007         .m(1)
32008         .n(8)
32009         .k(k)
32010         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c8__avx2, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
32011     }
32012   }
32013 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__AVX2,k_div_8_subtile)32014   TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__AVX2, k_div_8_subtile) {
32015     TEST_REQUIRES_X86_AVX2;
32016     for (size_t k = 16; k <= 80; k += 8) {
32017       for (uint32_t n = 1; n <= 8; n++) {
32018         for (uint32_t m = 1; m <= 1; m++) {
32019           GemmMicrokernelTester()
32020             .mr(1)
32021             .nr(8)
32022             .kr(8)
32023             .sr(1)
32024             .m(m)
32025             .n(n)
32026             .k(k)
32027             .iterations(1)
32028             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c8__avx2, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
32029         }
32030       }
32031     }
32032   }
32033 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__AVX2,n_gt_8)32034   TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__AVX2, n_gt_8) {
32035     TEST_REQUIRES_X86_AVX2;
32036     for (uint32_t n = 9; n < 16; n++) {
32037       for (size_t k = 1; k <= 40; k += 9) {
32038         GemmMicrokernelTester()
32039           .mr(1)
32040           .nr(8)
32041           .kr(8)
32042           .sr(1)
32043           .m(1)
32044           .n(n)
32045           .k(k)
32046           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c8__avx2, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
32047       }
32048     }
32049   }
32050 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__AVX2,n_gt_8_strided_cn)32051   TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__AVX2, n_gt_8_strided_cn) {
32052     TEST_REQUIRES_X86_AVX2;
32053     for (uint32_t n = 9; n < 16; n++) {
32054       for (size_t k = 1; k <= 40; k += 9) {
32055         GemmMicrokernelTester()
32056           .mr(1)
32057           .nr(8)
32058           .kr(8)
32059           .sr(1)
32060           .m(1)
32061           .n(n)
32062           .k(k)
32063           .cn_stride(11)
32064           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c8__avx2, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
32065       }
32066     }
32067   }
32068 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__AVX2,n_gt_8_subtile)32069   TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__AVX2, n_gt_8_subtile) {
32070     TEST_REQUIRES_X86_AVX2;
32071     for (uint32_t n = 9; n < 16; n++) {
32072       for (size_t k = 1; k <= 40; k += 9) {
32073         for (uint32_t m = 1; m <= 1; m++) {
32074           GemmMicrokernelTester()
32075             .mr(1)
32076             .nr(8)
32077             .kr(8)
32078             .sr(1)
32079             .m(m)
32080             .n(n)
32081             .k(k)
32082             .iterations(1)
32083             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c8__avx2, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
32084         }
32085       }
32086     }
32087   }
32088 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__AVX2,n_div_8)32089   TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__AVX2, n_div_8) {
32090     TEST_REQUIRES_X86_AVX2;
32091     for (uint32_t n = 16; n <= 24; n += 8) {
32092       for (size_t k = 1; k <= 40; k += 9) {
32093         GemmMicrokernelTester()
32094           .mr(1)
32095           .nr(8)
32096           .kr(8)
32097           .sr(1)
32098           .m(1)
32099           .n(n)
32100           .k(k)
32101           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c8__avx2, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
32102       }
32103     }
32104   }
32105 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__AVX2,n_div_8_strided_cn)32106   TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__AVX2, n_div_8_strided_cn) {
32107     TEST_REQUIRES_X86_AVX2;
32108     for (uint32_t n = 16; n <= 24; n += 8) {
32109       for (size_t k = 1; k <= 40; k += 9) {
32110         GemmMicrokernelTester()
32111           .mr(1)
32112           .nr(8)
32113           .kr(8)
32114           .sr(1)
32115           .m(1)
32116           .n(n)
32117           .k(k)
32118           .cn_stride(11)
32119           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c8__avx2, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
32120       }
32121     }
32122   }
32123 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__AVX2,n_div_8_subtile)32124   TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__AVX2, n_div_8_subtile) {
32125     TEST_REQUIRES_X86_AVX2;
32126     for (uint32_t n = 16; n <= 24; n += 8) {
32127       for (size_t k = 1; k <= 40; k += 9) {
32128         for (uint32_t m = 1; m <= 1; m++) {
32129           GemmMicrokernelTester()
32130             .mr(1)
32131             .nr(8)
32132             .kr(8)
32133             .sr(1)
32134             .m(m)
32135             .n(n)
32136             .k(k)
32137             .iterations(1)
32138             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c8__avx2, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
32139         }
32140       }
32141     }
32142   }
32143 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__AVX2,small_kernel)32144   TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__AVX2, small_kernel) {
32145     TEST_REQUIRES_X86_AVX2;
32146     for (size_t k = 1; k <= 40; k += 9) {
32147       GemmMicrokernelTester()
32148         .mr(1)
32149         .nr(8)
32150         .kr(8)
32151         .sr(1)
32152         .m(1)
32153         .n(8)
32154         .k(k)
32155         .ks(3)
32156         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c8__avx2, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
32157     }
32158   }
32159 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__AVX2,small_kernel_subtile)32160   TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__AVX2, small_kernel_subtile) {
32161     TEST_REQUIRES_X86_AVX2;
32162     for (size_t k = 1; k <= 40; k += 9) {
32163       for (uint32_t n = 1; n <= 8; n++) {
32164         for (uint32_t m = 1; m <= 1; m++) {
32165           GemmMicrokernelTester()
32166             .mr(1)
32167             .nr(8)
32168             .kr(8)
32169             .sr(1)
32170             .m(m)
32171             .n(n)
32172             .k(k)
32173             .ks(3)
32174             .iterations(1)
32175             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c8__avx2, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
32176         }
32177       }
32178     }
32179   }
32180 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__AVX2,n_gt_8_small_kernel)32181   TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__AVX2, n_gt_8_small_kernel) {
32182     TEST_REQUIRES_X86_AVX2;
32183     for (uint32_t n = 9; n < 16; n++) {
32184       for (size_t k = 1; k <= 40; k += 9) {
32185         GemmMicrokernelTester()
32186           .mr(1)
32187           .nr(8)
32188           .kr(8)
32189           .sr(1)
32190           .m(1)
32191           .n(n)
32192           .k(k)
32193           .ks(3)
32194           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c8__avx2, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
32195       }
32196     }
32197   }
32198 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__AVX2,n_div_8_small_kernel)32199   TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__AVX2, n_div_8_small_kernel) {
32200     TEST_REQUIRES_X86_AVX2;
32201     for (uint32_t n = 16; n <= 24; n += 8) {
32202       for (size_t k = 1; k <= 40; k += 9) {
32203         GemmMicrokernelTester()
32204           .mr(1)
32205           .nr(8)
32206           .kr(8)
32207           .sr(1)
32208           .m(1)
32209           .n(n)
32210           .k(k)
32211           .ks(3)
32212           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c8__avx2, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
32213       }
32214     }
32215   }
32216 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__AVX2,strided_cm_subtile)32217   TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__AVX2, strided_cm_subtile) {
32218     TEST_REQUIRES_X86_AVX2;
32219     for (size_t k = 1; k <= 40; k += 9) {
32220       for (uint32_t n = 1; n <= 8; n++) {
32221         for (uint32_t m = 1; m <= 1; m++) {
32222           GemmMicrokernelTester()
32223             .mr(1)
32224             .nr(8)
32225             .kr(8)
32226             .sr(1)
32227             .m(m)
32228             .n(n)
32229             .k(k)
32230             .cm_stride(11)
32231             .iterations(1)
32232             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c8__avx2, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
32233         }
32234       }
32235     }
32236   }
32237 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__AVX2,a_offset)32238   TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__AVX2, a_offset) {
32239     TEST_REQUIRES_X86_AVX2;
32240     for (size_t k = 1; k <= 40; k += 9) {
32241       GemmMicrokernelTester()
32242         .mr(1)
32243         .nr(8)
32244         .kr(8)
32245         .sr(1)
32246         .m(1)
32247         .n(8)
32248         .k(k)
32249         .ks(3)
32250         .a_offset(43)
32251         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c8__avx2, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
32252     }
32253   }
32254 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__AVX2,zero)32255   TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__AVX2, zero) {
32256     TEST_REQUIRES_X86_AVX2;
32257     for (size_t k = 1; k <= 40; k += 9) {
32258       for (uint32_t mz = 0; mz < 1; mz++) {
32259         GemmMicrokernelTester()
32260           .mr(1)
32261           .nr(8)
32262           .kr(8)
32263           .sr(1)
32264           .m(1)
32265           .n(8)
32266           .k(k)
32267           .ks(3)
32268           .a_offset(43)
32269           .zero_index(mz)
32270           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c8__avx2, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
32271       }
32272     }
32273   }
32274 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__AVX2,qmin)32275   TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__AVX2, qmin) {
32276     TEST_REQUIRES_X86_AVX2;
32277     GemmMicrokernelTester()
32278       .mr(1)
32279       .nr(8)
32280       .kr(8)
32281       .sr(1)
32282       .m(1)
32283       .n(8)
32284       .k(8)
32285       .qmin(128)
32286       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c8__avx2, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
32287   }
32288 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__AVX2,qmax)32289   TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__AVX2, qmax) {
32290     TEST_REQUIRES_X86_AVX2;
32291     GemmMicrokernelTester()
32292       .mr(1)
32293       .nr(8)
32294       .kr(8)
32295       .sr(1)
32296       .m(1)
32297       .n(8)
32298       .k(8)
32299       .qmax(128)
32300       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c8__avx2, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
32301   }
32302 
TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__AVX2,strided_cm)32303   TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__AVX2, strided_cm) {
32304     TEST_REQUIRES_X86_AVX2;
32305     GemmMicrokernelTester()
32306       .mr(1)
32307       .nr(8)
32308       .kr(8)
32309       .sr(1)
32310       .m(1)
32311       .n(8)
32312       .k(8)
32313       .cm_stride(11)
32314       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c8__avx2, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
32315   }
32316 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
32317 
32318 
32319 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QC8_IGEMM_MINMAX_FP32_3X8C8__AVX2,k_eq_8)32320   TEST(QC8_IGEMM_MINMAX_FP32_3X8C8__AVX2, k_eq_8) {
32321     TEST_REQUIRES_X86_AVX2;
32322     GemmMicrokernelTester()
32323       .mr(3)
32324       .nr(8)
32325       .kr(8)
32326       .sr(1)
32327       .m(3)
32328       .n(8)
32329       .k(8)
32330       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x8c8__avx2, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
32331   }
32332 
TEST(QC8_IGEMM_MINMAX_FP32_3X8C8__AVX2,strided_cn)32333   TEST(QC8_IGEMM_MINMAX_FP32_3X8C8__AVX2, strided_cn) {
32334     TEST_REQUIRES_X86_AVX2;
32335     GemmMicrokernelTester()
32336       .mr(3)
32337       .nr(8)
32338       .kr(8)
32339       .sr(1)
32340       .m(3)
32341       .n(8)
32342       .k(8)
32343       .cn_stride(11)
32344       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x8c8__avx2, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
32345   }
32346 
TEST(QC8_IGEMM_MINMAX_FP32_3X8C8__AVX2,k_eq_8_subtile)32347   TEST(QC8_IGEMM_MINMAX_FP32_3X8C8__AVX2, k_eq_8_subtile) {
32348     TEST_REQUIRES_X86_AVX2;
32349     for (uint32_t n = 1; n <= 8; n++) {
32350       for (uint32_t m = 1; m <= 3; m++) {
32351         GemmMicrokernelTester()
32352           .mr(3)
32353           .nr(8)
32354           .kr(8)
32355           .sr(1)
32356           .m(m)
32357           .n(n)
32358           .k(8)
32359           .iterations(1)
32360           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x8c8__avx2, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
32361       }
32362     }
32363   }
32364 
TEST(QC8_IGEMM_MINMAX_FP32_3X8C8__AVX2,k_eq_8_subtile_m)32365   TEST(QC8_IGEMM_MINMAX_FP32_3X8C8__AVX2, k_eq_8_subtile_m) {
32366     TEST_REQUIRES_X86_AVX2;
32367     for (uint32_t m = 1; m <= 3; m++) {
32368       GemmMicrokernelTester()
32369         .mr(3)
32370         .nr(8)
32371         .kr(8)
32372         .sr(1)
32373         .m(m)
32374         .n(8)
32375         .k(8)
32376         .iterations(1)
32377         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x8c8__avx2, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
32378     }
32379   }
32380 
TEST(QC8_IGEMM_MINMAX_FP32_3X8C8__AVX2,k_eq_8_subtile_n)32381   TEST(QC8_IGEMM_MINMAX_FP32_3X8C8__AVX2, k_eq_8_subtile_n) {
32382     TEST_REQUIRES_X86_AVX2;
32383     for (uint32_t n = 1; n <= 8; n++) {
32384       GemmMicrokernelTester()
32385         .mr(3)
32386         .nr(8)
32387         .kr(8)
32388         .sr(1)
32389         .m(3)
32390         .n(n)
32391         .k(8)
32392         .iterations(1)
32393         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x8c8__avx2, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
32394     }
32395   }
32396 
TEST(QC8_IGEMM_MINMAX_FP32_3X8C8__AVX2,k_lt_8)32397   TEST(QC8_IGEMM_MINMAX_FP32_3X8C8__AVX2, k_lt_8) {
32398     TEST_REQUIRES_X86_AVX2;
32399     for (size_t k = 1; k < 8; k++) {
32400       GemmMicrokernelTester()
32401         .mr(3)
32402         .nr(8)
32403         .kr(8)
32404         .sr(1)
32405         .m(3)
32406         .n(8)
32407         .k(k)
32408         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x8c8__avx2, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
32409     }
32410   }
32411 
TEST(QC8_IGEMM_MINMAX_FP32_3X8C8__AVX2,k_lt_8_subtile)32412   TEST(QC8_IGEMM_MINMAX_FP32_3X8C8__AVX2, k_lt_8_subtile) {
32413     TEST_REQUIRES_X86_AVX2;
32414     for (size_t k = 1; k < 8; k++) {
32415       for (uint32_t n = 1; n <= 8; n++) {
32416         for (uint32_t m = 1; m <= 3; m++) {
32417           GemmMicrokernelTester()
32418             .mr(3)
32419             .nr(8)
32420             .kr(8)
32421             .sr(1)
32422             .m(m)
32423             .n(n)
32424             .k(k)
32425             .iterations(1)
32426             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x8c8__avx2, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
32427         }
32428       }
32429     }
32430   }
32431 
TEST(QC8_IGEMM_MINMAX_FP32_3X8C8__AVX2,k_gt_8)32432   TEST(QC8_IGEMM_MINMAX_FP32_3X8C8__AVX2, k_gt_8) {
32433     TEST_REQUIRES_X86_AVX2;
32434     for (size_t k = 9; k < 16; k++) {
32435       GemmMicrokernelTester()
32436         .mr(3)
32437         .nr(8)
32438         .kr(8)
32439         .sr(1)
32440         .m(3)
32441         .n(8)
32442         .k(k)
32443         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x8c8__avx2, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
32444     }
32445   }
32446 
TEST(QC8_IGEMM_MINMAX_FP32_3X8C8__AVX2,k_gt_8_subtile)32447   TEST(QC8_IGEMM_MINMAX_FP32_3X8C8__AVX2, k_gt_8_subtile) {
32448     TEST_REQUIRES_X86_AVX2;
32449     for (size_t k = 9; k < 16; k++) {
32450       for (uint32_t n = 1; n <= 8; n++) {
32451         for (uint32_t m = 1; m <= 3; m++) {
32452           GemmMicrokernelTester()
32453             .mr(3)
32454             .nr(8)
32455             .kr(8)
32456             .sr(1)
32457             .m(m)
32458             .n(n)
32459             .k(k)
32460             .iterations(1)
32461             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x8c8__avx2, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
32462         }
32463       }
32464     }
32465   }
32466 
TEST(QC8_IGEMM_MINMAX_FP32_3X8C8__AVX2,k_div_8)32467   TEST(QC8_IGEMM_MINMAX_FP32_3X8C8__AVX2, k_div_8) {
32468     TEST_REQUIRES_X86_AVX2;
32469     for (size_t k = 16; k <= 80; k += 8) {
32470       GemmMicrokernelTester()
32471         .mr(3)
32472         .nr(8)
32473         .kr(8)
32474         .sr(1)
32475         .m(3)
32476         .n(8)
32477         .k(k)
32478         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x8c8__avx2, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
32479     }
32480   }
32481 
TEST(QC8_IGEMM_MINMAX_FP32_3X8C8__AVX2,k_div_8_subtile)32482   TEST(QC8_IGEMM_MINMAX_FP32_3X8C8__AVX2, k_div_8_subtile) {
32483     TEST_REQUIRES_X86_AVX2;
32484     for (size_t k = 16; k <= 80; k += 8) {
32485       for (uint32_t n = 1; n <= 8; n++) {
32486         for (uint32_t m = 1; m <= 3; m++) {
32487           GemmMicrokernelTester()
32488             .mr(3)
32489             .nr(8)
32490             .kr(8)
32491             .sr(1)
32492             .m(m)
32493             .n(n)
32494             .k(k)
32495             .iterations(1)
32496             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x8c8__avx2, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
32497         }
32498       }
32499     }
32500   }
32501 
TEST(QC8_IGEMM_MINMAX_FP32_3X8C8__AVX2,n_gt_8)32502   TEST(QC8_IGEMM_MINMAX_FP32_3X8C8__AVX2, n_gt_8) {
32503     TEST_REQUIRES_X86_AVX2;
32504     for (uint32_t n = 9; n < 16; n++) {
32505       for (size_t k = 1; k <= 40; k += 9) {
32506         GemmMicrokernelTester()
32507           .mr(3)
32508           .nr(8)
32509           .kr(8)
32510           .sr(1)
32511           .m(3)
32512           .n(n)
32513           .k(k)
32514           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x8c8__avx2, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
32515       }
32516     }
32517   }
32518 
TEST(QC8_IGEMM_MINMAX_FP32_3X8C8__AVX2,n_gt_8_strided_cn)32519   TEST(QC8_IGEMM_MINMAX_FP32_3X8C8__AVX2, n_gt_8_strided_cn) {
32520     TEST_REQUIRES_X86_AVX2;
32521     for (uint32_t n = 9; n < 16; n++) {
32522       for (size_t k = 1; k <= 40; k += 9) {
32523         GemmMicrokernelTester()
32524           .mr(3)
32525           .nr(8)
32526           .kr(8)
32527           .sr(1)
32528           .m(3)
32529           .n(n)
32530           .k(k)
32531           .cn_stride(11)
32532           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x8c8__avx2, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
32533       }
32534     }
32535   }
32536 
TEST(QC8_IGEMM_MINMAX_FP32_3X8C8__AVX2,n_gt_8_subtile)32537   TEST(QC8_IGEMM_MINMAX_FP32_3X8C8__AVX2, n_gt_8_subtile) {
32538     TEST_REQUIRES_X86_AVX2;
32539     for (uint32_t n = 9; n < 16; n++) {
32540       for (size_t k = 1; k <= 40; k += 9) {
32541         for (uint32_t m = 1; m <= 3; m++) {
32542           GemmMicrokernelTester()
32543             .mr(3)
32544             .nr(8)
32545             .kr(8)
32546             .sr(1)
32547             .m(m)
32548             .n(n)
32549             .k(k)
32550             .iterations(1)
32551             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x8c8__avx2, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
32552         }
32553       }
32554     }
32555   }
32556 
TEST(QC8_IGEMM_MINMAX_FP32_3X8C8__AVX2,n_div_8)32557   TEST(QC8_IGEMM_MINMAX_FP32_3X8C8__AVX2, n_div_8) {
32558     TEST_REQUIRES_X86_AVX2;
32559     for (uint32_t n = 16; n <= 24; n += 8) {
32560       for (size_t k = 1; k <= 40; k += 9) {
32561         GemmMicrokernelTester()
32562           .mr(3)
32563           .nr(8)
32564           .kr(8)
32565           .sr(1)
32566           .m(3)
32567           .n(n)
32568           .k(k)
32569           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x8c8__avx2, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
32570       }
32571     }
32572   }
32573 
TEST(QC8_IGEMM_MINMAX_FP32_3X8C8__AVX2,n_div_8_strided_cn)32574   TEST(QC8_IGEMM_MINMAX_FP32_3X8C8__AVX2, n_div_8_strided_cn) {
32575     TEST_REQUIRES_X86_AVX2;
32576     for (uint32_t n = 16; n <= 24; n += 8) {
32577       for (size_t k = 1; k <= 40; k += 9) {
32578         GemmMicrokernelTester()
32579           .mr(3)
32580           .nr(8)
32581           .kr(8)
32582           .sr(1)
32583           .m(3)
32584           .n(n)
32585           .k(k)
32586           .cn_stride(11)
32587           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x8c8__avx2, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
32588       }
32589     }
32590   }
32591 
TEST(QC8_IGEMM_MINMAX_FP32_3X8C8__AVX2,n_div_8_subtile)32592   TEST(QC8_IGEMM_MINMAX_FP32_3X8C8__AVX2, n_div_8_subtile) {
32593     TEST_REQUIRES_X86_AVX2;
32594     for (uint32_t n = 16; n <= 24; n += 8) {
32595       for (size_t k = 1; k <= 40; k += 9) {
32596         for (uint32_t m = 1; m <= 3; m++) {
32597           GemmMicrokernelTester()
32598             .mr(3)
32599             .nr(8)
32600             .kr(8)
32601             .sr(1)
32602             .m(m)
32603             .n(n)
32604             .k(k)
32605             .iterations(1)
32606             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x8c8__avx2, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
32607         }
32608       }
32609     }
32610   }
32611 
TEST(QC8_IGEMM_MINMAX_FP32_3X8C8__AVX2,small_kernel)32612   TEST(QC8_IGEMM_MINMAX_FP32_3X8C8__AVX2, small_kernel) {
32613     TEST_REQUIRES_X86_AVX2;
32614     for (size_t k = 1; k <= 40; k += 9) {
32615       GemmMicrokernelTester()
32616         .mr(3)
32617         .nr(8)
32618         .kr(8)
32619         .sr(1)
32620         .m(3)
32621         .n(8)
32622         .k(k)
32623         .ks(3)
32624         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x8c8__avx2, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
32625     }
32626   }
32627 
TEST(QC8_IGEMM_MINMAX_FP32_3X8C8__AVX2,small_kernel_subtile)32628   TEST(QC8_IGEMM_MINMAX_FP32_3X8C8__AVX2, small_kernel_subtile) {
32629     TEST_REQUIRES_X86_AVX2;
32630     for (size_t k = 1; k <= 40; k += 9) {
32631       for (uint32_t n = 1; n <= 8; n++) {
32632         for (uint32_t m = 1; m <= 3; m++) {
32633           GemmMicrokernelTester()
32634             .mr(3)
32635             .nr(8)
32636             .kr(8)
32637             .sr(1)
32638             .m(m)
32639             .n(n)
32640             .k(k)
32641             .ks(3)
32642             .iterations(1)
32643             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x8c8__avx2, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
32644         }
32645       }
32646     }
32647   }
32648 
TEST(QC8_IGEMM_MINMAX_FP32_3X8C8__AVX2,n_gt_8_small_kernel)32649   TEST(QC8_IGEMM_MINMAX_FP32_3X8C8__AVX2, n_gt_8_small_kernel) {
32650     TEST_REQUIRES_X86_AVX2;
32651     for (uint32_t n = 9; n < 16; n++) {
32652       for (size_t k = 1; k <= 40; k += 9) {
32653         GemmMicrokernelTester()
32654           .mr(3)
32655           .nr(8)
32656           .kr(8)
32657           .sr(1)
32658           .m(3)
32659           .n(n)
32660           .k(k)
32661           .ks(3)
32662           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x8c8__avx2, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
32663       }
32664     }
32665   }
32666 
TEST(QC8_IGEMM_MINMAX_FP32_3X8C8__AVX2,n_div_8_small_kernel)32667   TEST(QC8_IGEMM_MINMAX_FP32_3X8C8__AVX2, n_div_8_small_kernel) {
32668     TEST_REQUIRES_X86_AVX2;
32669     for (uint32_t n = 16; n <= 24; n += 8) {
32670       for (size_t k = 1; k <= 40; k += 9) {
32671         GemmMicrokernelTester()
32672           .mr(3)
32673           .nr(8)
32674           .kr(8)
32675           .sr(1)
32676           .m(3)
32677           .n(n)
32678           .k(k)
32679           .ks(3)
32680           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x8c8__avx2, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
32681       }
32682     }
32683   }
32684 
TEST(QC8_IGEMM_MINMAX_FP32_3X8C8__AVX2,strided_cm_subtile)32685   TEST(QC8_IGEMM_MINMAX_FP32_3X8C8__AVX2, strided_cm_subtile) {
32686     TEST_REQUIRES_X86_AVX2;
32687     for (size_t k = 1; k <= 40; k += 9) {
32688       for (uint32_t n = 1; n <= 8; n++) {
32689         for (uint32_t m = 1; m <= 3; m++) {
32690           GemmMicrokernelTester()
32691             .mr(3)
32692             .nr(8)
32693             .kr(8)
32694             .sr(1)
32695             .m(m)
32696             .n(n)
32697             .k(k)
32698             .cm_stride(11)
32699             .iterations(1)
32700             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x8c8__avx2, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
32701         }
32702       }
32703     }
32704   }
32705 
TEST(QC8_IGEMM_MINMAX_FP32_3X8C8__AVX2,a_offset)32706   TEST(QC8_IGEMM_MINMAX_FP32_3X8C8__AVX2, a_offset) {
32707     TEST_REQUIRES_X86_AVX2;
32708     for (size_t k = 1; k <= 40; k += 9) {
32709       GemmMicrokernelTester()
32710         .mr(3)
32711         .nr(8)
32712         .kr(8)
32713         .sr(1)
32714         .m(3)
32715         .n(8)
32716         .k(k)
32717         .ks(3)
32718         .a_offset(127)
32719         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x8c8__avx2, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
32720     }
32721   }
32722 
TEST(QC8_IGEMM_MINMAX_FP32_3X8C8__AVX2,zero)32723   TEST(QC8_IGEMM_MINMAX_FP32_3X8C8__AVX2, zero) {
32724     TEST_REQUIRES_X86_AVX2;
32725     for (size_t k = 1; k <= 40; k += 9) {
32726       for (uint32_t mz = 0; mz < 3; mz++) {
32727         GemmMicrokernelTester()
32728           .mr(3)
32729           .nr(8)
32730           .kr(8)
32731           .sr(1)
32732           .m(3)
32733           .n(8)
32734           .k(k)
32735           .ks(3)
32736           .a_offset(127)
32737           .zero_index(mz)
32738           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x8c8__avx2, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
32739       }
32740     }
32741   }
32742 
TEST(QC8_IGEMM_MINMAX_FP32_3X8C8__AVX2,qmin)32743   TEST(QC8_IGEMM_MINMAX_FP32_3X8C8__AVX2, qmin) {
32744     TEST_REQUIRES_X86_AVX2;
32745     GemmMicrokernelTester()
32746       .mr(3)
32747       .nr(8)
32748       .kr(8)
32749       .sr(1)
32750       .m(3)
32751       .n(8)
32752       .k(8)
32753       .qmin(128)
32754       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x8c8__avx2, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
32755   }
32756 
TEST(QC8_IGEMM_MINMAX_FP32_3X8C8__AVX2,qmax)32757   TEST(QC8_IGEMM_MINMAX_FP32_3X8C8__AVX2, qmax) {
32758     TEST_REQUIRES_X86_AVX2;
32759     GemmMicrokernelTester()
32760       .mr(3)
32761       .nr(8)
32762       .kr(8)
32763       .sr(1)
32764       .m(3)
32765       .n(8)
32766       .k(8)
32767       .qmax(128)
32768       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x8c8__avx2, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
32769   }
32770 
TEST(QC8_IGEMM_MINMAX_FP32_3X8C8__AVX2,strided_cm)32771   TEST(QC8_IGEMM_MINMAX_FP32_3X8C8__AVX2, strided_cm) {
32772     TEST_REQUIRES_X86_AVX2;
32773     GemmMicrokernelTester()
32774       .mr(3)
32775       .nr(8)
32776       .kr(8)
32777       .sr(1)
32778       .m(3)
32779       .n(8)
32780       .k(8)
32781       .cm_stride(11)
32782       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x8c8__avx2, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
32783   }
32784 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
32785 
32786 
32787 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QC8_IGEMM_MINMAX_FP32_2X16C8__AVX512SKX,k_eq_8)32788   TEST(QC8_IGEMM_MINMAX_FP32_2X16C8__AVX512SKX, k_eq_8) {
32789     TEST_REQUIRES_X86_AVX512SKX;
32790     GemmMicrokernelTester()
32791       .mr(2)
32792       .nr(16)
32793       .kr(8)
32794       .sr(1)
32795       .m(2)
32796       .n(16)
32797       .k(8)
32798       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x16c8__avx512skx, xnn_init_qc8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
32799   }
32800 
TEST(QC8_IGEMM_MINMAX_FP32_2X16C8__AVX512SKX,strided_cn)32801   TEST(QC8_IGEMM_MINMAX_FP32_2X16C8__AVX512SKX, strided_cn) {
32802     TEST_REQUIRES_X86_AVX512SKX;
32803     GemmMicrokernelTester()
32804       .mr(2)
32805       .nr(16)
32806       .kr(8)
32807       .sr(1)
32808       .m(2)
32809       .n(16)
32810       .k(8)
32811       .cn_stride(19)
32812       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x16c8__avx512skx, xnn_init_qc8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
32813   }
32814 
TEST(QC8_IGEMM_MINMAX_FP32_2X16C8__AVX512SKX,k_eq_8_subtile)32815   TEST(QC8_IGEMM_MINMAX_FP32_2X16C8__AVX512SKX, k_eq_8_subtile) {
32816     TEST_REQUIRES_X86_AVX512SKX;
32817     for (uint32_t n = 1; n <= 16; n++) {
32818       for (uint32_t m = 1; m <= 2; m++) {
32819         GemmMicrokernelTester()
32820           .mr(2)
32821           .nr(16)
32822           .kr(8)
32823           .sr(1)
32824           .m(m)
32825           .n(n)
32826           .k(8)
32827           .iterations(1)
32828           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x16c8__avx512skx, xnn_init_qc8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
32829       }
32830     }
32831   }
32832 
TEST(QC8_IGEMM_MINMAX_FP32_2X16C8__AVX512SKX,k_eq_8_subtile_m)32833   TEST(QC8_IGEMM_MINMAX_FP32_2X16C8__AVX512SKX, k_eq_8_subtile_m) {
32834     TEST_REQUIRES_X86_AVX512SKX;
32835     for (uint32_t m = 1; m <= 2; m++) {
32836       GemmMicrokernelTester()
32837         .mr(2)
32838         .nr(16)
32839         .kr(8)
32840         .sr(1)
32841         .m(m)
32842         .n(16)
32843         .k(8)
32844         .iterations(1)
32845         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x16c8__avx512skx, xnn_init_qc8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
32846     }
32847   }
32848 
TEST(QC8_IGEMM_MINMAX_FP32_2X16C8__AVX512SKX,k_eq_8_subtile_n)32849   TEST(QC8_IGEMM_MINMAX_FP32_2X16C8__AVX512SKX, k_eq_8_subtile_n) {
32850     TEST_REQUIRES_X86_AVX512SKX;
32851     for (uint32_t n = 1; n <= 16; n++) {
32852       GemmMicrokernelTester()
32853         .mr(2)
32854         .nr(16)
32855         .kr(8)
32856         .sr(1)
32857         .m(2)
32858         .n(n)
32859         .k(8)
32860         .iterations(1)
32861         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x16c8__avx512skx, xnn_init_qc8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
32862     }
32863   }
32864 
TEST(QC8_IGEMM_MINMAX_FP32_2X16C8__AVX512SKX,k_lt_8)32865   TEST(QC8_IGEMM_MINMAX_FP32_2X16C8__AVX512SKX, k_lt_8) {
32866     TEST_REQUIRES_X86_AVX512SKX;
32867     for (size_t k = 1; k < 8; k++) {
32868       GemmMicrokernelTester()
32869         .mr(2)
32870         .nr(16)
32871         .kr(8)
32872         .sr(1)
32873         .m(2)
32874         .n(16)
32875         .k(k)
32876         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x16c8__avx512skx, xnn_init_qc8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
32877     }
32878   }
32879 
TEST(QC8_IGEMM_MINMAX_FP32_2X16C8__AVX512SKX,k_lt_8_subtile)32880   TEST(QC8_IGEMM_MINMAX_FP32_2X16C8__AVX512SKX, k_lt_8_subtile) {
32881     TEST_REQUIRES_X86_AVX512SKX;
32882     for (size_t k = 1; k < 8; k++) {
32883       for (uint32_t n = 1; n <= 16; n++) {
32884         for (uint32_t m = 1; m <= 2; m++) {
32885           GemmMicrokernelTester()
32886             .mr(2)
32887             .nr(16)
32888             .kr(8)
32889             .sr(1)
32890             .m(m)
32891             .n(n)
32892             .k(k)
32893             .iterations(1)
32894             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x16c8__avx512skx, xnn_init_qc8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
32895         }
32896       }
32897     }
32898   }
32899 
TEST(QC8_IGEMM_MINMAX_FP32_2X16C8__AVX512SKX,k_gt_8)32900   TEST(QC8_IGEMM_MINMAX_FP32_2X16C8__AVX512SKX, k_gt_8) {
32901     TEST_REQUIRES_X86_AVX512SKX;
32902     for (size_t k = 9; k < 16; k++) {
32903       GemmMicrokernelTester()
32904         .mr(2)
32905         .nr(16)
32906         .kr(8)
32907         .sr(1)
32908         .m(2)
32909         .n(16)
32910         .k(k)
32911         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x16c8__avx512skx, xnn_init_qc8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
32912     }
32913   }
32914 
TEST(QC8_IGEMM_MINMAX_FP32_2X16C8__AVX512SKX,k_gt_8_subtile)32915   TEST(QC8_IGEMM_MINMAX_FP32_2X16C8__AVX512SKX, k_gt_8_subtile) {
32916     TEST_REQUIRES_X86_AVX512SKX;
32917     for (size_t k = 9; k < 16; k++) {
32918       for (uint32_t n = 1; n <= 16; n++) {
32919         for (uint32_t m = 1; m <= 2; m++) {
32920           GemmMicrokernelTester()
32921             .mr(2)
32922             .nr(16)
32923             .kr(8)
32924             .sr(1)
32925             .m(m)
32926             .n(n)
32927             .k(k)
32928             .iterations(1)
32929             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x16c8__avx512skx, xnn_init_qc8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
32930         }
32931       }
32932     }
32933   }
32934 
TEST(QC8_IGEMM_MINMAX_FP32_2X16C8__AVX512SKX,k_div_8)32935   TEST(QC8_IGEMM_MINMAX_FP32_2X16C8__AVX512SKX, k_div_8) {
32936     TEST_REQUIRES_X86_AVX512SKX;
32937     for (size_t k = 16; k <= 80; k += 8) {
32938       GemmMicrokernelTester()
32939         .mr(2)
32940         .nr(16)
32941         .kr(8)
32942         .sr(1)
32943         .m(2)
32944         .n(16)
32945         .k(k)
32946         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x16c8__avx512skx, xnn_init_qc8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
32947     }
32948   }
32949 
TEST(QC8_IGEMM_MINMAX_FP32_2X16C8__AVX512SKX,k_div_8_subtile)32950   TEST(QC8_IGEMM_MINMAX_FP32_2X16C8__AVX512SKX, k_div_8_subtile) {
32951     TEST_REQUIRES_X86_AVX512SKX;
32952     for (size_t k = 16; k <= 80; k += 8) {
32953       for (uint32_t n = 1; n <= 16; n++) {
32954         for (uint32_t m = 1; m <= 2; m++) {
32955           GemmMicrokernelTester()
32956             .mr(2)
32957             .nr(16)
32958             .kr(8)
32959             .sr(1)
32960             .m(m)
32961             .n(n)
32962             .k(k)
32963             .iterations(1)
32964             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x16c8__avx512skx, xnn_init_qc8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
32965         }
32966       }
32967     }
32968   }
32969 
TEST(QC8_IGEMM_MINMAX_FP32_2X16C8__AVX512SKX,n_gt_16)32970   TEST(QC8_IGEMM_MINMAX_FP32_2X16C8__AVX512SKX, n_gt_16) {
32971     TEST_REQUIRES_X86_AVX512SKX;
32972     for (uint32_t n = 17; n < 32; n++) {
32973       for (size_t k = 1; k <= 40; k += 9) {
32974         GemmMicrokernelTester()
32975           .mr(2)
32976           .nr(16)
32977           .kr(8)
32978           .sr(1)
32979           .m(2)
32980           .n(n)
32981           .k(k)
32982           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x16c8__avx512skx, xnn_init_qc8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
32983       }
32984     }
32985   }
32986 
TEST(QC8_IGEMM_MINMAX_FP32_2X16C8__AVX512SKX,n_gt_16_strided_cn)32987   TEST(QC8_IGEMM_MINMAX_FP32_2X16C8__AVX512SKX, n_gt_16_strided_cn) {
32988     TEST_REQUIRES_X86_AVX512SKX;
32989     for (uint32_t n = 17; n < 32; n++) {
32990       for (size_t k = 1; k <= 40; k += 9) {
32991         GemmMicrokernelTester()
32992           .mr(2)
32993           .nr(16)
32994           .kr(8)
32995           .sr(1)
32996           .m(2)
32997           .n(n)
32998           .k(k)
32999           .cn_stride(19)
33000           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x16c8__avx512skx, xnn_init_qc8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
33001       }
33002     }
33003   }
33004 
TEST(QC8_IGEMM_MINMAX_FP32_2X16C8__AVX512SKX,n_gt_16_subtile)33005   TEST(QC8_IGEMM_MINMAX_FP32_2X16C8__AVX512SKX, n_gt_16_subtile) {
33006     TEST_REQUIRES_X86_AVX512SKX;
33007     for (uint32_t n = 17; n < 32; n++) {
33008       for (size_t k = 1; k <= 40; k += 9) {
33009         for (uint32_t m = 1; m <= 2; m++) {
33010           GemmMicrokernelTester()
33011             .mr(2)
33012             .nr(16)
33013             .kr(8)
33014             .sr(1)
33015             .m(m)
33016             .n(n)
33017             .k(k)
33018             .iterations(1)
33019             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x16c8__avx512skx, xnn_init_qc8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
33020         }
33021       }
33022     }
33023   }
33024 
TEST(QC8_IGEMM_MINMAX_FP32_2X16C8__AVX512SKX,n_div_16)33025   TEST(QC8_IGEMM_MINMAX_FP32_2X16C8__AVX512SKX, n_div_16) {
33026     TEST_REQUIRES_X86_AVX512SKX;
33027     for (uint32_t n = 32; n <= 48; n += 16) {
33028       for (size_t k = 1; k <= 40; k += 9) {
33029         GemmMicrokernelTester()
33030           .mr(2)
33031           .nr(16)
33032           .kr(8)
33033           .sr(1)
33034           .m(2)
33035           .n(n)
33036           .k(k)
33037           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x16c8__avx512skx, xnn_init_qc8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
33038       }
33039     }
33040   }
33041 
TEST(QC8_IGEMM_MINMAX_FP32_2X16C8__AVX512SKX,n_div_16_strided_cn)33042   TEST(QC8_IGEMM_MINMAX_FP32_2X16C8__AVX512SKX, n_div_16_strided_cn) {
33043     TEST_REQUIRES_X86_AVX512SKX;
33044     for (uint32_t n = 32; n <= 48; n += 16) {
33045       for (size_t k = 1; k <= 40; k += 9) {
33046         GemmMicrokernelTester()
33047           .mr(2)
33048           .nr(16)
33049           .kr(8)
33050           .sr(1)
33051           .m(2)
33052           .n(n)
33053           .k(k)
33054           .cn_stride(19)
33055           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x16c8__avx512skx, xnn_init_qc8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
33056       }
33057     }
33058   }
33059 
TEST(QC8_IGEMM_MINMAX_FP32_2X16C8__AVX512SKX,n_div_16_subtile)33060   TEST(QC8_IGEMM_MINMAX_FP32_2X16C8__AVX512SKX, n_div_16_subtile) {
33061     TEST_REQUIRES_X86_AVX512SKX;
33062     for (uint32_t n = 32; n <= 48; n += 16) {
33063       for (size_t k = 1; k <= 40; k += 9) {
33064         for (uint32_t m = 1; m <= 2; m++) {
33065           GemmMicrokernelTester()
33066             .mr(2)
33067             .nr(16)
33068             .kr(8)
33069             .sr(1)
33070             .m(m)
33071             .n(n)
33072             .k(k)
33073             .iterations(1)
33074             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x16c8__avx512skx, xnn_init_qc8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
33075         }
33076       }
33077     }
33078   }
33079 
TEST(QC8_IGEMM_MINMAX_FP32_2X16C8__AVX512SKX,small_kernel)33080   TEST(QC8_IGEMM_MINMAX_FP32_2X16C8__AVX512SKX, small_kernel) {
33081     TEST_REQUIRES_X86_AVX512SKX;
33082     for (size_t k = 1; k <= 40; k += 9) {
33083       GemmMicrokernelTester()
33084         .mr(2)
33085         .nr(16)
33086         .kr(8)
33087         .sr(1)
33088         .m(2)
33089         .n(16)
33090         .k(k)
33091         .ks(3)
33092         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x16c8__avx512skx, xnn_init_qc8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
33093     }
33094   }
33095 
TEST(QC8_IGEMM_MINMAX_FP32_2X16C8__AVX512SKX,small_kernel_subtile)33096   TEST(QC8_IGEMM_MINMAX_FP32_2X16C8__AVX512SKX, small_kernel_subtile) {
33097     TEST_REQUIRES_X86_AVX512SKX;
33098     for (size_t k = 1; k <= 40; k += 9) {
33099       for (uint32_t n = 1; n <= 16; n++) {
33100         for (uint32_t m = 1; m <= 2; m++) {
33101           GemmMicrokernelTester()
33102             .mr(2)
33103             .nr(16)
33104             .kr(8)
33105             .sr(1)
33106             .m(m)
33107             .n(n)
33108             .k(k)
33109             .ks(3)
33110             .iterations(1)
33111             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x16c8__avx512skx, xnn_init_qc8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
33112         }
33113       }
33114     }
33115   }
33116 
TEST(QC8_IGEMM_MINMAX_FP32_2X16C8__AVX512SKX,n_gt_16_small_kernel)33117   TEST(QC8_IGEMM_MINMAX_FP32_2X16C8__AVX512SKX, n_gt_16_small_kernel) {
33118     TEST_REQUIRES_X86_AVX512SKX;
33119     for (uint32_t n = 17; n < 32; n++) {
33120       for (size_t k = 1; k <= 40; k += 9) {
33121         GemmMicrokernelTester()
33122           .mr(2)
33123           .nr(16)
33124           .kr(8)
33125           .sr(1)
33126           .m(2)
33127           .n(n)
33128           .k(k)
33129           .ks(3)
33130           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x16c8__avx512skx, xnn_init_qc8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
33131       }
33132     }
33133   }
33134 
TEST(QC8_IGEMM_MINMAX_FP32_2X16C8__AVX512SKX,n_div_16_small_kernel)33135   TEST(QC8_IGEMM_MINMAX_FP32_2X16C8__AVX512SKX, n_div_16_small_kernel) {
33136     TEST_REQUIRES_X86_AVX512SKX;
33137     for (uint32_t n = 32; n <= 48; n += 16) {
33138       for (size_t k = 1; k <= 40; k += 9) {
33139         GemmMicrokernelTester()
33140           .mr(2)
33141           .nr(16)
33142           .kr(8)
33143           .sr(1)
33144           .m(2)
33145           .n(n)
33146           .k(k)
33147           .ks(3)
33148           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x16c8__avx512skx, xnn_init_qc8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
33149       }
33150     }
33151   }
33152 
TEST(QC8_IGEMM_MINMAX_FP32_2X16C8__AVX512SKX,strided_cm_subtile)33153   TEST(QC8_IGEMM_MINMAX_FP32_2X16C8__AVX512SKX, strided_cm_subtile) {
33154     TEST_REQUIRES_X86_AVX512SKX;
33155     for (size_t k = 1; k <= 40; k += 9) {
33156       for (uint32_t n = 1; n <= 16; n++) {
33157         for (uint32_t m = 1; m <= 2; m++) {
33158           GemmMicrokernelTester()
33159             .mr(2)
33160             .nr(16)
33161             .kr(8)
33162             .sr(1)
33163             .m(m)
33164             .n(n)
33165             .k(k)
33166             .cm_stride(19)
33167             .iterations(1)
33168             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x16c8__avx512skx, xnn_init_qc8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
33169         }
33170       }
33171     }
33172   }
33173 
TEST(QC8_IGEMM_MINMAX_FP32_2X16C8__AVX512SKX,a_offset)33174   TEST(QC8_IGEMM_MINMAX_FP32_2X16C8__AVX512SKX, a_offset) {
33175     TEST_REQUIRES_X86_AVX512SKX;
33176     for (size_t k = 1; k <= 40; k += 9) {
33177       GemmMicrokernelTester()
33178         .mr(2)
33179         .nr(16)
33180         .kr(8)
33181         .sr(1)
33182         .m(2)
33183         .n(16)
33184         .k(k)
33185         .ks(3)
33186         .a_offset(83)
33187         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x16c8__avx512skx, xnn_init_qc8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
33188     }
33189   }
33190 
TEST(QC8_IGEMM_MINMAX_FP32_2X16C8__AVX512SKX,zero)33191   TEST(QC8_IGEMM_MINMAX_FP32_2X16C8__AVX512SKX, zero) {
33192     TEST_REQUIRES_X86_AVX512SKX;
33193     for (size_t k = 1; k <= 40; k += 9) {
33194       for (uint32_t mz = 0; mz < 2; mz++) {
33195         GemmMicrokernelTester()
33196           .mr(2)
33197           .nr(16)
33198           .kr(8)
33199           .sr(1)
33200           .m(2)
33201           .n(16)
33202           .k(k)
33203           .ks(3)
33204           .a_offset(83)
33205           .zero_index(mz)
33206           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x16c8__avx512skx, xnn_init_qc8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
33207       }
33208     }
33209   }
33210 
TEST(QC8_IGEMM_MINMAX_FP32_2X16C8__AVX512SKX,qmin)33211   TEST(QC8_IGEMM_MINMAX_FP32_2X16C8__AVX512SKX, qmin) {
33212     TEST_REQUIRES_X86_AVX512SKX;
33213     GemmMicrokernelTester()
33214       .mr(2)
33215       .nr(16)
33216       .kr(8)
33217       .sr(1)
33218       .m(2)
33219       .n(16)
33220       .k(8)
33221       .qmin(128)
33222       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x16c8__avx512skx, xnn_init_qc8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
33223   }
33224 
TEST(QC8_IGEMM_MINMAX_FP32_2X16C8__AVX512SKX,qmax)33225   TEST(QC8_IGEMM_MINMAX_FP32_2X16C8__AVX512SKX, qmax) {
33226     TEST_REQUIRES_X86_AVX512SKX;
33227     GemmMicrokernelTester()
33228       .mr(2)
33229       .nr(16)
33230       .kr(8)
33231       .sr(1)
33232       .m(2)
33233       .n(16)
33234       .k(8)
33235       .qmax(128)
33236       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x16c8__avx512skx, xnn_init_qc8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
33237   }
33238 
TEST(QC8_IGEMM_MINMAX_FP32_2X16C8__AVX512SKX,strided_cm)33239   TEST(QC8_IGEMM_MINMAX_FP32_2X16C8__AVX512SKX, strided_cm) {
33240     TEST_REQUIRES_X86_AVX512SKX;
33241     GemmMicrokernelTester()
33242       .mr(2)
33243       .nr(16)
33244       .kr(8)
33245       .sr(1)
33246       .m(2)
33247       .n(16)
33248       .k(8)
33249       .cm_stride(19)
33250       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x16c8__avx512skx, xnn_init_qc8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
33251   }
33252 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
33253 
33254 
33255 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD64,k_eq_8)33256   TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD64, k_eq_8) {
33257     GemmMicrokernelTester()
33258       .mr(1)
33259       .nr(4)
33260       .kr(8)
33261       .sr(1)
33262       .m(1)
33263       .n(4)
33264       .k(8)
33265       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
33266   }
33267 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD64,strided_cn)33268   TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD64, strided_cn) {
33269     GemmMicrokernelTester()
33270       .mr(1)
33271       .nr(4)
33272       .kr(8)
33273       .sr(1)
33274       .m(1)
33275       .n(4)
33276       .k(8)
33277       .cn_stride(7)
33278       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
33279   }
33280 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD64,k_eq_8_subtile)33281   TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD64, k_eq_8_subtile) {
33282     for (uint32_t n = 1; n <= 4; n++) {
33283       for (uint32_t m = 1; m <= 1; m++) {
33284         GemmMicrokernelTester()
33285           .mr(1)
33286           .nr(4)
33287           .kr(8)
33288           .sr(1)
33289           .m(m)
33290           .n(n)
33291           .k(8)
33292           .iterations(1)
33293           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
33294       }
33295     }
33296   }
33297 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD64,k_eq_8_subtile_m)33298   TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD64, k_eq_8_subtile_m) {
33299     for (uint32_t m = 1; m <= 1; m++) {
33300       GemmMicrokernelTester()
33301         .mr(1)
33302         .nr(4)
33303         .kr(8)
33304         .sr(1)
33305         .m(m)
33306         .n(4)
33307         .k(8)
33308         .iterations(1)
33309         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
33310     }
33311   }
33312 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD64,k_eq_8_subtile_n)33313   TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD64, k_eq_8_subtile_n) {
33314     for (uint32_t n = 1; n <= 4; n++) {
33315       GemmMicrokernelTester()
33316         .mr(1)
33317         .nr(4)
33318         .kr(8)
33319         .sr(1)
33320         .m(1)
33321         .n(n)
33322         .k(8)
33323         .iterations(1)
33324         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
33325     }
33326   }
33327 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD64,k_lt_8)33328   TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD64, k_lt_8) {
33329     for (size_t k = 1; k < 8; k++) {
33330       GemmMicrokernelTester()
33331         .mr(1)
33332         .nr(4)
33333         .kr(8)
33334         .sr(1)
33335         .m(1)
33336         .n(4)
33337         .k(k)
33338         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
33339     }
33340   }
33341 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD64,k_lt_8_subtile)33342   TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD64, k_lt_8_subtile) {
33343     for (size_t k = 1; k < 8; k++) {
33344       for (uint32_t n = 1; n <= 4; n++) {
33345         for (uint32_t m = 1; m <= 1; m++) {
33346           GemmMicrokernelTester()
33347             .mr(1)
33348             .nr(4)
33349             .kr(8)
33350             .sr(1)
33351             .m(m)
33352             .n(n)
33353             .k(k)
33354             .iterations(1)
33355             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
33356         }
33357       }
33358     }
33359   }
33360 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD64,k_gt_8)33361   TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD64, k_gt_8) {
33362     for (size_t k = 9; k < 16; k++) {
33363       GemmMicrokernelTester()
33364         .mr(1)
33365         .nr(4)
33366         .kr(8)
33367         .sr(1)
33368         .m(1)
33369         .n(4)
33370         .k(k)
33371         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
33372     }
33373   }
33374 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD64,k_gt_8_subtile)33375   TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD64, k_gt_8_subtile) {
33376     for (size_t k = 9; k < 16; k++) {
33377       for (uint32_t n = 1; n <= 4; n++) {
33378         for (uint32_t m = 1; m <= 1; m++) {
33379           GemmMicrokernelTester()
33380             .mr(1)
33381             .nr(4)
33382             .kr(8)
33383             .sr(1)
33384             .m(m)
33385             .n(n)
33386             .k(k)
33387             .iterations(1)
33388             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
33389         }
33390       }
33391     }
33392   }
33393 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD64,k_div_8)33394   TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD64, k_div_8) {
33395     for (size_t k = 16; k <= 80; k += 8) {
33396       GemmMicrokernelTester()
33397         .mr(1)
33398         .nr(4)
33399         .kr(8)
33400         .sr(1)
33401         .m(1)
33402         .n(4)
33403         .k(k)
33404         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
33405     }
33406   }
33407 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD64,k_div_8_subtile)33408   TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD64, k_div_8_subtile) {
33409     for (size_t k = 16; k <= 80; k += 8) {
33410       for (uint32_t n = 1; n <= 4; n++) {
33411         for (uint32_t m = 1; m <= 1; m++) {
33412           GemmMicrokernelTester()
33413             .mr(1)
33414             .nr(4)
33415             .kr(8)
33416             .sr(1)
33417             .m(m)
33418             .n(n)
33419             .k(k)
33420             .iterations(1)
33421             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
33422         }
33423       }
33424     }
33425   }
33426 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD64,n_gt_4)33427   TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD64, n_gt_4) {
33428     for (uint32_t n = 5; n < 8; n++) {
33429       for (size_t k = 1; k <= 40; k += 9) {
33430         GemmMicrokernelTester()
33431           .mr(1)
33432           .nr(4)
33433           .kr(8)
33434           .sr(1)
33435           .m(1)
33436           .n(n)
33437           .k(k)
33438           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
33439       }
33440     }
33441   }
33442 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD64,n_gt_4_strided_cn)33443   TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD64, n_gt_4_strided_cn) {
33444     for (uint32_t n = 5; n < 8; n++) {
33445       for (size_t k = 1; k <= 40; k += 9) {
33446         GemmMicrokernelTester()
33447           .mr(1)
33448           .nr(4)
33449           .kr(8)
33450           .sr(1)
33451           .m(1)
33452           .n(n)
33453           .k(k)
33454           .cn_stride(7)
33455           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
33456       }
33457     }
33458   }
33459 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD64,n_gt_4_subtile)33460   TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD64, n_gt_4_subtile) {
33461     for (uint32_t n = 5; n < 8; n++) {
33462       for (size_t k = 1; k <= 40; k += 9) {
33463         for (uint32_t m = 1; m <= 1; m++) {
33464           GemmMicrokernelTester()
33465             .mr(1)
33466             .nr(4)
33467             .kr(8)
33468             .sr(1)
33469             .m(m)
33470             .n(n)
33471             .k(k)
33472             .iterations(1)
33473             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
33474         }
33475       }
33476     }
33477   }
33478 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD64,n_div_4)33479   TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD64, n_div_4) {
33480     for (uint32_t n = 8; n <= 12; n += 4) {
33481       for (size_t k = 1; k <= 40; k += 9) {
33482         GemmMicrokernelTester()
33483           .mr(1)
33484           .nr(4)
33485           .kr(8)
33486           .sr(1)
33487           .m(1)
33488           .n(n)
33489           .k(k)
33490           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
33491       }
33492     }
33493   }
33494 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD64,n_div_4_strided_cn)33495   TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD64, n_div_4_strided_cn) {
33496     for (uint32_t n = 8; n <= 12; n += 4) {
33497       for (size_t k = 1; k <= 40; k += 9) {
33498         GemmMicrokernelTester()
33499           .mr(1)
33500           .nr(4)
33501           .kr(8)
33502           .sr(1)
33503           .m(1)
33504           .n(n)
33505           .k(k)
33506           .cn_stride(7)
33507           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
33508       }
33509     }
33510   }
33511 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD64,n_div_4_subtile)33512   TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD64, n_div_4_subtile) {
33513     for (uint32_t n = 8; n <= 12; n += 4) {
33514       for (size_t k = 1; k <= 40; k += 9) {
33515         for (uint32_t m = 1; m <= 1; m++) {
33516           GemmMicrokernelTester()
33517             .mr(1)
33518             .nr(4)
33519             .kr(8)
33520             .sr(1)
33521             .m(m)
33522             .n(n)
33523             .k(k)
33524             .iterations(1)
33525             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
33526         }
33527       }
33528     }
33529   }
33530 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD64,small_kernel)33531   TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD64, small_kernel) {
33532     for (size_t k = 1; k <= 40; k += 9) {
33533       GemmMicrokernelTester()
33534         .mr(1)
33535         .nr(4)
33536         .kr(8)
33537         .sr(1)
33538         .m(1)
33539         .n(4)
33540         .k(k)
33541         .ks(3)
33542         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
33543     }
33544   }
33545 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD64,small_kernel_subtile)33546   TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD64, small_kernel_subtile) {
33547     for (size_t k = 1; k <= 40; k += 9) {
33548       for (uint32_t n = 1; n <= 4; n++) {
33549         for (uint32_t m = 1; m <= 1; m++) {
33550           GemmMicrokernelTester()
33551             .mr(1)
33552             .nr(4)
33553             .kr(8)
33554             .sr(1)
33555             .m(m)
33556             .n(n)
33557             .k(k)
33558             .ks(3)
33559             .iterations(1)
33560             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
33561         }
33562       }
33563     }
33564   }
33565 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD64,n_gt_4_small_kernel)33566   TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD64, n_gt_4_small_kernel) {
33567     for (uint32_t n = 5; n < 8; n++) {
33568       for (size_t k = 1; k <= 40; k += 9) {
33569         GemmMicrokernelTester()
33570           .mr(1)
33571           .nr(4)
33572           .kr(8)
33573           .sr(1)
33574           .m(1)
33575           .n(n)
33576           .k(k)
33577           .ks(3)
33578           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
33579       }
33580     }
33581   }
33582 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD64,n_div_4_small_kernel)33583   TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD64, n_div_4_small_kernel) {
33584     for (uint32_t n = 8; n <= 12; n += 4) {
33585       for (size_t k = 1; k <= 40; k += 9) {
33586         GemmMicrokernelTester()
33587           .mr(1)
33588           .nr(4)
33589           .kr(8)
33590           .sr(1)
33591           .m(1)
33592           .n(n)
33593           .k(k)
33594           .ks(3)
33595           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
33596       }
33597     }
33598   }
33599 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD64,strided_cm_subtile)33600   TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD64, strided_cm_subtile) {
33601     for (size_t k = 1; k <= 40; k += 9) {
33602       for (uint32_t n = 1; n <= 4; n++) {
33603         for (uint32_t m = 1; m <= 1; m++) {
33604           GemmMicrokernelTester()
33605             .mr(1)
33606             .nr(4)
33607             .kr(8)
33608             .sr(1)
33609             .m(m)
33610             .n(n)
33611             .k(k)
33612             .cm_stride(7)
33613             .iterations(1)
33614             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
33615         }
33616       }
33617     }
33618   }
33619 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD64,a_offset)33620   TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD64, a_offset) {
33621     for (size_t k = 1; k <= 40; k += 9) {
33622       GemmMicrokernelTester()
33623         .mr(1)
33624         .nr(4)
33625         .kr(8)
33626         .sr(1)
33627         .m(1)
33628         .n(4)
33629         .k(k)
33630         .ks(3)
33631         .a_offset(43)
33632         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
33633     }
33634   }
33635 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD64,zero)33636   TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD64, zero) {
33637     for (size_t k = 1; k <= 40; k += 9) {
33638       for (uint32_t mz = 0; mz < 1; mz++) {
33639         GemmMicrokernelTester()
33640           .mr(1)
33641           .nr(4)
33642           .kr(8)
33643           .sr(1)
33644           .m(1)
33645           .n(4)
33646           .k(k)
33647           .ks(3)
33648           .a_offset(43)
33649           .zero_index(mz)
33650           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
33651       }
33652     }
33653   }
33654 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD64,qmin)33655   TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD64, qmin) {
33656     GemmMicrokernelTester()
33657       .mr(1)
33658       .nr(4)
33659       .kr(8)
33660       .sr(1)
33661       .m(1)
33662       .n(4)
33663       .k(8)
33664       .qmin(128)
33665       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
33666   }
33667 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD64,qmax)33668   TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD64, qmax) {
33669     GemmMicrokernelTester()
33670       .mr(1)
33671       .nr(4)
33672       .kr(8)
33673       .sr(1)
33674       .m(1)
33675       .n(4)
33676       .k(8)
33677       .qmax(128)
33678       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
33679   }
33680 
TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD64,strided_cm)33681   TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD64, strided_cm) {
33682     GemmMicrokernelTester()
33683       .mr(1)
33684       .nr(4)
33685       .kr(8)
33686       .sr(1)
33687       .m(1)
33688       .n(4)
33689       .k(8)
33690       .cm_stride(7)
33691       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
33692   }
33693 #endif  // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
33694 
33695 
33696 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128,k_eq_8)33697   TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128, k_eq_8) {
33698     GemmMicrokernelTester()
33699       .mr(2)
33700       .nr(4)
33701       .kr(2)
33702       .sr(1)
33703       .m(2)
33704       .n(4)
33705       .k(8)
33706       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
33707   }
33708 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128,strided_cn)33709   TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128, strided_cn) {
33710     GemmMicrokernelTester()
33711       .mr(2)
33712       .nr(4)
33713       .kr(2)
33714       .sr(1)
33715       .m(2)
33716       .n(4)
33717       .k(8)
33718       .cn_stride(7)
33719       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
33720   }
33721 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128,k_eq_8_subtile)33722   TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128, k_eq_8_subtile) {
33723     for (uint32_t n = 1; n <= 4; n++) {
33724       for (uint32_t m = 1; m <= 2; m++) {
33725         GemmMicrokernelTester()
33726           .mr(2)
33727           .nr(4)
33728           .kr(2)
33729           .sr(1)
33730           .m(m)
33731           .n(n)
33732           .k(8)
33733           .iterations(1)
33734           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
33735       }
33736     }
33737   }
33738 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128,k_eq_8_subtile_m)33739   TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128, k_eq_8_subtile_m) {
33740     for (uint32_t m = 1; m <= 2; m++) {
33741       GemmMicrokernelTester()
33742         .mr(2)
33743         .nr(4)
33744         .kr(2)
33745         .sr(1)
33746         .m(m)
33747         .n(4)
33748         .k(8)
33749         .iterations(1)
33750         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
33751     }
33752   }
33753 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128,k_eq_8_subtile_n)33754   TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128, k_eq_8_subtile_n) {
33755     for (uint32_t n = 1; n <= 4; n++) {
33756       GemmMicrokernelTester()
33757         .mr(2)
33758         .nr(4)
33759         .kr(2)
33760         .sr(1)
33761         .m(2)
33762         .n(n)
33763         .k(8)
33764         .iterations(1)
33765         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
33766     }
33767   }
33768 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128,k_lt_8)33769   TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128, k_lt_8) {
33770     for (size_t k = 1; k < 8; k++) {
33771       GemmMicrokernelTester()
33772         .mr(2)
33773         .nr(4)
33774         .kr(2)
33775         .sr(1)
33776         .m(2)
33777         .n(4)
33778         .k(k)
33779         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
33780     }
33781   }
33782 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128,k_lt_8_subtile)33783   TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128, k_lt_8_subtile) {
33784     for (size_t k = 1; k < 8; k++) {
33785       for (uint32_t n = 1; n <= 4; n++) {
33786         for (uint32_t m = 1; m <= 2; m++) {
33787           GemmMicrokernelTester()
33788             .mr(2)
33789             .nr(4)
33790             .kr(2)
33791             .sr(1)
33792             .m(m)
33793             .n(n)
33794             .k(k)
33795             .iterations(1)
33796             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
33797         }
33798       }
33799     }
33800   }
33801 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128,k_gt_8)33802   TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128, k_gt_8) {
33803     for (size_t k = 9; k < 16; k++) {
33804       GemmMicrokernelTester()
33805         .mr(2)
33806         .nr(4)
33807         .kr(2)
33808         .sr(1)
33809         .m(2)
33810         .n(4)
33811         .k(k)
33812         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
33813     }
33814   }
33815 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128,k_gt_8_subtile)33816   TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128, k_gt_8_subtile) {
33817     for (size_t k = 9; k < 16; k++) {
33818       for (uint32_t n = 1; n <= 4; n++) {
33819         for (uint32_t m = 1; m <= 2; m++) {
33820           GemmMicrokernelTester()
33821             .mr(2)
33822             .nr(4)
33823             .kr(2)
33824             .sr(1)
33825             .m(m)
33826             .n(n)
33827             .k(k)
33828             .iterations(1)
33829             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
33830         }
33831       }
33832     }
33833   }
33834 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128,k_div_8)33835   TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128, k_div_8) {
33836     for (size_t k = 16; k <= 80; k += 8) {
33837       GemmMicrokernelTester()
33838         .mr(2)
33839         .nr(4)
33840         .kr(2)
33841         .sr(1)
33842         .m(2)
33843         .n(4)
33844         .k(k)
33845         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
33846     }
33847   }
33848 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128,k_div_8_subtile)33849   TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128, k_div_8_subtile) {
33850     for (size_t k = 16; k <= 80; k += 8) {
33851       for (uint32_t n = 1; n <= 4; n++) {
33852         for (uint32_t m = 1; m <= 2; m++) {
33853           GemmMicrokernelTester()
33854             .mr(2)
33855             .nr(4)
33856             .kr(2)
33857             .sr(1)
33858             .m(m)
33859             .n(n)
33860             .k(k)
33861             .iterations(1)
33862             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
33863         }
33864       }
33865     }
33866   }
33867 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128,n_gt_4)33868   TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128, n_gt_4) {
33869     for (uint32_t n = 5; n < 8; n++) {
33870       for (size_t k = 1; k <= 40; k += 9) {
33871         GemmMicrokernelTester()
33872           .mr(2)
33873           .nr(4)
33874           .kr(2)
33875           .sr(1)
33876           .m(2)
33877           .n(n)
33878           .k(k)
33879           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
33880       }
33881     }
33882   }
33883 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128,n_gt_4_strided_cn)33884   TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128, n_gt_4_strided_cn) {
33885     for (uint32_t n = 5; n < 8; n++) {
33886       for (size_t k = 1; k <= 40; k += 9) {
33887         GemmMicrokernelTester()
33888           .mr(2)
33889           .nr(4)
33890           .kr(2)
33891           .sr(1)
33892           .m(2)
33893           .n(n)
33894           .k(k)
33895           .cn_stride(7)
33896           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
33897       }
33898     }
33899   }
33900 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128,n_gt_4_subtile)33901   TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128, n_gt_4_subtile) {
33902     for (uint32_t n = 5; n < 8; n++) {
33903       for (size_t k = 1; k <= 40; k += 9) {
33904         for (uint32_t m = 1; m <= 2; m++) {
33905           GemmMicrokernelTester()
33906             .mr(2)
33907             .nr(4)
33908             .kr(2)
33909             .sr(1)
33910             .m(m)
33911             .n(n)
33912             .k(k)
33913             .iterations(1)
33914             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
33915         }
33916       }
33917     }
33918   }
33919 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128,n_div_4)33920   TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128, n_div_4) {
33921     for (uint32_t n = 8; n <= 12; n += 4) {
33922       for (size_t k = 1; k <= 40; k += 9) {
33923         GemmMicrokernelTester()
33924           .mr(2)
33925           .nr(4)
33926           .kr(2)
33927           .sr(1)
33928           .m(2)
33929           .n(n)
33930           .k(k)
33931           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
33932       }
33933     }
33934   }
33935 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128,n_div_4_strided_cn)33936   TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128, n_div_4_strided_cn) {
33937     for (uint32_t n = 8; n <= 12; n += 4) {
33938       for (size_t k = 1; k <= 40; k += 9) {
33939         GemmMicrokernelTester()
33940           .mr(2)
33941           .nr(4)
33942           .kr(2)
33943           .sr(1)
33944           .m(2)
33945           .n(n)
33946           .k(k)
33947           .cn_stride(7)
33948           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
33949       }
33950     }
33951   }
33952 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128,n_div_4_subtile)33953   TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128, n_div_4_subtile) {
33954     for (uint32_t n = 8; n <= 12; n += 4) {
33955       for (size_t k = 1; k <= 40; k += 9) {
33956         for (uint32_t m = 1; m <= 2; m++) {
33957           GemmMicrokernelTester()
33958             .mr(2)
33959             .nr(4)
33960             .kr(2)
33961             .sr(1)
33962             .m(m)
33963             .n(n)
33964             .k(k)
33965             .iterations(1)
33966             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
33967         }
33968       }
33969     }
33970   }
33971 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128,small_kernel)33972   TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128, small_kernel) {
33973     for (size_t k = 1; k <= 40; k += 9) {
33974       GemmMicrokernelTester()
33975         .mr(2)
33976         .nr(4)
33977         .kr(2)
33978         .sr(1)
33979         .m(2)
33980         .n(4)
33981         .k(k)
33982         .ks(3)
33983         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
33984     }
33985   }
33986 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128,small_kernel_subtile)33987   TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128, small_kernel_subtile) {
33988     for (size_t k = 1; k <= 40; k += 9) {
33989       for (uint32_t n = 1; n <= 4; n++) {
33990         for (uint32_t m = 1; m <= 2; m++) {
33991           GemmMicrokernelTester()
33992             .mr(2)
33993             .nr(4)
33994             .kr(2)
33995             .sr(1)
33996             .m(m)
33997             .n(n)
33998             .k(k)
33999             .ks(3)
34000             .iterations(1)
34001             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
34002         }
34003       }
34004     }
34005   }
34006 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128,n_gt_4_small_kernel)34007   TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128, n_gt_4_small_kernel) {
34008     for (uint32_t n = 5; n < 8; n++) {
34009       for (size_t k = 1; k <= 40; k += 9) {
34010         GemmMicrokernelTester()
34011           .mr(2)
34012           .nr(4)
34013           .kr(2)
34014           .sr(1)
34015           .m(2)
34016           .n(n)
34017           .k(k)
34018           .ks(3)
34019           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
34020       }
34021     }
34022   }
34023 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128,n_div_4_small_kernel)34024   TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128, n_div_4_small_kernel) {
34025     for (uint32_t n = 8; n <= 12; n += 4) {
34026       for (size_t k = 1; k <= 40; k += 9) {
34027         GemmMicrokernelTester()
34028           .mr(2)
34029           .nr(4)
34030           .kr(2)
34031           .sr(1)
34032           .m(2)
34033           .n(n)
34034           .k(k)
34035           .ks(3)
34036           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
34037       }
34038     }
34039   }
34040 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128,strided_cm_subtile)34041   TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128, strided_cm_subtile) {
34042     for (size_t k = 1; k <= 40; k += 9) {
34043       for (uint32_t n = 1; n <= 4; n++) {
34044         for (uint32_t m = 1; m <= 2; m++) {
34045           GemmMicrokernelTester()
34046             .mr(2)
34047             .nr(4)
34048             .kr(2)
34049             .sr(1)
34050             .m(m)
34051             .n(n)
34052             .k(k)
34053             .cm_stride(7)
34054             .iterations(1)
34055             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
34056         }
34057       }
34058     }
34059   }
34060 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128,a_offset)34061   TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128, a_offset) {
34062     for (size_t k = 1; k <= 40; k += 9) {
34063       GemmMicrokernelTester()
34064         .mr(2)
34065         .nr(4)
34066         .kr(2)
34067         .sr(1)
34068         .m(2)
34069         .n(4)
34070         .k(k)
34071         .ks(3)
34072         .a_offset(83)
34073         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
34074     }
34075   }
34076 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128,zero)34077   TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128, zero) {
34078     for (size_t k = 1; k <= 40; k += 9) {
34079       for (uint32_t mz = 0; mz < 2; mz++) {
34080         GemmMicrokernelTester()
34081           .mr(2)
34082           .nr(4)
34083           .kr(2)
34084           .sr(1)
34085           .m(2)
34086           .n(4)
34087           .k(k)
34088           .ks(3)
34089           .a_offset(83)
34090           .zero_index(mz)
34091           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
34092       }
34093     }
34094   }
34095 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128,qmin)34096   TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128, qmin) {
34097     GemmMicrokernelTester()
34098       .mr(2)
34099       .nr(4)
34100       .kr(2)
34101       .sr(1)
34102       .m(2)
34103       .n(4)
34104       .k(8)
34105       .qmin(128)
34106       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
34107   }
34108 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128,qmax)34109   TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128, qmax) {
34110     GemmMicrokernelTester()
34111       .mr(2)
34112       .nr(4)
34113       .kr(2)
34114       .sr(1)
34115       .m(2)
34116       .n(4)
34117       .k(8)
34118       .qmax(128)
34119       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
34120   }
34121 
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128,strided_cm)34122   TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128, strided_cm) {
34123     GemmMicrokernelTester()
34124       .mr(2)
34125       .nr(4)
34126       .kr(2)
34127       .sr(1)
34128       .m(2)
34129       .n(4)
34130       .k(8)
34131       .cm_stride(7)
34132       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
34133   }
34134 #endif  // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
34135 
34136 
34137 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128,k_eq_8)34138   TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128, k_eq_8) {
34139     GemmMicrokernelTester()
34140       .mr(3)
34141       .nr(4)
34142       .kr(2)
34143       .sr(4)
34144       .m(3)
34145       .n(4)
34146       .k(8)
34147       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
34148   }
34149 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128,strided_cn)34150   TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128, strided_cn) {
34151     GemmMicrokernelTester()
34152       .mr(3)
34153       .nr(4)
34154       .kr(2)
34155       .sr(4)
34156       .m(3)
34157       .n(4)
34158       .k(8)
34159       .cn_stride(7)
34160       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
34161   }
34162 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128,k_eq_8_subtile)34163   TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128, k_eq_8_subtile) {
34164     for (uint32_t n = 1; n <= 4; n++) {
34165       for (uint32_t m = 1; m <= 3; m++) {
34166         GemmMicrokernelTester()
34167           .mr(3)
34168           .nr(4)
34169           .kr(2)
34170           .sr(4)
34171           .m(m)
34172           .n(n)
34173           .k(8)
34174           .iterations(1)
34175           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
34176       }
34177     }
34178   }
34179 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128,k_eq_8_subtile_m)34180   TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128, k_eq_8_subtile_m) {
34181     for (uint32_t m = 1; m <= 3; m++) {
34182       GemmMicrokernelTester()
34183         .mr(3)
34184         .nr(4)
34185         .kr(2)
34186         .sr(4)
34187         .m(m)
34188         .n(4)
34189         .k(8)
34190         .iterations(1)
34191         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
34192     }
34193   }
34194 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128,k_eq_8_subtile_n)34195   TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128, k_eq_8_subtile_n) {
34196     for (uint32_t n = 1; n <= 4; n++) {
34197       GemmMicrokernelTester()
34198         .mr(3)
34199         .nr(4)
34200         .kr(2)
34201         .sr(4)
34202         .m(3)
34203         .n(n)
34204         .k(8)
34205         .iterations(1)
34206         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
34207     }
34208   }
34209 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128,k_lt_8)34210   TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128, k_lt_8) {
34211     for (size_t k = 1; k < 8; k++) {
34212       GemmMicrokernelTester()
34213         .mr(3)
34214         .nr(4)
34215         .kr(2)
34216         .sr(4)
34217         .m(3)
34218         .n(4)
34219         .k(k)
34220         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
34221     }
34222   }
34223 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128,k_lt_8_subtile)34224   TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128, k_lt_8_subtile) {
34225     for (size_t k = 1; k < 8; k++) {
34226       for (uint32_t n = 1; n <= 4; n++) {
34227         for (uint32_t m = 1; m <= 3; m++) {
34228           GemmMicrokernelTester()
34229             .mr(3)
34230             .nr(4)
34231             .kr(2)
34232             .sr(4)
34233             .m(m)
34234             .n(n)
34235             .k(k)
34236             .iterations(1)
34237             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
34238         }
34239       }
34240     }
34241   }
34242 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128,k_gt_8)34243   TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128, k_gt_8) {
34244     for (size_t k = 9; k < 16; k++) {
34245       GemmMicrokernelTester()
34246         .mr(3)
34247         .nr(4)
34248         .kr(2)
34249         .sr(4)
34250         .m(3)
34251         .n(4)
34252         .k(k)
34253         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
34254     }
34255   }
34256 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128,k_gt_8_subtile)34257   TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128, k_gt_8_subtile) {
34258     for (size_t k = 9; k < 16; k++) {
34259       for (uint32_t n = 1; n <= 4; n++) {
34260         for (uint32_t m = 1; m <= 3; m++) {
34261           GemmMicrokernelTester()
34262             .mr(3)
34263             .nr(4)
34264             .kr(2)
34265             .sr(4)
34266             .m(m)
34267             .n(n)
34268             .k(k)
34269             .iterations(1)
34270             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
34271         }
34272       }
34273     }
34274   }
34275 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128,k_div_8)34276   TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128, k_div_8) {
34277     for (size_t k = 16; k <= 80; k += 8) {
34278       GemmMicrokernelTester()
34279         .mr(3)
34280         .nr(4)
34281         .kr(2)
34282         .sr(4)
34283         .m(3)
34284         .n(4)
34285         .k(k)
34286         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
34287     }
34288   }
34289 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128,k_div_8_subtile)34290   TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128, k_div_8_subtile) {
34291     for (size_t k = 16; k <= 80; k += 8) {
34292       for (uint32_t n = 1; n <= 4; n++) {
34293         for (uint32_t m = 1; m <= 3; m++) {
34294           GemmMicrokernelTester()
34295             .mr(3)
34296             .nr(4)
34297             .kr(2)
34298             .sr(4)
34299             .m(m)
34300             .n(n)
34301             .k(k)
34302             .iterations(1)
34303             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
34304         }
34305       }
34306     }
34307   }
34308 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128,n_gt_4)34309   TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128, n_gt_4) {
34310     for (uint32_t n = 5; n < 8; n++) {
34311       for (size_t k = 1; k <= 40; k += 9) {
34312         GemmMicrokernelTester()
34313           .mr(3)
34314           .nr(4)
34315           .kr(2)
34316           .sr(4)
34317           .m(3)
34318           .n(n)
34319           .k(k)
34320           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
34321       }
34322     }
34323   }
34324 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128,n_gt_4_strided_cn)34325   TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128, n_gt_4_strided_cn) {
34326     for (uint32_t n = 5; n < 8; n++) {
34327       for (size_t k = 1; k <= 40; k += 9) {
34328         GemmMicrokernelTester()
34329           .mr(3)
34330           .nr(4)
34331           .kr(2)
34332           .sr(4)
34333           .m(3)
34334           .n(n)
34335           .k(k)
34336           .cn_stride(7)
34337           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
34338       }
34339     }
34340   }
34341 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128,n_gt_4_subtile)34342   TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128, n_gt_4_subtile) {
34343     for (uint32_t n = 5; n < 8; n++) {
34344       for (size_t k = 1; k <= 40; k += 9) {
34345         for (uint32_t m = 1; m <= 3; m++) {
34346           GemmMicrokernelTester()
34347             .mr(3)
34348             .nr(4)
34349             .kr(2)
34350             .sr(4)
34351             .m(m)
34352             .n(n)
34353             .k(k)
34354             .iterations(1)
34355             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
34356         }
34357       }
34358     }
34359   }
34360 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128,n_div_4)34361   TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128, n_div_4) {
34362     for (uint32_t n = 8; n <= 12; n += 4) {
34363       for (size_t k = 1; k <= 40; k += 9) {
34364         GemmMicrokernelTester()
34365           .mr(3)
34366           .nr(4)
34367           .kr(2)
34368           .sr(4)
34369           .m(3)
34370           .n(n)
34371           .k(k)
34372           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
34373       }
34374     }
34375   }
34376 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128,n_div_4_strided_cn)34377   TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128, n_div_4_strided_cn) {
34378     for (uint32_t n = 8; n <= 12; n += 4) {
34379       for (size_t k = 1; k <= 40; k += 9) {
34380         GemmMicrokernelTester()
34381           .mr(3)
34382           .nr(4)
34383           .kr(2)
34384           .sr(4)
34385           .m(3)
34386           .n(n)
34387           .k(k)
34388           .cn_stride(7)
34389           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
34390       }
34391     }
34392   }
34393 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128,n_div_4_subtile)34394   TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128, n_div_4_subtile) {
34395     for (uint32_t n = 8; n <= 12; n += 4) {
34396       for (size_t k = 1; k <= 40; k += 9) {
34397         for (uint32_t m = 1; m <= 3; m++) {
34398           GemmMicrokernelTester()
34399             .mr(3)
34400             .nr(4)
34401             .kr(2)
34402             .sr(4)
34403             .m(m)
34404             .n(n)
34405             .k(k)
34406             .iterations(1)
34407             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
34408         }
34409       }
34410     }
34411   }
34412 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128,small_kernel)34413   TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128, small_kernel) {
34414     for (size_t k = 1; k <= 40; k += 9) {
34415       GemmMicrokernelTester()
34416         .mr(3)
34417         .nr(4)
34418         .kr(2)
34419         .sr(4)
34420         .m(3)
34421         .n(4)
34422         .k(k)
34423         .ks(3)
34424         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
34425     }
34426   }
34427 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128,small_kernel_subtile)34428   TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128, small_kernel_subtile) {
34429     for (size_t k = 1; k <= 40; k += 9) {
34430       for (uint32_t n = 1; n <= 4; n++) {
34431         for (uint32_t m = 1; m <= 3; m++) {
34432           GemmMicrokernelTester()
34433             .mr(3)
34434             .nr(4)
34435             .kr(2)
34436             .sr(4)
34437             .m(m)
34438             .n(n)
34439             .k(k)
34440             .ks(3)
34441             .iterations(1)
34442             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
34443         }
34444       }
34445     }
34446   }
34447 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128,n_gt_4_small_kernel)34448   TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128, n_gt_4_small_kernel) {
34449     for (uint32_t n = 5; n < 8; n++) {
34450       for (size_t k = 1; k <= 40; k += 9) {
34451         GemmMicrokernelTester()
34452           .mr(3)
34453           .nr(4)
34454           .kr(2)
34455           .sr(4)
34456           .m(3)
34457           .n(n)
34458           .k(k)
34459           .ks(3)
34460           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
34461       }
34462     }
34463   }
34464 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128,n_div_4_small_kernel)34465   TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128, n_div_4_small_kernel) {
34466     for (uint32_t n = 8; n <= 12; n += 4) {
34467       for (size_t k = 1; k <= 40; k += 9) {
34468         GemmMicrokernelTester()
34469           .mr(3)
34470           .nr(4)
34471           .kr(2)
34472           .sr(4)
34473           .m(3)
34474           .n(n)
34475           .k(k)
34476           .ks(3)
34477           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
34478       }
34479     }
34480   }
34481 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128,strided_cm_subtile)34482   TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128, strided_cm_subtile) {
34483     for (size_t k = 1; k <= 40; k += 9) {
34484       for (uint32_t n = 1; n <= 4; n++) {
34485         for (uint32_t m = 1; m <= 3; m++) {
34486           GemmMicrokernelTester()
34487             .mr(3)
34488             .nr(4)
34489             .kr(2)
34490             .sr(4)
34491             .m(m)
34492             .n(n)
34493             .k(k)
34494             .cm_stride(7)
34495             .iterations(1)
34496             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
34497         }
34498       }
34499     }
34500   }
34501 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128,a_offset)34502   TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128, a_offset) {
34503     for (size_t k = 1; k <= 40; k += 9) {
34504       GemmMicrokernelTester()
34505         .mr(3)
34506         .nr(4)
34507         .kr(2)
34508         .sr(4)
34509         .m(3)
34510         .n(4)
34511         .k(k)
34512         .ks(3)
34513         .a_offset(127)
34514         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
34515     }
34516   }
34517 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128,zero)34518   TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128, zero) {
34519     for (size_t k = 1; k <= 40; k += 9) {
34520       for (uint32_t mz = 0; mz < 3; mz++) {
34521         GemmMicrokernelTester()
34522           .mr(3)
34523           .nr(4)
34524           .kr(2)
34525           .sr(4)
34526           .m(3)
34527           .n(4)
34528           .k(k)
34529           .ks(3)
34530           .a_offset(127)
34531           .zero_index(mz)
34532           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
34533       }
34534     }
34535   }
34536 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128,qmin)34537   TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128, qmin) {
34538     GemmMicrokernelTester()
34539       .mr(3)
34540       .nr(4)
34541       .kr(2)
34542       .sr(4)
34543       .m(3)
34544       .n(4)
34545       .k(8)
34546       .qmin(128)
34547       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
34548   }
34549 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128,qmax)34550   TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128, qmax) {
34551     GemmMicrokernelTester()
34552       .mr(3)
34553       .nr(4)
34554       .kr(2)
34555       .sr(4)
34556       .m(3)
34557       .n(4)
34558       .k(8)
34559       .qmax(128)
34560       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
34561   }
34562 
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128,strided_cm)34563   TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128, strided_cm) {
34564     GemmMicrokernelTester()
34565       .mr(3)
34566       .nr(4)
34567       .kr(2)
34568       .sr(4)
34569       .m(3)
34570       .n(4)
34571       .k(8)
34572       .cm_stride(7)
34573       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
34574   }
34575 #endif  // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
34576 
34577 
34578 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD64,k_eq_8)34579   TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD64, k_eq_8) {
34580     GemmMicrokernelTester()
34581       .mr(4)
34582       .nr(4)
34583       .kr(2)
34584       .sr(1)
34585       .m(4)
34586       .n(4)
34587       .k(8)
34588       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
34589   }
34590 
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD64,strided_cn)34591   TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD64, strided_cn) {
34592     GemmMicrokernelTester()
34593       .mr(4)
34594       .nr(4)
34595       .kr(2)
34596       .sr(1)
34597       .m(4)
34598       .n(4)
34599       .k(8)
34600       .cn_stride(7)
34601       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
34602   }
34603 
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD64,k_eq_8_subtile)34604   TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD64, k_eq_8_subtile) {
34605     for (uint32_t n = 1; n <= 4; n++) {
34606       for (uint32_t m = 1; m <= 4; m++) {
34607         GemmMicrokernelTester()
34608           .mr(4)
34609           .nr(4)
34610           .kr(2)
34611           .sr(1)
34612           .m(m)
34613           .n(n)
34614           .k(8)
34615           .iterations(1)
34616           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
34617       }
34618     }
34619   }
34620 
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD64,k_eq_8_subtile_m)34621   TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD64, k_eq_8_subtile_m) {
34622     for (uint32_t m = 1; m <= 4; m++) {
34623       GemmMicrokernelTester()
34624         .mr(4)
34625         .nr(4)
34626         .kr(2)
34627         .sr(1)
34628         .m(m)
34629         .n(4)
34630         .k(8)
34631         .iterations(1)
34632         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
34633     }
34634   }
34635 
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD64,k_eq_8_subtile_n)34636   TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD64, k_eq_8_subtile_n) {
34637     for (uint32_t n = 1; n <= 4; n++) {
34638       GemmMicrokernelTester()
34639         .mr(4)
34640         .nr(4)
34641         .kr(2)
34642         .sr(1)
34643         .m(4)
34644         .n(n)
34645         .k(8)
34646         .iterations(1)
34647         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
34648     }
34649   }
34650 
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD64,k_lt_8)34651   TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD64, k_lt_8) {
34652     for (size_t k = 1; k < 8; k++) {
34653       GemmMicrokernelTester()
34654         .mr(4)
34655         .nr(4)
34656         .kr(2)
34657         .sr(1)
34658         .m(4)
34659         .n(4)
34660         .k(k)
34661         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
34662     }
34663   }
34664 
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD64,k_lt_8_subtile)34665   TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD64, k_lt_8_subtile) {
34666     for (size_t k = 1; k < 8; k++) {
34667       for (uint32_t n = 1; n <= 4; n++) {
34668         for (uint32_t m = 1; m <= 4; m++) {
34669           GemmMicrokernelTester()
34670             .mr(4)
34671             .nr(4)
34672             .kr(2)
34673             .sr(1)
34674             .m(m)
34675             .n(n)
34676             .k(k)
34677             .iterations(1)
34678             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
34679         }
34680       }
34681     }
34682   }
34683 
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD64,k_gt_8)34684   TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD64, k_gt_8) {
34685     for (size_t k = 9; k < 16; k++) {
34686       GemmMicrokernelTester()
34687         .mr(4)
34688         .nr(4)
34689         .kr(2)
34690         .sr(1)
34691         .m(4)
34692         .n(4)
34693         .k(k)
34694         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
34695     }
34696   }
34697 
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD64,k_gt_8_subtile)34698   TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD64, k_gt_8_subtile) {
34699     for (size_t k = 9; k < 16; k++) {
34700       for (uint32_t n = 1; n <= 4; n++) {
34701         for (uint32_t m = 1; m <= 4; m++) {
34702           GemmMicrokernelTester()
34703             .mr(4)
34704             .nr(4)
34705             .kr(2)
34706             .sr(1)
34707             .m(m)
34708             .n(n)
34709             .k(k)
34710             .iterations(1)
34711             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
34712         }
34713       }
34714     }
34715   }
34716 
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD64,k_div_8)34717   TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD64, k_div_8) {
34718     for (size_t k = 16; k <= 80; k += 8) {
34719       GemmMicrokernelTester()
34720         .mr(4)
34721         .nr(4)
34722         .kr(2)
34723         .sr(1)
34724         .m(4)
34725         .n(4)
34726         .k(k)
34727         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
34728     }
34729   }
34730 
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD64,k_div_8_subtile)34731   TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD64, k_div_8_subtile) {
34732     for (size_t k = 16; k <= 80; k += 8) {
34733       for (uint32_t n = 1; n <= 4; n++) {
34734         for (uint32_t m = 1; m <= 4; m++) {
34735           GemmMicrokernelTester()
34736             .mr(4)
34737             .nr(4)
34738             .kr(2)
34739             .sr(1)
34740             .m(m)
34741             .n(n)
34742             .k(k)
34743             .iterations(1)
34744             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
34745         }
34746       }
34747     }
34748   }
34749 
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD64,n_gt_4)34750   TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD64, n_gt_4) {
34751     for (uint32_t n = 5; n < 8; n++) {
34752       for (size_t k = 1; k <= 40; k += 9) {
34753         GemmMicrokernelTester()
34754           .mr(4)
34755           .nr(4)
34756           .kr(2)
34757           .sr(1)
34758           .m(4)
34759           .n(n)
34760           .k(k)
34761           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
34762       }
34763     }
34764   }
34765 
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD64,n_gt_4_strided_cn)34766   TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD64, n_gt_4_strided_cn) {
34767     for (uint32_t n = 5; n < 8; n++) {
34768       for (size_t k = 1; k <= 40; k += 9) {
34769         GemmMicrokernelTester()
34770           .mr(4)
34771           .nr(4)
34772           .kr(2)
34773           .sr(1)
34774           .m(4)
34775           .n(n)
34776           .k(k)
34777           .cn_stride(7)
34778           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
34779       }
34780     }
34781   }
34782 
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD64,n_gt_4_subtile)34783   TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD64, n_gt_4_subtile) {
34784     for (uint32_t n = 5; n < 8; n++) {
34785       for (size_t k = 1; k <= 40; k += 9) {
34786         for (uint32_t m = 1; m <= 4; m++) {
34787           GemmMicrokernelTester()
34788             .mr(4)
34789             .nr(4)
34790             .kr(2)
34791             .sr(1)
34792             .m(m)
34793             .n(n)
34794             .k(k)
34795             .iterations(1)
34796             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
34797         }
34798       }
34799     }
34800   }
34801 
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD64,n_div_4)34802   TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD64, n_div_4) {
34803     for (uint32_t n = 8; n <= 12; n += 4) {
34804       for (size_t k = 1; k <= 40; k += 9) {
34805         GemmMicrokernelTester()
34806           .mr(4)
34807           .nr(4)
34808           .kr(2)
34809           .sr(1)
34810           .m(4)
34811           .n(n)
34812           .k(k)
34813           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
34814       }
34815     }
34816   }
34817 
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD64,n_div_4_strided_cn)34818   TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD64, n_div_4_strided_cn) {
34819     for (uint32_t n = 8; n <= 12; n += 4) {
34820       for (size_t k = 1; k <= 40; k += 9) {
34821         GemmMicrokernelTester()
34822           .mr(4)
34823           .nr(4)
34824           .kr(2)
34825           .sr(1)
34826           .m(4)
34827           .n(n)
34828           .k(k)
34829           .cn_stride(7)
34830           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
34831       }
34832     }
34833   }
34834 
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD64,n_div_4_subtile)34835   TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD64, n_div_4_subtile) {
34836     for (uint32_t n = 8; n <= 12; n += 4) {
34837       for (size_t k = 1; k <= 40; k += 9) {
34838         for (uint32_t m = 1; m <= 4; m++) {
34839           GemmMicrokernelTester()
34840             .mr(4)
34841             .nr(4)
34842             .kr(2)
34843             .sr(1)
34844             .m(m)
34845             .n(n)
34846             .k(k)
34847             .iterations(1)
34848             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
34849         }
34850       }
34851     }
34852   }
34853 
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD64,small_kernel)34854   TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD64, small_kernel) {
34855     for (size_t k = 1; k <= 40; k += 9) {
34856       GemmMicrokernelTester()
34857         .mr(4)
34858         .nr(4)
34859         .kr(2)
34860         .sr(1)
34861         .m(4)
34862         .n(4)
34863         .k(k)
34864         .ks(3)
34865         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
34866     }
34867   }
34868 
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD64,small_kernel_subtile)34869   TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD64, small_kernel_subtile) {
34870     for (size_t k = 1; k <= 40; k += 9) {
34871       for (uint32_t n = 1; n <= 4; n++) {
34872         for (uint32_t m = 1; m <= 4; m++) {
34873           GemmMicrokernelTester()
34874             .mr(4)
34875             .nr(4)
34876             .kr(2)
34877             .sr(1)
34878             .m(m)
34879             .n(n)
34880             .k(k)
34881             .ks(3)
34882             .iterations(1)
34883             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
34884         }
34885       }
34886     }
34887   }
34888 
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD64,n_gt_4_small_kernel)34889   TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD64, n_gt_4_small_kernel) {
34890     for (uint32_t n = 5; n < 8; n++) {
34891       for (size_t k = 1; k <= 40; k += 9) {
34892         GemmMicrokernelTester()
34893           .mr(4)
34894           .nr(4)
34895           .kr(2)
34896           .sr(1)
34897           .m(4)
34898           .n(n)
34899           .k(k)
34900           .ks(3)
34901           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
34902       }
34903     }
34904   }
34905 
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD64,n_div_4_small_kernel)34906   TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD64, n_div_4_small_kernel) {
34907     for (uint32_t n = 8; n <= 12; n += 4) {
34908       for (size_t k = 1; k <= 40; k += 9) {
34909         GemmMicrokernelTester()
34910           .mr(4)
34911           .nr(4)
34912           .kr(2)
34913           .sr(1)
34914           .m(4)
34915           .n(n)
34916           .k(k)
34917           .ks(3)
34918           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
34919       }
34920     }
34921   }
34922 
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD64,strided_cm_subtile)34923   TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD64, strided_cm_subtile) {
34924     for (size_t k = 1; k <= 40; k += 9) {
34925       for (uint32_t n = 1; n <= 4; n++) {
34926         for (uint32_t m = 1; m <= 4; m++) {
34927           GemmMicrokernelTester()
34928             .mr(4)
34929             .nr(4)
34930             .kr(2)
34931             .sr(1)
34932             .m(m)
34933             .n(n)
34934             .k(k)
34935             .cm_stride(7)
34936             .iterations(1)
34937             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
34938         }
34939       }
34940     }
34941   }
34942 
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD64,a_offset)34943   TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD64, a_offset) {
34944     for (size_t k = 1; k <= 40; k += 9) {
34945       GemmMicrokernelTester()
34946         .mr(4)
34947         .nr(4)
34948         .kr(2)
34949         .sr(1)
34950         .m(4)
34951         .n(4)
34952         .k(k)
34953         .ks(3)
34954         .a_offset(163)
34955         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
34956     }
34957   }
34958 
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD64,zero)34959   TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD64, zero) {
34960     for (size_t k = 1; k <= 40; k += 9) {
34961       for (uint32_t mz = 0; mz < 4; mz++) {
34962         GemmMicrokernelTester()
34963           .mr(4)
34964           .nr(4)
34965           .kr(2)
34966           .sr(1)
34967           .m(4)
34968           .n(4)
34969           .k(k)
34970           .ks(3)
34971           .a_offset(163)
34972           .zero_index(mz)
34973           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
34974       }
34975     }
34976   }
34977 
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD64,qmin)34978   TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD64, qmin) {
34979     GemmMicrokernelTester()
34980       .mr(4)
34981       .nr(4)
34982       .kr(2)
34983       .sr(1)
34984       .m(4)
34985       .n(4)
34986       .k(8)
34987       .qmin(128)
34988       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
34989   }
34990 
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD64,qmax)34991   TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD64, qmax) {
34992     GemmMicrokernelTester()
34993       .mr(4)
34994       .nr(4)
34995       .kr(2)
34996       .sr(1)
34997       .m(4)
34998       .n(4)
34999       .k(8)
35000       .qmax(128)
35001       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
35002   }
35003 
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD64,strided_cm)35004   TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD64, strided_cm) {
35005     GemmMicrokernelTester()
35006       .mr(4)
35007       .nr(4)
35008       .kr(2)
35009       .sr(1)
35010       .m(4)
35011       .n(4)
35012       .k(8)
35013       .cm_stride(7)
35014       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
35015   }
35016 #endif  // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
35017 
35018 
35019 #if XNN_ARCH_WASM || XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(QC8_IGEMM_MINMAX_FP32_4X2__WASM_FMAGIC,k_eq_1)35020   TEST(QC8_IGEMM_MINMAX_FP32_4X2__WASM_FMAGIC, k_eq_1) {
35021     GemmMicrokernelTester()
35022       .mr(4)
35023       .nr(2)
35024       .kr(1)
35025       .sr(1)
35026       .m(4)
35027       .n(2)
35028       .k(1)
35029       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x2__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
35030   }
35031 
TEST(QC8_IGEMM_MINMAX_FP32_4X2__WASM_FMAGIC,strided_cn)35032   TEST(QC8_IGEMM_MINMAX_FP32_4X2__WASM_FMAGIC, strided_cn) {
35033     GemmMicrokernelTester()
35034       .mr(4)
35035       .nr(2)
35036       .kr(1)
35037       .sr(1)
35038       .m(4)
35039       .n(2)
35040       .k(1)
35041       .cn_stride(5)
35042       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x2__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
35043   }
35044 
TEST(QC8_IGEMM_MINMAX_FP32_4X2__WASM_FMAGIC,k_eq_1_subtile)35045   TEST(QC8_IGEMM_MINMAX_FP32_4X2__WASM_FMAGIC, k_eq_1_subtile) {
35046     for (uint32_t n = 1; n <= 2; n++) {
35047       for (uint32_t m = 1; m <= 4; m++) {
35048         GemmMicrokernelTester()
35049           .mr(4)
35050           .nr(2)
35051           .kr(1)
35052           .sr(1)
35053           .m(m)
35054           .n(n)
35055           .k(1)
35056           .iterations(1)
35057           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x2__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
35058       }
35059     }
35060   }
35061 
TEST(QC8_IGEMM_MINMAX_FP32_4X2__WASM_FMAGIC,k_eq_1_subtile_m)35062   TEST(QC8_IGEMM_MINMAX_FP32_4X2__WASM_FMAGIC, k_eq_1_subtile_m) {
35063     for (uint32_t m = 1; m <= 4; m++) {
35064       GemmMicrokernelTester()
35065         .mr(4)
35066         .nr(2)
35067         .kr(1)
35068         .sr(1)
35069         .m(m)
35070         .n(2)
35071         .k(1)
35072         .iterations(1)
35073         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x2__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
35074     }
35075   }
35076 
TEST(QC8_IGEMM_MINMAX_FP32_4X2__WASM_FMAGIC,k_eq_1_subtile_n)35077   TEST(QC8_IGEMM_MINMAX_FP32_4X2__WASM_FMAGIC, k_eq_1_subtile_n) {
35078     for (uint32_t n = 1; n <= 2; n++) {
35079       GemmMicrokernelTester()
35080         .mr(4)
35081         .nr(2)
35082         .kr(1)
35083         .sr(1)
35084         .m(4)
35085         .n(n)
35086         .k(1)
35087         .iterations(1)
35088         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x2__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
35089     }
35090   }
35091 
TEST(QC8_IGEMM_MINMAX_FP32_4X2__WASM_FMAGIC,k_gt_1)35092   TEST(QC8_IGEMM_MINMAX_FP32_4X2__WASM_FMAGIC, k_gt_1) {
35093     for (size_t k = 2; k < 10; k++) {
35094       GemmMicrokernelTester()
35095         .mr(4)
35096         .nr(2)
35097         .kr(1)
35098         .sr(1)
35099         .m(4)
35100         .n(2)
35101         .k(k)
35102         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x2__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
35103     }
35104   }
35105 
TEST(QC8_IGEMM_MINMAX_FP32_4X2__WASM_FMAGIC,k_gt_1_subtile)35106   TEST(QC8_IGEMM_MINMAX_FP32_4X2__WASM_FMAGIC, k_gt_1_subtile) {
35107     for (size_t k = 2; k < 10; k++) {
35108       for (uint32_t n = 1; n <= 2; n++) {
35109         for (uint32_t m = 1; m <= 4; m++) {
35110           GemmMicrokernelTester()
35111             .mr(4)
35112             .nr(2)
35113             .kr(1)
35114             .sr(1)
35115             .m(m)
35116             .n(n)
35117             .k(k)
35118             .iterations(1)
35119             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x2__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
35120         }
35121       }
35122     }
35123   }
35124 
TEST(QC8_IGEMM_MINMAX_FP32_4X2__WASM_FMAGIC,n_gt_2)35125   TEST(QC8_IGEMM_MINMAX_FP32_4X2__WASM_FMAGIC, n_gt_2) {
35126     for (uint32_t n = 3; n < 4; n++) {
35127       for (size_t k = 1; k <= 5; k += 2) {
35128         GemmMicrokernelTester()
35129           .mr(4)
35130           .nr(2)
35131           .kr(1)
35132           .sr(1)
35133           .m(4)
35134           .n(n)
35135           .k(k)
35136           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x2__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
35137       }
35138     }
35139   }
35140 
TEST(QC8_IGEMM_MINMAX_FP32_4X2__WASM_FMAGIC,n_gt_2_strided_cn)35141   TEST(QC8_IGEMM_MINMAX_FP32_4X2__WASM_FMAGIC, n_gt_2_strided_cn) {
35142     for (uint32_t n = 3; n < 4; n++) {
35143       for (size_t k = 1; k <= 5; k += 2) {
35144         GemmMicrokernelTester()
35145           .mr(4)
35146           .nr(2)
35147           .kr(1)
35148           .sr(1)
35149           .m(4)
35150           .n(n)
35151           .k(k)
35152           .cn_stride(5)
35153           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x2__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
35154       }
35155     }
35156   }
35157 
TEST(QC8_IGEMM_MINMAX_FP32_4X2__WASM_FMAGIC,n_gt_2_subtile)35158   TEST(QC8_IGEMM_MINMAX_FP32_4X2__WASM_FMAGIC, n_gt_2_subtile) {
35159     for (uint32_t n = 3; n < 4; n++) {
35160       for (size_t k = 1; k <= 5; k += 2) {
35161         for (uint32_t m = 1; m <= 4; m++) {
35162           GemmMicrokernelTester()
35163             .mr(4)
35164             .nr(2)
35165             .kr(1)
35166             .sr(1)
35167             .m(m)
35168             .n(n)
35169             .k(k)
35170             .iterations(1)
35171             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x2__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
35172         }
35173       }
35174     }
35175   }
35176 
TEST(QC8_IGEMM_MINMAX_FP32_4X2__WASM_FMAGIC,n_div_2)35177   TEST(QC8_IGEMM_MINMAX_FP32_4X2__WASM_FMAGIC, n_div_2) {
35178     for (uint32_t n = 4; n <= 6; n += 2) {
35179       for (size_t k = 1; k <= 5; k += 2) {
35180         GemmMicrokernelTester()
35181           .mr(4)
35182           .nr(2)
35183           .kr(1)
35184           .sr(1)
35185           .m(4)
35186           .n(n)
35187           .k(k)
35188           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x2__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
35189       }
35190     }
35191   }
35192 
TEST(QC8_IGEMM_MINMAX_FP32_4X2__WASM_FMAGIC,n_div_2_strided_cn)35193   TEST(QC8_IGEMM_MINMAX_FP32_4X2__WASM_FMAGIC, n_div_2_strided_cn) {
35194     for (uint32_t n = 4; n <= 6; n += 2) {
35195       for (size_t k = 1; k <= 5; k += 2) {
35196         GemmMicrokernelTester()
35197           .mr(4)
35198           .nr(2)
35199           .kr(1)
35200           .sr(1)
35201           .m(4)
35202           .n(n)
35203           .k(k)
35204           .cn_stride(5)
35205           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x2__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
35206       }
35207     }
35208   }
35209 
TEST(QC8_IGEMM_MINMAX_FP32_4X2__WASM_FMAGIC,n_div_2_subtile)35210   TEST(QC8_IGEMM_MINMAX_FP32_4X2__WASM_FMAGIC, n_div_2_subtile) {
35211     for (uint32_t n = 4; n <= 6; n += 2) {
35212       for (size_t k = 1; k <= 5; k += 2) {
35213         for (uint32_t m = 1; m <= 4; m++) {
35214           GemmMicrokernelTester()
35215             .mr(4)
35216             .nr(2)
35217             .kr(1)
35218             .sr(1)
35219             .m(m)
35220             .n(n)
35221             .k(k)
35222             .iterations(1)
35223             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x2__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
35224         }
35225       }
35226     }
35227   }
35228 
TEST(QC8_IGEMM_MINMAX_FP32_4X2__WASM_FMAGIC,small_kernel)35229   TEST(QC8_IGEMM_MINMAX_FP32_4X2__WASM_FMAGIC, small_kernel) {
35230     for (size_t k = 1; k <= 5; k += 2) {
35231       GemmMicrokernelTester()
35232         .mr(4)
35233         .nr(2)
35234         .kr(1)
35235         .sr(1)
35236         .m(4)
35237         .n(2)
35238         .k(k)
35239         .ks(3)
35240         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x2__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
35241     }
35242   }
35243 
TEST(QC8_IGEMM_MINMAX_FP32_4X2__WASM_FMAGIC,small_kernel_subtile)35244   TEST(QC8_IGEMM_MINMAX_FP32_4X2__WASM_FMAGIC, small_kernel_subtile) {
35245     for (size_t k = 1; k <= 5; k += 2) {
35246       for (uint32_t n = 1; n <= 2; n++) {
35247         for (uint32_t m = 1; m <= 4; m++) {
35248           GemmMicrokernelTester()
35249             .mr(4)
35250             .nr(2)
35251             .kr(1)
35252             .sr(1)
35253             .m(m)
35254             .n(n)
35255             .k(k)
35256             .ks(3)
35257             .iterations(1)
35258             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x2__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
35259         }
35260       }
35261     }
35262   }
35263 
TEST(QC8_IGEMM_MINMAX_FP32_4X2__WASM_FMAGIC,n_gt_2_small_kernel)35264   TEST(QC8_IGEMM_MINMAX_FP32_4X2__WASM_FMAGIC, n_gt_2_small_kernel) {
35265     for (uint32_t n = 3; n < 4; n++) {
35266       for (size_t k = 1; k <= 5; k += 2) {
35267         GemmMicrokernelTester()
35268           .mr(4)
35269           .nr(2)
35270           .kr(1)
35271           .sr(1)
35272           .m(4)
35273           .n(n)
35274           .k(k)
35275           .ks(3)
35276           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x2__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
35277       }
35278     }
35279   }
35280 
TEST(QC8_IGEMM_MINMAX_FP32_4X2__WASM_FMAGIC,n_div_2_small_kernel)35281   TEST(QC8_IGEMM_MINMAX_FP32_4X2__WASM_FMAGIC, n_div_2_small_kernel) {
35282     for (uint32_t n = 4; n <= 6; n += 2) {
35283       for (size_t k = 1; k <= 5; k += 2) {
35284         GemmMicrokernelTester()
35285           .mr(4)
35286           .nr(2)
35287           .kr(1)
35288           .sr(1)
35289           .m(4)
35290           .n(n)
35291           .k(k)
35292           .ks(3)
35293           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x2__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
35294       }
35295     }
35296   }
35297 
TEST(QC8_IGEMM_MINMAX_FP32_4X2__WASM_FMAGIC,strided_cm_subtile)35298   TEST(QC8_IGEMM_MINMAX_FP32_4X2__WASM_FMAGIC, strided_cm_subtile) {
35299     for (size_t k = 1; k <= 5; k += 2) {
35300       for (uint32_t n = 1; n <= 2; n++) {
35301         for (uint32_t m = 1; m <= 4; m++) {
35302           GemmMicrokernelTester()
35303             .mr(4)
35304             .nr(2)
35305             .kr(1)
35306             .sr(1)
35307             .m(m)
35308             .n(n)
35309             .k(k)
35310             .cm_stride(5)
35311             .iterations(1)
35312             .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x2__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
35313         }
35314       }
35315     }
35316   }
35317 
TEST(QC8_IGEMM_MINMAX_FP32_4X2__WASM_FMAGIC,a_offset)35318   TEST(QC8_IGEMM_MINMAX_FP32_4X2__WASM_FMAGIC, a_offset) {
35319     for (size_t k = 1; k <= 5; k += 2) {
35320       GemmMicrokernelTester()
35321         .mr(4)
35322         .nr(2)
35323         .kr(1)
35324         .sr(1)
35325         .m(4)
35326         .n(2)
35327         .k(k)
35328         .ks(3)
35329         .a_offset(23)
35330         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x2__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
35331     }
35332   }
35333 
TEST(QC8_IGEMM_MINMAX_FP32_4X2__WASM_FMAGIC,zero)35334   TEST(QC8_IGEMM_MINMAX_FP32_4X2__WASM_FMAGIC, zero) {
35335     for (size_t k = 1; k <= 5; k += 2) {
35336       for (uint32_t mz = 0; mz < 4; mz++) {
35337         GemmMicrokernelTester()
35338           .mr(4)
35339           .nr(2)
35340           .kr(1)
35341           .sr(1)
35342           .m(4)
35343           .n(2)
35344           .k(k)
35345           .ks(3)
35346           .a_offset(23)
35347           .zero_index(mz)
35348           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x2__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
35349       }
35350     }
35351   }
35352 
TEST(QC8_IGEMM_MINMAX_FP32_4X2__WASM_FMAGIC,qmin)35353   TEST(QC8_IGEMM_MINMAX_FP32_4X2__WASM_FMAGIC, qmin) {
35354     GemmMicrokernelTester()
35355       .mr(4)
35356       .nr(2)
35357       .kr(1)
35358       .sr(1)
35359       .m(4)
35360       .n(2)
35361       .k(1)
35362       .qmin(128)
35363       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x2__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
35364   }
35365 
TEST(QC8_IGEMM_MINMAX_FP32_4X2__WASM_FMAGIC,qmax)35366   TEST(QC8_IGEMM_MINMAX_FP32_4X2__WASM_FMAGIC, qmax) {
35367     GemmMicrokernelTester()
35368       .mr(4)
35369       .nr(2)
35370       .kr(1)
35371       .sr(1)
35372       .m(4)
35373       .n(2)
35374       .k(1)
35375       .qmax(128)
35376       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x2__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
35377   }
35378 
TEST(QC8_IGEMM_MINMAX_FP32_4X2__WASM_FMAGIC,strided_cm)35379   TEST(QC8_IGEMM_MINMAX_FP32_4X2__WASM_FMAGIC, strided_cm) {
35380     GemmMicrokernelTester()
35381       .mr(4)
35382       .nr(2)
35383       .kr(1)
35384       .sr(1)
35385       .m(4)
35386       .n(2)
35387       .k(1)
35388       .cm_stride(5)
35389       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x2__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
35390   }
35391 #endif  // XNN_ARCH_WASM || XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
35392 
35393 
TEST(QC8_IGEMM_MINMAX_FP32_1X2__SCALAR_LRINTF,k_eq_1)35394 TEST(QC8_IGEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, k_eq_1) {
35395   GemmMicrokernelTester()
35396     .mr(1)
35397     .nr(2)
35398     .kr(1)
35399     .sr(1)
35400     .m(1)
35401     .n(2)
35402     .k(1)
35403     .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
35404 }
35405 
TEST(QC8_IGEMM_MINMAX_FP32_1X2__SCALAR_LRINTF,strided_cn)35406 TEST(QC8_IGEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, strided_cn) {
35407   GemmMicrokernelTester()
35408     .mr(1)
35409     .nr(2)
35410     .kr(1)
35411     .sr(1)
35412     .m(1)
35413     .n(2)
35414     .k(1)
35415     .cn_stride(5)
35416     .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
35417 }
35418 
TEST(QC8_IGEMM_MINMAX_FP32_1X2__SCALAR_LRINTF,k_eq_1_subtile)35419 TEST(QC8_IGEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, k_eq_1_subtile) {
35420   for (uint32_t n = 1; n <= 2; n++) {
35421     for (uint32_t m = 1; m <= 1; m++) {
35422       GemmMicrokernelTester()
35423         .mr(1)
35424         .nr(2)
35425         .kr(1)
35426         .sr(1)
35427         .m(m)
35428         .n(n)
35429         .k(1)
35430         .iterations(1)
35431         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
35432     }
35433   }
35434 }
35435 
TEST(QC8_IGEMM_MINMAX_FP32_1X2__SCALAR_LRINTF,k_eq_1_subtile_m)35436 TEST(QC8_IGEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, k_eq_1_subtile_m) {
35437   for (uint32_t m = 1; m <= 1; m++) {
35438     GemmMicrokernelTester()
35439       .mr(1)
35440       .nr(2)
35441       .kr(1)
35442       .sr(1)
35443       .m(m)
35444       .n(2)
35445       .k(1)
35446       .iterations(1)
35447       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
35448   }
35449 }
35450 
TEST(QC8_IGEMM_MINMAX_FP32_1X2__SCALAR_LRINTF,k_eq_1_subtile_n)35451 TEST(QC8_IGEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, k_eq_1_subtile_n) {
35452   for (uint32_t n = 1; n <= 2; n++) {
35453     GemmMicrokernelTester()
35454       .mr(1)
35455       .nr(2)
35456       .kr(1)
35457       .sr(1)
35458       .m(1)
35459       .n(n)
35460       .k(1)
35461       .iterations(1)
35462       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
35463   }
35464 }
35465 
TEST(QC8_IGEMM_MINMAX_FP32_1X2__SCALAR_LRINTF,k_gt_1)35466 TEST(QC8_IGEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, k_gt_1) {
35467   for (size_t k = 2; k < 10; k++) {
35468     GemmMicrokernelTester()
35469       .mr(1)
35470       .nr(2)
35471       .kr(1)
35472       .sr(1)
35473       .m(1)
35474       .n(2)
35475       .k(k)
35476       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
35477   }
35478 }
35479 
TEST(QC8_IGEMM_MINMAX_FP32_1X2__SCALAR_LRINTF,k_gt_1_subtile)35480 TEST(QC8_IGEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, k_gt_1_subtile) {
35481   for (size_t k = 2; k < 10; k++) {
35482     for (uint32_t n = 1; n <= 2; n++) {
35483       for (uint32_t m = 1; m <= 1; m++) {
35484         GemmMicrokernelTester()
35485           .mr(1)
35486           .nr(2)
35487           .kr(1)
35488           .sr(1)
35489           .m(m)
35490           .n(n)
35491           .k(k)
35492           .iterations(1)
35493           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
35494       }
35495     }
35496   }
35497 }
35498 
TEST(QC8_IGEMM_MINMAX_FP32_1X2__SCALAR_LRINTF,n_gt_2)35499 TEST(QC8_IGEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, n_gt_2) {
35500   for (uint32_t n = 3; n < 4; n++) {
35501     for (size_t k = 1; k <= 5; k += 2) {
35502       GemmMicrokernelTester()
35503         .mr(1)
35504         .nr(2)
35505         .kr(1)
35506         .sr(1)
35507         .m(1)
35508         .n(n)
35509         .k(k)
35510         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
35511     }
35512   }
35513 }
35514 
TEST(QC8_IGEMM_MINMAX_FP32_1X2__SCALAR_LRINTF,n_gt_2_strided_cn)35515 TEST(QC8_IGEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, n_gt_2_strided_cn) {
35516   for (uint32_t n = 3; n < 4; n++) {
35517     for (size_t k = 1; k <= 5; k += 2) {
35518       GemmMicrokernelTester()
35519         .mr(1)
35520         .nr(2)
35521         .kr(1)
35522         .sr(1)
35523         .m(1)
35524         .n(n)
35525         .k(k)
35526         .cn_stride(5)
35527         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
35528     }
35529   }
35530 }
35531 
TEST(QC8_IGEMM_MINMAX_FP32_1X2__SCALAR_LRINTF,n_gt_2_subtile)35532 TEST(QC8_IGEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, n_gt_2_subtile) {
35533   for (uint32_t n = 3; n < 4; n++) {
35534     for (size_t k = 1; k <= 5; k += 2) {
35535       for (uint32_t m = 1; m <= 1; m++) {
35536         GemmMicrokernelTester()
35537           .mr(1)
35538           .nr(2)
35539           .kr(1)
35540           .sr(1)
35541           .m(m)
35542           .n(n)
35543           .k(k)
35544           .iterations(1)
35545           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
35546       }
35547     }
35548   }
35549 }
35550 
TEST(QC8_IGEMM_MINMAX_FP32_1X2__SCALAR_LRINTF,n_div_2)35551 TEST(QC8_IGEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, n_div_2) {
35552   for (uint32_t n = 4; n <= 6; n += 2) {
35553     for (size_t k = 1; k <= 5; k += 2) {
35554       GemmMicrokernelTester()
35555         .mr(1)
35556         .nr(2)
35557         .kr(1)
35558         .sr(1)
35559         .m(1)
35560         .n(n)
35561         .k(k)
35562         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
35563     }
35564   }
35565 }
35566 
TEST(QC8_IGEMM_MINMAX_FP32_1X2__SCALAR_LRINTF,n_div_2_strided_cn)35567 TEST(QC8_IGEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, n_div_2_strided_cn) {
35568   for (uint32_t n = 4; n <= 6; n += 2) {
35569     for (size_t k = 1; k <= 5; k += 2) {
35570       GemmMicrokernelTester()
35571         .mr(1)
35572         .nr(2)
35573         .kr(1)
35574         .sr(1)
35575         .m(1)
35576         .n(n)
35577         .k(k)
35578         .cn_stride(5)
35579         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
35580     }
35581   }
35582 }
35583 
TEST(QC8_IGEMM_MINMAX_FP32_1X2__SCALAR_LRINTF,n_div_2_subtile)35584 TEST(QC8_IGEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, n_div_2_subtile) {
35585   for (uint32_t n = 4; n <= 6; n += 2) {
35586     for (size_t k = 1; k <= 5; k += 2) {
35587       for (uint32_t m = 1; m <= 1; m++) {
35588         GemmMicrokernelTester()
35589           .mr(1)
35590           .nr(2)
35591           .kr(1)
35592           .sr(1)
35593           .m(m)
35594           .n(n)
35595           .k(k)
35596           .iterations(1)
35597           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
35598       }
35599     }
35600   }
35601 }
35602 
TEST(QC8_IGEMM_MINMAX_FP32_1X2__SCALAR_LRINTF,small_kernel)35603 TEST(QC8_IGEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, small_kernel) {
35604   for (size_t k = 1; k <= 5; k += 2) {
35605     GemmMicrokernelTester()
35606       .mr(1)
35607       .nr(2)
35608       .kr(1)
35609       .sr(1)
35610       .m(1)
35611       .n(2)
35612       .k(k)
35613       .ks(3)
35614       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
35615   }
35616 }
35617 
TEST(QC8_IGEMM_MINMAX_FP32_1X2__SCALAR_LRINTF,small_kernel_subtile)35618 TEST(QC8_IGEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, small_kernel_subtile) {
35619   for (size_t k = 1; k <= 5; k += 2) {
35620     for (uint32_t n = 1; n <= 2; n++) {
35621       for (uint32_t m = 1; m <= 1; m++) {
35622         GemmMicrokernelTester()
35623           .mr(1)
35624           .nr(2)
35625           .kr(1)
35626           .sr(1)
35627           .m(m)
35628           .n(n)
35629           .k(k)
35630           .ks(3)
35631           .iterations(1)
35632           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
35633       }
35634     }
35635   }
35636 }
35637 
TEST(QC8_IGEMM_MINMAX_FP32_1X2__SCALAR_LRINTF,n_gt_2_small_kernel)35638 TEST(QC8_IGEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, n_gt_2_small_kernel) {
35639   for (uint32_t n = 3; n < 4; n++) {
35640     for (size_t k = 1; k <= 5; k += 2) {
35641       GemmMicrokernelTester()
35642         .mr(1)
35643         .nr(2)
35644         .kr(1)
35645         .sr(1)
35646         .m(1)
35647         .n(n)
35648         .k(k)
35649         .ks(3)
35650         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
35651     }
35652   }
35653 }
35654 
TEST(QC8_IGEMM_MINMAX_FP32_1X2__SCALAR_LRINTF,n_div_2_small_kernel)35655 TEST(QC8_IGEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, n_div_2_small_kernel) {
35656   for (uint32_t n = 4; n <= 6; n += 2) {
35657     for (size_t k = 1; k <= 5; k += 2) {
35658       GemmMicrokernelTester()
35659         .mr(1)
35660         .nr(2)
35661         .kr(1)
35662         .sr(1)
35663         .m(1)
35664         .n(n)
35665         .k(k)
35666         .ks(3)
35667         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
35668     }
35669   }
35670 }
35671 
TEST(QC8_IGEMM_MINMAX_FP32_1X2__SCALAR_LRINTF,strided_cm_subtile)35672 TEST(QC8_IGEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, strided_cm_subtile) {
35673   for (size_t k = 1; k <= 5; k += 2) {
35674     for (uint32_t n = 1; n <= 2; n++) {
35675       for (uint32_t m = 1; m <= 1; m++) {
35676         GemmMicrokernelTester()
35677           .mr(1)
35678           .nr(2)
35679           .kr(1)
35680           .sr(1)
35681           .m(m)
35682           .n(n)
35683           .k(k)
35684           .cm_stride(5)
35685           .iterations(1)
35686           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
35687       }
35688     }
35689   }
35690 }
35691 
TEST(QC8_IGEMM_MINMAX_FP32_1X2__SCALAR_LRINTF,a_offset)35692 TEST(QC8_IGEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, a_offset) {
35693   for (size_t k = 1; k <= 5; k += 2) {
35694     GemmMicrokernelTester()
35695       .mr(1)
35696       .nr(2)
35697       .kr(1)
35698       .sr(1)
35699       .m(1)
35700       .n(2)
35701       .k(k)
35702       .ks(3)
35703       .a_offset(7)
35704       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
35705   }
35706 }
35707 
TEST(QC8_IGEMM_MINMAX_FP32_1X2__SCALAR_LRINTF,zero)35708 TEST(QC8_IGEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, zero) {
35709   for (size_t k = 1; k <= 5; k += 2) {
35710     for (uint32_t mz = 0; mz < 1; mz++) {
35711       GemmMicrokernelTester()
35712         .mr(1)
35713         .nr(2)
35714         .kr(1)
35715         .sr(1)
35716         .m(1)
35717         .n(2)
35718         .k(k)
35719         .ks(3)
35720         .a_offset(7)
35721         .zero_index(mz)
35722         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
35723     }
35724   }
35725 }
35726 
TEST(QC8_IGEMM_MINMAX_FP32_1X2__SCALAR_LRINTF,qmin)35727 TEST(QC8_IGEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, qmin) {
35728   GemmMicrokernelTester()
35729     .mr(1)
35730     .nr(2)
35731     .kr(1)
35732     .sr(1)
35733     .m(1)
35734     .n(2)
35735     .k(1)
35736     .qmin(128)
35737     .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
35738 }
35739 
TEST(QC8_IGEMM_MINMAX_FP32_1X2__SCALAR_LRINTF,qmax)35740 TEST(QC8_IGEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, qmax) {
35741   GemmMicrokernelTester()
35742     .mr(1)
35743     .nr(2)
35744     .kr(1)
35745     .sr(1)
35746     .m(1)
35747     .n(2)
35748     .k(1)
35749     .qmax(128)
35750     .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
35751 }
35752 
TEST(QC8_IGEMM_MINMAX_FP32_1X2__SCALAR_LRINTF,strided_cm)35753 TEST(QC8_IGEMM_MINMAX_FP32_1X2__SCALAR_LRINTF, strided_cm) {
35754   GemmMicrokernelTester()
35755     .mr(1)
35756     .nr(2)
35757     .kr(1)
35758     .sr(1)
35759     .m(1)
35760     .n(2)
35761     .k(1)
35762     .cm_stride(5)
35763     .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x2__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
35764 }
35765 
35766 
TEST(QC8_IGEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC,k_eq_1)35767 TEST(QC8_IGEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC, k_eq_1) {
35768   GemmMicrokernelTester()
35769     .mr(1)
35770     .nr(4)
35771     .kr(1)
35772     .sr(1)
35773     .m(1)
35774     .n(4)
35775     .k(1)
35776     .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
35777 }
35778 
TEST(QC8_IGEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC,strided_cn)35779 TEST(QC8_IGEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC, strided_cn) {
35780   GemmMicrokernelTester()
35781     .mr(1)
35782     .nr(4)
35783     .kr(1)
35784     .sr(1)
35785     .m(1)
35786     .n(4)
35787     .k(1)
35788     .cn_stride(7)
35789     .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
35790 }
35791 
TEST(QC8_IGEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC,k_eq_1_subtile)35792 TEST(QC8_IGEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC, k_eq_1_subtile) {
35793   for (uint32_t n = 1; n <= 4; n++) {
35794     for (uint32_t m = 1; m <= 1; m++) {
35795       GemmMicrokernelTester()
35796         .mr(1)
35797         .nr(4)
35798         .kr(1)
35799         .sr(1)
35800         .m(m)
35801         .n(n)
35802         .k(1)
35803         .iterations(1)
35804         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
35805     }
35806   }
35807 }
35808 
TEST(QC8_IGEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC,k_eq_1_subtile_m)35809 TEST(QC8_IGEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC, k_eq_1_subtile_m) {
35810   for (uint32_t m = 1; m <= 1; m++) {
35811     GemmMicrokernelTester()
35812       .mr(1)
35813       .nr(4)
35814       .kr(1)
35815       .sr(1)
35816       .m(m)
35817       .n(4)
35818       .k(1)
35819       .iterations(1)
35820       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
35821   }
35822 }
35823 
TEST(QC8_IGEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC,k_eq_1_subtile_n)35824 TEST(QC8_IGEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC, k_eq_1_subtile_n) {
35825   for (uint32_t n = 1; n <= 4; n++) {
35826     GemmMicrokernelTester()
35827       .mr(1)
35828       .nr(4)
35829       .kr(1)
35830       .sr(1)
35831       .m(1)
35832       .n(n)
35833       .k(1)
35834       .iterations(1)
35835       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
35836   }
35837 }
35838 
TEST(QC8_IGEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC,k_gt_1)35839 TEST(QC8_IGEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC, k_gt_1) {
35840   for (size_t k = 2; k < 10; k++) {
35841     GemmMicrokernelTester()
35842       .mr(1)
35843       .nr(4)
35844       .kr(1)
35845       .sr(1)
35846       .m(1)
35847       .n(4)
35848       .k(k)
35849       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
35850   }
35851 }
35852 
TEST(QC8_IGEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC,k_gt_1_subtile)35853 TEST(QC8_IGEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC, k_gt_1_subtile) {
35854   for (size_t k = 2; k < 10; k++) {
35855     for (uint32_t n = 1; n <= 4; n++) {
35856       for (uint32_t m = 1; m <= 1; m++) {
35857         GemmMicrokernelTester()
35858           .mr(1)
35859           .nr(4)
35860           .kr(1)
35861           .sr(1)
35862           .m(m)
35863           .n(n)
35864           .k(k)
35865           .iterations(1)
35866           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
35867       }
35868     }
35869   }
35870 }
35871 
TEST(QC8_IGEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC,n_gt_4)35872 TEST(QC8_IGEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC, n_gt_4) {
35873   for (uint32_t n = 5; n < 8; n++) {
35874     for (size_t k = 1; k <= 5; k += 2) {
35875       GemmMicrokernelTester()
35876         .mr(1)
35877         .nr(4)
35878         .kr(1)
35879         .sr(1)
35880         .m(1)
35881         .n(n)
35882         .k(k)
35883         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
35884     }
35885   }
35886 }
35887 
TEST(QC8_IGEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC,n_gt_4_strided_cn)35888 TEST(QC8_IGEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC, n_gt_4_strided_cn) {
35889   for (uint32_t n = 5; n < 8; n++) {
35890     for (size_t k = 1; k <= 5; k += 2) {
35891       GemmMicrokernelTester()
35892         .mr(1)
35893         .nr(4)
35894         .kr(1)
35895         .sr(1)
35896         .m(1)
35897         .n(n)
35898         .k(k)
35899         .cn_stride(7)
35900         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
35901     }
35902   }
35903 }
35904 
TEST(QC8_IGEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC,n_gt_4_subtile)35905 TEST(QC8_IGEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC, n_gt_4_subtile) {
35906   for (uint32_t n = 5; n < 8; n++) {
35907     for (size_t k = 1; k <= 5; k += 2) {
35908       for (uint32_t m = 1; m <= 1; m++) {
35909         GemmMicrokernelTester()
35910           .mr(1)
35911           .nr(4)
35912           .kr(1)
35913           .sr(1)
35914           .m(m)
35915           .n(n)
35916           .k(k)
35917           .iterations(1)
35918           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
35919       }
35920     }
35921   }
35922 }
35923 
TEST(QC8_IGEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC,n_div_4)35924 TEST(QC8_IGEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC, n_div_4) {
35925   for (uint32_t n = 8; n <= 12; n += 4) {
35926     for (size_t k = 1; k <= 5; k += 2) {
35927       GemmMicrokernelTester()
35928         .mr(1)
35929         .nr(4)
35930         .kr(1)
35931         .sr(1)
35932         .m(1)
35933         .n(n)
35934         .k(k)
35935         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
35936     }
35937   }
35938 }
35939 
TEST(QC8_IGEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC,n_div_4_strided_cn)35940 TEST(QC8_IGEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC, n_div_4_strided_cn) {
35941   for (uint32_t n = 8; n <= 12; n += 4) {
35942     for (size_t k = 1; k <= 5; k += 2) {
35943       GemmMicrokernelTester()
35944         .mr(1)
35945         .nr(4)
35946         .kr(1)
35947         .sr(1)
35948         .m(1)
35949         .n(n)
35950         .k(k)
35951         .cn_stride(7)
35952         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
35953     }
35954   }
35955 }
35956 
TEST(QC8_IGEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC,n_div_4_subtile)35957 TEST(QC8_IGEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC, n_div_4_subtile) {
35958   for (uint32_t n = 8; n <= 12; n += 4) {
35959     for (size_t k = 1; k <= 5; k += 2) {
35960       for (uint32_t m = 1; m <= 1; m++) {
35961         GemmMicrokernelTester()
35962           .mr(1)
35963           .nr(4)
35964           .kr(1)
35965           .sr(1)
35966           .m(m)
35967           .n(n)
35968           .k(k)
35969           .iterations(1)
35970           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
35971       }
35972     }
35973   }
35974 }
35975 
TEST(QC8_IGEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC,small_kernel)35976 TEST(QC8_IGEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC, small_kernel) {
35977   for (size_t k = 1; k <= 5; k += 2) {
35978     GemmMicrokernelTester()
35979       .mr(1)
35980       .nr(4)
35981       .kr(1)
35982       .sr(1)
35983       .m(1)
35984       .n(4)
35985       .k(k)
35986       .ks(3)
35987       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
35988   }
35989 }
35990 
TEST(QC8_IGEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC,small_kernel_subtile)35991 TEST(QC8_IGEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC, small_kernel_subtile) {
35992   for (size_t k = 1; k <= 5; k += 2) {
35993     for (uint32_t n = 1; n <= 4; n++) {
35994       for (uint32_t m = 1; m <= 1; m++) {
35995         GemmMicrokernelTester()
35996           .mr(1)
35997           .nr(4)
35998           .kr(1)
35999           .sr(1)
36000           .m(m)
36001           .n(n)
36002           .k(k)
36003           .ks(3)
36004           .iterations(1)
36005           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
36006       }
36007     }
36008   }
36009 }
36010 
TEST(QC8_IGEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC,n_gt_4_small_kernel)36011 TEST(QC8_IGEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC, n_gt_4_small_kernel) {
36012   for (uint32_t n = 5; n < 8; n++) {
36013     for (size_t k = 1; k <= 5; k += 2) {
36014       GemmMicrokernelTester()
36015         .mr(1)
36016         .nr(4)
36017         .kr(1)
36018         .sr(1)
36019         .m(1)
36020         .n(n)
36021         .k(k)
36022         .ks(3)
36023         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
36024     }
36025   }
36026 }
36027 
TEST(QC8_IGEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC,n_div_4_small_kernel)36028 TEST(QC8_IGEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC, n_div_4_small_kernel) {
36029   for (uint32_t n = 8; n <= 12; n += 4) {
36030     for (size_t k = 1; k <= 5; k += 2) {
36031       GemmMicrokernelTester()
36032         .mr(1)
36033         .nr(4)
36034         .kr(1)
36035         .sr(1)
36036         .m(1)
36037         .n(n)
36038         .k(k)
36039         .ks(3)
36040         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
36041     }
36042   }
36043 }
36044 
TEST(QC8_IGEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC,strided_cm_subtile)36045 TEST(QC8_IGEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC, strided_cm_subtile) {
36046   for (size_t k = 1; k <= 5; k += 2) {
36047     for (uint32_t n = 1; n <= 4; n++) {
36048       for (uint32_t m = 1; m <= 1; m++) {
36049         GemmMicrokernelTester()
36050           .mr(1)
36051           .nr(4)
36052           .kr(1)
36053           .sr(1)
36054           .m(m)
36055           .n(n)
36056           .k(k)
36057           .cm_stride(7)
36058           .iterations(1)
36059           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
36060       }
36061     }
36062   }
36063 }
36064 
TEST(QC8_IGEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC,a_offset)36065 TEST(QC8_IGEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC, a_offset) {
36066   for (size_t k = 1; k <= 5; k += 2) {
36067     GemmMicrokernelTester()
36068       .mr(1)
36069       .nr(4)
36070       .kr(1)
36071       .sr(1)
36072       .m(1)
36073       .n(4)
36074       .k(k)
36075       .ks(3)
36076       .a_offset(7)
36077       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
36078   }
36079 }
36080 
TEST(QC8_IGEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC,zero)36081 TEST(QC8_IGEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC, zero) {
36082   for (size_t k = 1; k <= 5; k += 2) {
36083     for (uint32_t mz = 0; mz < 1; mz++) {
36084       GemmMicrokernelTester()
36085         .mr(1)
36086         .nr(4)
36087         .kr(1)
36088         .sr(1)
36089         .m(1)
36090         .n(4)
36091         .k(k)
36092         .ks(3)
36093         .a_offset(7)
36094         .zero_index(mz)
36095         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
36096     }
36097   }
36098 }
36099 
TEST(QC8_IGEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC,qmin)36100 TEST(QC8_IGEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC, qmin) {
36101   GemmMicrokernelTester()
36102     .mr(1)
36103     .nr(4)
36104     .kr(1)
36105     .sr(1)
36106     .m(1)
36107     .n(4)
36108     .k(1)
36109     .qmin(128)
36110     .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
36111 }
36112 
TEST(QC8_IGEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC,qmax)36113 TEST(QC8_IGEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC, qmax) {
36114   GemmMicrokernelTester()
36115     .mr(1)
36116     .nr(4)
36117     .kr(1)
36118     .sr(1)
36119     .m(1)
36120     .n(4)
36121     .k(1)
36122     .qmax(128)
36123     .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
36124 }
36125 
TEST(QC8_IGEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC,strided_cm)36126 TEST(QC8_IGEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC, strided_cm) {
36127   GemmMicrokernelTester()
36128     .mr(1)
36129     .nr(4)
36130     .kr(1)
36131     .sr(1)
36132     .m(1)
36133     .n(4)
36134     .k(1)
36135     .cm_stride(7)
36136     .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
36137 }
36138 
36139 
TEST(QC8_IGEMM_MINMAX_FP32_1X4__SCALAR_LRINTF,k_eq_1)36140 TEST(QC8_IGEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, k_eq_1) {
36141   GemmMicrokernelTester()
36142     .mr(1)
36143     .nr(4)
36144     .kr(1)
36145     .sr(1)
36146     .m(1)
36147     .n(4)
36148     .k(1)
36149     .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
36150 }
36151 
TEST(QC8_IGEMM_MINMAX_FP32_1X4__SCALAR_LRINTF,strided_cn)36152 TEST(QC8_IGEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, strided_cn) {
36153   GemmMicrokernelTester()
36154     .mr(1)
36155     .nr(4)
36156     .kr(1)
36157     .sr(1)
36158     .m(1)
36159     .n(4)
36160     .k(1)
36161     .cn_stride(7)
36162     .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
36163 }
36164 
TEST(QC8_IGEMM_MINMAX_FP32_1X4__SCALAR_LRINTF,k_eq_1_subtile)36165 TEST(QC8_IGEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, k_eq_1_subtile) {
36166   for (uint32_t n = 1; n <= 4; n++) {
36167     for (uint32_t m = 1; m <= 1; m++) {
36168       GemmMicrokernelTester()
36169         .mr(1)
36170         .nr(4)
36171         .kr(1)
36172         .sr(1)
36173         .m(m)
36174         .n(n)
36175         .k(1)
36176         .iterations(1)
36177         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
36178     }
36179   }
36180 }
36181 
TEST(QC8_IGEMM_MINMAX_FP32_1X4__SCALAR_LRINTF,k_eq_1_subtile_m)36182 TEST(QC8_IGEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, k_eq_1_subtile_m) {
36183   for (uint32_t m = 1; m <= 1; m++) {
36184     GemmMicrokernelTester()
36185       .mr(1)
36186       .nr(4)
36187       .kr(1)
36188       .sr(1)
36189       .m(m)
36190       .n(4)
36191       .k(1)
36192       .iterations(1)
36193       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
36194   }
36195 }
36196 
TEST(QC8_IGEMM_MINMAX_FP32_1X4__SCALAR_LRINTF,k_eq_1_subtile_n)36197 TEST(QC8_IGEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, k_eq_1_subtile_n) {
36198   for (uint32_t n = 1; n <= 4; n++) {
36199     GemmMicrokernelTester()
36200       .mr(1)
36201       .nr(4)
36202       .kr(1)
36203       .sr(1)
36204       .m(1)
36205       .n(n)
36206       .k(1)
36207       .iterations(1)
36208       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
36209   }
36210 }
36211 
TEST(QC8_IGEMM_MINMAX_FP32_1X4__SCALAR_LRINTF,k_gt_1)36212 TEST(QC8_IGEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, k_gt_1) {
36213   for (size_t k = 2; k < 10; k++) {
36214     GemmMicrokernelTester()
36215       .mr(1)
36216       .nr(4)
36217       .kr(1)
36218       .sr(1)
36219       .m(1)
36220       .n(4)
36221       .k(k)
36222       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
36223   }
36224 }
36225 
TEST(QC8_IGEMM_MINMAX_FP32_1X4__SCALAR_LRINTF,k_gt_1_subtile)36226 TEST(QC8_IGEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, k_gt_1_subtile) {
36227   for (size_t k = 2; k < 10; k++) {
36228     for (uint32_t n = 1; n <= 4; n++) {
36229       for (uint32_t m = 1; m <= 1; m++) {
36230         GemmMicrokernelTester()
36231           .mr(1)
36232           .nr(4)
36233           .kr(1)
36234           .sr(1)
36235           .m(m)
36236           .n(n)
36237           .k(k)
36238           .iterations(1)
36239           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
36240       }
36241     }
36242   }
36243 }
36244 
TEST(QC8_IGEMM_MINMAX_FP32_1X4__SCALAR_LRINTF,n_gt_4)36245 TEST(QC8_IGEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, n_gt_4) {
36246   for (uint32_t n = 5; n < 8; n++) {
36247     for (size_t k = 1; k <= 5; k += 2) {
36248       GemmMicrokernelTester()
36249         .mr(1)
36250         .nr(4)
36251         .kr(1)
36252         .sr(1)
36253         .m(1)
36254         .n(n)
36255         .k(k)
36256         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
36257     }
36258   }
36259 }
36260 
TEST(QC8_IGEMM_MINMAX_FP32_1X4__SCALAR_LRINTF,n_gt_4_strided_cn)36261 TEST(QC8_IGEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, n_gt_4_strided_cn) {
36262   for (uint32_t n = 5; n < 8; n++) {
36263     for (size_t k = 1; k <= 5; k += 2) {
36264       GemmMicrokernelTester()
36265         .mr(1)
36266         .nr(4)
36267         .kr(1)
36268         .sr(1)
36269         .m(1)
36270         .n(n)
36271         .k(k)
36272         .cn_stride(7)
36273         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
36274     }
36275   }
36276 }
36277 
TEST(QC8_IGEMM_MINMAX_FP32_1X4__SCALAR_LRINTF,n_gt_4_subtile)36278 TEST(QC8_IGEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, n_gt_4_subtile) {
36279   for (uint32_t n = 5; n < 8; n++) {
36280     for (size_t k = 1; k <= 5; k += 2) {
36281       for (uint32_t m = 1; m <= 1; m++) {
36282         GemmMicrokernelTester()
36283           .mr(1)
36284           .nr(4)
36285           .kr(1)
36286           .sr(1)
36287           .m(m)
36288           .n(n)
36289           .k(k)
36290           .iterations(1)
36291           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
36292       }
36293     }
36294   }
36295 }
36296 
TEST(QC8_IGEMM_MINMAX_FP32_1X4__SCALAR_LRINTF,n_div_4)36297 TEST(QC8_IGEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, n_div_4) {
36298   for (uint32_t n = 8; n <= 12; n += 4) {
36299     for (size_t k = 1; k <= 5; k += 2) {
36300       GemmMicrokernelTester()
36301         .mr(1)
36302         .nr(4)
36303         .kr(1)
36304         .sr(1)
36305         .m(1)
36306         .n(n)
36307         .k(k)
36308         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
36309     }
36310   }
36311 }
36312 
TEST(QC8_IGEMM_MINMAX_FP32_1X4__SCALAR_LRINTF,n_div_4_strided_cn)36313 TEST(QC8_IGEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, n_div_4_strided_cn) {
36314   for (uint32_t n = 8; n <= 12; n += 4) {
36315     for (size_t k = 1; k <= 5; k += 2) {
36316       GemmMicrokernelTester()
36317         .mr(1)
36318         .nr(4)
36319         .kr(1)
36320         .sr(1)
36321         .m(1)
36322         .n(n)
36323         .k(k)
36324         .cn_stride(7)
36325         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
36326     }
36327   }
36328 }
36329 
TEST(QC8_IGEMM_MINMAX_FP32_1X4__SCALAR_LRINTF,n_div_4_subtile)36330 TEST(QC8_IGEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, n_div_4_subtile) {
36331   for (uint32_t n = 8; n <= 12; n += 4) {
36332     for (size_t k = 1; k <= 5; k += 2) {
36333       for (uint32_t m = 1; m <= 1; m++) {
36334         GemmMicrokernelTester()
36335           .mr(1)
36336           .nr(4)
36337           .kr(1)
36338           .sr(1)
36339           .m(m)
36340           .n(n)
36341           .k(k)
36342           .iterations(1)
36343           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
36344       }
36345     }
36346   }
36347 }
36348 
TEST(QC8_IGEMM_MINMAX_FP32_1X4__SCALAR_LRINTF,small_kernel)36349 TEST(QC8_IGEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, small_kernel) {
36350   for (size_t k = 1; k <= 5; k += 2) {
36351     GemmMicrokernelTester()
36352       .mr(1)
36353       .nr(4)
36354       .kr(1)
36355       .sr(1)
36356       .m(1)
36357       .n(4)
36358       .k(k)
36359       .ks(3)
36360       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
36361   }
36362 }
36363 
TEST(QC8_IGEMM_MINMAX_FP32_1X4__SCALAR_LRINTF,small_kernel_subtile)36364 TEST(QC8_IGEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, small_kernel_subtile) {
36365   for (size_t k = 1; k <= 5; k += 2) {
36366     for (uint32_t n = 1; n <= 4; n++) {
36367       for (uint32_t m = 1; m <= 1; m++) {
36368         GemmMicrokernelTester()
36369           .mr(1)
36370           .nr(4)
36371           .kr(1)
36372           .sr(1)
36373           .m(m)
36374           .n(n)
36375           .k(k)
36376           .ks(3)
36377           .iterations(1)
36378           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
36379       }
36380     }
36381   }
36382 }
36383 
TEST(QC8_IGEMM_MINMAX_FP32_1X4__SCALAR_LRINTF,n_gt_4_small_kernel)36384 TEST(QC8_IGEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, n_gt_4_small_kernel) {
36385   for (uint32_t n = 5; n < 8; n++) {
36386     for (size_t k = 1; k <= 5; k += 2) {
36387       GemmMicrokernelTester()
36388         .mr(1)
36389         .nr(4)
36390         .kr(1)
36391         .sr(1)
36392         .m(1)
36393         .n(n)
36394         .k(k)
36395         .ks(3)
36396         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
36397     }
36398   }
36399 }
36400 
TEST(QC8_IGEMM_MINMAX_FP32_1X4__SCALAR_LRINTF,n_div_4_small_kernel)36401 TEST(QC8_IGEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, n_div_4_small_kernel) {
36402   for (uint32_t n = 8; n <= 12; n += 4) {
36403     for (size_t k = 1; k <= 5; k += 2) {
36404       GemmMicrokernelTester()
36405         .mr(1)
36406         .nr(4)
36407         .kr(1)
36408         .sr(1)
36409         .m(1)
36410         .n(n)
36411         .k(k)
36412         .ks(3)
36413         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
36414     }
36415   }
36416 }
36417 
TEST(QC8_IGEMM_MINMAX_FP32_1X4__SCALAR_LRINTF,strided_cm_subtile)36418 TEST(QC8_IGEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, strided_cm_subtile) {
36419   for (size_t k = 1; k <= 5; k += 2) {
36420     for (uint32_t n = 1; n <= 4; n++) {
36421       for (uint32_t m = 1; m <= 1; m++) {
36422         GemmMicrokernelTester()
36423           .mr(1)
36424           .nr(4)
36425           .kr(1)
36426           .sr(1)
36427           .m(m)
36428           .n(n)
36429           .k(k)
36430           .cm_stride(7)
36431           .iterations(1)
36432           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
36433       }
36434     }
36435   }
36436 }
36437 
TEST(QC8_IGEMM_MINMAX_FP32_1X4__SCALAR_LRINTF,a_offset)36438 TEST(QC8_IGEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, a_offset) {
36439   for (size_t k = 1; k <= 5; k += 2) {
36440     GemmMicrokernelTester()
36441       .mr(1)
36442       .nr(4)
36443       .kr(1)
36444       .sr(1)
36445       .m(1)
36446       .n(4)
36447       .k(k)
36448       .ks(3)
36449       .a_offset(7)
36450       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
36451   }
36452 }
36453 
TEST(QC8_IGEMM_MINMAX_FP32_1X4__SCALAR_LRINTF,zero)36454 TEST(QC8_IGEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, zero) {
36455   for (size_t k = 1; k <= 5; k += 2) {
36456     for (uint32_t mz = 0; mz < 1; mz++) {
36457       GemmMicrokernelTester()
36458         .mr(1)
36459         .nr(4)
36460         .kr(1)
36461         .sr(1)
36462         .m(1)
36463         .n(4)
36464         .k(k)
36465         .ks(3)
36466         .a_offset(7)
36467         .zero_index(mz)
36468         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
36469     }
36470   }
36471 }
36472 
TEST(QC8_IGEMM_MINMAX_FP32_1X4__SCALAR_LRINTF,qmin)36473 TEST(QC8_IGEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, qmin) {
36474   GemmMicrokernelTester()
36475     .mr(1)
36476     .nr(4)
36477     .kr(1)
36478     .sr(1)
36479     .m(1)
36480     .n(4)
36481     .k(1)
36482     .qmin(128)
36483     .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
36484 }
36485 
TEST(QC8_IGEMM_MINMAX_FP32_1X4__SCALAR_LRINTF,qmax)36486 TEST(QC8_IGEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, qmax) {
36487   GemmMicrokernelTester()
36488     .mr(1)
36489     .nr(4)
36490     .kr(1)
36491     .sr(1)
36492     .m(1)
36493     .n(4)
36494     .k(1)
36495     .qmax(128)
36496     .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
36497 }
36498 
TEST(QC8_IGEMM_MINMAX_FP32_1X4__SCALAR_LRINTF,strided_cm)36499 TEST(QC8_IGEMM_MINMAX_FP32_1X4__SCALAR_LRINTF, strided_cm) {
36500   GemmMicrokernelTester()
36501     .mr(1)
36502     .nr(4)
36503     .kr(1)
36504     .sr(1)
36505     .m(1)
36506     .n(4)
36507     .k(1)
36508     .cm_stride(7)
36509     .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
36510 }
36511 
36512 
TEST(QC8_IGEMM_MINMAX_FP32_2X2__SCALAR_FMAGIC,k_eq_1)36513 TEST(QC8_IGEMM_MINMAX_FP32_2X2__SCALAR_FMAGIC, k_eq_1) {
36514   GemmMicrokernelTester()
36515     .mr(2)
36516     .nr(2)
36517     .kr(1)
36518     .sr(1)
36519     .m(2)
36520     .n(2)
36521     .k(1)
36522     .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x2__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
36523 }
36524 
TEST(QC8_IGEMM_MINMAX_FP32_2X2__SCALAR_FMAGIC,strided_cn)36525 TEST(QC8_IGEMM_MINMAX_FP32_2X2__SCALAR_FMAGIC, strided_cn) {
36526   GemmMicrokernelTester()
36527     .mr(2)
36528     .nr(2)
36529     .kr(1)
36530     .sr(1)
36531     .m(2)
36532     .n(2)
36533     .k(1)
36534     .cn_stride(5)
36535     .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x2__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
36536 }
36537 
TEST(QC8_IGEMM_MINMAX_FP32_2X2__SCALAR_FMAGIC,k_eq_1_subtile)36538 TEST(QC8_IGEMM_MINMAX_FP32_2X2__SCALAR_FMAGIC, k_eq_1_subtile) {
36539   for (uint32_t n = 1; n <= 2; n++) {
36540     for (uint32_t m = 1; m <= 2; m++) {
36541       GemmMicrokernelTester()
36542         .mr(2)
36543         .nr(2)
36544         .kr(1)
36545         .sr(1)
36546         .m(m)
36547         .n(n)
36548         .k(1)
36549         .iterations(1)
36550         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x2__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
36551     }
36552   }
36553 }
36554 
TEST(QC8_IGEMM_MINMAX_FP32_2X2__SCALAR_FMAGIC,k_eq_1_subtile_m)36555 TEST(QC8_IGEMM_MINMAX_FP32_2X2__SCALAR_FMAGIC, k_eq_1_subtile_m) {
36556   for (uint32_t m = 1; m <= 2; m++) {
36557     GemmMicrokernelTester()
36558       .mr(2)
36559       .nr(2)
36560       .kr(1)
36561       .sr(1)
36562       .m(m)
36563       .n(2)
36564       .k(1)
36565       .iterations(1)
36566       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x2__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
36567   }
36568 }
36569 
TEST(QC8_IGEMM_MINMAX_FP32_2X2__SCALAR_FMAGIC,k_eq_1_subtile_n)36570 TEST(QC8_IGEMM_MINMAX_FP32_2X2__SCALAR_FMAGIC, k_eq_1_subtile_n) {
36571   for (uint32_t n = 1; n <= 2; n++) {
36572     GemmMicrokernelTester()
36573       .mr(2)
36574       .nr(2)
36575       .kr(1)
36576       .sr(1)
36577       .m(2)
36578       .n(n)
36579       .k(1)
36580       .iterations(1)
36581       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x2__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
36582   }
36583 }
36584 
TEST(QC8_IGEMM_MINMAX_FP32_2X2__SCALAR_FMAGIC,k_gt_1)36585 TEST(QC8_IGEMM_MINMAX_FP32_2X2__SCALAR_FMAGIC, k_gt_1) {
36586   for (size_t k = 2; k < 10; k++) {
36587     GemmMicrokernelTester()
36588       .mr(2)
36589       .nr(2)
36590       .kr(1)
36591       .sr(1)
36592       .m(2)
36593       .n(2)
36594       .k(k)
36595       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x2__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
36596   }
36597 }
36598 
TEST(QC8_IGEMM_MINMAX_FP32_2X2__SCALAR_FMAGIC,k_gt_1_subtile)36599 TEST(QC8_IGEMM_MINMAX_FP32_2X2__SCALAR_FMAGIC, k_gt_1_subtile) {
36600   for (size_t k = 2; k < 10; k++) {
36601     for (uint32_t n = 1; n <= 2; n++) {
36602       for (uint32_t m = 1; m <= 2; m++) {
36603         GemmMicrokernelTester()
36604           .mr(2)
36605           .nr(2)
36606           .kr(1)
36607           .sr(1)
36608           .m(m)
36609           .n(n)
36610           .k(k)
36611           .iterations(1)
36612           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x2__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
36613       }
36614     }
36615   }
36616 }
36617 
TEST(QC8_IGEMM_MINMAX_FP32_2X2__SCALAR_FMAGIC,n_gt_2)36618 TEST(QC8_IGEMM_MINMAX_FP32_2X2__SCALAR_FMAGIC, n_gt_2) {
36619   for (uint32_t n = 3; n < 4; n++) {
36620     for (size_t k = 1; k <= 5; k += 2) {
36621       GemmMicrokernelTester()
36622         .mr(2)
36623         .nr(2)
36624         .kr(1)
36625         .sr(1)
36626         .m(2)
36627         .n(n)
36628         .k(k)
36629         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x2__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
36630     }
36631   }
36632 }
36633 
TEST(QC8_IGEMM_MINMAX_FP32_2X2__SCALAR_FMAGIC,n_gt_2_strided_cn)36634 TEST(QC8_IGEMM_MINMAX_FP32_2X2__SCALAR_FMAGIC, n_gt_2_strided_cn) {
36635   for (uint32_t n = 3; n < 4; n++) {
36636     for (size_t k = 1; k <= 5; k += 2) {
36637       GemmMicrokernelTester()
36638         .mr(2)
36639         .nr(2)
36640         .kr(1)
36641         .sr(1)
36642         .m(2)
36643         .n(n)
36644         .k(k)
36645         .cn_stride(5)
36646         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x2__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
36647     }
36648   }
36649 }
36650 
TEST(QC8_IGEMM_MINMAX_FP32_2X2__SCALAR_FMAGIC,n_gt_2_subtile)36651 TEST(QC8_IGEMM_MINMAX_FP32_2X2__SCALAR_FMAGIC, n_gt_2_subtile) {
36652   for (uint32_t n = 3; n < 4; n++) {
36653     for (size_t k = 1; k <= 5; k += 2) {
36654       for (uint32_t m = 1; m <= 2; m++) {
36655         GemmMicrokernelTester()
36656           .mr(2)
36657           .nr(2)
36658           .kr(1)
36659           .sr(1)
36660           .m(m)
36661           .n(n)
36662           .k(k)
36663           .iterations(1)
36664           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x2__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
36665       }
36666     }
36667   }
36668 }
36669 
TEST(QC8_IGEMM_MINMAX_FP32_2X2__SCALAR_FMAGIC,n_div_2)36670 TEST(QC8_IGEMM_MINMAX_FP32_2X2__SCALAR_FMAGIC, n_div_2) {
36671   for (uint32_t n = 4; n <= 6; n += 2) {
36672     for (size_t k = 1; k <= 5; k += 2) {
36673       GemmMicrokernelTester()
36674         .mr(2)
36675         .nr(2)
36676         .kr(1)
36677         .sr(1)
36678         .m(2)
36679         .n(n)
36680         .k(k)
36681         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x2__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
36682     }
36683   }
36684 }
36685 
TEST(QC8_IGEMM_MINMAX_FP32_2X2__SCALAR_FMAGIC,n_div_2_strided_cn)36686 TEST(QC8_IGEMM_MINMAX_FP32_2X2__SCALAR_FMAGIC, n_div_2_strided_cn) {
36687   for (uint32_t n = 4; n <= 6; n += 2) {
36688     for (size_t k = 1; k <= 5; k += 2) {
36689       GemmMicrokernelTester()
36690         .mr(2)
36691         .nr(2)
36692         .kr(1)
36693         .sr(1)
36694         .m(2)
36695         .n(n)
36696         .k(k)
36697         .cn_stride(5)
36698         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x2__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
36699     }
36700   }
36701 }
36702 
TEST(QC8_IGEMM_MINMAX_FP32_2X2__SCALAR_FMAGIC,n_div_2_subtile)36703 TEST(QC8_IGEMM_MINMAX_FP32_2X2__SCALAR_FMAGIC, n_div_2_subtile) {
36704   for (uint32_t n = 4; n <= 6; n += 2) {
36705     for (size_t k = 1; k <= 5; k += 2) {
36706       for (uint32_t m = 1; m <= 2; m++) {
36707         GemmMicrokernelTester()
36708           .mr(2)
36709           .nr(2)
36710           .kr(1)
36711           .sr(1)
36712           .m(m)
36713           .n(n)
36714           .k(k)
36715           .iterations(1)
36716           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x2__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
36717       }
36718     }
36719   }
36720 }
36721 
TEST(QC8_IGEMM_MINMAX_FP32_2X2__SCALAR_FMAGIC,small_kernel)36722 TEST(QC8_IGEMM_MINMAX_FP32_2X2__SCALAR_FMAGIC, small_kernel) {
36723   for (size_t k = 1; k <= 5; k += 2) {
36724     GemmMicrokernelTester()
36725       .mr(2)
36726       .nr(2)
36727       .kr(1)
36728       .sr(1)
36729       .m(2)
36730       .n(2)
36731       .k(k)
36732       .ks(3)
36733       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x2__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
36734   }
36735 }
36736 
TEST(QC8_IGEMM_MINMAX_FP32_2X2__SCALAR_FMAGIC,small_kernel_subtile)36737 TEST(QC8_IGEMM_MINMAX_FP32_2X2__SCALAR_FMAGIC, small_kernel_subtile) {
36738   for (size_t k = 1; k <= 5; k += 2) {
36739     for (uint32_t n = 1; n <= 2; n++) {
36740       for (uint32_t m = 1; m <= 2; m++) {
36741         GemmMicrokernelTester()
36742           .mr(2)
36743           .nr(2)
36744           .kr(1)
36745           .sr(1)
36746           .m(m)
36747           .n(n)
36748           .k(k)
36749           .ks(3)
36750           .iterations(1)
36751           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x2__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
36752       }
36753     }
36754   }
36755 }
36756 
TEST(QC8_IGEMM_MINMAX_FP32_2X2__SCALAR_FMAGIC,n_gt_2_small_kernel)36757 TEST(QC8_IGEMM_MINMAX_FP32_2X2__SCALAR_FMAGIC, n_gt_2_small_kernel) {
36758   for (uint32_t n = 3; n < 4; n++) {
36759     for (size_t k = 1; k <= 5; k += 2) {
36760       GemmMicrokernelTester()
36761         .mr(2)
36762         .nr(2)
36763         .kr(1)
36764         .sr(1)
36765         .m(2)
36766         .n(n)
36767         .k(k)
36768         .ks(3)
36769         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x2__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
36770     }
36771   }
36772 }
36773 
TEST(QC8_IGEMM_MINMAX_FP32_2X2__SCALAR_FMAGIC,n_div_2_small_kernel)36774 TEST(QC8_IGEMM_MINMAX_FP32_2X2__SCALAR_FMAGIC, n_div_2_small_kernel) {
36775   for (uint32_t n = 4; n <= 6; n += 2) {
36776     for (size_t k = 1; k <= 5; k += 2) {
36777       GemmMicrokernelTester()
36778         .mr(2)
36779         .nr(2)
36780         .kr(1)
36781         .sr(1)
36782         .m(2)
36783         .n(n)
36784         .k(k)
36785         .ks(3)
36786         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x2__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
36787     }
36788   }
36789 }
36790 
TEST(QC8_IGEMM_MINMAX_FP32_2X2__SCALAR_FMAGIC,strided_cm_subtile)36791 TEST(QC8_IGEMM_MINMAX_FP32_2X2__SCALAR_FMAGIC, strided_cm_subtile) {
36792   for (size_t k = 1; k <= 5; k += 2) {
36793     for (uint32_t n = 1; n <= 2; n++) {
36794       for (uint32_t m = 1; m <= 2; m++) {
36795         GemmMicrokernelTester()
36796           .mr(2)
36797           .nr(2)
36798           .kr(1)
36799           .sr(1)
36800           .m(m)
36801           .n(n)
36802           .k(k)
36803           .cm_stride(5)
36804           .iterations(1)
36805           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x2__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
36806       }
36807     }
36808   }
36809 }
36810 
TEST(QC8_IGEMM_MINMAX_FP32_2X2__SCALAR_FMAGIC,a_offset)36811 TEST(QC8_IGEMM_MINMAX_FP32_2X2__SCALAR_FMAGIC, a_offset) {
36812   for (size_t k = 1; k <= 5; k += 2) {
36813     GemmMicrokernelTester()
36814       .mr(2)
36815       .nr(2)
36816       .kr(1)
36817       .sr(1)
36818       .m(2)
36819       .n(2)
36820       .k(k)
36821       .ks(3)
36822       .a_offset(13)
36823       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x2__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
36824   }
36825 }
36826 
TEST(QC8_IGEMM_MINMAX_FP32_2X2__SCALAR_FMAGIC,zero)36827 TEST(QC8_IGEMM_MINMAX_FP32_2X2__SCALAR_FMAGIC, zero) {
36828   for (size_t k = 1; k <= 5; k += 2) {
36829     for (uint32_t mz = 0; mz < 2; mz++) {
36830       GemmMicrokernelTester()
36831         .mr(2)
36832         .nr(2)
36833         .kr(1)
36834         .sr(1)
36835         .m(2)
36836         .n(2)
36837         .k(k)
36838         .ks(3)
36839         .a_offset(13)
36840         .zero_index(mz)
36841         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x2__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
36842     }
36843   }
36844 }
36845 
TEST(QC8_IGEMM_MINMAX_FP32_2X2__SCALAR_FMAGIC,qmin)36846 TEST(QC8_IGEMM_MINMAX_FP32_2X2__SCALAR_FMAGIC, qmin) {
36847   GemmMicrokernelTester()
36848     .mr(2)
36849     .nr(2)
36850     .kr(1)
36851     .sr(1)
36852     .m(2)
36853     .n(2)
36854     .k(1)
36855     .qmin(128)
36856     .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x2__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
36857 }
36858 
TEST(QC8_IGEMM_MINMAX_FP32_2X2__SCALAR_FMAGIC,qmax)36859 TEST(QC8_IGEMM_MINMAX_FP32_2X2__SCALAR_FMAGIC, qmax) {
36860   GemmMicrokernelTester()
36861     .mr(2)
36862     .nr(2)
36863     .kr(1)
36864     .sr(1)
36865     .m(2)
36866     .n(2)
36867     .k(1)
36868     .qmax(128)
36869     .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x2__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
36870 }
36871 
TEST(QC8_IGEMM_MINMAX_FP32_2X2__SCALAR_FMAGIC,strided_cm)36872 TEST(QC8_IGEMM_MINMAX_FP32_2X2__SCALAR_FMAGIC, strided_cm) {
36873   GemmMicrokernelTester()
36874     .mr(2)
36875     .nr(2)
36876     .kr(1)
36877     .sr(1)
36878     .m(2)
36879     .n(2)
36880     .k(1)
36881     .cm_stride(5)
36882     .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x2__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
36883 }
36884 
36885 
TEST(QC8_IGEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC,k_eq_1)36886 TEST(QC8_IGEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC, k_eq_1) {
36887   GemmMicrokernelTester()
36888     .mr(2)
36889     .nr(4)
36890     .kr(1)
36891     .sr(1)
36892     .m(2)
36893     .n(4)
36894     .k(1)
36895     .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
36896 }
36897 
TEST(QC8_IGEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC,strided_cn)36898 TEST(QC8_IGEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC, strided_cn) {
36899   GemmMicrokernelTester()
36900     .mr(2)
36901     .nr(4)
36902     .kr(1)
36903     .sr(1)
36904     .m(2)
36905     .n(4)
36906     .k(1)
36907     .cn_stride(7)
36908     .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
36909 }
36910 
TEST(QC8_IGEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC,k_eq_1_subtile)36911 TEST(QC8_IGEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC, k_eq_1_subtile) {
36912   for (uint32_t n = 1; n <= 4; n++) {
36913     for (uint32_t m = 1; m <= 2; m++) {
36914       GemmMicrokernelTester()
36915         .mr(2)
36916         .nr(4)
36917         .kr(1)
36918         .sr(1)
36919         .m(m)
36920         .n(n)
36921         .k(1)
36922         .iterations(1)
36923         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
36924     }
36925   }
36926 }
36927 
TEST(QC8_IGEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC,k_eq_1_subtile_m)36928 TEST(QC8_IGEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC, k_eq_1_subtile_m) {
36929   for (uint32_t m = 1; m <= 2; m++) {
36930     GemmMicrokernelTester()
36931       .mr(2)
36932       .nr(4)
36933       .kr(1)
36934       .sr(1)
36935       .m(m)
36936       .n(4)
36937       .k(1)
36938       .iterations(1)
36939       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
36940   }
36941 }
36942 
TEST(QC8_IGEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC,k_eq_1_subtile_n)36943 TEST(QC8_IGEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC, k_eq_1_subtile_n) {
36944   for (uint32_t n = 1; n <= 4; n++) {
36945     GemmMicrokernelTester()
36946       .mr(2)
36947       .nr(4)
36948       .kr(1)
36949       .sr(1)
36950       .m(2)
36951       .n(n)
36952       .k(1)
36953       .iterations(1)
36954       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
36955   }
36956 }
36957 
TEST(QC8_IGEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC,k_gt_1)36958 TEST(QC8_IGEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC, k_gt_1) {
36959   for (size_t k = 2; k < 10; k++) {
36960     GemmMicrokernelTester()
36961       .mr(2)
36962       .nr(4)
36963       .kr(1)
36964       .sr(1)
36965       .m(2)
36966       .n(4)
36967       .k(k)
36968       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
36969   }
36970 }
36971 
TEST(QC8_IGEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC,k_gt_1_subtile)36972 TEST(QC8_IGEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC, k_gt_1_subtile) {
36973   for (size_t k = 2; k < 10; k++) {
36974     for (uint32_t n = 1; n <= 4; n++) {
36975       for (uint32_t m = 1; m <= 2; m++) {
36976         GemmMicrokernelTester()
36977           .mr(2)
36978           .nr(4)
36979           .kr(1)
36980           .sr(1)
36981           .m(m)
36982           .n(n)
36983           .k(k)
36984           .iterations(1)
36985           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
36986       }
36987     }
36988   }
36989 }
36990 
TEST(QC8_IGEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC,n_gt_4)36991 TEST(QC8_IGEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC, n_gt_4) {
36992   for (uint32_t n = 5; n < 8; n++) {
36993     for (size_t k = 1; k <= 5; k += 2) {
36994       GemmMicrokernelTester()
36995         .mr(2)
36996         .nr(4)
36997         .kr(1)
36998         .sr(1)
36999         .m(2)
37000         .n(n)
37001         .k(k)
37002         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
37003     }
37004   }
37005 }
37006 
TEST(QC8_IGEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC,n_gt_4_strided_cn)37007 TEST(QC8_IGEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC, n_gt_4_strided_cn) {
37008   for (uint32_t n = 5; n < 8; n++) {
37009     for (size_t k = 1; k <= 5; k += 2) {
37010       GemmMicrokernelTester()
37011         .mr(2)
37012         .nr(4)
37013         .kr(1)
37014         .sr(1)
37015         .m(2)
37016         .n(n)
37017         .k(k)
37018         .cn_stride(7)
37019         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
37020     }
37021   }
37022 }
37023 
TEST(QC8_IGEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC,n_gt_4_subtile)37024 TEST(QC8_IGEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC, n_gt_4_subtile) {
37025   for (uint32_t n = 5; n < 8; n++) {
37026     for (size_t k = 1; k <= 5; k += 2) {
37027       for (uint32_t m = 1; m <= 2; m++) {
37028         GemmMicrokernelTester()
37029           .mr(2)
37030           .nr(4)
37031           .kr(1)
37032           .sr(1)
37033           .m(m)
37034           .n(n)
37035           .k(k)
37036           .iterations(1)
37037           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
37038       }
37039     }
37040   }
37041 }
37042 
TEST(QC8_IGEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC,n_div_4)37043 TEST(QC8_IGEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC, n_div_4) {
37044   for (uint32_t n = 8; n <= 12; n += 4) {
37045     for (size_t k = 1; k <= 5; k += 2) {
37046       GemmMicrokernelTester()
37047         .mr(2)
37048         .nr(4)
37049         .kr(1)
37050         .sr(1)
37051         .m(2)
37052         .n(n)
37053         .k(k)
37054         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
37055     }
37056   }
37057 }
37058 
TEST(QC8_IGEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC,n_div_4_strided_cn)37059 TEST(QC8_IGEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC, n_div_4_strided_cn) {
37060   for (uint32_t n = 8; n <= 12; n += 4) {
37061     for (size_t k = 1; k <= 5; k += 2) {
37062       GemmMicrokernelTester()
37063         .mr(2)
37064         .nr(4)
37065         .kr(1)
37066         .sr(1)
37067         .m(2)
37068         .n(n)
37069         .k(k)
37070         .cn_stride(7)
37071         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
37072     }
37073   }
37074 }
37075 
TEST(QC8_IGEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC,n_div_4_subtile)37076 TEST(QC8_IGEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC, n_div_4_subtile) {
37077   for (uint32_t n = 8; n <= 12; n += 4) {
37078     for (size_t k = 1; k <= 5; k += 2) {
37079       for (uint32_t m = 1; m <= 2; m++) {
37080         GemmMicrokernelTester()
37081           .mr(2)
37082           .nr(4)
37083           .kr(1)
37084           .sr(1)
37085           .m(m)
37086           .n(n)
37087           .k(k)
37088           .iterations(1)
37089           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
37090       }
37091     }
37092   }
37093 }
37094 
TEST(QC8_IGEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC,small_kernel)37095 TEST(QC8_IGEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC, small_kernel) {
37096   for (size_t k = 1; k <= 5; k += 2) {
37097     GemmMicrokernelTester()
37098       .mr(2)
37099       .nr(4)
37100       .kr(1)
37101       .sr(1)
37102       .m(2)
37103       .n(4)
37104       .k(k)
37105       .ks(3)
37106       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
37107   }
37108 }
37109 
TEST(QC8_IGEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC,small_kernel_subtile)37110 TEST(QC8_IGEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC, small_kernel_subtile) {
37111   for (size_t k = 1; k <= 5; k += 2) {
37112     for (uint32_t n = 1; n <= 4; n++) {
37113       for (uint32_t m = 1; m <= 2; m++) {
37114         GemmMicrokernelTester()
37115           .mr(2)
37116           .nr(4)
37117           .kr(1)
37118           .sr(1)
37119           .m(m)
37120           .n(n)
37121           .k(k)
37122           .ks(3)
37123           .iterations(1)
37124           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
37125       }
37126     }
37127   }
37128 }
37129 
TEST(QC8_IGEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC,n_gt_4_small_kernel)37130 TEST(QC8_IGEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC, n_gt_4_small_kernel) {
37131   for (uint32_t n = 5; n < 8; n++) {
37132     for (size_t k = 1; k <= 5; k += 2) {
37133       GemmMicrokernelTester()
37134         .mr(2)
37135         .nr(4)
37136         .kr(1)
37137         .sr(1)
37138         .m(2)
37139         .n(n)
37140         .k(k)
37141         .ks(3)
37142         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
37143     }
37144   }
37145 }
37146 
TEST(QC8_IGEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC,n_div_4_small_kernel)37147 TEST(QC8_IGEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC, n_div_4_small_kernel) {
37148   for (uint32_t n = 8; n <= 12; n += 4) {
37149     for (size_t k = 1; k <= 5; k += 2) {
37150       GemmMicrokernelTester()
37151         .mr(2)
37152         .nr(4)
37153         .kr(1)
37154         .sr(1)
37155         .m(2)
37156         .n(n)
37157         .k(k)
37158         .ks(3)
37159         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
37160     }
37161   }
37162 }
37163 
TEST(QC8_IGEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC,strided_cm_subtile)37164 TEST(QC8_IGEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC, strided_cm_subtile) {
37165   for (size_t k = 1; k <= 5; k += 2) {
37166     for (uint32_t n = 1; n <= 4; n++) {
37167       for (uint32_t m = 1; m <= 2; m++) {
37168         GemmMicrokernelTester()
37169           .mr(2)
37170           .nr(4)
37171           .kr(1)
37172           .sr(1)
37173           .m(m)
37174           .n(n)
37175           .k(k)
37176           .cm_stride(7)
37177           .iterations(1)
37178           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
37179       }
37180     }
37181   }
37182 }
37183 
TEST(QC8_IGEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC,a_offset)37184 TEST(QC8_IGEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC, a_offset) {
37185   for (size_t k = 1; k <= 5; k += 2) {
37186     GemmMicrokernelTester()
37187       .mr(2)
37188       .nr(4)
37189       .kr(1)
37190       .sr(1)
37191       .m(2)
37192       .n(4)
37193       .k(k)
37194       .ks(3)
37195       .a_offset(13)
37196       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
37197   }
37198 }
37199 
TEST(QC8_IGEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC,zero)37200 TEST(QC8_IGEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC, zero) {
37201   for (size_t k = 1; k <= 5; k += 2) {
37202     for (uint32_t mz = 0; mz < 2; mz++) {
37203       GemmMicrokernelTester()
37204         .mr(2)
37205         .nr(4)
37206         .kr(1)
37207         .sr(1)
37208         .m(2)
37209         .n(4)
37210         .k(k)
37211         .ks(3)
37212         .a_offset(13)
37213         .zero_index(mz)
37214         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
37215     }
37216   }
37217 }
37218 
TEST(QC8_IGEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC,qmin)37219 TEST(QC8_IGEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC, qmin) {
37220   GemmMicrokernelTester()
37221     .mr(2)
37222     .nr(4)
37223     .kr(1)
37224     .sr(1)
37225     .m(2)
37226     .n(4)
37227     .k(1)
37228     .qmin(128)
37229     .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
37230 }
37231 
TEST(QC8_IGEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC,qmax)37232 TEST(QC8_IGEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC, qmax) {
37233   GemmMicrokernelTester()
37234     .mr(2)
37235     .nr(4)
37236     .kr(1)
37237     .sr(1)
37238     .m(2)
37239     .n(4)
37240     .k(1)
37241     .qmax(128)
37242     .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
37243 }
37244 
TEST(QC8_IGEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC,strided_cm)37245 TEST(QC8_IGEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC, strided_cm) {
37246   GemmMicrokernelTester()
37247     .mr(2)
37248     .nr(4)
37249     .kr(1)
37250     .sr(1)
37251     .m(2)
37252     .n(4)
37253     .k(1)
37254     .cm_stride(7)
37255     .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
37256 }
37257 
37258 
TEST(QC8_IGEMM_MINMAX_FP32_3X2__SCALAR_FMAGIC,k_eq_1)37259 TEST(QC8_IGEMM_MINMAX_FP32_3X2__SCALAR_FMAGIC, k_eq_1) {
37260   GemmMicrokernelTester()
37261     .mr(3)
37262     .nr(2)
37263     .kr(1)
37264     .sr(1)
37265     .m(3)
37266     .n(2)
37267     .k(1)
37268     .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x2__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
37269 }
37270 
TEST(QC8_IGEMM_MINMAX_FP32_3X2__SCALAR_FMAGIC,strided_cn)37271 TEST(QC8_IGEMM_MINMAX_FP32_3X2__SCALAR_FMAGIC, strided_cn) {
37272   GemmMicrokernelTester()
37273     .mr(3)
37274     .nr(2)
37275     .kr(1)
37276     .sr(1)
37277     .m(3)
37278     .n(2)
37279     .k(1)
37280     .cn_stride(5)
37281     .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x2__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
37282 }
37283 
TEST(QC8_IGEMM_MINMAX_FP32_3X2__SCALAR_FMAGIC,k_eq_1_subtile)37284 TEST(QC8_IGEMM_MINMAX_FP32_3X2__SCALAR_FMAGIC, k_eq_1_subtile) {
37285   for (uint32_t n = 1; n <= 2; n++) {
37286     for (uint32_t m = 1; m <= 3; m++) {
37287       GemmMicrokernelTester()
37288         .mr(3)
37289         .nr(2)
37290         .kr(1)
37291         .sr(1)
37292         .m(m)
37293         .n(n)
37294         .k(1)
37295         .iterations(1)
37296         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x2__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
37297     }
37298   }
37299 }
37300 
TEST(QC8_IGEMM_MINMAX_FP32_3X2__SCALAR_FMAGIC,k_eq_1_subtile_m)37301 TEST(QC8_IGEMM_MINMAX_FP32_3X2__SCALAR_FMAGIC, k_eq_1_subtile_m) {
37302   for (uint32_t m = 1; m <= 3; m++) {
37303     GemmMicrokernelTester()
37304       .mr(3)
37305       .nr(2)
37306       .kr(1)
37307       .sr(1)
37308       .m(m)
37309       .n(2)
37310       .k(1)
37311       .iterations(1)
37312       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x2__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
37313   }
37314 }
37315 
TEST(QC8_IGEMM_MINMAX_FP32_3X2__SCALAR_FMAGIC,k_eq_1_subtile_n)37316 TEST(QC8_IGEMM_MINMAX_FP32_3X2__SCALAR_FMAGIC, k_eq_1_subtile_n) {
37317   for (uint32_t n = 1; n <= 2; n++) {
37318     GemmMicrokernelTester()
37319       .mr(3)
37320       .nr(2)
37321       .kr(1)
37322       .sr(1)
37323       .m(3)
37324       .n(n)
37325       .k(1)
37326       .iterations(1)
37327       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x2__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
37328   }
37329 }
37330 
TEST(QC8_IGEMM_MINMAX_FP32_3X2__SCALAR_FMAGIC,k_gt_1)37331 TEST(QC8_IGEMM_MINMAX_FP32_3X2__SCALAR_FMAGIC, k_gt_1) {
37332   for (size_t k = 2; k < 10; k++) {
37333     GemmMicrokernelTester()
37334       .mr(3)
37335       .nr(2)
37336       .kr(1)
37337       .sr(1)
37338       .m(3)
37339       .n(2)
37340       .k(k)
37341       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x2__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
37342   }
37343 }
37344 
TEST(QC8_IGEMM_MINMAX_FP32_3X2__SCALAR_FMAGIC,k_gt_1_subtile)37345 TEST(QC8_IGEMM_MINMAX_FP32_3X2__SCALAR_FMAGIC, k_gt_1_subtile) {
37346   for (size_t k = 2; k < 10; k++) {
37347     for (uint32_t n = 1; n <= 2; n++) {
37348       for (uint32_t m = 1; m <= 3; m++) {
37349         GemmMicrokernelTester()
37350           .mr(3)
37351           .nr(2)
37352           .kr(1)
37353           .sr(1)
37354           .m(m)
37355           .n(n)
37356           .k(k)
37357           .iterations(1)
37358           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x2__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
37359       }
37360     }
37361   }
37362 }
37363 
TEST(QC8_IGEMM_MINMAX_FP32_3X2__SCALAR_FMAGIC,n_gt_2)37364 TEST(QC8_IGEMM_MINMAX_FP32_3X2__SCALAR_FMAGIC, n_gt_2) {
37365   for (uint32_t n = 3; n < 4; n++) {
37366     for (size_t k = 1; k <= 5; k += 2) {
37367       GemmMicrokernelTester()
37368         .mr(3)
37369         .nr(2)
37370         .kr(1)
37371         .sr(1)
37372         .m(3)
37373         .n(n)
37374         .k(k)
37375         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x2__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
37376     }
37377   }
37378 }
37379 
TEST(QC8_IGEMM_MINMAX_FP32_3X2__SCALAR_FMAGIC,n_gt_2_strided_cn)37380 TEST(QC8_IGEMM_MINMAX_FP32_3X2__SCALAR_FMAGIC, n_gt_2_strided_cn) {
37381   for (uint32_t n = 3; n < 4; n++) {
37382     for (size_t k = 1; k <= 5; k += 2) {
37383       GemmMicrokernelTester()
37384         .mr(3)
37385         .nr(2)
37386         .kr(1)
37387         .sr(1)
37388         .m(3)
37389         .n(n)
37390         .k(k)
37391         .cn_stride(5)
37392         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x2__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
37393     }
37394   }
37395 }
37396 
TEST(QC8_IGEMM_MINMAX_FP32_3X2__SCALAR_FMAGIC,n_gt_2_subtile)37397 TEST(QC8_IGEMM_MINMAX_FP32_3X2__SCALAR_FMAGIC, n_gt_2_subtile) {
37398   for (uint32_t n = 3; n < 4; n++) {
37399     for (size_t k = 1; k <= 5; k += 2) {
37400       for (uint32_t m = 1; m <= 3; m++) {
37401         GemmMicrokernelTester()
37402           .mr(3)
37403           .nr(2)
37404           .kr(1)
37405           .sr(1)
37406           .m(m)
37407           .n(n)
37408           .k(k)
37409           .iterations(1)
37410           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x2__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
37411       }
37412     }
37413   }
37414 }
37415 
TEST(QC8_IGEMM_MINMAX_FP32_3X2__SCALAR_FMAGIC,n_div_2)37416 TEST(QC8_IGEMM_MINMAX_FP32_3X2__SCALAR_FMAGIC, n_div_2) {
37417   for (uint32_t n = 4; n <= 6; n += 2) {
37418     for (size_t k = 1; k <= 5; k += 2) {
37419       GemmMicrokernelTester()
37420         .mr(3)
37421         .nr(2)
37422         .kr(1)
37423         .sr(1)
37424         .m(3)
37425         .n(n)
37426         .k(k)
37427         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x2__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
37428     }
37429   }
37430 }
37431 
TEST(QC8_IGEMM_MINMAX_FP32_3X2__SCALAR_FMAGIC,n_div_2_strided_cn)37432 TEST(QC8_IGEMM_MINMAX_FP32_3X2__SCALAR_FMAGIC, n_div_2_strided_cn) {
37433   for (uint32_t n = 4; n <= 6; n += 2) {
37434     for (size_t k = 1; k <= 5; k += 2) {
37435       GemmMicrokernelTester()
37436         .mr(3)
37437         .nr(2)
37438         .kr(1)
37439         .sr(1)
37440         .m(3)
37441         .n(n)
37442         .k(k)
37443         .cn_stride(5)
37444         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x2__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
37445     }
37446   }
37447 }
37448 
TEST(QC8_IGEMM_MINMAX_FP32_3X2__SCALAR_FMAGIC,n_div_2_subtile)37449 TEST(QC8_IGEMM_MINMAX_FP32_3X2__SCALAR_FMAGIC, n_div_2_subtile) {
37450   for (uint32_t n = 4; n <= 6; n += 2) {
37451     for (size_t k = 1; k <= 5; k += 2) {
37452       for (uint32_t m = 1; m <= 3; m++) {
37453         GemmMicrokernelTester()
37454           .mr(3)
37455           .nr(2)
37456           .kr(1)
37457           .sr(1)
37458           .m(m)
37459           .n(n)
37460           .k(k)
37461           .iterations(1)
37462           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x2__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
37463       }
37464     }
37465   }
37466 }
37467 
TEST(QC8_IGEMM_MINMAX_FP32_3X2__SCALAR_FMAGIC,small_kernel)37468 TEST(QC8_IGEMM_MINMAX_FP32_3X2__SCALAR_FMAGIC, small_kernel) {
37469   for (size_t k = 1; k <= 5; k += 2) {
37470     GemmMicrokernelTester()
37471       .mr(3)
37472       .nr(2)
37473       .kr(1)
37474       .sr(1)
37475       .m(3)
37476       .n(2)
37477       .k(k)
37478       .ks(3)
37479       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x2__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
37480   }
37481 }
37482 
TEST(QC8_IGEMM_MINMAX_FP32_3X2__SCALAR_FMAGIC,small_kernel_subtile)37483 TEST(QC8_IGEMM_MINMAX_FP32_3X2__SCALAR_FMAGIC, small_kernel_subtile) {
37484   for (size_t k = 1; k <= 5; k += 2) {
37485     for (uint32_t n = 1; n <= 2; n++) {
37486       for (uint32_t m = 1; m <= 3; m++) {
37487         GemmMicrokernelTester()
37488           .mr(3)
37489           .nr(2)
37490           .kr(1)
37491           .sr(1)
37492           .m(m)
37493           .n(n)
37494           .k(k)
37495           .ks(3)
37496           .iterations(1)
37497           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x2__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
37498       }
37499     }
37500   }
37501 }
37502 
TEST(QC8_IGEMM_MINMAX_FP32_3X2__SCALAR_FMAGIC,n_gt_2_small_kernel)37503 TEST(QC8_IGEMM_MINMAX_FP32_3X2__SCALAR_FMAGIC, n_gt_2_small_kernel) {
37504   for (uint32_t n = 3; n < 4; n++) {
37505     for (size_t k = 1; k <= 5; k += 2) {
37506       GemmMicrokernelTester()
37507         .mr(3)
37508         .nr(2)
37509         .kr(1)
37510         .sr(1)
37511         .m(3)
37512         .n(n)
37513         .k(k)
37514         .ks(3)
37515         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x2__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
37516     }
37517   }
37518 }
37519 
TEST(QC8_IGEMM_MINMAX_FP32_3X2__SCALAR_FMAGIC,n_div_2_small_kernel)37520 TEST(QC8_IGEMM_MINMAX_FP32_3X2__SCALAR_FMAGIC, n_div_2_small_kernel) {
37521   for (uint32_t n = 4; n <= 6; n += 2) {
37522     for (size_t k = 1; k <= 5; k += 2) {
37523       GemmMicrokernelTester()
37524         .mr(3)
37525         .nr(2)
37526         .kr(1)
37527         .sr(1)
37528         .m(3)
37529         .n(n)
37530         .k(k)
37531         .ks(3)
37532         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x2__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
37533     }
37534   }
37535 }
37536 
TEST(QC8_IGEMM_MINMAX_FP32_3X2__SCALAR_FMAGIC,strided_cm_subtile)37537 TEST(QC8_IGEMM_MINMAX_FP32_3X2__SCALAR_FMAGIC, strided_cm_subtile) {
37538   for (size_t k = 1; k <= 5; k += 2) {
37539     for (uint32_t n = 1; n <= 2; n++) {
37540       for (uint32_t m = 1; m <= 3; m++) {
37541         GemmMicrokernelTester()
37542           .mr(3)
37543           .nr(2)
37544           .kr(1)
37545           .sr(1)
37546           .m(m)
37547           .n(n)
37548           .k(k)
37549           .cm_stride(5)
37550           .iterations(1)
37551           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x2__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
37552       }
37553     }
37554   }
37555 }
37556 
TEST(QC8_IGEMM_MINMAX_FP32_3X2__SCALAR_FMAGIC,a_offset)37557 TEST(QC8_IGEMM_MINMAX_FP32_3X2__SCALAR_FMAGIC, a_offset) {
37558   for (size_t k = 1; k <= 5; k += 2) {
37559     GemmMicrokernelTester()
37560       .mr(3)
37561       .nr(2)
37562       .kr(1)
37563       .sr(1)
37564       .m(3)
37565       .n(2)
37566       .k(k)
37567       .ks(3)
37568       .a_offset(17)
37569       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x2__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
37570   }
37571 }
37572 
TEST(QC8_IGEMM_MINMAX_FP32_3X2__SCALAR_FMAGIC,zero)37573 TEST(QC8_IGEMM_MINMAX_FP32_3X2__SCALAR_FMAGIC, zero) {
37574   for (size_t k = 1; k <= 5; k += 2) {
37575     for (uint32_t mz = 0; mz < 3; mz++) {
37576       GemmMicrokernelTester()
37577         .mr(3)
37578         .nr(2)
37579         .kr(1)
37580         .sr(1)
37581         .m(3)
37582         .n(2)
37583         .k(k)
37584         .ks(3)
37585         .a_offset(17)
37586         .zero_index(mz)
37587         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x2__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
37588     }
37589   }
37590 }
37591 
TEST(QC8_IGEMM_MINMAX_FP32_3X2__SCALAR_FMAGIC,qmin)37592 TEST(QC8_IGEMM_MINMAX_FP32_3X2__SCALAR_FMAGIC, qmin) {
37593   GemmMicrokernelTester()
37594     .mr(3)
37595     .nr(2)
37596     .kr(1)
37597     .sr(1)
37598     .m(3)
37599     .n(2)
37600     .k(1)
37601     .qmin(128)
37602     .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x2__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
37603 }
37604 
TEST(QC8_IGEMM_MINMAX_FP32_3X2__SCALAR_FMAGIC,qmax)37605 TEST(QC8_IGEMM_MINMAX_FP32_3X2__SCALAR_FMAGIC, qmax) {
37606   GemmMicrokernelTester()
37607     .mr(3)
37608     .nr(2)
37609     .kr(1)
37610     .sr(1)
37611     .m(3)
37612     .n(2)
37613     .k(1)
37614     .qmax(128)
37615     .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x2__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
37616 }
37617 
TEST(QC8_IGEMM_MINMAX_FP32_3X2__SCALAR_FMAGIC,strided_cm)37618 TEST(QC8_IGEMM_MINMAX_FP32_3X2__SCALAR_FMAGIC, strided_cm) {
37619   GemmMicrokernelTester()
37620     .mr(3)
37621     .nr(2)
37622     .kr(1)
37623     .sr(1)
37624     .m(3)
37625     .n(2)
37626     .k(1)
37627     .cm_stride(5)
37628     .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x2__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
37629 }
37630 
37631 
TEST(QC8_IGEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC,k_eq_1)37632 TEST(QC8_IGEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC, k_eq_1) {
37633   GemmMicrokernelTester()
37634     .mr(3)
37635     .nr(2)
37636     .kr(1)
37637     .sr(1)
37638     .m(3)
37639     .n(2)
37640     .k(1)
37641     .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x2__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
37642 }
37643 
TEST(QC8_IGEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC,strided_cn)37644 TEST(QC8_IGEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC, strided_cn) {
37645   GemmMicrokernelTester()
37646     .mr(3)
37647     .nr(2)
37648     .kr(1)
37649     .sr(1)
37650     .m(3)
37651     .n(2)
37652     .k(1)
37653     .cn_stride(5)
37654     .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x2__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
37655 }
37656 
TEST(QC8_IGEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC,k_eq_1_subtile)37657 TEST(QC8_IGEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC, k_eq_1_subtile) {
37658   for (uint32_t n = 1; n <= 2; n++) {
37659     for (uint32_t m = 1; m <= 3; m++) {
37660       GemmMicrokernelTester()
37661         .mr(3)
37662         .nr(2)
37663         .kr(1)
37664         .sr(1)
37665         .m(m)
37666         .n(n)
37667         .k(1)
37668         .iterations(1)
37669         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x2__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
37670     }
37671   }
37672 }
37673 
TEST(QC8_IGEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC,k_eq_1_subtile_m)37674 TEST(QC8_IGEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC, k_eq_1_subtile_m) {
37675   for (uint32_t m = 1; m <= 3; m++) {
37676     GemmMicrokernelTester()
37677       .mr(3)
37678       .nr(2)
37679       .kr(1)
37680       .sr(1)
37681       .m(m)
37682       .n(2)
37683       .k(1)
37684       .iterations(1)
37685       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x2__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
37686   }
37687 }
37688 
TEST(QC8_IGEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC,k_eq_1_subtile_n)37689 TEST(QC8_IGEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC, k_eq_1_subtile_n) {
37690   for (uint32_t n = 1; n <= 2; n++) {
37691     GemmMicrokernelTester()
37692       .mr(3)
37693       .nr(2)
37694       .kr(1)
37695       .sr(1)
37696       .m(3)
37697       .n(n)
37698       .k(1)
37699       .iterations(1)
37700       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x2__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
37701   }
37702 }
37703 
TEST(QC8_IGEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC,k_gt_1)37704 TEST(QC8_IGEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC, k_gt_1) {
37705   for (size_t k = 2; k < 10; k++) {
37706     GemmMicrokernelTester()
37707       .mr(3)
37708       .nr(2)
37709       .kr(1)
37710       .sr(1)
37711       .m(3)
37712       .n(2)
37713       .k(k)
37714       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x2__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
37715   }
37716 }
37717 
TEST(QC8_IGEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC,k_gt_1_subtile)37718 TEST(QC8_IGEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC, k_gt_1_subtile) {
37719   for (size_t k = 2; k < 10; k++) {
37720     for (uint32_t n = 1; n <= 2; n++) {
37721       for (uint32_t m = 1; m <= 3; m++) {
37722         GemmMicrokernelTester()
37723           .mr(3)
37724           .nr(2)
37725           .kr(1)
37726           .sr(1)
37727           .m(m)
37728           .n(n)
37729           .k(k)
37730           .iterations(1)
37731           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x2__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
37732       }
37733     }
37734   }
37735 }
37736 
TEST(QC8_IGEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC,n_gt_2)37737 TEST(QC8_IGEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC, n_gt_2) {
37738   for (uint32_t n = 3; n < 4; n++) {
37739     for (size_t k = 1; k <= 5; k += 2) {
37740       GemmMicrokernelTester()
37741         .mr(3)
37742         .nr(2)
37743         .kr(1)
37744         .sr(1)
37745         .m(3)
37746         .n(n)
37747         .k(k)
37748         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x2__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
37749     }
37750   }
37751 }
37752 
TEST(QC8_IGEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC,n_gt_2_strided_cn)37753 TEST(QC8_IGEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC, n_gt_2_strided_cn) {
37754   for (uint32_t n = 3; n < 4; n++) {
37755     for (size_t k = 1; k <= 5; k += 2) {
37756       GemmMicrokernelTester()
37757         .mr(3)
37758         .nr(2)
37759         .kr(1)
37760         .sr(1)
37761         .m(3)
37762         .n(n)
37763         .k(k)
37764         .cn_stride(5)
37765         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x2__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
37766     }
37767   }
37768 }
37769 
TEST(QC8_IGEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC,n_gt_2_subtile)37770 TEST(QC8_IGEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC, n_gt_2_subtile) {
37771   for (uint32_t n = 3; n < 4; n++) {
37772     for (size_t k = 1; k <= 5; k += 2) {
37773       for (uint32_t m = 1; m <= 3; m++) {
37774         GemmMicrokernelTester()
37775           .mr(3)
37776           .nr(2)
37777           .kr(1)
37778           .sr(1)
37779           .m(m)
37780           .n(n)
37781           .k(k)
37782           .iterations(1)
37783           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x2__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
37784       }
37785     }
37786   }
37787 }
37788 
TEST(QC8_IGEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC,n_div_2)37789 TEST(QC8_IGEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC, n_div_2) {
37790   for (uint32_t n = 4; n <= 6; n += 2) {
37791     for (size_t k = 1; k <= 5; k += 2) {
37792       GemmMicrokernelTester()
37793         .mr(3)
37794         .nr(2)
37795         .kr(1)
37796         .sr(1)
37797         .m(3)
37798         .n(n)
37799         .k(k)
37800         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x2__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
37801     }
37802   }
37803 }
37804 
TEST(QC8_IGEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC,n_div_2_strided_cn)37805 TEST(QC8_IGEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC, n_div_2_strided_cn) {
37806   for (uint32_t n = 4; n <= 6; n += 2) {
37807     for (size_t k = 1; k <= 5; k += 2) {
37808       GemmMicrokernelTester()
37809         .mr(3)
37810         .nr(2)
37811         .kr(1)
37812         .sr(1)
37813         .m(3)
37814         .n(n)
37815         .k(k)
37816         .cn_stride(5)
37817         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x2__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
37818     }
37819   }
37820 }
37821 
TEST(QC8_IGEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC,n_div_2_subtile)37822 TEST(QC8_IGEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC, n_div_2_subtile) {
37823   for (uint32_t n = 4; n <= 6; n += 2) {
37824     for (size_t k = 1; k <= 5; k += 2) {
37825       for (uint32_t m = 1; m <= 3; m++) {
37826         GemmMicrokernelTester()
37827           .mr(3)
37828           .nr(2)
37829           .kr(1)
37830           .sr(1)
37831           .m(m)
37832           .n(n)
37833           .k(k)
37834           .iterations(1)
37835           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x2__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
37836       }
37837     }
37838   }
37839 }
37840 
TEST(QC8_IGEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC,small_kernel)37841 TEST(QC8_IGEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC, small_kernel) {
37842   for (size_t k = 1; k <= 5; k += 2) {
37843     GemmMicrokernelTester()
37844       .mr(3)
37845       .nr(2)
37846       .kr(1)
37847       .sr(1)
37848       .m(3)
37849       .n(2)
37850       .k(k)
37851       .ks(3)
37852       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x2__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
37853   }
37854 }
37855 
TEST(QC8_IGEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC,small_kernel_subtile)37856 TEST(QC8_IGEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC, small_kernel_subtile) {
37857   for (size_t k = 1; k <= 5; k += 2) {
37858     for (uint32_t n = 1; n <= 2; n++) {
37859       for (uint32_t m = 1; m <= 3; m++) {
37860         GemmMicrokernelTester()
37861           .mr(3)
37862           .nr(2)
37863           .kr(1)
37864           .sr(1)
37865           .m(m)
37866           .n(n)
37867           .k(k)
37868           .ks(3)
37869           .iterations(1)
37870           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x2__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
37871       }
37872     }
37873   }
37874 }
37875 
TEST(QC8_IGEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC,n_gt_2_small_kernel)37876 TEST(QC8_IGEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC, n_gt_2_small_kernel) {
37877   for (uint32_t n = 3; n < 4; n++) {
37878     for (size_t k = 1; k <= 5; k += 2) {
37879       GemmMicrokernelTester()
37880         .mr(3)
37881         .nr(2)
37882         .kr(1)
37883         .sr(1)
37884         .m(3)
37885         .n(n)
37886         .k(k)
37887         .ks(3)
37888         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x2__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
37889     }
37890   }
37891 }
37892 
TEST(QC8_IGEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC,n_div_2_small_kernel)37893 TEST(QC8_IGEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC, n_div_2_small_kernel) {
37894   for (uint32_t n = 4; n <= 6; n += 2) {
37895     for (size_t k = 1; k <= 5; k += 2) {
37896       GemmMicrokernelTester()
37897         .mr(3)
37898         .nr(2)
37899         .kr(1)
37900         .sr(1)
37901         .m(3)
37902         .n(n)
37903         .k(k)
37904         .ks(3)
37905         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x2__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
37906     }
37907   }
37908 }
37909 
TEST(QC8_IGEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC,strided_cm_subtile)37910 TEST(QC8_IGEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC, strided_cm_subtile) {
37911   for (size_t k = 1; k <= 5; k += 2) {
37912     for (uint32_t n = 1; n <= 2; n++) {
37913       for (uint32_t m = 1; m <= 3; m++) {
37914         GemmMicrokernelTester()
37915           .mr(3)
37916           .nr(2)
37917           .kr(1)
37918           .sr(1)
37919           .m(m)
37920           .n(n)
37921           .k(k)
37922           .cm_stride(5)
37923           .iterations(1)
37924           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x2__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
37925       }
37926     }
37927   }
37928 }
37929 
TEST(QC8_IGEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC,a_offset)37930 TEST(QC8_IGEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC, a_offset) {
37931   for (size_t k = 1; k <= 5; k += 2) {
37932     GemmMicrokernelTester()
37933       .mr(3)
37934       .nr(2)
37935       .kr(1)
37936       .sr(1)
37937       .m(3)
37938       .n(2)
37939       .k(k)
37940       .ks(3)
37941       .a_offset(17)
37942       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x2__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
37943   }
37944 }
37945 
TEST(QC8_IGEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC,zero)37946 TEST(QC8_IGEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC, zero) {
37947   for (size_t k = 1; k <= 5; k += 2) {
37948     for (uint32_t mz = 0; mz < 3; mz++) {
37949       GemmMicrokernelTester()
37950         .mr(3)
37951         .nr(2)
37952         .kr(1)
37953         .sr(1)
37954         .m(3)
37955         .n(2)
37956         .k(k)
37957         .ks(3)
37958         .a_offset(17)
37959         .zero_index(mz)
37960         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x2__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
37961     }
37962   }
37963 }
37964 
TEST(QC8_IGEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC,qmin)37965 TEST(QC8_IGEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC, qmin) {
37966   GemmMicrokernelTester()
37967     .mr(3)
37968     .nr(2)
37969     .kr(1)
37970     .sr(1)
37971     .m(3)
37972     .n(2)
37973     .k(1)
37974     .qmin(128)
37975     .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x2__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
37976 }
37977 
TEST(QC8_IGEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC,qmax)37978 TEST(QC8_IGEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC, qmax) {
37979   GemmMicrokernelTester()
37980     .mr(3)
37981     .nr(2)
37982     .kr(1)
37983     .sr(1)
37984     .m(3)
37985     .n(2)
37986     .k(1)
37987     .qmax(128)
37988     .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x2__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
37989 }
37990 
TEST(QC8_IGEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC,strided_cm)37991 TEST(QC8_IGEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC, strided_cm) {
37992   GemmMicrokernelTester()
37993     .mr(3)
37994     .nr(2)
37995     .kr(1)
37996     .sr(1)
37997     .m(3)
37998     .n(2)
37999     .k(1)
38000     .cm_stride(5)
38001     .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x2__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
38002 }
38003 
38004 
TEST(QC8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF,k_eq_1)38005 TEST(QC8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, k_eq_1) {
38006   GemmMicrokernelTester()
38007     .mr(3)
38008     .nr(2)
38009     .kr(1)
38010     .sr(1)
38011     .m(3)
38012     .n(2)
38013     .k(1)
38014     .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
38015 }
38016 
TEST(QC8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF,strided_cn)38017 TEST(QC8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, strided_cn) {
38018   GemmMicrokernelTester()
38019     .mr(3)
38020     .nr(2)
38021     .kr(1)
38022     .sr(1)
38023     .m(3)
38024     .n(2)
38025     .k(1)
38026     .cn_stride(5)
38027     .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
38028 }
38029 
TEST(QC8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF,k_eq_1_subtile)38030 TEST(QC8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, k_eq_1_subtile) {
38031   for (uint32_t n = 1; n <= 2; n++) {
38032     for (uint32_t m = 1; m <= 3; m++) {
38033       GemmMicrokernelTester()
38034         .mr(3)
38035         .nr(2)
38036         .kr(1)
38037         .sr(1)
38038         .m(m)
38039         .n(n)
38040         .k(1)
38041         .iterations(1)
38042         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
38043     }
38044   }
38045 }
38046 
TEST(QC8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF,k_eq_1_subtile_m)38047 TEST(QC8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, k_eq_1_subtile_m) {
38048   for (uint32_t m = 1; m <= 3; m++) {
38049     GemmMicrokernelTester()
38050       .mr(3)
38051       .nr(2)
38052       .kr(1)
38053       .sr(1)
38054       .m(m)
38055       .n(2)
38056       .k(1)
38057       .iterations(1)
38058       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
38059   }
38060 }
38061 
TEST(QC8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF,k_eq_1_subtile_n)38062 TEST(QC8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, k_eq_1_subtile_n) {
38063   for (uint32_t n = 1; n <= 2; n++) {
38064     GemmMicrokernelTester()
38065       .mr(3)
38066       .nr(2)
38067       .kr(1)
38068       .sr(1)
38069       .m(3)
38070       .n(n)
38071       .k(1)
38072       .iterations(1)
38073       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
38074   }
38075 }
38076 
TEST(QC8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF,k_gt_1)38077 TEST(QC8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, k_gt_1) {
38078   for (size_t k = 2; k < 10; k++) {
38079     GemmMicrokernelTester()
38080       .mr(3)
38081       .nr(2)
38082       .kr(1)
38083       .sr(1)
38084       .m(3)
38085       .n(2)
38086       .k(k)
38087       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
38088   }
38089 }
38090 
TEST(QC8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF,k_gt_1_subtile)38091 TEST(QC8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, k_gt_1_subtile) {
38092   for (size_t k = 2; k < 10; k++) {
38093     for (uint32_t n = 1; n <= 2; n++) {
38094       for (uint32_t m = 1; m <= 3; m++) {
38095         GemmMicrokernelTester()
38096           .mr(3)
38097           .nr(2)
38098           .kr(1)
38099           .sr(1)
38100           .m(m)
38101           .n(n)
38102           .k(k)
38103           .iterations(1)
38104           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
38105       }
38106     }
38107   }
38108 }
38109 
TEST(QC8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF,n_gt_2)38110 TEST(QC8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, n_gt_2) {
38111   for (uint32_t n = 3; n < 4; n++) {
38112     for (size_t k = 1; k <= 5; k += 2) {
38113       GemmMicrokernelTester()
38114         .mr(3)
38115         .nr(2)
38116         .kr(1)
38117         .sr(1)
38118         .m(3)
38119         .n(n)
38120         .k(k)
38121         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
38122     }
38123   }
38124 }
38125 
TEST(QC8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF,n_gt_2_strided_cn)38126 TEST(QC8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, n_gt_2_strided_cn) {
38127   for (uint32_t n = 3; n < 4; n++) {
38128     for (size_t k = 1; k <= 5; k += 2) {
38129       GemmMicrokernelTester()
38130         .mr(3)
38131         .nr(2)
38132         .kr(1)
38133         .sr(1)
38134         .m(3)
38135         .n(n)
38136         .k(k)
38137         .cn_stride(5)
38138         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
38139     }
38140   }
38141 }
38142 
TEST(QC8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF,n_gt_2_subtile)38143 TEST(QC8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, n_gt_2_subtile) {
38144   for (uint32_t n = 3; n < 4; n++) {
38145     for (size_t k = 1; k <= 5; k += 2) {
38146       for (uint32_t m = 1; m <= 3; m++) {
38147         GemmMicrokernelTester()
38148           .mr(3)
38149           .nr(2)
38150           .kr(1)
38151           .sr(1)
38152           .m(m)
38153           .n(n)
38154           .k(k)
38155           .iterations(1)
38156           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
38157       }
38158     }
38159   }
38160 }
38161 
TEST(QC8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF,n_div_2)38162 TEST(QC8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, n_div_2) {
38163   for (uint32_t n = 4; n <= 6; n += 2) {
38164     for (size_t k = 1; k <= 5; k += 2) {
38165       GemmMicrokernelTester()
38166         .mr(3)
38167         .nr(2)
38168         .kr(1)
38169         .sr(1)
38170         .m(3)
38171         .n(n)
38172         .k(k)
38173         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
38174     }
38175   }
38176 }
38177 
TEST(QC8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF,n_div_2_strided_cn)38178 TEST(QC8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, n_div_2_strided_cn) {
38179   for (uint32_t n = 4; n <= 6; n += 2) {
38180     for (size_t k = 1; k <= 5; k += 2) {
38181       GemmMicrokernelTester()
38182         .mr(3)
38183         .nr(2)
38184         .kr(1)
38185         .sr(1)
38186         .m(3)
38187         .n(n)
38188         .k(k)
38189         .cn_stride(5)
38190         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
38191     }
38192   }
38193 }
38194 
TEST(QC8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF,n_div_2_subtile)38195 TEST(QC8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, n_div_2_subtile) {
38196   for (uint32_t n = 4; n <= 6; n += 2) {
38197     for (size_t k = 1; k <= 5; k += 2) {
38198       for (uint32_t m = 1; m <= 3; m++) {
38199         GemmMicrokernelTester()
38200           .mr(3)
38201           .nr(2)
38202           .kr(1)
38203           .sr(1)
38204           .m(m)
38205           .n(n)
38206           .k(k)
38207           .iterations(1)
38208           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
38209       }
38210     }
38211   }
38212 }
38213 
TEST(QC8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF,small_kernel)38214 TEST(QC8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, small_kernel) {
38215   for (size_t k = 1; k <= 5; k += 2) {
38216     GemmMicrokernelTester()
38217       .mr(3)
38218       .nr(2)
38219       .kr(1)
38220       .sr(1)
38221       .m(3)
38222       .n(2)
38223       .k(k)
38224       .ks(3)
38225       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
38226   }
38227 }
38228 
TEST(QC8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF,small_kernel_subtile)38229 TEST(QC8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, small_kernel_subtile) {
38230   for (size_t k = 1; k <= 5; k += 2) {
38231     for (uint32_t n = 1; n <= 2; n++) {
38232       for (uint32_t m = 1; m <= 3; m++) {
38233         GemmMicrokernelTester()
38234           .mr(3)
38235           .nr(2)
38236           .kr(1)
38237           .sr(1)
38238           .m(m)
38239           .n(n)
38240           .k(k)
38241           .ks(3)
38242           .iterations(1)
38243           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
38244       }
38245     }
38246   }
38247 }
38248 
TEST(QC8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF,n_gt_2_small_kernel)38249 TEST(QC8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, n_gt_2_small_kernel) {
38250   for (uint32_t n = 3; n < 4; n++) {
38251     for (size_t k = 1; k <= 5; k += 2) {
38252       GemmMicrokernelTester()
38253         .mr(3)
38254         .nr(2)
38255         .kr(1)
38256         .sr(1)
38257         .m(3)
38258         .n(n)
38259         .k(k)
38260         .ks(3)
38261         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
38262     }
38263   }
38264 }
38265 
TEST(QC8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF,n_div_2_small_kernel)38266 TEST(QC8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, n_div_2_small_kernel) {
38267   for (uint32_t n = 4; n <= 6; n += 2) {
38268     for (size_t k = 1; k <= 5; k += 2) {
38269       GemmMicrokernelTester()
38270         .mr(3)
38271         .nr(2)
38272         .kr(1)
38273         .sr(1)
38274         .m(3)
38275         .n(n)
38276         .k(k)
38277         .ks(3)
38278         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
38279     }
38280   }
38281 }
38282 
TEST(QC8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF,strided_cm_subtile)38283 TEST(QC8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, strided_cm_subtile) {
38284   for (size_t k = 1; k <= 5; k += 2) {
38285     for (uint32_t n = 1; n <= 2; n++) {
38286       for (uint32_t m = 1; m <= 3; m++) {
38287         GemmMicrokernelTester()
38288           .mr(3)
38289           .nr(2)
38290           .kr(1)
38291           .sr(1)
38292           .m(m)
38293           .n(n)
38294           .k(k)
38295           .cm_stride(5)
38296           .iterations(1)
38297           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
38298       }
38299     }
38300   }
38301 }
38302 
TEST(QC8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF,a_offset)38303 TEST(QC8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, a_offset) {
38304   for (size_t k = 1; k <= 5; k += 2) {
38305     GemmMicrokernelTester()
38306       .mr(3)
38307       .nr(2)
38308       .kr(1)
38309       .sr(1)
38310       .m(3)
38311       .n(2)
38312       .k(k)
38313       .ks(3)
38314       .a_offset(17)
38315       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
38316   }
38317 }
38318 
TEST(QC8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF,zero)38319 TEST(QC8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, zero) {
38320   for (size_t k = 1; k <= 5; k += 2) {
38321     for (uint32_t mz = 0; mz < 3; mz++) {
38322       GemmMicrokernelTester()
38323         .mr(3)
38324         .nr(2)
38325         .kr(1)
38326         .sr(1)
38327         .m(3)
38328         .n(2)
38329         .k(k)
38330         .ks(3)
38331         .a_offset(17)
38332         .zero_index(mz)
38333         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
38334     }
38335   }
38336 }
38337 
TEST(QC8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF,qmin)38338 TEST(QC8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, qmin) {
38339   GemmMicrokernelTester()
38340     .mr(3)
38341     .nr(2)
38342     .kr(1)
38343     .sr(1)
38344     .m(3)
38345     .n(2)
38346     .k(1)
38347     .qmin(128)
38348     .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
38349 }
38350 
TEST(QC8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF,qmax)38351 TEST(QC8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, qmax) {
38352   GemmMicrokernelTester()
38353     .mr(3)
38354     .nr(2)
38355     .kr(1)
38356     .sr(1)
38357     .m(3)
38358     .n(2)
38359     .k(1)
38360     .qmax(128)
38361     .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
38362 }
38363 
TEST(QC8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF,strided_cm)38364 TEST(QC8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, strided_cm) {
38365   GemmMicrokernelTester()
38366     .mr(3)
38367     .nr(2)
38368     .kr(1)
38369     .sr(1)
38370     .m(3)
38371     .n(2)
38372     .k(1)
38373     .cm_stride(5)
38374     .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
38375 }
38376 
38377 
TEST(QC8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF,k_eq_1)38378 TEST(QC8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, k_eq_1) {
38379   GemmMicrokernelTester()
38380     .mr(4)
38381     .nr(2)
38382     .kr(1)
38383     .sr(1)
38384     .m(4)
38385     .n(2)
38386     .k(1)
38387     .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
38388 }
38389 
TEST(QC8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF,strided_cn)38390 TEST(QC8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, strided_cn) {
38391   GemmMicrokernelTester()
38392     .mr(4)
38393     .nr(2)
38394     .kr(1)
38395     .sr(1)
38396     .m(4)
38397     .n(2)
38398     .k(1)
38399     .cn_stride(5)
38400     .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
38401 }
38402 
TEST(QC8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF,k_eq_1_subtile)38403 TEST(QC8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, k_eq_1_subtile) {
38404   for (uint32_t n = 1; n <= 2; n++) {
38405     for (uint32_t m = 1; m <= 4; m++) {
38406       GemmMicrokernelTester()
38407         .mr(4)
38408         .nr(2)
38409         .kr(1)
38410         .sr(1)
38411         .m(m)
38412         .n(n)
38413         .k(1)
38414         .iterations(1)
38415         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
38416     }
38417   }
38418 }
38419 
TEST(QC8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF,k_eq_1_subtile_m)38420 TEST(QC8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, k_eq_1_subtile_m) {
38421   for (uint32_t m = 1; m <= 4; m++) {
38422     GemmMicrokernelTester()
38423       .mr(4)
38424       .nr(2)
38425       .kr(1)
38426       .sr(1)
38427       .m(m)
38428       .n(2)
38429       .k(1)
38430       .iterations(1)
38431       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
38432   }
38433 }
38434 
TEST(QC8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF,k_eq_1_subtile_n)38435 TEST(QC8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, k_eq_1_subtile_n) {
38436   for (uint32_t n = 1; n <= 2; n++) {
38437     GemmMicrokernelTester()
38438       .mr(4)
38439       .nr(2)
38440       .kr(1)
38441       .sr(1)
38442       .m(4)
38443       .n(n)
38444       .k(1)
38445       .iterations(1)
38446       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
38447   }
38448 }
38449 
TEST(QC8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF,k_gt_1)38450 TEST(QC8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, k_gt_1) {
38451   for (size_t k = 2; k < 10; k++) {
38452     GemmMicrokernelTester()
38453       .mr(4)
38454       .nr(2)
38455       .kr(1)
38456       .sr(1)
38457       .m(4)
38458       .n(2)
38459       .k(k)
38460       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
38461   }
38462 }
38463 
TEST(QC8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF,k_gt_1_subtile)38464 TEST(QC8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, k_gt_1_subtile) {
38465   for (size_t k = 2; k < 10; k++) {
38466     for (uint32_t n = 1; n <= 2; n++) {
38467       for (uint32_t m = 1; m <= 4; m++) {
38468         GemmMicrokernelTester()
38469           .mr(4)
38470           .nr(2)
38471           .kr(1)
38472           .sr(1)
38473           .m(m)
38474           .n(n)
38475           .k(k)
38476           .iterations(1)
38477           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
38478       }
38479     }
38480   }
38481 }
38482 
TEST(QC8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF,n_gt_2)38483 TEST(QC8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, n_gt_2) {
38484   for (uint32_t n = 3; n < 4; n++) {
38485     for (size_t k = 1; k <= 5; k += 2) {
38486       GemmMicrokernelTester()
38487         .mr(4)
38488         .nr(2)
38489         .kr(1)
38490         .sr(1)
38491         .m(4)
38492         .n(n)
38493         .k(k)
38494         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
38495     }
38496   }
38497 }
38498 
TEST(QC8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF,n_gt_2_strided_cn)38499 TEST(QC8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, n_gt_2_strided_cn) {
38500   for (uint32_t n = 3; n < 4; n++) {
38501     for (size_t k = 1; k <= 5; k += 2) {
38502       GemmMicrokernelTester()
38503         .mr(4)
38504         .nr(2)
38505         .kr(1)
38506         .sr(1)
38507         .m(4)
38508         .n(n)
38509         .k(k)
38510         .cn_stride(5)
38511         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
38512     }
38513   }
38514 }
38515 
TEST(QC8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF,n_gt_2_subtile)38516 TEST(QC8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, n_gt_2_subtile) {
38517   for (uint32_t n = 3; n < 4; n++) {
38518     for (size_t k = 1; k <= 5; k += 2) {
38519       for (uint32_t m = 1; m <= 4; m++) {
38520         GemmMicrokernelTester()
38521           .mr(4)
38522           .nr(2)
38523           .kr(1)
38524           .sr(1)
38525           .m(m)
38526           .n(n)
38527           .k(k)
38528           .iterations(1)
38529           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
38530       }
38531     }
38532   }
38533 }
38534 
TEST(QC8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF,n_div_2)38535 TEST(QC8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, n_div_2) {
38536   for (uint32_t n = 4; n <= 6; n += 2) {
38537     for (size_t k = 1; k <= 5; k += 2) {
38538       GemmMicrokernelTester()
38539         .mr(4)
38540         .nr(2)
38541         .kr(1)
38542         .sr(1)
38543         .m(4)
38544         .n(n)
38545         .k(k)
38546         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
38547     }
38548   }
38549 }
38550 
TEST(QC8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF,n_div_2_strided_cn)38551 TEST(QC8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, n_div_2_strided_cn) {
38552   for (uint32_t n = 4; n <= 6; n += 2) {
38553     for (size_t k = 1; k <= 5; k += 2) {
38554       GemmMicrokernelTester()
38555         .mr(4)
38556         .nr(2)
38557         .kr(1)
38558         .sr(1)
38559         .m(4)
38560         .n(n)
38561         .k(k)
38562         .cn_stride(5)
38563         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
38564     }
38565   }
38566 }
38567 
TEST(QC8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF,n_div_2_subtile)38568 TEST(QC8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, n_div_2_subtile) {
38569   for (uint32_t n = 4; n <= 6; n += 2) {
38570     for (size_t k = 1; k <= 5; k += 2) {
38571       for (uint32_t m = 1; m <= 4; m++) {
38572         GemmMicrokernelTester()
38573           .mr(4)
38574           .nr(2)
38575           .kr(1)
38576           .sr(1)
38577           .m(m)
38578           .n(n)
38579           .k(k)
38580           .iterations(1)
38581           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
38582       }
38583     }
38584   }
38585 }
38586 
TEST(QC8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF,small_kernel)38587 TEST(QC8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, small_kernel) {
38588   for (size_t k = 1; k <= 5; k += 2) {
38589     GemmMicrokernelTester()
38590       .mr(4)
38591       .nr(2)
38592       .kr(1)
38593       .sr(1)
38594       .m(4)
38595       .n(2)
38596       .k(k)
38597       .ks(3)
38598       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
38599   }
38600 }
38601 
TEST(QC8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF,small_kernel_subtile)38602 TEST(QC8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, small_kernel_subtile) {
38603   for (size_t k = 1; k <= 5; k += 2) {
38604     for (uint32_t n = 1; n <= 2; n++) {
38605       for (uint32_t m = 1; m <= 4; m++) {
38606         GemmMicrokernelTester()
38607           .mr(4)
38608           .nr(2)
38609           .kr(1)
38610           .sr(1)
38611           .m(m)
38612           .n(n)
38613           .k(k)
38614           .ks(3)
38615           .iterations(1)
38616           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
38617       }
38618     }
38619   }
38620 }
38621 
TEST(QC8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF,n_gt_2_small_kernel)38622 TEST(QC8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, n_gt_2_small_kernel) {
38623   for (uint32_t n = 3; n < 4; n++) {
38624     for (size_t k = 1; k <= 5; k += 2) {
38625       GemmMicrokernelTester()
38626         .mr(4)
38627         .nr(2)
38628         .kr(1)
38629         .sr(1)
38630         .m(4)
38631         .n(n)
38632         .k(k)
38633         .ks(3)
38634         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
38635     }
38636   }
38637 }
38638 
TEST(QC8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF,n_div_2_small_kernel)38639 TEST(QC8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, n_div_2_small_kernel) {
38640   for (uint32_t n = 4; n <= 6; n += 2) {
38641     for (size_t k = 1; k <= 5; k += 2) {
38642       GemmMicrokernelTester()
38643         .mr(4)
38644         .nr(2)
38645         .kr(1)
38646         .sr(1)
38647         .m(4)
38648         .n(n)
38649         .k(k)
38650         .ks(3)
38651         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
38652     }
38653   }
38654 }
38655 
TEST(QC8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF,strided_cm_subtile)38656 TEST(QC8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, strided_cm_subtile) {
38657   for (size_t k = 1; k <= 5; k += 2) {
38658     for (uint32_t n = 1; n <= 2; n++) {
38659       for (uint32_t m = 1; m <= 4; m++) {
38660         GemmMicrokernelTester()
38661           .mr(4)
38662           .nr(2)
38663           .kr(1)
38664           .sr(1)
38665           .m(m)
38666           .n(n)
38667           .k(k)
38668           .cm_stride(5)
38669           .iterations(1)
38670           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
38671       }
38672     }
38673   }
38674 }
38675 
TEST(QC8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF,a_offset)38676 TEST(QC8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, a_offset) {
38677   for (size_t k = 1; k <= 5; k += 2) {
38678     GemmMicrokernelTester()
38679       .mr(4)
38680       .nr(2)
38681       .kr(1)
38682       .sr(1)
38683       .m(4)
38684       .n(2)
38685       .k(k)
38686       .ks(3)
38687       .a_offset(23)
38688       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
38689   }
38690 }
38691 
TEST(QC8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF,zero)38692 TEST(QC8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, zero) {
38693   for (size_t k = 1; k <= 5; k += 2) {
38694     for (uint32_t mz = 0; mz < 4; mz++) {
38695       GemmMicrokernelTester()
38696         .mr(4)
38697         .nr(2)
38698         .kr(1)
38699         .sr(1)
38700         .m(4)
38701         .n(2)
38702         .k(k)
38703         .ks(3)
38704         .a_offset(23)
38705         .zero_index(mz)
38706         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
38707     }
38708   }
38709 }
38710 
TEST(QC8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF,qmin)38711 TEST(QC8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, qmin) {
38712   GemmMicrokernelTester()
38713     .mr(4)
38714     .nr(2)
38715     .kr(1)
38716     .sr(1)
38717     .m(4)
38718     .n(2)
38719     .k(1)
38720     .qmin(128)
38721     .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
38722 }
38723 
TEST(QC8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF,qmax)38724 TEST(QC8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, qmax) {
38725   GemmMicrokernelTester()
38726     .mr(4)
38727     .nr(2)
38728     .kr(1)
38729     .sr(1)
38730     .m(4)
38731     .n(2)
38732     .k(1)
38733     .qmax(128)
38734     .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
38735 }
38736 
TEST(QC8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF,strided_cm)38737 TEST(QC8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, strided_cm) {
38738   GemmMicrokernelTester()
38739     .mr(4)
38740     .nr(2)
38741     .kr(1)
38742     .sr(1)
38743     .m(4)
38744     .n(2)
38745     .k(1)
38746     .cm_stride(5)
38747     .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
38748 }
38749 
38750 
TEST(QC8_IGEMM_MINMAX_FP32_4X4__SCALAR_FMAGIC,k_eq_1)38751 TEST(QC8_IGEMM_MINMAX_FP32_4X4__SCALAR_FMAGIC, k_eq_1) {
38752   GemmMicrokernelTester()
38753     .mr(4)
38754     .nr(4)
38755     .kr(1)
38756     .sr(1)
38757     .m(4)
38758     .n(4)
38759     .k(1)
38760     .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
38761 }
38762 
TEST(QC8_IGEMM_MINMAX_FP32_4X4__SCALAR_FMAGIC,strided_cn)38763 TEST(QC8_IGEMM_MINMAX_FP32_4X4__SCALAR_FMAGIC, strided_cn) {
38764   GemmMicrokernelTester()
38765     .mr(4)
38766     .nr(4)
38767     .kr(1)
38768     .sr(1)
38769     .m(4)
38770     .n(4)
38771     .k(1)
38772     .cn_stride(7)
38773     .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
38774 }
38775 
TEST(QC8_IGEMM_MINMAX_FP32_4X4__SCALAR_FMAGIC,k_eq_1_subtile)38776 TEST(QC8_IGEMM_MINMAX_FP32_4X4__SCALAR_FMAGIC, k_eq_1_subtile) {
38777   for (uint32_t n = 1; n <= 4; n++) {
38778     for (uint32_t m = 1; m <= 4; m++) {
38779       GemmMicrokernelTester()
38780         .mr(4)
38781         .nr(4)
38782         .kr(1)
38783         .sr(1)
38784         .m(m)
38785         .n(n)
38786         .k(1)
38787         .iterations(1)
38788         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
38789     }
38790   }
38791 }
38792 
TEST(QC8_IGEMM_MINMAX_FP32_4X4__SCALAR_FMAGIC,k_eq_1_subtile_m)38793 TEST(QC8_IGEMM_MINMAX_FP32_4X4__SCALAR_FMAGIC, k_eq_1_subtile_m) {
38794   for (uint32_t m = 1; m <= 4; m++) {
38795     GemmMicrokernelTester()
38796       .mr(4)
38797       .nr(4)
38798       .kr(1)
38799       .sr(1)
38800       .m(m)
38801       .n(4)
38802       .k(1)
38803       .iterations(1)
38804       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
38805   }
38806 }
38807 
TEST(QC8_IGEMM_MINMAX_FP32_4X4__SCALAR_FMAGIC,k_eq_1_subtile_n)38808 TEST(QC8_IGEMM_MINMAX_FP32_4X4__SCALAR_FMAGIC, k_eq_1_subtile_n) {
38809   for (uint32_t n = 1; n <= 4; n++) {
38810     GemmMicrokernelTester()
38811       .mr(4)
38812       .nr(4)
38813       .kr(1)
38814       .sr(1)
38815       .m(4)
38816       .n(n)
38817       .k(1)
38818       .iterations(1)
38819       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
38820   }
38821 }
38822 
TEST(QC8_IGEMM_MINMAX_FP32_4X4__SCALAR_FMAGIC,k_gt_1)38823 TEST(QC8_IGEMM_MINMAX_FP32_4X4__SCALAR_FMAGIC, k_gt_1) {
38824   for (size_t k = 2; k < 10; k++) {
38825     GemmMicrokernelTester()
38826       .mr(4)
38827       .nr(4)
38828       .kr(1)
38829       .sr(1)
38830       .m(4)
38831       .n(4)
38832       .k(k)
38833       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
38834   }
38835 }
38836 
TEST(QC8_IGEMM_MINMAX_FP32_4X4__SCALAR_FMAGIC,k_gt_1_subtile)38837 TEST(QC8_IGEMM_MINMAX_FP32_4X4__SCALAR_FMAGIC, k_gt_1_subtile) {
38838   for (size_t k = 2; k < 10; k++) {
38839     for (uint32_t n = 1; n <= 4; n++) {
38840       for (uint32_t m = 1; m <= 4; m++) {
38841         GemmMicrokernelTester()
38842           .mr(4)
38843           .nr(4)
38844           .kr(1)
38845           .sr(1)
38846           .m(m)
38847           .n(n)
38848           .k(k)
38849           .iterations(1)
38850           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
38851       }
38852     }
38853   }
38854 }
38855 
TEST(QC8_IGEMM_MINMAX_FP32_4X4__SCALAR_FMAGIC,n_gt_4)38856 TEST(QC8_IGEMM_MINMAX_FP32_4X4__SCALAR_FMAGIC, n_gt_4) {
38857   for (uint32_t n = 5; n < 8; n++) {
38858     for (size_t k = 1; k <= 5; k += 2) {
38859       GemmMicrokernelTester()
38860         .mr(4)
38861         .nr(4)
38862         .kr(1)
38863         .sr(1)
38864         .m(4)
38865         .n(n)
38866         .k(k)
38867         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
38868     }
38869   }
38870 }
38871 
TEST(QC8_IGEMM_MINMAX_FP32_4X4__SCALAR_FMAGIC,n_gt_4_strided_cn)38872 TEST(QC8_IGEMM_MINMAX_FP32_4X4__SCALAR_FMAGIC, n_gt_4_strided_cn) {
38873   for (uint32_t n = 5; n < 8; n++) {
38874     for (size_t k = 1; k <= 5; k += 2) {
38875       GemmMicrokernelTester()
38876         .mr(4)
38877         .nr(4)
38878         .kr(1)
38879         .sr(1)
38880         .m(4)
38881         .n(n)
38882         .k(k)
38883         .cn_stride(7)
38884         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
38885     }
38886   }
38887 }
38888 
TEST(QC8_IGEMM_MINMAX_FP32_4X4__SCALAR_FMAGIC,n_gt_4_subtile)38889 TEST(QC8_IGEMM_MINMAX_FP32_4X4__SCALAR_FMAGIC, n_gt_4_subtile) {
38890   for (uint32_t n = 5; n < 8; n++) {
38891     for (size_t k = 1; k <= 5; k += 2) {
38892       for (uint32_t m = 1; m <= 4; m++) {
38893         GemmMicrokernelTester()
38894           .mr(4)
38895           .nr(4)
38896           .kr(1)
38897           .sr(1)
38898           .m(m)
38899           .n(n)
38900           .k(k)
38901           .iterations(1)
38902           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
38903       }
38904     }
38905   }
38906 }
38907 
TEST(QC8_IGEMM_MINMAX_FP32_4X4__SCALAR_FMAGIC,n_div_4)38908 TEST(QC8_IGEMM_MINMAX_FP32_4X4__SCALAR_FMAGIC, n_div_4) {
38909   for (uint32_t n = 8; n <= 12; n += 4) {
38910     for (size_t k = 1; k <= 5; k += 2) {
38911       GemmMicrokernelTester()
38912         .mr(4)
38913         .nr(4)
38914         .kr(1)
38915         .sr(1)
38916         .m(4)
38917         .n(n)
38918         .k(k)
38919         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
38920     }
38921   }
38922 }
38923 
TEST(QC8_IGEMM_MINMAX_FP32_4X4__SCALAR_FMAGIC,n_div_4_strided_cn)38924 TEST(QC8_IGEMM_MINMAX_FP32_4X4__SCALAR_FMAGIC, n_div_4_strided_cn) {
38925   for (uint32_t n = 8; n <= 12; n += 4) {
38926     for (size_t k = 1; k <= 5; k += 2) {
38927       GemmMicrokernelTester()
38928         .mr(4)
38929         .nr(4)
38930         .kr(1)
38931         .sr(1)
38932         .m(4)
38933         .n(n)
38934         .k(k)
38935         .cn_stride(7)
38936         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
38937     }
38938   }
38939 }
38940 
TEST(QC8_IGEMM_MINMAX_FP32_4X4__SCALAR_FMAGIC,n_div_4_subtile)38941 TEST(QC8_IGEMM_MINMAX_FP32_4X4__SCALAR_FMAGIC, n_div_4_subtile) {
38942   for (uint32_t n = 8; n <= 12; n += 4) {
38943     for (size_t k = 1; k <= 5; k += 2) {
38944       for (uint32_t m = 1; m <= 4; m++) {
38945         GemmMicrokernelTester()
38946           .mr(4)
38947           .nr(4)
38948           .kr(1)
38949           .sr(1)
38950           .m(m)
38951           .n(n)
38952           .k(k)
38953           .iterations(1)
38954           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
38955       }
38956     }
38957   }
38958 }
38959 
TEST(QC8_IGEMM_MINMAX_FP32_4X4__SCALAR_FMAGIC,small_kernel)38960 TEST(QC8_IGEMM_MINMAX_FP32_4X4__SCALAR_FMAGIC, small_kernel) {
38961   for (size_t k = 1; k <= 5; k += 2) {
38962     GemmMicrokernelTester()
38963       .mr(4)
38964       .nr(4)
38965       .kr(1)
38966       .sr(1)
38967       .m(4)
38968       .n(4)
38969       .k(k)
38970       .ks(3)
38971       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
38972   }
38973 }
38974 
TEST(QC8_IGEMM_MINMAX_FP32_4X4__SCALAR_FMAGIC,small_kernel_subtile)38975 TEST(QC8_IGEMM_MINMAX_FP32_4X4__SCALAR_FMAGIC, small_kernel_subtile) {
38976   for (size_t k = 1; k <= 5; k += 2) {
38977     for (uint32_t n = 1; n <= 4; n++) {
38978       for (uint32_t m = 1; m <= 4; m++) {
38979         GemmMicrokernelTester()
38980           .mr(4)
38981           .nr(4)
38982           .kr(1)
38983           .sr(1)
38984           .m(m)
38985           .n(n)
38986           .k(k)
38987           .ks(3)
38988           .iterations(1)
38989           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
38990       }
38991     }
38992   }
38993 }
38994 
TEST(QC8_IGEMM_MINMAX_FP32_4X4__SCALAR_FMAGIC,n_gt_4_small_kernel)38995 TEST(QC8_IGEMM_MINMAX_FP32_4X4__SCALAR_FMAGIC, n_gt_4_small_kernel) {
38996   for (uint32_t n = 5; n < 8; n++) {
38997     for (size_t k = 1; k <= 5; k += 2) {
38998       GemmMicrokernelTester()
38999         .mr(4)
39000         .nr(4)
39001         .kr(1)
39002         .sr(1)
39003         .m(4)
39004         .n(n)
39005         .k(k)
39006         .ks(3)
39007         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
39008     }
39009   }
39010 }
39011 
TEST(QC8_IGEMM_MINMAX_FP32_4X4__SCALAR_FMAGIC,n_div_4_small_kernel)39012 TEST(QC8_IGEMM_MINMAX_FP32_4X4__SCALAR_FMAGIC, n_div_4_small_kernel) {
39013   for (uint32_t n = 8; n <= 12; n += 4) {
39014     for (size_t k = 1; k <= 5; k += 2) {
39015       GemmMicrokernelTester()
39016         .mr(4)
39017         .nr(4)
39018         .kr(1)
39019         .sr(1)
39020         .m(4)
39021         .n(n)
39022         .k(k)
39023         .ks(3)
39024         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
39025     }
39026   }
39027 }
39028 
TEST(QC8_IGEMM_MINMAX_FP32_4X4__SCALAR_FMAGIC,strided_cm_subtile)39029 TEST(QC8_IGEMM_MINMAX_FP32_4X4__SCALAR_FMAGIC, strided_cm_subtile) {
39030   for (size_t k = 1; k <= 5; k += 2) {
39031     for (uint32_t n = 1; n <= 4; n++) {
39032       for (uint32_t m = 1; m <= 4; m++) {
39033         GemmMicrokernelTester()
39034           .mr(4)
39035           .nr(4)
39036           .kr(1)
39037           .sr(1)
39038           .m(m)
39039           .n(n)
39040           .k(k)
39041           .cm_stride(7)
39042           .iterations(1)
39043           .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
39044       }
39045     }
39046   }
39047 }
39048 
TEST(QC8_IGEMM_MINMAX_FP32_4X4__SCALAR_FMAGIC,a_offset)39049 TEST(QC8_IGEMM_MINMAX_FP32_4X4__SCALAR_FMAGIC, a_offset) {
39050   for (size_t k = 1; k <= 5; k += 2) {
39051     GemmMicrokernelTester()
39052       .mr(4)
39053       .nr(4)
39054       .kr(1)
39055       .sr(1)
39056       .m(4)
39057       .n(4)
39058       .k(k)
39059       .ks(3)
39060       .a_offset(23)
39061       .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
39062   }
39063 }
39064 
TEST(QC8_IGEMM_MINMAX_FP32_4X4__SCALAR_FMAGIC,zero)39065 TEST(QC8_IGEMM_MINMAX_FP32_4X4__SCALAR_FMAGIC, zero) {
39066   for (size_t k = 1; k <= 5; k += 2) {
39067     for (uint32_t mz = 0; mz < 4; mz++) {
39068       GemmMicrokernelTester()
39069         .mr(4)
39070         .nr(4)
39071         .kr(1)
39072         .sr(1)
39073         .m(4)
39074         .n(4)
39075         .k(k)
39076         .ks(3)
39077         .a_offset(23)
39078         .zero_index(mz)
39079         .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
39080     }
39081   }
39082 }
39083 
TEST(QC8_IGEMM_MINMAX_FP32_4X4__SCALAR_FMAGIC,qmin)39084 TEST(QC8_IGEMM_MINMAX_FP32_4X4__SCALAR_FMAGIC, qmin) {
39085   GemmMicrokernelTester()
39086     .mr(4)
39087     .nr(4)
39088     .kr(1)
39089     .sr(1)
39090     .m(4)
39091     .n(4)
39092     .k(1)
39093     .qmin(128)
39094     .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
39095 }
39096 
TEST(QC8_IGEMM_MINMAX_FP32_4X4__SCALAR_FMAGIC,qmax)39097 TEST(QC8_IGEMM_MINMAX_FP32_4X4__SCALAR_FMAGIC, qmax) {
39098   GemmMicrokernelTester()
39099     .mr(4)
39100     .nr(4)
39101     .kr(1)
39102     .sr(1)
39103     .m(4)
39104     .n(4)
39105     .k(1)
39106     .qmax(128)
39107     .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
39108 }
39109 
TEST(QC8_IGEMM_MINMAX_FP32_4X4__SCALAR_FMAGIC,strided_cm)39110 TEST(QC8_IGEMM_MINMAX_FP32_4X4__SCALAR_FMAGIC, strided_cm) {
39111   GemmMicrokernelTester()
39112     .mr(4)
39113     .nr(4)
39114     .kr(1)
39115     .sr(1)
39116     .m(4)
39117     .n(4)
39118     .k(1)
39119     .cm_stride(7)
39120     .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
39121 }
39122 
39123 
39124 #if XNN_ARCH_ARM && XNN_ENABLE_ASSEMBLY && XNN_PLATFORM_JIT
TEST(GENERATE_QC8_IGEMM_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_LD64,k_eq_8)39125   TEST(GENERATE_QC8_IGEMM_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_LD64, k_eq_8) {
39126     TEST_REQUIRES_ARM_NEON_V8;
39127     GemmMicrokernelTester()
39128       .mr(4)
39129       .nr(8)
39130       .kr(1)
39131       .sr(1)
39132       .m(4)
39133       .n(8)
39134       .k(8)
39135       .Test(xnn_generate_qc8_igemm_fp32_ukernel_4x8__aarch32_neonv8_mlal_lane_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
39136   }
39137 
TEST(GENERATE_QC8_IGEMM_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_LD64,strided_cn)39138   TEST(GENERATE_QC8_IGEMM_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_LD64, strided_cn) {
39139     TEST_REQUIRES_ARM_NEON_V8;
39140     GemmMicrokernelTester()
39141       .mr(4)
39142       .nr(8)
39143       .kr(1)
39144       .sr(1)
39145       .m(4)
39146       .n(8)
39147       .k(8)
39148       .cn_stride(11)
39149       .Test(xnn_generate_qc8_igemm_fp32_ukernel_4x8__aarch32_neonv8_mlal_lane_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
39150   }
39151 
TEST(GENERATE_QC8_IGEMM_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_LD64,k_eq_8_subtile)39152   TEST(GENERATE_QC8_IGEMM_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_LD64, k_eq_8_subtile) {
39153     TEST_REQUIRES_ARM_NEON_V8;
39154     for (uint32_t n = 1; n <= 8; n++) {
39155       for (uint32_t m = 1; m <= 4; m++) {
39156         GemmMicrokernelTester()
39157           .mr(4)
39158           .nr(8)
39159           .kr(1)
39160           .sr(1)
39161           .m(m)
39162           .n(n)
39163           .k(8)
39164           .iterations(1)
39165           .Test(xnn_generate_qc8_igemm_fp32_ukernel_4x8__aarch32_neonv8_mlal_lane_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
39166       }
39167     }
39168   }
39169 
TEST(GENERATE_QC8_IGEMM_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_LD64,k_eq_8_subtile_m)39170   TEST(GENERATE_QC8_IGEMM_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_LD64, k_eq_8_subtile_m) {
39171     TEST_REQUIRES_ARM_NEON_V8;
39172     for (uint32_t m = 1; m <= 4; m++) {
39173       GemmMicrokernelTester()
39174         .mr(4)
39175         .nr(8)
39176         .kr(1)
39177         .sr(1)
39178         .m(m)
39179         .n(8)
39180         .k(8)
39181         .iterations(1)
39182         .Test(xnn_generate_qc8_igemm_fp32_ukernel_4x8__aarch32_neonv8_mlal_lane_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
39183     }
39184   }
39185 
TEST(GENERATE_QC8_IGEMM_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_LD64,k_eq_8_subtile_n)39186   TEST(GENERATE_QC8_IGEMM_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_LD64, k_eq_8_subtile_n) {
39187     TEST_REQUIRES_ARM_NEON_V8;
39188     for (uint32_t n = 1; n <= 8; n++) {
39189       GemmMicrokernelTester()
39190         .mr(4)
39191         .nr(8)
39192         .kr(1)
39193         .sr(1)
39194         .m(4)
39195         .n(n)
39196         .k(8)
39197         .iterations(1)
39198         .Test(xnn_generate_qc8_igemm_fp32_ukernel_4x8__aarch32_neonv8_mlal_lane_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
39199     }
39200   }
39201 
TEST(GENERATE_QC8_IGEMM_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_LD64,k_lt_8)39202   TEST(GENERATE_QC8_IGEMM_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_LD64, k_lt_8) {
39203     TEST_REQUIRES_ARM_NEON_V8;
39204     for (size_t k = 1; k < 8; k++) {
39205       GemmMicrokernelTester()
39206         .mr(4)
39207         .nr(8)
39208         .kr(1)
39209         .sr(1)
39210         .m(4)
39211         .n(8)
39212         .k(k)
39213         .Test(xnn_generate_qc8_igemm_fp32_ukernel_4x8__aarch32_neonv8_mlal_lane_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
39214     }
39215   }
39216 
TEST(GENERATE_QC8_IGEMM_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_LD64,k_lt_8_subtile)39217   TEST(GENERATE_QC8_IGEMM_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_LD64, k_lt_8_subtile) {
39218     TEST_REQUIRES_ARM_NEON_V8;
39219     for (size_t k = 1; k < 8; k++) {
39220       for (uint32_t n = 1; n <= 8; n++) {
39221         for (uint32_t m = 1; m <= 4; m++) {
39222           GemmMicrokernelTester()
39223             .mr(4)
39224             .nr(8)
39225             .kr(1)
39226             .sr(1)
39227             .m(m)
39228             .n(n)
39229             .k(k)
39230             .iterations(1)
39231             .Test(xnn_generate_qc8_igemm_fp32_ukernel_4x8__aarch32_neonv8_mlal_lane_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
39232         }
39233       }
39234     }
39235   }
39236 
TEST(GENERATE_QC8_IGEMM_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_LD64,k_gt_8)39237   TEST(GENERATE_QC8_IGEMM_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_LD64, k_gt_8) {
39238     TEST_REQUIRES_ARM_NEON_V8;
39239     for (size_t k = 9; k < 16; k++) {
39240       GemmMicrokernelTester()
39241         .mr(4)
39242         .nr(8)
39243         .kr(1)
39244         .sr(1)
39245         .m(4)
39246         .n(8)
39247         .k(k)
39248         .Test(xnn_generate_qc8_igemm_fp32_ukernel_4x8__aarch32_neonv8_mlal_lane_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
39249     }
39250   }
39251 
TEST(GENERATE_QC8_IGEMM_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_LD64,k_gt_8_subtile)39252   TEST(GENERATE_QC8_IGEMM_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_LD64, k_gt_8_subtile) {
39253     TEST_REQUIRES_ARM_NEON_V8;
39254     for (size_t k = 9; k < 16; k++) {
39255       for (uint32_t n = 1; n <= 8; n++) {
39256         for (uint32_t m = 1; m <= 4; m++) {
39257           GemmMicrokernelTester()
39258             .mr(4)
39259             .nr(8)
39260             .kr(1)
39261             .sr(1)
39262             .m(m)
39263             .n(n)
39264             .k(k)
39265             .iterations(1)
39266             .Test(xnn_generate_qc8_igemm_fp32_ukernel_4x8__aarch32_neonv8_mlal_lane_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
39267         }
39268       }
39269     }
39270   }
39271 
TEST(GENERATE_QC8_IGEMM_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_LD64,k_div_8)39272   TEST(GENERATE_QC8_IGEMM_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_LD64, k_div_8) {
39273     TEST_REQUIRES_ARM_NEON_V8;
39274     for (size_t k = 16; k <= 80; k += 8) {
39275       GemmMicrokernelTester()
39276         .mr(4)
39277         .nr(8)
39278         .kr(1)
39279         .sr(1)
39280         .m(4)
39281         .n(8)
39282         .k(k)
39283         .Test(xnn_generate_qc8_igemm_fp32_ukernel_4x8__aarch32_neonv8_mlal_lane_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
39284     }
39285   }
39286 
TEST(GENERATE_QC8_IGEMM_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_LD64,k_div_8_subtile)39287   TEST(GENERATE_QC8_IGEMM_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_LD64, k_div_8_subtile) {
39288     TEST_REQUIRES_ARM_NEON_V8;
39289     for (size_t k = 16; k <= 80; k += 8) {
39290       for (uint32_t n = 1; n <= 8; n++) {
39291         for (uint32_t m = 1; m <= 4; m++) {
39292           GemmMicrokernelTester()
39293             .mr(4)
39294             .nr(8)
39295             .kr(1)
39296             .sr(1)
39297             .m(m)
39298             .n(n)
39299             .k(k)
39300             .iterations(1)
39301             .Test(xnn_generate_qc8_igemm_fp32_ukernel_4x8__aarch32_neonv8_mlal_lane_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
39302         }
39303       }
39304     }
39305   }
39306 
TEST(GENERATE_QC8_IGEMM_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_LD64,n_gt_8)39307   TEST(GENERATE_QC8_IGEMM_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_LD64, n_gt_8) {
39308     TEST_REQUIRES_ARM_NEON_V8;
39309     for (uint32_t n = 9; n < 16; n++) {
39310       for (size_t k = 1; k <= 40; k += 9) {
39311         GemmMicrokernelTester()
39312           .mr(4)
39313           .nr(8)
39314           .kr(1)
39315           .sr(1)
39316           .m(4)
39317           .n(n)
39318           .k(k)
39319           .Test(xnn_generate_qc8_igemm_fp32_ukernel_4x8__aarch32_neonv8_mlal_lane_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
39320       }
39321     }
39322   }
39323 
TEST(GENERATE_QC8_IGEMM_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_LD64,n_gt_8_strided_cn)39324   TEST(GENERATE_QC8_IGEMM_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_LD64, n_gt_8_strided_cn) {
39325     TEST_REQUIRES_ARM_NEON_V8;
39326     for (uint32_t n = 9; n < 16; n++) {
39327       for (size_t k = 1; k <= 40; k += 9) {
39328         GemmMicrokernelTester()
39329           .mr(4)
39330           .nr(8)
39331           .kr(1)
39332           .sr(1)
39333           .m(4)
39334           .n(n)
39335           .k(k)
39336           .cn_stride(11)
39337           .Test(xnn_generate_qc8_igemm_fp32_ukernel_4x8__aarch32_neonv8_mlal_lane_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
39338       }
39339     }
39340   }
39341 
TEST(GENERATE_QC8_IGEMM_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_LD64,n_gt_8_subtile)39342   TEST(GENERATE_QC8_IGEMM_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_LD64, n_gt_8_subtile) {
39343     TEST_REQUIRES_ARM_NEON_V8;
39344     for (uint32_t n = 9; n < 16; n++) {
39345       for (size_t k = 1; k <= 40; k += 9) {
39346         for (uint32_t m = 1; m <= 4; m++) {
39347           GemmMicrokernelTester()
39348             .mr(4)
39349             .nr(8)
39350             .kr(1)
39351             .sr(1)
39352             .m(m)
39353             .n(n)
39354             .k(k)
39355             .iterations(1)
39356             .Test(xnn_generate_qc8_igemm_fp32_ukernel_4x8__aarch32_neonv8_mlal_lane_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
39357         }
39358       }
39359     }
39360   }
39361 
TEST(GENERATE_QC8_IGEMM_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_LD64,n_div_8)39362   TEST(GENERATE_QC8_IGEMM_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_LD64, n_div_8) {
39363     TEST_REQUIRES_ARM_NEON_V8;
39364     for (uint32_t n = 16; n <= 24; n += 8) {
39365       for (size_t k = 1; k <= 40; k += 9) {
39366         GemmMicrokernelTester()
39367           .mr(4)
39368           .nr(8)
39369           .kr(1)
39370           .sr(1)
39371           .m(4)
39372           .n(n)
39373           .k(k)
39374           .Test(xnn_generate_qc8_igemm_fp32_ukernel_4x8__aarch32_neonv8_mlal_lane_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
39375       }
39376     }
39377   }
39378 
TEST(GENERATE_QC8_IGEMM_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_LD64,n_div_8_strided_cn)39379   TEST(GENERATE_QC8_IGEMM_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_LD64, n_div_8_strided_cn) {
39380     TEST_REQUIRES_ARM_NEON_V8;
39381     for (uint32_t n = 16; n <= 24; n += 8) {
39382       for (size_t k = 1; k <= 40; k += 9) {
39383         GemmMicrokernelTester()
39384           .mr(4)
39385           .nr(8)
39386           .kr(1)
39387           .sr(1)
39388           .m(4)
39389           .n(n)
39390           .k(k)
39391           .cn_stride(11)
39392           .Test(xnn_generate_qc8_igemm_fp32_ukernel_4x8__aarch32_neonv8_mlal_lane_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
39393       }
39394     }
39395   }
39396 
TEST(GENERATE_QC8_IGEMM_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_LD64,n_div_8_subtile)39397   TEST(GENERATE_QC8_IGEMM_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_LD64, n_div_8_subtile) {
39398     TEST_REQUIRES_ARM_NEON_V8;
39399     for (uint32_t n = 16; n <= 24; n += 8) {
39400       for (size_t k = 1; k <= 40; k += 9) {
39401         for (uint32_t m = 1; m <= 4; m++) {
39402           GemmMicrokernelTester()
39403             .mr(4)
39404             .nr(8)
39405             .kr(1)
39406             .sr(1)
39407             .m(m)
39408             .n(n)
39409             .k(k)
39410             .iterations(1)
39411             .Test(xnn_generate_qc8_igemm_fp32_ukernel_4x8__aarch32_neonv8_mlal_lane_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
39412         }
39413       }
39414     }
39415   }
39416 
TEST(GENERATE_QC8_IGEMM_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_LD64,small_kernel)39417   TEST(GENERATE_QC8_IGEMM_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_LD64, small_kernel) {
39418     TEST_REQUIRES_ARM_NEON_V8;
39419     for (size_t k = 1; k <= 40; k += 9) {
39420       GemmMicrokernelTester()
39421         .mr(4)
39422         .nr(8)
39423         .kr(1)
39424         .sr(1)
39425         .m(4)
39426         .n(8)
39427         .k(k)
39428         .ks(3)
39429         .Test(xnn_generate_qc8_igemm_fp32_ukernel_4x8__aarch32_neonv8_mlal_lane_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
39430     }
39431   }
39432 
TEST(GENERATE_QC8_IGEMM_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_LD64,small_kernel_subtile)39433   TEST(GENERATE_QC8_IGEMM_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_LD64, small_kernel_subtile) {
39434     TEST_REQUIRES_ARM_NEON_V8;
39435     for (size_t k = 1; k <= 40; k += 9) {
39436       for (uint32_t n = 1; n <= 8; n++) {
39437         for (uint32_t m = 1; m <= 4; m++) {
39438           GemmMicrokernelTester()
39439             .mr(4)
39440             .nr(8)
39441             .kr(1)
39442             .sr(1)
39443             .m(m)
39444             .n(n)
39445             .k(k)
39446             .ks(3)
39447             .iterations(1)
39448             .Test(xnn_generate_qc8_igemm_fp32_ukernel_4x8__aarch32_neonv8_mlal_lane_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
39449         }
39450       }
39451     }
39452   }
39453 
TEST(GENERATE_QC8_IGEMM_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_LD64,n_gt_8_small_kernel)39454   TEST(GENERATE_QC8_IGEMM_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_LD64, n_gt_8_small_kernel) {
39455     TEST_REQUIRES_ARM_NEON_V8;
39456     for (uint32_t n = 9; n < 16; n++) {
39457       for (size_t k = 1; k <= 40; k += 9) {
39458         GemmMicrokernelTester()
39459           .mr(4)
39460           .nr(8)
39461           .kr(1)
39462           .sr(1)
39463           .m(4)
39464           .n(n)
39465           .k(k)
39466           .ks(3)
39467           .Test(xnn_generate_qc8_igemm_fp32_ukernel_4x8__aarch32_neonv8_mlal_lane_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
39468       }
39469     }
39470   }
39471 
TEST(GENERATE_QC8_IGEMM_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_LD64,n_div_8_small_kernel)39472   TEST(GENERATE_QC8_IGEMM_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_LD64, n_div_8_small_kernel) {
39473     TEST_REQUIRES_ARM_NEON_V8;
39474     for (uint32_t n = 16; n <= 24; n += 8) {
39475       for (size_t k = 1; k <= 40; k += 9) {
39476         GemmMicrokernelTester()
39477           .mr(4)
39478           .nr(8)
39479           .kr(1)
39480           .sr(1)
39481           .m(4)
39482           .n(n)
39483           .k(k)
39484           .ks(3)
39485           .Test(xnn_generate_qc8_igemm_fp32_ukernel_4x8__aarch32_neonv8_mlal_lane_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
39486       }
39487     }
39488   }
39489 
TEST(GENERATE_QC8_IGEMM_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_LD64,strided_cm_subtile)39490   TEST(GENERATE_QC8_IGEMM_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_LD64, strided_cm_subtile) {
39491     TEST_REQUIRES_ARM_NEON_V8;
39492     for (size_t k = 1; k <= 40; k += 9) {
39493       for (uint32_t n = 1; n <= 8; n++) {
39494         for (uint32_t m = 1; m <= 4; m++) {
39495           GemmMicrokernelTester()
39496             .mr(4)
39497             .nr(8)
39498             .kr(1)
39499             .sr(1)
39500             .m(m)
39501             .n(n)
39502             .k(k)
39503             .cm_stride(11)
39504             .iterations(1)
39505             .Test(xnn_generate_qc8_igemm_fp32_ukernel_4x8__aarch32_neonv8_mlal_lane_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
39506         }
39507       }
39508     }
39509   }
39510 
TEST(GENERATE_QC8_IGEMM_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_LD64,a_offset)39511   TEST(GENERATE_QC8_IGEMM_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_LD64, a_offset) {
39512     TEST_REQUIRES_ARM_NEON_V8;
39513     for (size_t k = 1; k <= 40; k += 9) {
39514       GemmMicrokernelTester()
39515         .mr(4)
39516         .nr(8)
39517         .kr(1)
39518         .sr(1)
39519         .m(4)
39520         .n(8)
39521         .k(k)
39522         .ks(3)
39523         .a_offset(163)
39524         .Test(xnn_generate_qc8_igemm_fp32_ukernel_4x8__aarch32_neonv8_mlal_lane_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
39525     }
39526   }
39527 
TEST(GENERATE_QC8_IGEMM_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_LD64,zero)39528   TEST(GENERATE_QC8_IGEMM_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_LD64, zero) {
39529     TEST_REQUIRES_ARM_NEON_V8;
39530     for (size_t k = 1; k <= 40; k += 9) {
39531       for (uint32_t mz = 0; mz < 4; mz++) {
39532         GemmMicrokernelTester()
39533           .mr(4)
39534           .nr(8)
39535           .kr(1)
39536           .sr(1)
39537           .m(4)
39538           .n(8)
39539           .k(k)
39540           .ks(3)
39541           .a_offset(163)
39542           .zero_index(mz)
39543           .Test(xnn_generate_qc8_igemm_fp32_ukernel_4x8__aarch32_neonv8_mlal_lane_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
39544       }
39545     }
39546   }
39547 
TEST(GENERATE_QC8_IGEMM_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_LD64,qmin)39548   TEST(GENERATE_QC8_IGEMM_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_LD64, qmin) {
39549     TEST_REQUIRES_ARM_NEON_V8;
39550     GemmMicrokernelTester()
39551       .mr(4)
39552       .nr(8)
39553       .kr(1)
39554       .sr(1)
39555       .m(4)
39556       .n(8)
39557       .k(8)
39558       .qmin(128)
39559       .Test(xnn_generate_qc8_igemm_fp32_ukernel_4x8__aarch32_neonv8_mlal_lane_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
39560   }
39561 
TEST(GENERATE_QC8_IGEMM_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_LD64,qmax)39562   TEST(GENERATE_QC8_IGEMM_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_LD64, qmax) {
39563     TEST_REQUIRES_ARM_NEON_V8;
39564     GemmMicrokernelTester()
39565       .mr(4)
39566       .nr(8)
39567       .kr(1)
39568       .sr(1)
39569       .m(4)
39570       .n(8)
39571       .k(8)
39572       .qmax(128)
39573       .Test(xnn_generate_qc8_igemm_fp32_ukernel_4x8__aarch32_neonv8_mlal_lane_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
39574   }
39575 
TEST(GENERATE_QC8_IGEMM_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_LD64,strided_cm)39576   TEST(GENERATE_QC8_IGEMM_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_LD64, strided_cm) {
39577     TEST_REQUIRES_ARM_NEON_V8;
39578     GemmMicrokernelTester()
39579       .mr(4)
39580       .nr(8)
39581       .kr(1)
39582       .sr(1)
39583       .m(4)
39584       .n(8)
39585       .k(8)
39586       .cm_stride(11)
39587       .Test(xnn_generate_qc8_igemm_fp32_ukernel_4x8__aarch32_neonv8_mlal_lane_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
39588   }
39589 #endif  // XNN_ARCH_ARM && XNN_ENABLE_ASSEMBLY && XNN_PLATFORM_JIT
39590 
39591 
39592 #if XNN_ENABLE_ARM_DOTPROD && XNN_ARCH_ARM && !XNN_PLATFORM_IOS && XNN_ENABLE_ASSEMBLY && XNN_PLATFORM_JIT
TEST(GENERATE_QC8_IGEMM_FP32_4X8C4__AARCH32_NEONDOT_LD64,k_eq_8)39593   TEST(GENERATE_QC8_IGEMM_FP32_4X8C4__AARCH32_NEONDOT_LD64, k_eq_8) {
39594     TEST_REQUIRES_ARM_NEON_DOT;
39595     GemmMicrokernelTester()
39596       .mr(4)
39597       .nr(8)
39598       .kr(4)
39599       .sr(1)
39600       .m(4)
39601       .n(8)
39602       .k(8)
39603       .Test(xnn_generate_qc8_igemm_fp32_ukernel_4x8c4__aarch32_neondot_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
39604   }
39605 
TEST(GENERATE_QC8_IGEMM_FP32_4X8C4__AARCH32_NEONDOT_LD64,strided_cn)39606   TEST(GENERATE_QC8_IGEMM_FP32_4X8C4__AARCH32_NEONDOT_LD64, strided_cn) {
39607     TEST_REQUIRES_ARM_NEON_DOT;
39608     GemmMicrokernelTester()
39609       .mr(4)
39610       .nr(8)
39611       .kr(4)
39612       .sr(1)
39613       .m(4)
39614       .n(8)
39615       .k(8)
39616       .cn_stride(11)
39617       .Test(xnn_generate_qc8_igemm_fp32_ukernel_4x8c4__aarch32_neondot_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
39618   }
39619 
TEST(GENERATE_QC8_IGEMM_FP32_4X8C4__AARCH32_NEONDOT_LD64,k_eq_8_subtile)39620   TEST(GENERATE_QC8_IGEMM_FP32_4X8C4__AARCH32_NEONDOT_LD64, k_eq_8_subtile) {
39621     TEST_REQUIRES_ARM_NEON_DOT;
39622     for (uint32_t n = 1; n <= 8; n++) {
39623       for (uint32_t m = 1; m <= 4; m++) {
39624         GemmMicrokernelTester()
39625           .mr(4)
39626           .nr(8)
39627           .kr(4)
39628           .sr(1)
39629           .m(m)
39630           .n(n)
39631           .k(8)
39632           .iterations(1)
39633           .Test(xnn_generate_qc8_igemm_fp32_ukernel_4x8c4__aarch32_neondot_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
39634       }
39635     }
39636   }
39637 
TEST(GENERATE_QC8_IGEMM_FP32_4X8C4__AARCH32_NEONDOT_LD64,k_eq_8_subtile_m)39638   TEST(GENERATE_QC8_IGEMM_FP32_4X8C4__AARCH32_NEONDOT_LD64, k_eq_8_subtile_m) {
39639     TEST_REQUIRES_ARM_NEON_DOT;
39640     for (uint32_t m = 1; m <= 4; m++) {
39641       GemmMicrokernelTester()
39642         .mr(4)
39643         .nr(8)
39644         .kr(4)
39645         .sr(1)
39646         .m(m)
39647         .n(8)
39648         .k(8)
39649         .iterations(1)
39650         .Test(xnn_generate_qc8_igemm_fp32_ukernel_4x8c4__aarch32_neondot_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
39651     }
39652   }
39653 
TEST(GENERATE_QC8_IGEMM_FP32_4X8C4__AARCH32_NEONDOT_LD64,k_eq_8_subtile_n)39654   TEST(GENERATE_QC8_IGEMM_FP32_4X8C4__AARCH32_NEONDOT_LD64, k_eq_8_subtile_n) {
39655     TEST_REQUIRES_ARM_NEON_DOT;
39656     for (uint32_t n = 1; n <= 8; n++) {
39657       GemmMicrokernelTester()
39658         .mr(4)
39659         .nr(8)
39660         .kr(4)
39661         .sr(1)
39662         .m(4)
39663         .n(n)
39664         .k(8)
39665         .iterations(1)
39666         .Test(xnn_generate_qc8_igemm_fp32_ukernel_4x8c4__aarch32_neondot_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
39667     }
39668   }
39669 
TEST(GENERATE_QC8_IGEMM_FP32_4X8C4__AARCH32_NEONDOT_LD64,k_lt_8)39670   TEST(GENERATE_QC8_IGEMM_FP32_4X8C4__AARCH32_NEONDOT_LD64, k_lt_8) {
39671     TEST_REQUIRES_ARM_NEON_DOT;
39672     for (size_t k = 1; k < 8; k++) {
39673       GemmMicrokernelTester()
39674         .mr(4)
39675         .nr(8)
39676         .kr(4)
39677         .sr(1)
39678         .m(4)
39679         .n(8)
39680         .k(k)
39681         .Test(xnn_generate_qc8_igemm_fp32_ukernel_4x8c4__aarch32_neondot_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
39682     }
39683   }
39684 
TEST(GENERATE_QC8_IGEMM_FP32_4X8C4__AARCH32_NEONDOT_LD64,k_lt_8_subtile)39685   TEST(GENERATE_QC8_IGEMM_FP32_4X8C4__AARCH32_NEONDOT_LD64, k_lt_8_subtile) {
39686     TEST_REQUIRES_ARM_NEON_DOT;
39687     for (size_t k = 1; k < 8; k++) {
39688       for (uint32_t n = 1; n <= 8; n++) {
39689         for (uint32_t m = 1; m <= 4; m++) {
39690           GemmMicrokernelTester()
39691             .mr(4)
39692             .nr(8)
39693             .kr(4)
39694             .sr(1)
39695             .m(m)
39696             .n(n)
39697             .k(k)
39698             .iterations(1)
39699             .Test(xnn_generate_qc8_igemm_fp32_ukernel_4x8c4__aarch32_neondot_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
39700         }
39701       }
39702     }
39703   }
39704 
TEST(GENERATE_QC8_IGEMM_FP32_4X8C4__AARCH32_NEONDOT_LD64,k_gt_8)39705   TEST(GENERATE_QC8_IGEMM_FP32_4X8C4__AARCH32_NEONDOT_LD64, k_gt_8) {
39706     TEST_REQUIRES_ARM_NEON_DOT;
39707     for (size_t k = 9; k < 16; k++) {
39708       GemmMicrokernelTester()
39709         .mr(4)
39710         .nr(8)
39711         .kr(4)
39712         .sr(1)
39713         .m(4)
39714         .n(8)
39715         .k(k)
39716         .Test(xnn_generate_qc8_igemm_fp32_ukernel_4x8c4__aarch32_neondot_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
39717     }
39718   }
39719 
TEST(GENERATE_QC8_IGEMM_FP32_4X8C4__AARCH32_NEONDOT_LD64,k_gt_8_subtile)39720   TEST(GENERATE_QC8_IGEMM_FP32_4X8C4__AARCH32_NEONDOT_LD64, k_gt_8_subtile) {
39721     TEST_REQUIRES_ARM_NEON_DOT;
39722     for (size_t k = 9; k < 16; k++) {
39723       for (uint32_t n = 1; n <= 8; n++) {
39724         for (uint32_t m = 1; m <= 4; m++) {
39725           GemmMicrokernelTester()
39726             .mr(4)
39727             .nr(8)
39728             .kr(4)
39729             .sr(1)
39730             .m(m)
39731             .n(n)
39732             .k(k)
39733             .iterations(1)
39734             .Test(xnn_generate_qc8_igemm_fp32_ukernel_4x8c4__aarch32_neondot_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
39735         }
39736       }
39737     }
39738   }
39739 
TEST(GENERATE_QC8_IGEMM_FP32_4X8C4__AARCH32_NEONDOT_LD64,k_div_8)39740   TEST(GENERATE_QC8_IGEMM_FP32_4X8C4__AARCH32_NEONDOT_LD64, k_div_8) {
39741     TEST_REQUIRES_ARM_NEON_DOT;
39742     for (size_t k = 16; k <= 80; k += 8) {
39743       GemmMicrokernelTester()
39744         .mr(4)
39745         .nr(8)
39746         .kr(4)
39747         .sr(1)
39748         .m(4)
39749         .n(8)
39750         .k(k)
39751         .Test(xnn_generate_qc8_igemm_fp32_ukernel_4x8c4__aarch32_neondot_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
39752     }
39753   }
39754 
TEST(GENERATE_QC8_IGEMM_FP32_4X8C4__AARCH32_NEONDOT_LD64,k_div_8_subtile)39755   TEST(GENERATE_QC8_IGEMM_FP32_4X8C4__AARCH32_NEONDOT_LD64, k_div_8_subtile) {
39756     TEST_REQUIRES_ARM_NEON_DOT;
39757     for (size_t k = 16; k <= 80; k += 8) {
39758       for (uint32_t n = 1; n <= 8; n++) {
39759         for (uint32_t m = 1; m <= 4; m++) {
39760           GemmMicrokernelTester()
39761             .mr(4)
39762             .nr(8)
39763             .kr(4)
39764             .sr(1)
39765             .m(m)
39766             .n(n)
39767             .k(k)
39768             .iterations(1)
39769             .Test(xnn_generate_qc8_igemm_fp32_ukernel_4x8c4__aarch32_neondot_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
39770         }
39771       }
39772     }
39773   }
39774 
TEST(GENERATE_QC8_IGEMM_FP32_4X8C4__AARCH32_NEONDOT_LD64,n_gt_8)39775   TEST(GENERATE_QC8_IGEMM_FP32_4X8C4__AARCH32_NEONDOT_LD64, n_gt_8) {
39776     TEST_REQUIRES_ARM_NEON_DOT;
39777     for (uint32_t n = 9; n < 16; n++) {
39778       for (size_t k = 1; k <= 40; k += 9) {
39779         GemmMicrokernelTester()
39780           .mr(4)
39781           .nr(8)
39782           .kr(4)
39783           .sr(1)
39784           .m(4)
39785           .n(n)
39786           .k(k)
39787           .Test(xnn_generate_qc8_igemm_fp32_ukernel_4x8c4__aarch32_neondot_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
39788       }
39789     }
39790   }
39791 
TEST(GENERATE_QC8_IGEMM_FP32_4X8C4__AARCH32_NEONDOT_LD64,n_gt_8_strided_cn)39792   TEST(GENERATE_QC8_IGEMM_FP32_4X8C4__AARCH32_NEONDOT_LD64, n_gt_8_strided_cn) {
39793     TEST_REQUIRES_ARM_NEON_DOT;
39794     for (uint32_t n = 9; n < 16; n++) {
39795       for (size_t k = 1; k <= 40; k += 9) {
39796         GemmMicrokernelTester()
39797           .mr(4)
39798           .nr(8)
39799           .kr(4)
39800           .sr(1)
39801           .m(4)
39802           .n(n)
39803           .k(k)
39804           .cn_stride(11)
39805           .Test(xnn_generate_qc8_igemm_fp32_ukernel_4x8c4__aarch32_neondot_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
39806       }
39807     }
39808   }
39809 
TEST(GENERATE_QC8_IGEMM_FP32_4X8C4__AARCH32_NEONDOT_LD64,n_gt_8_subtile)39810   TEST(GENERATE_QC8_IGEMM_FP32_4X8C4__AARCH32_NEONDOT_LD64, n_gt_8_subtile) {
39811     TEST_REQUIRES_ARM_NEON_DOT;
39812     for (uint32_t n = 9; n < 16; n++) {
39813       for (size_t k = 1; k <= 40; k += 9) {
39814         for (uint32_t m = 1; m <= 4; m++) {
39815           GemmMicrokernelTester()
39816             .mr(4)
39817             .nr(8)
39818             .kr(4)
39819             .sr(1)
39820             .m(m)
39821             .n(n)
39822             .k(k)
39823             .iterations(1)
39824             .Test(xnn_generate_qc8_igemm_fp32_ukernel_4x8c4__aarch32_neondot_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
39825         }
39826       }
39827     }
39828   }
39829 
TEST(GENERATE_QC8_IGEMM_FP32_4X8C4__AARCH32_NEONDOT_LD64,n_div_8)39830   TEST(GENERATE_QC8_IGEMM_FP32_4X8C4__AARCH32_NEONDOT_LD64, n_div_8) {
39831     TEST_REQUIRES_ARM_NEON_DOT;
39832     for (uint32_t n = 16; n <= 24; n += 8) {
39833       for (size_t k = 1; k <= 40; k += 9) {
39834         GemmMicrokernelTester()
39835           .mr(4)
39836           .nr(8)
39837           .kr(4)
39838           .sr(1)
39839           .m(4)
39840           .n(n)
39841           .k(k)
39842           .Test(xnn_generate_qc8_igemm_fp32_ukernel_4x8c4__aarch32_neondot_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
39843       }
39844     }
39845   }
39846 
TEST(GENERATE_QC8_IGEMM_FP32_4X8C4__AARCH32_NEONDOT_LD64,n_div_8_strided_cn)39847   TEST(GENERATE_QC8_IGEMM_FP32_4X8C4__AARCH32_NEONDOT_LD64, n_div_8_strided_cn) {
39848     TEST_REQUIRES_ARM_NEON_DOT;
39849     for (uint32_t n = 16; n <= 24; n += 8) {
39850       for (size_t k = 1; k <= 40; k += 9) {
39851         GemmMicrokernelTester()
39852           .mr(4)
39853           .nr(8)
39854           .kr(4)
39855           .sr(1)
39856           .m(4)
39857           .n(n)
39858           .k(k)
39859           .cn_stride(11)
39860           .Test(xnn_generate_qc8_igemm_fp32_ukernel_4x8c4__aarch32_neondot_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
39861       }
39862     }
39863   }
39864 
TEST(GENERATE_QC8_IGEMM_FP32_4X8C4__AARCH32_NEONDOT_LD64,n_div_8_subtile)39865   TEST(GENERATE_QC8_IGEMM_FP32_4X8C4__AARCH32_NEONDOT_LD64, n_div_8_subtile) {
39866     TEST_REQUIRES_ARM_NEON_DOT;
39867     for (uint32_t n = 16; n <= 24; n += 8) {
39868       for (size_t k = 1; k <= 40; k += 9) {
39869         for (uint32_t m = 1; m <= 4; m++) {
39870           GemmMicrokernelTester()
39871             .mr(4)
39872             .nr(8)
39873             .kr(4)
39874             .sr(1)
39875             .m(m)
39876             .n(n)
39877             .k(k)
39878             .iterations(1)
39879             .Test(xnn_generate_qc8_igemm_fp32_ukernel_4x8c4__aarch32_neondot_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
39880         }
39881       }
39882     }
39883   }
39884 
TEST(GENERATE_QC8_IGEMM_FP32_4X8C4__AARCH32_NEONDOT_LD64,small_kernel)39885   TEST(GENERATE_QC8_IGEMM_FP32_4X8C4__AARCH32_NEONDOT_LD64, small_kernel) {
39886     TEST_REQUIRES_ARM_NEON_DOT;
39887     for (size_t k = 1; k <= 40; k += 9) {
39888       GemmMicrokernelTester()
39889         .mr(4)
39890         .nr(8)
39891         .kr(4)
39892         .sr(1)
39893         .m(4)
39894         .n(8)
39895         .k(k)
39896         .ks(3)
39897         .Test(xnn_generate_qc8_igemm_fp32_ukernel_4x8c4__aarch32_neondot_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
39898     }
39899   }
39900 
TEST(GENERATE_QC8_IGEMM_FP32_4X8C4__AARCH32_NEONDOT_LD64,small_kernel_subtile)39901   TEST(GENERATE_QC8_IGEMM_FP32_4X8C4__AARCH32_NEONDOT_LD64, small_kernel_subtile) {
39902     TEST_REQUIRES_ARM_NEON_DOT;
39903     for (size_t k = 1; k <= 40; k += 9) {
39904       for (uint32_t n = 1; n <= 8; n++) {
39905         for (uint32_t m = 1; m <= 4; m++) {
39906           GemmMicrokernelTester()
39907             .mr(4)
39908             .nr(8)
39909             .kr(4)
39910             .sr(1)
39911             .m(m)
39912             .n(n)
39913             .k(k)
39914             .ks(3)
39915             .iterations(1)
39916             .Test(xnn_generate_qc8_igemm_fp32_ukernel_4x8c4__aarch32_neondot_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
39917         }
39918       }
39919     }
39920   }
39921 
TEST(GENERATE_QC8_IGEMM_FP32_4X8C4__AARCH32_NEONDOT_LD64,n_gt_8_small_kernel)39922   TEST(GENERATE_QC8_IGEMM_FP32_4X8C4__AARCH32_NEONDOT_LD64, n_gt_8_small_kernel) {
39923     TEST_REQUIRES_ARM_NEON_DOT;
39924     for (uint32_t n = 9; n < 16; n++) {
39925       for (size_t k = 1; k <= 40; k += 9) {
39926         GemmMicrokernelTester()
39927           .mr(4)
39928           .nr(8)
39929           .kr(4)
39930           .sr(1)
39931           .m(4)
39932           .n(n)
39933           .k(k)
39934           .ks(3)
39935           .Test(xnn_generate_qc8_igemm_fp32_ukernel_4x8c4__aarch32_neondot_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
39936       }
39937     }
39938   }
39939 
TEST(GENERATE_QC8_IGEMM_FP32_4X8C4__AARCH32_NEONDOT_LD64,n_div_8_small_kernel)39940   TEST(GENERATE_QC8_IGEMM_FP32_4X8C4__AARCH32_NEONDOT_LD64, n_div_8_small_kernel) {
39941     TEST_REQUIRES_ARM_NEON_DOT;
39942     for (uint32_t n = 16; n <= 24; n += 8) {
39943       for (size_t k = 1; k <= 40; k += 9) {
39944         GemmMicrokernelTester()
39945           .mr(4)
39946           .nr(8)
39947           .kr(4)
39948           .sr(1)
39949           .m(4)
39950           .n(n)
39951           .k(k)
39952           .ks(3)
39953           .Test(xnn_generate_qc8_igemm_fp32_ukernel_4x8c4__aarch32_neondot_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
39954       }
39955     }
39956   }
39957 
TEST(GENERATE_QC8_IGEMM_FP32_4X8C4__AARCH32_NEONDOT_LD64,strided_cm_subtile)39958   TEST(GENERATE_QC8_IGEMM_FP32_4X8C4__AARCH32_NEONDOT_LD64, strided_cm_subtile) {
39959     TEST_REQUIRES_ARM_NEON_DOT;
39960     for (size_t k = 1; k <= 40; k += 9) {
39961       for (uint32_t n = 1; n <= 8; n++) {
39962         for (uint32_t m = 1; m <= 4; m++) {
39963           GemmMicrokernelTester()
39964             .mr(4)
39965             .nr(8)
39966             .kr(4)
39967             .sr(1)
39968             .m(m)
39969             .n(n)
39970             .k(k)
39971             .cm_stride(11)
39972             .iterations(1)
39973             .Test(xnn_generate_qc8_igemm_fp32_ukernel_4x8c4__aarch32_neondot_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
39974         }
39975       }
39976     }
39977   }
39978 
TEST(GENERATE_QC8_IGEMM_FP32_4X8C4__AARCH32_NEONDOT_LD64,a_offset)39979   TEST(GENERATE_QC8_IGEMM_FP32_4X8C4__AARCH32_NEONDOT_LD64, a_offset) {
39980     TEST_REQUIRES_ARM_NEON_DOT;
39981     for (size_t k = 1; k <= 40; k += 9) {
39982       GemmMicrokernelTester()
39983         .mr(4)
39984         .nr(8)
39985         .kr(4)
39986         .sr(1)
39987         .m(4)
39988         .n(8)
39989         .k(k)
39990         .ks(3)
39991         .a_offset(163)
39992         .Test(xnn_generate_qc8_igemm_fp32_ukernel_4x8c4__aarch32_neondot_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
39993     }
39994   }
39995 
TEST(GENERATE_QC8_IGEMM_FP32_4X8C4__AARCH32_NEONDOT_LD64,zero)39996   TEST(GENERATE_QC8_IGEMM_FP32_4X8C4__AARCH32_NEONDOT_LD64, zero) {
39997     TEST_REQUIRES_ARM_NEON_DOT;
39998     for (size_t k = 1; k <= 40; k += 9) {
39999       for (uint32_t mz = 0; mz < 4; mz++) {
40000         GemmMicrokernelTester()
40001           .mr(4)
40002           .nr(8)
40003           .kr(4)
40004           .sr(1)
40005           .m(4)
40006           .n(8)
40007           .k(k)
40008           .ks(3)
40009           .a_offset(163)
40010           .zero_index(mz)
40011           .Test(xnn_generate_qc8_igemm_fp32_ukernel_4x8c4__aarch32_neondot_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
40012       }
40013     }
40014   }
40015 
TEST(GENERATE_QC8_IGEMM_FP32_4X8C4__AARCH32_NEONDOT_LD64,qmin)40016   TEST(GENERATE_QC8_IGEMM_FP32_4X8C4__AARCH32_NEONDOT_LD64, qmin) {
40017     TEST_REQUIRES_ARM_NEON_DOT;
40018     GemmMicrokernelTester()
40019       .mr(4)
40020       .nr(8)
40021       .kr(4)
40022       .sr(1)
40023       .m(4)
40024       .n(8)
40025       .k(8)
40026       .qmin(128)
40027       .Test(xnn_generate_qc8_igemm_fp32_ukernel_4x8c4__aarch32_neondot_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
40028   }
40029 
TEST(GENERATE_QC8_IGEMM_FP32_4X8C4__AARCH32_NEONDOT_LD64,qmax)40030   TEST(GENERATE_QC8_IGEMM_FP32_4X8C4__AARCH32_NEONDOT_LD64, qmax) {
40031     TEST_REQUIRES_ARM_NEON_DOT;
40032     GemmMicrokernelTester()
40033       .mr(4)
40034       .nr(8)
40035       .kr(4)
40036       .sr(1)
40037       .m(4)
40038       .n(8)
40039       .k(8)
40040       .qmax(128)
40041       .Test(xnn_generate_qc8_igemm_fp32_ukernel_4x8c4__aarch32_neondot_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
40042   }
40043 
TEST(GENERATE_QC8_IGEMM_FP32_4X8C4__AARCH32_NEONDOT_LD64,strided_cm)40044   TEST(GENERATE_QC8_IGEMM_FP32_4X8C4__AARCH32_NEONDOT_LD64, strided_cm) {
40045     TEST_REQUIRES_ARM_NEON_DOT;
40046     GemmMicrokernelTester()
40047       .mr(4)
40048       .nr(8)
40049       .kr(4)
40050       .sr(1)
40051       .m(4)
40052       .n(8)
40053       .k(8)
40054       .cm_stride(11)
40055       .Test(xnn_generate_qc8_igemm_fp32_ukernel_4x8c4__aarch32_neondot_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
40056   }
40057 #endif  // XNN_ENABLE_ARM_DOTPROD && XNN_ARCH_ARM && !XNN_PLATFORM_IOS && XNN_ENABLE_ASSEMBLY && XNN_PLATFORM_JIT
40058