1 // Copyright (c) Facebook, Inc. and its affiliates.
2 // All rights reserved.
3 //
4 // Copyright 2019 Google LLC
5 //
6 // This source code is licensed under the BSD-style license found in the
7 // LICENSE file in the root directory of this source tree.
8 //
9 // Auto-generated file. Do not edit!
10 // Specification: test/qc8-gemm-minmax-fp32.yaml
11 // Generator: tools/generate-gemm-test.py
12
13
14 #include <gtest/gtest.h>
15
16 #include <xnnpack/allocator.h>
17 #include <xnnpack/common.h>
18 #include <xnnpack/isa-checks.h>
19 #include <xnnpack/microparams-init.h>
20
21 #include <xnnpack/gemm.h>
22 #include <xnnpack/igemm.h>
23 #include <xnnpack/ppmm.h>
24 #include "gemm-microkernel-tester.h"
25
26
27 #if XNN_ARCH_ARM && XNN_ENABLE_ASSEMBLY
TEST(QC8_GEMM_MINMAX_FP32_1X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A7,k_eq_8)28 TEST(QC8_GEMM_MINMAX_FP32_1X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A7, k_eq_8) {
29 TEST_REQUIRES_ARM_NEON;
30 GemmMicrokernelTester()
31 .mr(1)
32 .nr(8)
33 .kr(1)
34 .sr(1)
35 .m(1)
36 .n(8)
37 .k(8)
38 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8__aarch32_neon_mlal_lane_prfm_cortex_a7, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
39 }
40
TEST(QC8_GEMM_MINMAX_FP32_1X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A7,strided_cn)41 TEST(QC8_GEMM_MINMAX_FP32_1X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A7, strided_cn) {
42 TEST_REQUIRES_ARM_NEON;
43 GemmMicrokernelTester()
44 .mr(1)
45 .nr(8)
46 .kr(1)
47 .sr(1)
48 .m(1)
49 .n(8)
50 .k(8)
51 .cn_stride(11)
52 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8__aarch32_neon_mlal_lane_prfm_cortex_a7, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
53 }
54
TEST(QC8_GEMM_MINMAX_FP32_1X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A7,k_eq_8_strided_a)55 TEST(QC8_GEMM_MINMAX_FP32_1X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A7, k_eq_8_strided_a) {
56 TEST_REQUIRES_ARM_NEON;
57 GemmMicrokernelTester()
58 .mr(1)
59 .nr(8)
60 .kr(1)
61 .sr(1)
62 .m(1)
63 .n(8)
64 .k(8)
65 .a_stride(11)
66 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8__aarch32_neon_mlal_lane_prfm_cortex_a7, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
67 }
68
TEST(QC8_GEMM_MINMAX_FP32_1X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A7,k_eq_8_subtile)69 TEST(QC8_GEMM_MINMAX_FP32_1X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A7, k_eq_8_subtile) {
70 TEST_REQUIRES_ARM_NEON;
71 for (uint32_t n = 1; n <= 8; n++) {
72 for (uint32_t m = 1; m <= 1; m++) {
73 GemmMicrokernelTester()
74 .mr(1)
75 .nr(8)
76 .kr(1)
77 .sr(1)
78 .m(m)
79 .n(n)
80 .k(8)
81 .iterations(1)
82 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8__aarch32_neon_mlal_lane_prfm_cortex_a7, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
83 }
84 }
85 }
86
TEST(QC8_GEMM_MINMAX_FP32_1X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A7,k_eq_8_subtile_m)87 TEST(QC8_GEMM_MINMAX_FP32_1X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A7, k_eq_8_subtile_m) {
88 TEST_REQUIRES_ARM_NEON;
89 for (uint32_t m = 1; m <= 1; m++) {
90 GemmMicrokernelTester()
91 .mr(1)
92 .nr(8)
93 .kr(1)
94 .sr(1)
95 .m(m)
96 .n(8)
97 .k(8)
98 .iterations(1)
99 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8__aarch32_neon_mlal_lane_prfm_cortex_a7, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
100 }
101 }
102
TEST(QC8_GEMM_MINMAX_FP32_1X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A7,k_eq_8_subtile_n)103 TEST(QC8_GEMM_MINMAX_FP32_1X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A7, k_eq_8_subtile_n) {
104 TEST_REQUIRES_ARM_NEON;
105 for (uint32_t n = 1; n <= 8; n++) {
106 GemmMicrokernelTester()
107 .mr(1)
108 .nr(8)
109 .kr(1)
110 .sr(1)
111 .m(1)
112 .n(n)
113 .k(8)
114 .iterations(1)
115 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8__aarch32_neon_mlal_lane_prfm_cortex_a7, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
116 }
117 }
118
TEST(QC8_GEMM_MINMAX_FP32_1X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A7,k_lt_8)119 TEST(QC8_GEMM_MINMAX_FP32_1X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A7, k_lt_8) {
120 TEST_REQUIRES_ARM_NEON;
121 for (size_t k = 1; k < 8; k++) {
122 GemmMicrokernelTester()
123 .mr(1)
124 .nr(8)
125 .kr(1)
126 .sr(1)
127 .m(1)
128 .n(8)
129 .k(k)
130 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8__aarch32_neon_mlal_lane_prfm_cortex_a7, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
131 }
132 }
133
TEST(QC8_GEMM_MINMAX_FP32_1X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A7,k_lt_8_strided_a)134 TEST(QC8_GEMM_MINMAX_FP32_1X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A7, k_lt_8_strided_a) {
135 TEST_REQUIRES_ARM_NEON;
136 for (size_t k = 1; k < 8; k++) {
137 GemmMicrokernelTester()
138 .mr(1)
139 .nr(8)
140 .kr(1)
141 .sr(1)
142 .m(1)
143 .n(8)
144 .k(k)
145 .a_stride(11)
146 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8__aarch32_neon_mlal_lane_prfm_cortex_a7, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
147 }
148 }
149
TEST(QC8_GEMM_MINMAX_FP32_1X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A7,k_lt_8_subtile)150 TEST(QC8_GEMM_MINMAX_FP32_1X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A7, k_lt_8_subtile) {
151 TEST_REQUIRES_ARM_NEON;
152 for (size_t k = 1; k < 8; k++) {
153 for (uint32_t n = 1; n <= 8; n++) {
154 for (uint32_t m = 1; m <= 1; m++) {
155 GemmMicrokernelTester()
156 .mr(1)
157 .nr(8)
158 .kr(1)
159 .sr(1)
160 .m(m)
161 .n(n)
162 .k(k)
163 .iterations(1)
164 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8__aarch32_neon_mlal_lane_prfm_cortex_a7, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
165 }
166 }
167 }
168 }
169
TEST(QC8_GEMM_MINMAX_FP32_1X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A7,k_gt_8)170 TEST(QC8_GEMM_MINMAX_FP32_1X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A7, k_gt_8) {
171 TEST_REQUIRES_ARM_NEON;
172 for (size_t k = 9; k < 16; k++) {
173 GemmMicrokernelTester()
174 .mr(1)
175 .nr(8)
176 .kr(1)
177 .sr(1)
178 .m(1)
179 .n(8)
180 .k(k)
181 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8__aarch32_neon_mlal_lane_prfm_cortex_a7, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
182 }
183 }
184
TEST(QC8_GEMM_MINMAX_FP32_1X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A7,k_gt_8_strided_a)185 TEST(QC8_GEMM_MINMAX_FP32_1X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A7, k_gt_8_strided_a) {
186 TEST_REQUIRES_ARM_NEON;
187 for (size_t k = 9; k < 16; k++) {
188 GemmMicrokernelTester()
189 .mr(1)
190 .nr(8)
191 .kr(1)
192 .sr(1)
193 .m(1)
194 .n(8)
195 .k(k)
196 .a_stride(19)
197 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8__aarch32_neon_mlal_lane_prfm_cortex_a7, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
198 }
199 }
200
TEST(QC8_GEMM_MINMAX_FP32_1X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A7,k_gt_8_subtile)201 TEST(QC8_GEMM_MINMAX_FP32_1X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A7, k_gt_8_subtile) {
202 TEST_REQUIRES_ARM_NEON;
203 for (size_t k = 9; k < 16; k++) {
204 for (uint32_t n = 1; n <= 8; n++) {
205 for (uint32_t m = 1; m <= 1; m++) {
206 GemmMicrokernelTester()
207 .mr(1)
208 .nr(8)
209 .kr(1)
210 .sr(1)
211 .m(m)
212 .n(n)
213 .k(k)
214 .iterations(1)
215 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8__aarch32_neon_mlal_lane_prfm_cortex_a7, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
216 }
217 }
218 }
219 }
220
TEST(QC8_GEMM_MINMAX_FP32_1X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A7,k_div_8)221 TEST(QC8_GEMM_MINMAX_FP32_1X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A7, k_div_8) {
222 TEST_REQUIRES_ARM_NEON;
223 for (size_t k = 16; k <= 80; k += 8) {
224 GemmMicrokernelTester()
225 .mr(1)
226 .nr(8)
227 .kr(1)
228 .sr(1)
229 .m(1)
230 .n(8)
231 .k(k)
232 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8__aarch32_neon_mlal_lane_prfm_cortex_a7, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
233 }
234 }
235
TEST(QC8_GEMM_MINMAX_FP32_1X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A7,k_div_8_strided_a)236 TEST(QC8_GEMM_MINMAX_FP32_1X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A7, k_div_8_strided_a) {
237 TEST_REQUIRES_ARM_NEON;
238 for (size_t k = 16; k <= 80; k += 8) {
239 GemmMicrokernelTester()
240 .mr(1)
241 .nr(8)
242 .kr(1)
243 .sr(1)
244 .m(1)
245 .n(8)
246 .k(k)
247 .a_stride(83)
248 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8__aarch32_neon_mlal_lane_prfm_cortex_a7, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
249 }
250 }
251
TEST(QC8_GEMM_MINMAX_FP32_1X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A7,k_div_8_subtile)252 TEST(QC8_GEMM_MINMAX_FP32_1X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A7, k_div_8_subtile) {
253 TEST_REQUIRES_ARM_NEON;
254 for (size_t k = 16; k <= 80; k += 8) {
255 for (uint32_t n = 1; n <= 8; n++) {
256 for (uint32_t m = 1; m <= 1; m++) {
257 GemmMicrokernelTester()
258 .mr(1)
259 .nr(8)
260 .kr(1)
261 .sr(1)
262 .m(m)
263 .n(n)
264 .k(k)
265 .iterations(1)
266 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8__aarch32_neon_mlal_lane_prfm_cortex_a7, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
267 }
268 }
269 }
270 }
271
TEST(QC8_GEMM_MINMAX_FP32_1X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A7,n_gt_8)272 TEST(QC8_GEMM_MINMAX_FP32_1X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A7, n_gt_8) {
273 TEST_REQUIRES_ARM_NEON;
274 for (uint32_t n = 9; n < 16; n++) {
275 for (size_t k = 1; k <= 40; k += 9) {
276 GemmMicrokernelTester()
277 .mr(1)
278 .nr(8)
279 .kr(1)
280 .sr(1)
281 .m(1)
282 .n(n)
283 .k(k)
284 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8__aarch32_neon_mlal_lane_prfm_cortex_a7, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
285 }
286 }
287 }
288
TEST(QC8_GEMM_MINMAX_FP32_1X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A7,n_gt_8_strided_cn)289 TEST(QC8_GEMM_MINMAX_FP32_1X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A7, n_gt_8_strided_cn) {
290 TEST_REQUIRES_ARM_NEON;
291 for (uint32_t n = 9; n < 16; n++) {
292 for (size_t k = 1; k <= 40; k += 9) {
293 GemmMicrokernelTester()
294 .mr(1)
295 .nr(8)
296 .kr(1)
297 .sr(1)
298 .m(1)
299 .n(n)
300 .k(k)
301 .cn_stride(11)
302 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8__aarch32_neon_mlal_lane_prfm_cortex_a7, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
303 }
304 }
305 }
306
TEST(QC8_GEMM_MINMAX_FP32_1X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A7,n_gt_8_strided_a)307 TEST(QC8_GEMM_MINMAX_FP32_1X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A7, n_gt_8_strided_a) {
308 TEST_REQUIRES_ARM_NEON;
309 for (uint32_t n = 9; n < 16; n++) {
310 for (size_t k = 1; k <= 40; k += 9) {
311 GemmMicrokernelTester()
312 .mr(1)
313 .nr(8)
314 .kr(1)
315 .sr(1)
316 .m(1)
317 .n(n)
318 .k(k)
319 .a_stride(43)
320 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8__aarch32_neon_mlal_lane_prfm_cortex_a7, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
321 }
322 }
323 }
324
TEST(QC8_GEMM_MINMAX_FP32_1X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A7,n_gt_8_subtile)325 TEST(QC8_GEMM_MINMAX_FP32_1X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A7, n_gt_8_subtile) {
326 TEST_REQUIRES_ARM_NEON;
327 for (uint32_t n = 9; n < 16; n++) {
328 for (size_t k = 1; k <= 40; k += 9) {
329 for (uint32_t m = 1; m <= 1; m++) {
330 GemmMicrokernelTester()
331 .mr(1)
332 .nr(8)
333 .kr(1)
334 .sr(1)
335 .m(m)
336 .n(n)
337 .k(k)
338 .iterations(1)
339 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8__aarch32_neon_mlal_lane_prfm_cortex_a7, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
340 }
341 }
342 }
343 }
344
TEST(QC8_GEMM_MINMAX_FP32_1X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A7,n_div_8)345 TEST(QC8_GEMM_MINMAX_FP32_1X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A7, n_div_8) {
346 TEST_REQUIRES_ARM_NEON;
347 for (uint32_t n = 16; n <= 24; n += 8) {
348 for (size_t k = 1; k <= 40; k += 9) {
349 GemmMicrokernelTester()
350 .mr(1)
351 .nr(8)
352 .kr(1)
353 .sr(1)
354 .m(1)
355 .n(n)
356 .k(k)
357 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8__aarch32_neon_mlal_lane_prfm_cortex_a7, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
358 }
359 }
360 }
361
TEST(QC8_GEMM_MINMAX_FP32_1X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A7,n_div_8_strided_cn)362 TEST(QC8_GEMM_MINMAX_FP32_1X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A7, n_div_8_strided_cn) {
363 TEST_REQUIRES_ARM_NEON;
364 for (uint32_t n = 16; n <= 24; n += 8) {
365 for (size_t k = 1; k <= 40; k += 9) {
366 GemmMicrokernelTester()
367 .mr(1)
368 .nr(8)
369 .kr(1)
370 .sr(1)
371 .m(1)
372 .n(n)
373 .k(k)
374 .cn_stride(11)
375 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8__aarch32_neon_mlal_lane_prfm_cortex_a7, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
376 }
377 }
378 }
379
TEST(QC8_GEMM_MINMAX_FP32_1X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A7,n_div_8_strided_a)380 TEST(QC8_GEMM_MINMAX_FP32_1X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A7, n_div_8_strided_a) {
381 TEST_REQUIRES_ARM_NEON;
382 for (uint32_t n = 16; n <= 24; n += 8) {
383 for (size_t k = 1; k <= 40; k += 9) {
384 GemmMicrokernelTester()
385 .mr(1)
386 .nr(8)
387 .kr(1)
388 .sr(1)
389 .m(1)
390 .n(n)
391 .k(k)
392 .a_stride(43)
393 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8__aarch32_neon_mlal_lane_prfm_cortex_a7, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
394 }
395 }
396 }
397
TEST(QC8_GEMM_MINMAX_FP32_1X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A7,n_div_8_subtile)398 TEST(QC8_GEMM_MINMAX_FP32_1X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A7, n_div_8_subtile) {
399 TEST_REQUIRES_ARM_NEON;
400 for (uint32_t n = 16; n <= 24; n += 8) {
401 for (size_t k = 1; k <= 40; k += 9) {
402 for (uint32_t m = 1; m <= 1; m++) {
403 GemmMicrokernelTester()
404 .mr(1)
405 .nr(8)
406 .kr(1)
407 .sr(1)
408 .m(m)
409 .n(n)
410 .k(k)
411 .iterations(1)
412 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8__aarch32_neon_mlal_lane_prfm_cortex_a7, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
413 }
414 }
415 }
416 }
417
TEST(QC8_GEMM_MINMAX_FP32_1X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A7,strided_cm_subtile)418 TEST(QC8_GEMM_MINMAX_FP32_1X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A7, strided_cm_subtile) {
419 TEST_REQUIRES_ARM_NEON;
420 for (size_t k = 1; k <= 40; k += 9) {
421 for (uint32_t n = 1; n <= 8; n++) {
422 for (uint32_t m = 1; m <= 1; m++) {
423 GemmMicrokernelTester()
424 .mr(1)
425 .nr(8)
426 .kr(1)
427 .sr(1)
428 .m(m)
429 .n(n)
430 .k(k)
431 .cm_stride(11)
432 .iterations(1)
433 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8__aarch32_neon_mlal_lane_prfm_cortex_a7, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
434 }
435 }
436 }
437 }
438
TEST(QC8_GEMM_MINMAX_FP32_1X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A7,qmin)439 TEST(QC8_GEMM_MINMAX_FP32_1X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A7, qmin) {
440 TEST_REQUIRES_ARM_NEON;
441 GemmMicrokernelTester()
442 .mr(1)
443 .nr(8)
444 .kr(1)
445 .sr(1)
446 .m(1)
447 .n(8)
448 .k(8)
449 .qmin(128)
450 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8__aarch32_neon_mlal_lane_prfm_cortex_a7, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
451 }
452
TEST(QC8_GEMM_MINMAX_FP32_1X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A7,qmax)453 TEST(QC8_GEMM_MINMAX_FP32_1X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A7, qmax) {
454 TEST_REQUIRES_ARM_NEON;
455 GemmMicrokernelTester()
456 .mr(1)
457 .nr(8)
458 .kr(1)
459 .sr(1)
460 .m(1)
461 .n(8)
462 .k(8)
463 .qmax(128)
464 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8__aarch32_neon_mlal_lane_prfm_cortex_a7, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
465 }
466
TEST(QC8_GEMM_MINMAX_FP32_1X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A7,strided_cm)467 TEST(QC8_GEMM_MINMAX_FP32_1X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A7, strided_cm) {
468 TEST_REQUIRES_ARM_NEON;
469 GemmMicrokernelTester()
470 .mr(1)
471 .nr(8)
472 .kr(1)
473 .sr(1)
474 .m(1)
475 .n(8)
476 .k(8)
477 .cm_stride(11)
478 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8__aarch32_neon_mlal_lane_prfm_cortex_a7, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
479 }
480 #endif // XNN_ARCH_ARM && XNN_ENABLE_ASSEMBLY
481
482
483 #if XNN_ARCH_ARM && XNN_ENABLE_ASSEMBLY
TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_CORTEX_A7,k_eq_8)484 TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_CORTEX_A7, k_eq_8) {
485 TEST_REQUIRES_ARM_NEON;
486 GemmMicrokernelTester()
487 .mr(4)
488 .nr(8)
489 .kr(1)
490 .sr(1)
491 .m(4)
492 .n(8)
493 .k(8)
494 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8__aarch32_neon_mlal_lane_cortex_a7, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
495 }
496
TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_CORTEX_A7,strided_cn)497 TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_CORTEX_A7, strided_cn) {
498 TEST_REQUIRES_ARM_NEON;
499 GemmMicrokernelTester()
500 .mr(4)
501 .nr(8)
502 .kr(1)
503 .sr(1)
504 .m(4)
505 .n(8)
506 .k(8)
507 .cn_stride(11)
508 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8__aarch32_neon_mlal_lane_cortex_a7, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
509 }
510
TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_CORTEX_A7,k_eq_8_strided_a)511 TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_CORTEX_A7, k_eq_8_strided_a) {
512 TEST_REQUIRES_ARM_NEON;
513 GemmMicrokernelTester()
514 .mr(4)
515 .nr(8)
516 .kr(1)
517 .sr(1)
518 .m(4)
519 .n(8)
520 .k(8)
521 .a_stride(11)
522 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8__aarch32_neon_mlal_lane_cortex_a7, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
523 }
524
TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_CORTEX_A7,k_eq_8_subtile)525 TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_CORTEX_A7, k_eq_8_subtile) {
526 TEST_REQUIRES_ARM_NEON;
527 for (uint32_t n = 1; n <= 8; n++) {
528 for (uint32_t m = 1; m <= 4; m++) {
529 GemmMicrokernelTester()
530 .mr(4)
531 .nr(8)
532 .kr(1)
533 .sr(1)
534 .m(m)
535 .n(n)
536 .k(8)
537 .iterations(1)
538 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8__aarch32_neon_mlal_lane_cortex_a7, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
539 }
540 }
541 }
542
TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_CORTEX_A7,k_eq_8_subtile_m)543 TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_CORTEX_A7, k_eq_8_subtile_m) {
544 TEST_REQUIRES_ARM_NEON;
545 for (uint32_t m = 1; m <= 4; m++) {
546 GemmMicrokernelTester()
547 .mr(4)
548 .nr(8)
549 .kr(1)
550 .sr(1)
551 .m(m)
552 .n(8)
553 .k(8)
554 .iterations(1)
555 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8__aarch32_neon_mlal_lane_cortex_a7, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
556 }
557 }
558
TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_CORTEX_A7,k_eq_8_subtile_n)559 TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_CORTEX_A7, k_eq_8_subtile_n) {
560 TEST_REQUIRES_ARM_NEON;
561 for (uint32_t n = 1; n <= 8; n++) {
562 GemmMicrokernelTester()
563 .mr(4)
564 .nr(8)
565 .kr(1)
566 .sr(1)
567 .m(4)
568 .n(n)
569 .k(8)
570 .iterations(1)
571 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8__aarch32_neon_mlal_lane_cortex_a7, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
572 }
573 }
574
TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_CORTEX_A7,k_lt_8)575 TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_CORTEX_A7, k_lt_8) {
576 TEST_REQUIRES_ARM_NEON;
577 for (size_t k = 1; k < 8; k++) {
578 GemmMicrokernelTester()
579 .mr(4)
580 .nr(8)
581 .kr(1)
582 .sr(1)
583 .m(4)
584 .n(8)
585 .k(k)
586 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8__aarch32_neon_mlal_lane_cortex_a7, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
587 }
588 }
589
TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_CORTEX_A7,k_lt_8_strided_a)590 TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_CORTEX_A7, k_lt_8_strided_a) {
591 TEST_REQUIRES_ARM_NEON;
592 for (size_t k = 1; k < 8; k++) {
593 GemmMicrokernelTester()
594 .mr(4)
595 .nr(8)
596 .kr(1)
597 .sr(1)
598 .m(4)
599 .n(8)
600 .k(k)
601 .a_stride(11)
602 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8__aarch32_neon_mlal_lane_cortex_a7, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
603 }
604 }
605
TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_CORTEX_A7,k_lt_8_subtile)606 TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_CORTEX_A7, k_lt_8_subtile) {
607 TEST_REQUIRES_ARM_NEON;
608 for (size_t k = 1; k < 8; k++) {
609 for (uint32_t n = 1; n <= 8; n++) {
610 for (uint32_t m = 1; m <= 4; m++) {
611 GemmMicrokernelTester()
612 .mr(4)
613 .nr(8)
614 .kr(1)
615 .sr(1)
616 .m(m)
617 .n(n)
618 .k(k)
619 .iterations(1)
620 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8__aarch32_neon_mlal_lane_cortex_a7, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
621 }
622 }
623 }
624 }
625
TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_CORTEX_A7,k_gt_8)626 TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_CORTEX_A7, k_gt_8) {
627 TEST_REQUIRES_ARM_NEON;
628 for (size_t k = 9; k < 16; k++) {
629 GemmMicrokernelTester()
630 .mr(4)
631 .nr(8)
632 .kr(1)
633 .sr(1)
634 .m(4)
635 .n(8)
636 .k(k)
637 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8__aarch32_neon_mlal_lane_cortex_a7, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
638 }
639 }
640
TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_CORTEX_A7,k_gt_8_strided_a)641 TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_CORTEX_A7, k_gt_8_strided_a) {
642 TEST_REQUIRES_ARM_NEON;
643 for (size_t k = 9; k < 16; k++) {
644 GemmMicrokernelTester()
645 .mr(4)
646 .nr(8)
647 .kr(1)
648 .sr(1)
649 .m(4)
650 .n(8)
651 .k(k)
652 .a_stride(19)
653 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8__aarch32_neon_mlal_lane_cortex_a7, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
654 }
655 }
656
TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_CORTEX_A7,k_gt_8_subtile)657 TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_CORTEX_A7, k_gt_8_subtile) {
658 TEST_REQUIRES_ARM_NEON;
659 for (size_t k = 9; k < 16; k++) {
660 for (uint32_t n = 1; n <= 8; n++) {
661 for (uint32_t m = 1; m <= 4; m++) {
662 GemmMicrokernelTester()
663 .mr(4)
664 .nr(8)
665 .kr(1)
666 .sr(1)
667 .m(m)
668 .n(n)
669 .k(k)
670 .iterations(1)
671 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8__aarch32_neon_mlal_lane_cortex_a7, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
672 }
673 }
674 }
675 }
676
TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_CORTEX_A7,k_div_8)677 TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_CORTEX_A7, k_div_8) {
678 TEST_REQUIRES_ARM_NEON;
679 for (size_t k = 16; k <= 80; k += 8) {
680 GemmMicrokernelTester()
681 .mr(4)
682 .nr(8)
683 .kr(1)
684 .sr(1)
685 .m(4)
686 .n(8)
687 .k(k)
688 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8__aarch32_neon_mlal_lane_cortex_a7, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
689 }
690 }
691
TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_CORTEX_A7,k_div_8_strided_a)692 TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_CORTEX_A7, k_div_8_strided_a) {
693 TEST_REQUIRES_ARM_NEON;
694 for (size_t k = 16; k <= 80; k += 8) {
695 GemmMicrokernelTester()
696 .mr(4)
697 .nr(8)
698 .kr(1)
699 .sr(1)
700 .m(4)
701 .n(8)
702 .k(k)
703 .a_stride(83)
704 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8__aarch32_neon_mlal_lane_cortex_a7, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
705 }
706 }
707
TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_CORTEX_A7,k_div_8_subtile)708 TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_CORTEX_A7, k_div_8_subtile) {
709 TEST_REQUIRES_ARM_NEON;
710 for (size_t k = 16; k <= 80; k += 8) {
711 for (uint32_t n = 1; n <= 8; n++) {
712 for (uint32_t m = 1; m <= 4; m++) {
713 GemmMicrokernelTester()
714 .mr(4)
715 .nr(8)
716 .kr(1)
717 .sr(1)
718 .m(m)
719 .n(n)
720 .k(k)
721 .iterations(1)
722 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8__aarch32_neon_mlal_lane_cortex_a7, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
723 }
724 }
725 }
726 }
727
TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_CORTEX_A7,n_gt_8)728 TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_CORTEX_A7, n_gt_8) {
729 TEST_REQUIRES_ARM_NEON;
730 for (uint32_t n = 9; n < 16; n++) {
731 for (size_t k = 1; k <= 40; k += 9) {
732 GemmMicrokernelTester()
733 .mr(4)
734 .nr(8)
735 .kr(1)
736 .sr(1)
737 .m(4)
738 .n(n)
739 .k(k)
740 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8__aarch32_neon_mlal_lane_cortex_a7, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
741 }
742 }
743 }
744
TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_CORTEX_A7,n_gt_8_strided_cn)745 TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_CORTEX_A7, n_gt_8_strided_cn) {
746 TEST_REQUIRES_ARM_NEON;
747 for (uint32_t n = 9; n < 16; n++) {
748 for (size_t k = 1; k <= 40; k += 9) {
749 GemmMicrokernelTester()
750 .mr(4)
751 .nr(8)
752 .kr(1)
753 .sr(1)
754 .m(4)
755 .n(n)
756 .k(k)
757 .cn_stride(11)
758 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8__aarch32_neon_mlal_lane_cortex_a7, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
759 }
760 }
761 }
762
TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_CORTEX_A7,n_gt_8_strided_a)763 TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_CORTEX_A7, n_gt_8_strided_a) {
764 TEST_REQUIRES_ARM_NEON;
765 for (uint32_t n = 9; n < 16; n++) {
766 for (size_t k = 1; k <= 40; k += 9) {
767 GemmMicrokernelTester()
768 .mr(4)
769 .nr(8)
770 .kr(1)
771 .sr(1)
772 .m(4)
773 .n(n)
774 .k(k)
775 .a_stride(43)
776 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8__aarch32_neon_mlal_lane_cortex_a7, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
777 }
778 }
779 }
780
TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_CORTEX_A7,n_gt_8_subtile)781 TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_CORTEX_A7, n_gt_8_subtile) {
782 TEST_REQUIRES_ARM_NEON;
783 for (uint32_t n = 9; n < 16; n++) {
784 for (size_t k = 1; k <= 40; k += 9) {
785 for (uint32_t m = 1; m <= 4; m++) {
786 GemmMicrokernelTester()
787 .mr(4)
788 .nr(8)
789 .kr(1)
790 .sr(1)
791 .m(m)
792 .n(n)
793 .k(k)
794 .iterations(1)
795 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8__aarch32_neon_mlal_lane_cortex_a7, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
796 }
797 }
798 }
799 }
800
TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_CORTEX_A7,n_div_8)801 TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_CORTEX_A7, n_div_8) {
802 TEST_REQUIRES_ARM_NEON;
803 for (uint32_t n = 16; n <= 24; n += 8) {
804 for (size_t k = 1; k <= 40; k += 9) {
805 GemmMicrokernelTester()
806 .mr(4)
807 .nr(8)
808 .kr(1)
809 .sr(1)
810 .m(4)
811 .n(n)
812 .k(k)
813 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8__aarch32_neon_mlal_lane_cortex_a7, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
814 }
815 }
816 }
817
TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_CORTEX_A7,n_div_8_strided_cn)818 TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_CORTEX_A7, n_div_8_strided_cn) {
819 TEST_REQUIRES_ARM_NEON;
820 for (uint32_t n = 16; n <= 24; n += 8) {
821 for (size_t k = 1; k <= 40; k += 9) {
822 GemmMicrokernelTester()
823 .mr(4)
824 .nr(8)
825 .kr(1)
826 .sr(1)
827 .m(4)
828 .n(n)
829 .k(k)
830 .cn_stride(11)
831 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8__aarch32_neon_mlal_lane_cortex_a7, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
832 }
833 }
834 }
835
TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_CORTEX_A7,n_div_8_strided_a)836 TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_CORTEX_A7, n_div_8_strided_a) {
837 TEST_REQUIRES_ARM_NEON;
838 for (uint32_t n = 16; n <= 24; n += 8) {
839 for (size_t k = 1; k <= 40; k += 9) {
840 GemmMicrokernelTester()
841 .mr(4)
842 .nr(8)
843 .kr(1)
844 .sr(1)
845 .m(4)
846 .n(n)
847 .k(k)
848 .a_stride(43)
849 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8__aarch32_neon_mlal_lane_cortex_a7, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
850 }
851 }
852 }
853
TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_CORTEX_A7,n_div_8_subtile)854 TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_CORTEX_A7, n_div_8_subtile) {
855 TEST_REQUIRES_ARM_NEON;
856 for (uint32_t n = 16; n <= 24; n += 8) {
857 for (size_t k = 1; k <= 40; k += 9) {
858 for (uint32_t m = 1; m <= 4; m++) {
859 GemmMicrokernelTester()
860 .mr(4)
861 .nr(8)
862 .kr(1)
863 .sr(1)
864 .m(m)
865 .n(n)
866 .k(k)
867 .iterations(1)
868 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8__aarch32_neon_mlal_lane_cortex_a7, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
869 }
870 }
871 }
872 }
873
TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_CORTEX_A7,strided_cm_subtile)874 TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_CORTEX_A7, strided_cm_subtile) {
875 TEST_REQUIRES_ARM_NEON;
876 for (size_t k = 1; k <= 40; k += 9) {
877 for (uint32_t n = 1; n <= 8; n++) {
878 for (uint32_t m = 1; m <= 4; m++) {
879 GemmMicrokernelTester()
880 .mr(4)
881 .nr(8)
882 .kr(1)
883 .sr(1)
884 .m(m)
885 .n(n)
886 .k(k)
887 .cm_stride(11)
888 .iterations(1)
889 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8__aarch32_neon_mlal_lane_cortex_a7, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
890 }
891 }
892 }
893 }
894
TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_CORTEX_A7,qmin)895 TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_CORTEX_A7, qmin) {
896 TEST_REQUIRES_ARM_NEON;
897 GemmMicrokernelTester()
898 .mr(4)
899 .nr(8)
900 .kr(1)
901 .sr(1)
902 .m(4)
903 .n(8)
904 .k(8)
905 .qmin(128)
906 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8__aarch32_neon_mlal_lane_cortex_a7, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
907 }
908
TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_CORTEX_A7,qmax)909 TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_CORTEX_A7, qmax) {
910 TEST_REQUIRES_ARM_NEON;
911 GemmMicrokernelTester()
912 .mr(4)
913 .nr(8)
914 .kr(1)
915 .sr(1)
916 .m(4)
917 .n(8)
918 .k(8)
919 .qmax(128)
920 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8__aarch32_neon_mlal_lane_cortex_a7, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
921 }
922
TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_CORTEX_A7,strided_cm)923 TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_CORTEX_A7, strided_cm) {
924 TEST_REQUIRES_ARM_NEON;
925 GemmMicrokernelTester()
926 .mr(4)
927 .nr(8)
928 .kr(1)
929 .sr(1)
930 .m(4)
931 .n(8)
932 .k(8)
933 .cm_stride(11)
934 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8__aarch32_neon_mlal_lane_cortex_a7, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
935 }
936 #endif // XNN_ARCH_ARM && XNN_ENABLE_ASSEMBLY
937
938
939 #if XNN_ARCH_ARM && XNN_ENABLE_ASSEMBLY
TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_LD64,k_eq_8)940 TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_LD64, k_eq_8) {
941 TEST_REQUIRES_ARM_NEON;
942 GemmMicrokernelTester()
943 .mr(4)
944 .nr(8)
945 .kr(1)
946 .sr(1)
947 .m(4)
948 .n(8)
949 .k(8)
950 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8__aarch32_neon_mlal_lane_ld64, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
951 }
952
TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_LD64,strided_cn)953 TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_LD64, strided_cn) {
954 TEST_REQUIRES_ARM_NEON;
955 GemmMicrokernelTester()
956 .mr(4)
957 .nr(8)
958 .kr(1)
959 .sr(1)
960 .m(4)
961 .n(8)
962 .k(8)
963 .cn_stride(11)
964 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8__aarch32_neon_mlal_lane_ld64, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
965 }
966
TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_LD64,k_eq_8_strided_a)967 TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_LD64, k_eq_8_strided_a) {
968 TEST_REQUIRES_ARM_NEON;
969 GemmMicrokernelTester()
970 .mr(4)
971 .nr(8)
972 .kr(1)
973 .sr(1)
974 .m(4)
975 .n(8)
976 .k(8)
977 .a_stride(11)
978 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8__aarch32_neon_mlal_lane_ld64, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
979 }
980
TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_LD64,k_eq_8_subtile)981 TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_LD64, k_eq_8_subtile) {
982 TEST_REQUIRES_ARM_NEON;
983 for (uint32_t n = 1; n <= 8; n++) {
984 for (uint32_t m = 1; m <= 4; m++) {
985 GemmMicrokernelTester()
986 .mr(4)
987 .nr(8)
988 .kr(1)
989 .sr(1)
990 .m(m)
991 .n(n)
992 .k(8)
993 .iterations(1)
994 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8__aarch32_neon_mlal_lane_ld64, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
995 }
996 }
997 }
998
TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_LD64,k_eq_8_subtile_m)999 TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_LD64, k_eq_8_subtile_m) {
1000 TEST_REQUIRES_ARM_NEON;
1001 for (uint32_t m = 1; m <= 4; m++) {
1002 GemmMicrokernelTester()
1003 .mr(4)
1004 .nr(8)
1005 .kr(1)
1006 .sr(1)
1007 .m(m)
1008 .n(8)
1009 .k(8)
1010 .iterations(1)
1011 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8__aarch32_neon_mlal_lane_ld64, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1012 }
1013 }
1014
TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_LD64,k_eq_8_subtile_n)1015 TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_LD64, k_eq_8_subtile_n) {
1016 TEST_REQUIRES_ARM_NEON;
1017 for (uint32_t n = 1; n <= 8; n++) {
1018 GemmMicrokernelTester()
1019 .mr(4)
1020 .nr(8)
1021 .kr(1)
1022 .sr(1)
1023 .m(4)
1024 .n(n)
1025 .k(8)
1026 .iterations(1)
1027 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8__aarch32_neon_mlal_lane_ld64, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1028 }
1029 }
1030
TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_LD64,k_lt_8)1031 TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_LD64, k_lt_8) {
1032 TEST_REQUIRES_ARM_NEON;
1033 for (size_t k = 1; k < 8; k++) {
1034 GemmMicrokernelTester()
1035 .mr(4)
1036 .nr(8)
1037 .kr(1)
1038 .sr(1)
1039 .m(4)
1040 .n(8)
1041 .k(k)
1042 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8__aarch32_neon_mlal_lane_ld64, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1043 }
1044 }
1045
TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_LD64,k_lt_8_strided_a)1046 TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_LD64, k_lt_8_strided_a) {
1047 TEST_REQUIRES_ARM_NEON;
1048 for (size_t k = 1; k < 8; k++) {
1049 GemmMicrokernelTester()
1050 .mr(4)
1051 .nr(8)
1052 .kr(1)
1053 .sr(1)
1054 .m(4)
1055 .n(8)
1056 .k(k)
1057 .a_stride(11)
1058 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8__aarch32_neon_mlal_lane_ld64, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1059 }
1060 }
1061
TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_LD64,k_lt_8_subtile)1062 TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_LD64, k_lt_8_subtile) {
1063 TEST_REQUIRES_ARM_NEON;
1064 for (size_t k = 1; k < 8; k++) {
1065 for (uint32_t n = 1; n <= 8; n++) {
1066 for (uint32_t m = 1; m <= 4; m++) {
1067 GemmMicrokernelTester()
1068 .mr(4)
1069 .nr(8)
1070 .kr(1)
1071 .sr(1)
1072 .m(m)
1073 .n(n)
1074 .k(k)
1075 .iterations(1)
1076 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8__aarch32_neon_mlal_lane_ld64, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1077 }
1078 }
1079 }
1080 }
1081
TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_LD64,k_gt_8)1082 TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_LD64, k_gt_8) {
1083 TEST_REQUIRES_ARM_NEON;
1084 for (size_t k = 9; k < 16; k++) {
1085 GemmMicrokernelTester()
1086 .mr(4)
1087 .nr(8)
1088 .kr(1)
1089 .sr(1)
1090 .m(4)
1091 .n(8)
1092 .k(k)
1093 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8__aarch32_neon_mlal_lane_ld64, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1094 }
1095 }
1096
TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_LD64,k_gt_8_strided_a)1097 TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_LD64, k_gt_8_strided_a) {
1098 TEST_REQUIRES_ARM_NEON;
1099 for (size_t k = 9; k < 16; k++) {
1100 GemmMicrokernelTester()
1101 .mr(4)
1102 .nr(8)
1103 .kr(1)
1104 .sr(1)
1105 .m(4)
1106 .n(8)
1107 .k(k)
1108 .a_stride(19)
1109 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8__aarch32_neon_mlal_lane_ld64, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1110 }
1111 }
1112
TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_LD64,k_gt_8_subtile)1113 TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_LD64, k_gt_8_subtile) {
1114 TEST_REQUIRES_ARM_NEON;
1115 for (size_t k = 9; k < 16; k++) {
1116 for (uint32_t n = 1; n <= 8; n++) {
1117 for (uint32_t m = 1; m <= 4; m++) {
1118 GemmMicrokernelTester()
1119 .mr(4)
1120 .nr(8)
1121 .kr(1)
1122 .sr(1)
1123 .m(m)
1124 .n(n)
1125 .k(k)
1126 .iterations(1)
1127 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8__aarch32_neon_mlal_lane_ld64, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1128 }
1129 }
1130 }
1131 }
1132
TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_LD64,k_div_8)1133 TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_LD64, k_div_8) {
1134 TEST_REQUIRES_ARM_NEON;
1135 for (size_t k = 16; k <= 80; k += 8) {
1136 GemmMicrokernelTester()
1137 .mr(4)
1138 .nr(8)
1139 .kr(1)
1140 .sr(1)
1141 .m(4)
1142 .n(8)
1143 .k(k)
1144 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8__aarch32_neon_mlal_lane_ld64, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1145 }
1146 }
1147
TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_LD64,k_div_8_strided_a)1148 TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_LD64, k_div_8_strided_a) {
1149 TEST_REQUIRES_ARM_NEON;
1150 for (size_t k = 16; k <= 80; k += 8) {
1151 GemmMicrokernelTester()
1152 .mr(4)
1153 .nr(8)
1154 .kr(1)
1155 .sr(1)
1156 .m(4)
1157 .n(8)
1158 .k(k)
1159 .a_stride(83)
1160 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8__aarch32_neon_mlal_lane_ld64, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1161 }
1162 }
1163
TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_LD64,k_div_8_subtile)1164 TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_LD64, k_div_8_subtile) {
1165 TEST_REQUIRES_ARM_NEON;
1166 for (size_t k = 16; k <= 80; k += 8) {
1167 for (uint32_t n = 1; n <= 8; n++) {
1168 for (uint32_t m = 1; m <= 4; m++) {
1169 GemmMicrokernelTester()
1170 .mr(4)
1171 .nr(8)
1172 .kr(1)
1173 .sr(1)
1174 .m(m)
1175 .n(n)
1176 .k(k)
1177 .iterations(1)
1178 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8__aarch32_neon_mlal_lane_ld64, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1179 }
1180 }
1181 }
1182 }
1183
TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_LD64,n_gt_8)1184 TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_LD64, n_gt_8) {
1185 TEST_REQUIRES_ARM_NEON;
1186 for (uint32_t n = 9; n < 16; n++) {
1187 for (size_t k = 1; k <= 40; k += 9) {
1188 GemmMicrokernelTester()
1189 .mr(4)
1190 .nr(8)
1191 .kr(1)
1192 .sr(1)
1193 .m(4)
1194 .n(n)
1195 .k(k)
1196 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8__aarch32_neon_mlal_lane_ld64, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1197 }
1198 }
1199 }
1200
TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_LD64,n_gt_8_strided_cn)1201 TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_LD64, n_gt_8_strided_cn) {
1202 TEST_REQUIRES_ARM_NEON;
1203 for (uint32_t n = 9; n < 16; n++) {
1204 for (size_t k = 1; k <= 40; k += 9) {
1205 GemmMicrokernelTester()
1206 .mr(4)
1207 .nr(8)
1208 .kr(1)
1209 .sr(1)
1210 .m(4)
1211 .n(n)
1212 .k(k)
1213 .cn_stride(11)
1214 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8__aarch32_neon_mlal_lane_ld64, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1215 }
1216 }
1217 }
1218
TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_LD64,n_gt_8_strided_a)1219 TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_LD64, n_gt_8_strided_a) {
1220 TEST_REQUIRES_ARM_NEON;
1221 for (uint32_t n = 9; n < 16; n++) {
1222 for (size_t k = 1; k <= 40; k += 9) {
1223 GemmMicrokernelTester()
1224 .mr(4)
1225 .nr(8)
1226 .kr(1)
1227 .sr(1)
1228 .m(4)
1229 .n(n)
1230 .k(k)
1231 .a_stride(43)
1232 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8__aarch32_neon_mlal_lane_ld64, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1233 }
1234 }
1235 }
1236
TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_LD64,n_gt_8_subtile)1237 TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_LD64, n_gt_8_subtile) {
1238 TEST_REQUIRES_ARM_NEON;
1239 for (uint32_t n = 9; n < 16; n++) {
1240 for (size_t k = 1; k <= 40; k += 9) {
1241 for (uint32_t m = 1; m <= 4; m++) {
1242 GemmMicrokernelTester()
1243 .mr(4)
1244 .nr(8)
1245 .kr(1)
1246 .sr(1)
1247 .m(m)
1248 .n(n)
1249 .k(k)
1250 .iterations(1)
1251 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8__aarch32_neon_mlal_lane_ld64, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1252 }
1253 }
1254 }
1255 }
1256
TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_LD64,n_div_8)1257 TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_LD64, n_div_8) {
1258 TEST_REQUIRES_ARM_NEON;
1259 for (uint32_t n = 16; n <= 24; n += 8) {
1260 for (size_t k = 1; k <= 40; k += 9) {
1261 GemmMicrokernelTester()
1262 .mr(4)
1263 .nr(8)
1264 .kr(1)
1265 .sr(1)
1266 .m(4)
1267 .n(n)
1268 .k(k)
1269 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8__aarch32_neon_mlal_lane_ld64, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1270 }
1271 }
1272 }
1273
TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_LD64,n_div_8_strided_cn)1274 TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_LD64, n_div_8_strided_cn) {
1275 TEST_REQUIRES_ARM_NEON;
1276 for (uint32_t n = 16; n <= 24; n += 8) {
1277 for (size_t k = 1; k <= 40; k += 9) {
1278 GemmMicrokernelTester()
1279 .mr(4)
1280 .nr(8)
1281 .kr(1)
1282 .sr(1)
1283 .m(4)
1284 .n(n)
1285 .k(k)
1286 .cn_stride(11)
1287 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8__aarch32_neon_mlal_lane_ld64, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1288 }
1289 }
1290 }
1291
TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_LD64,n_div_8_strided_a)1292 TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_LD64, n_div_8_strided_a) {
1293 TEST_REQUIRES_ARM_NEON;
1294 for (uint32_t n = 16; n <= 24; n += 8) {
1295 for (size_t k = 1; k <= 40; k += 9) {
1296 GemmMicrokernelTester()
1297 .mr(4)
1298 .nr(8)
1299 .kr(1)
1300 .sr(1)
1301 .m(4)
1302 .n(n)
1303 .k(k)
1304 .a_stride(43)
1305 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8__aarch32_neon_mlal_lane_ld64, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1306 }
1307 }
1308 }
1309
TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_LD64,n_div_8_subtile)1310 TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_LD64, n_div_8_subtile) {
1311 TEST_REQUIRES_ARM_NEON;
1312 for (uint32_t n = 16; n <= 24; n += 8) {
1313 for (size_t k = 1; k <= 40; k += 9) {
1314 for (uint32_t m = 1; m <= 4; m++) {
1315 GemmMicrokernelTester()
1316 .mr(4)
1317 .nr(8)
1318 .kr(1)
1319 .sr(1)
1320 .m(m)
1321 .n(n)
1322 .k(k)
1323 .iterations(1)
1324 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8__aarch32_neon_mlal_lane_ld64, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1325 }
1326 }
1327 }
1328 }
1329
TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_LD64,strided_cm_subtile)1330 TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_LD64, strided_cm_subtile) {
1331 TEST_REQUIRES_ARM_NEON;
1332 for (size_t k = 1; k <= 40; k += 9) {
1333 for (uint32_t n = 1; n <= 8; n++) {
1334 for (uint32_t m = 1; m <= 4; m++) {
1335 GemmMicrokernelTester()
1336 .mr(4)
1337 .nr(8)
1338 .kr(1)
1339 .sr(1)
1340 .m(m)
1341 .n(n)
1342 .k(k)
1343 .cm_stride(11)
1344 .iterations(1)
1345 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8__aarch32_neon_mlal_lane_ld64, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1346 }
1347 }
1348 }
1349 }
1350
TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_LD64,qmin)1351 TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_LD64, qmin) {
1352 TEST_REQUIRES_ARM_NEON;
1353 GemmMicrokernelTester()
1354 .mr(4)
1355 .nr(8)
1356 .kr(1)
1357 .sr(1)
1358 .m(4)
1359 .n(8)
1360 .k(8)
1361 .qmin(128)
1362 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8__aarch32_neon_mlal_lane_ld64, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1363 }
1364
TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_LD64,qmax)1365 TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_LD64, qmax) {
1366 TEST_REQUIRES_ARM_NEON;
1367 GemmMicrokernelTester()
1368 .mr(4)
1369 .nr(8)
1370 .kr(1)
1371 .sr(1)
1372 .m(4)
1373 .n(8)
1374 .k(8)
1375 .qmax(128)
1376 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8__aarch32_neon_mlal_lane_ld64, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1377 }
1378
TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_LD64,strided_cm)1379 TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_LD64, strided_cm) {
1380 TEST_REQUIRES_ARM_NEON;
1381 GemmMicrokernelTester()
1382 .mr(4)
1383 .nr(8)
1384 .kr(1)
1385 .sr(1)
1386 .m(4)
1387 .n(8)
1388 .k(8)
1389 .cm_stride(11)
1390 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8__aarch32_neon_mlal_lane_ld64, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1391 }
1392 #endif // XNN_ARCH_ARM && XNN_ENABLE_ASSEMBLY
1393
1394
1395 #if XNN_ARCH_ARM && XNN_ENABLE_ASSEMBLY
TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64,k_eq_8)1396 TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64, k_eq_8) {
1397 TEST_REQUIRES_ARM_NEON;
1398 GemmMicrokernelTester()
1399 .mr(4)
1400 .nr(8)
1401 .kr(1)
1402 .sr(1)
1403 .m(4)
1404 .n(8)
1405 .k(8)
1406 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8__aarch32_neon_mlal_lane_prfm_ld64, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1407 }
1408
TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64,strided_cn)1409 TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64, strided_cn) {
1410 TEST_REQUIRES_ARM_NEON;
1411 GemmMicrokernelTester()
1412 .mr(4)
1413 .nr(8)
1414 .kr(1)
1415 .sr(1)
1416 .m(4)
1417 .n(8)
1418 .k(8)
1419 .cn_stride(11)
1420 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8__aarch32_neon_mlal_lane_prfm_ld64, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1421 }
1422
TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64,k_eq_8_strided_a)1423 TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64, k_eq_8_strided_a) {
1424 TEST_REQUIRES_ARM_NEON;
1425 GemmMicrokernelTester()
1426 .mr(4)
1427 .nr(8)
1428 .kr(1)
1429 .sr(1)
1430 .m(4)
1431 .n(8)
1432 .k(8)
1433 .a_stride(11)
1434 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8__aarch32_neon_mlal_lane_prfm_ld64, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1435 }
1436
TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64,k_eq_8_subtile)1437 TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64, k_eq_8_subtile) {
1438 TEST_REQUIRES_ARM_NEON;
1439 for (uint32_t n = 1; n <= 8; n++) {
1440 for (uint32_t m = 1; m <= 4; m++) {
1441 GemmMicrokernelTester()
1442 .mr(4)
1443 .nr(8)
1444 .kr(1)
1445 .sr(1)
1446 .m(m)
1447 .n(n)
1448 .k(8)
1449 .iterations(1)
1450 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8__aarch32_neon_mlal_lane_prfm_ld64, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1451 }
1452 }
1453 }
1454
TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64,k_eq_8_subtile_m)1455 TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64, k_eq_8_subtile_m) {
1456 TEST_REQUIRES_ARM_NEON;
1457 for (uint32_t m = 1; m <= 4; m++) {
1458 GemmMicrokernelTester()
1459 .mr(4)
1460 .nr(8)
1461 .kr(1)
1462 .sr(1)
1463 .m(m)
1464 .n(8)
1465 .k(8)
1466 .iterations(1)
1467 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8__aarch32_neon_mlal_lane_prfm_ld64, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1468 }
1469 }
1470
TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64,k_eq_8_subtile_n)1471 TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64, k_eq_8_subtile_n) {
1472 TEST_REQUIRES_ARM_NEON;
1473 for (uint32_t n = 1; n <= 8; n++) {
1474 GemmMicrokernelTester()
1475 .mr(4)
1476 .nr(8)
1477 .kr(1)
1478 .sr(1)
1479 .m(4)
1480 .n(n)
1481 .k(8)
1482 .iterations(1)
1483 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8__aarch32_neon_mlal_lane_prfm_ld64, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1484 }
1485 }
1486
TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64,k_lt_8)1487 TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64, k_lt_8) {
1488 TEST_REQUIRES_ARM_NEON;
1489 for (size_t k = 1; k < 8; k++) {
1490 GemmMicrokernelTester()
1491 .mr(4)
1492 .nr(8)
1493 .kr(1)
1494 .sr(1)
1495 .m(4)
1496 .n(8)
1497 .k(k)
1498 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8__aarch32_neon_mlal_lane_prfm_ld64, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1499 }
1500 }
1501
TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64,k_lt_8_strided_a)1502 TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64, k_lt_8_strided_a) {
1503 TEST_REQUIRES_ARM_NEON;
1504 for (size_t k = 1; k < 8; k++) {
1505 GemmMicrokernelTester()
1506 .mr(4)
1507 .nr(8)
1508 .kr(1)
1509 .sr(1)
1510 .m(4)
1511 .n(8)
1512 .k(k)
1513 .a_stride(11)
1514 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8__aarch32_neon_mlal_lane_prfm_ld64, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1515 }
1516 }
1517
TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64,k_lt_8_subtile)1518 TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64, k_lt_8_subtile) {
1519 TEST_REQUIRES_ARM_NEON;
1520 for (size_t k = 1; k < 8; k++) {
1521 for (uint32_t n = 1; n <= 8; n++) {
1522 for (uint32_t m = 1; m <= 4; m++) {
1523 GemmMicrokernelTester()
1524 .mr(4)
1525 .nr(8)
1526 .kr(1)
1527 .sr(1)
1528 .m(m)
1529 .n(n)
1530 .k(k)
1531 .iterations(1)
1532 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8__aarch32_neon_mlal_lane_prfm_ld64, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1533 }
1534 }
1535 }
1536 }
1537
TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64,k_gt_8)1538 TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64, k_gt_8) {
1539 TEST_REQUIRES_ARM_NEON;
1540 for (size_t k = 9; k < 16; k++) {
1541 GemmMicrokernelTester()
1542 .mr(4)
1543 .nr(8)
1544 .kr(1)
1545 .sr(1)
1546 .m(4)
1547 .n(8)
1548 .k(k)
1549 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8__aarch32_neon_mlal_lane_prfm_ld64, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1550 }
1551 }
1552
TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64,k_gt_8_strided_a)1553 TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64, k_gt_8_strided_a) {
1554 TEST_REQUIRES_ARM_NEON;
1555 for (size_t k = 9; k < 16; k++) {
1556 GemmMicrokernelTester()
1557 .mr(4)
1558 .nr(8)
1559 .kr(1)
1560 .sr(1)
1561 .m(4)
1562 .n(8)
1563 .k(k)
1564 .a_stride(19)
1565 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8__aarch32_neon_mlal_lane_prfm_ld64, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1566 }
1567 }
1568
TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64,k_gt_8_subtile)1569 TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64, k_gt_8_subtile) {
1570 TEST_REQUIRES_ARM_NEON;
1571 for (size_t k = 9; k < 16; k++) {
1572 for (uint32_t n = 1; n <= 8; n++) {
1573 for (uint32_t m = 1; m <= 4; m++) {
1574 GemmMicrokernelTester()
1575 .mr(4)
1576 .nr(8)
1577 .kr(1)
1578 .sr(1)
1579 .m(m)
1580 .n(n)
1581 .k(k)
1582 .iterations(1)
1583 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8__aarch32_neon_mlal_lane_prfm_ld64, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1584 }
1585 }
1586 }
1587 }
1588
TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64,k_div_8)1589 TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64, k_div_8) {
1590 TEST_REQUIRES_ARM_NEON;
1591 for (size_t k = 16; k <= 80; k += 8) {
1592 GemmMicrokernelTester()
1593 .mr(4)
1594 .nr(8)
1595 .kr(1)
1596 .sr(1)
1597 .m(4)
1598 .n(8)
1599 .k(k)
1600 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8__aarch32_neon_mlal_lane_prfm_ld64, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1601 }
1602 }
1603
TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64,k_div_8_strided_a)1604 TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64, k_div_8_strided_a) {
1605 TEST_REQUIRES_ARM_NEON;
1606 for (size_t k = 16; k <= 80; k += 8) {
1607 GemmMicrokernelTester()
1608 .mr(4)
1609 .nr(8)
1610 .kr(1)
1611 .sr(1)
1612 .m(4)
1613 .n(8)
1614 .k(k)
1615 .a_stride(83)
1616 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8__aarch32_neon_mlal_lane_prfm_ld64, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1617 }
1618 }
1619
TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64,k_div_8_subtile)1620 TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64, k_div_8_subtile) {
1621 TEST_REQUIRES_ARM_NEON;
1622 for (size_t k = 16; k <= 80; k += 8) {
1623 for (uint32_t n = 1; n <= 8; n++) {
1624 for (uint32_t m = 1; m <= 4; m++) {
1625 GemmMicrokernelTester()
1626 .mr(4)
1627 .nr(8)
1628 .kr(1)
1629 .sr(1)
1630 .m(m)
1631 .n(n)
1632 .k(k)
1633 .iterations(1)
1634 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8__aarch32_neon_mlal_lane_prfm_ld64, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1635 }
1636 }
1637 }
1638 }
1639
TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64,n_gt_8)1640 TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64, n_gt_8) {
1641 TEST_REQUIRES_ARM_NEON;
1642 for (uint32_t n = 9; n < 16; n++) {
1643 for (size_t k = 1; k <= 40; k += 9) {
1644 GemmMicrokernelTester()
1645 .mr(4)
1646 .nr(8)
1647 .kr(1)
1648 .sr(1)
1649 .m(4)
1650 .n(n)
1651 .k(k)
1652 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8__aarch32_neon_mlal_lane_prfm_ld64, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1653 }
1654 }
1655 }
1656
TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64,n_gt_8_strided_cn)1657 TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64, n_gt_8_strided_cn) {
1658 TEST_REQUIRES_ARM_NEON;
1659 for (uint32_t n = 9; n < 16; n++) {
1660 for (size_t k = 1; k <= 40; k += 9) {
1661 GemmMicrokernelTester()
1662 .mr(4)
1663 .nr(8)
1664 .kr(1)
1665 .sr(1)
1666 .m(4)
1667 .n(n)
1668 .k(k)
1669 .cn_stride(11)
1670 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8__aarch32_neon_mlal_lane_prfm_ld64, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1671 }
1672 }
1673 }
1674
TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64,n_gt_8_strided_a)1675 TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64, n_gt_8_strided_a) {
1676 TEST_REQUIRES_ARM_NEON;
1677 for (uint32_t n = 9; n < 16; n++) {
1678 for (size_t k = 1; k <= 40; k += 9) {
1679 GemmMicrokernelTester()
1680 .mr(4)
1681 .nr(8)
1682 .kr(1)
1683 .sr(1)
1684 .m(4)
1685 .n(n)
1686 .k(k)
1687 .a_stride(43)
1688 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8__aarch32_neon_mlal_lane_prfm_ld64, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1689 }
1690 }
1691 }
1692
TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64,n_gt_8_subtile)1693 TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64, n_gt_8_subtile) {
1694 TEST_REQUIRES_ARM_NEON;
1695 for (uint32_t n = 9; n < 16; n++) {
1696 for (size_t k = 1; k <= 40; k += 9) {
1697 for (uint32_t m = 1; m <= 4; m++) {
1698 GemmMicrokernelTester()
1699 .mr(4)
1700 .nr(8)
1701 .kr(1)
1702 .sr(1)
1703 .m(m)
1704 .n(n)
1705 .k(k)
1706 .iterations(1)
1707 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8__aarch32_neon_mlal_lane_prfm_ld64, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1708 }
1709 }
1710 }
1711 }
1712
TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64,n_div_8)1713 TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64, n_div_8) {
1714 TEST_REQUIRES_ARM_NEON;
1715 for (uint32_t n = 16; n <= 24; n += 8) {
1716 for (size_t k = 1; k <= 40; k += 9) {
1717 GemmMicrokernelTester()
1718 .mr(4)
1719 .nr(8)
1720 .kr(1)
1721 .sr(1)
1722 .m(4)
1723 .n(n)
1724 .k(k)
1725 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8__aarch32_neon_mlal_lane_prfm_ld64, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1726 }
1727 }
1728 }
1729
TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64,n_div_8_strided_cn)1730 TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64, n_div_8_strided_cn) {
1731 TEST_REQUIRES_ARM_NEON;
1732 for (uint32_t n = 16; n <= 24; n += 8) {
1733 for (size_t k = 1; k <= 40; k += 9) {
1734 GemmMicrokernelTester()
1735 .mr(4)
1736 .nr(8)
1737 .kr(1)
1738 .sr(1)
1739 .m(4)
1740 .n(n)
1741 .k(k)
1742 .cn_stride(11)
1743 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8__aarch32_neon_mlal_lane_prfm_ld64, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1744 }
1745 }
1746 }
1747
TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64,n_div_8_strided_a)1748 TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64, n_div_8_strided_a) {
1749 TEST_REQUIRES_ARM_NEON;
1750 for (uint32_t n = 16; n <= 24; n += 8) {
1751 for (size_t k = 1; k <= 40; k += 9) {
1752 GemmMicrokernelTester()
1753 .mr(4)
1754 .nr(8)
1755 .kr(1)
1756 .sr(1)
1757 .m(4)
1758 .n(n)
1759 .k(k)
1760 .a_stride(43)
1761 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8__aarch32_neon_mlal_lane_prfm_ld64, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1762 }
1763 }
1764 }
1765
TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64,n_div_8_subtile)1766 TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64, n_div_8_subtile) {
1767 TEST_REQUIRES_ARM_NEON;
1768 for (uint32_t n = 16; n <= 24; n += 8) {
1769 for (size_t k = 1; k <= 40; k += 9) {
1770 for (uint32_t m = 1; m <= 4; m++) {
1771 GemmMicrokernelTester()
1772 .mr(4)
1773 .nr(8)
1774 .kr(1)
1775 .sr(1)
1776 .m(m)
1777 .n(n)
1778 .k(k)
1779 .iterations(1)
1780 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8__aarch32_neon_mlal_lane_prfm_ld64, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1781 }
1782 }
1783 }
1784 }
1785
TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64,strided_cm_subtile)1786 TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64, strided_cm_subtile) {
1787 TEST_REQUIRES_ARM_NEON;
1788 for (size_t k = 1; k <= 40; k += 9) {
1789 for (uint32_t n = 1; n <= 8; n++) {
1790 for (uint32_t m = 1; m <= 4; m++) {
1791 GemmMicrokernelTester()
1792 .mr(4)
1793 .nr(8)
1794 .kr(1)
1795 .sr(1)
1796 .m(m)
1797 .n(n)
1798 .k(k)
1799 .cm_stride(11)
1800 .iterations(1)
1801 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8__aarch32_neon_mlal_lane_prfm_ld64, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1802 }
1803 }
1804 }
1805 }
1806
TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64,qmin)1807 TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64, qmin) {
1808 TEST_REQUIRES_ARM_NEON;
1809 GemmMicrokernelTester()
1810 .mr(4)
1811 .nr(8)
1812 .kr(1)
1813 .sr(1)
1814 .m(4)
1815 .n(8)
1816 .k(8)
1817 .qmin(128)
1818 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8__aarch32_neon_mlal_lane_prfm_ld64, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1819 }
1820
TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64,qmax)1821 TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64, qmax) {
1822 TEST_REQUIRES_ARM_NEON;
1823 GemmMicrokernelTester()
1824 .mr(4)
1825 .nr(8)
1826 .kr(1)
1827 .sr(1)
1828 .m(4)
1829 .n(8)
1830 .k(8)
1831 .qmax(128)
1832 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8__aarch32_neon_mlal_lane_prfm_ld64, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1833 }
1834
TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64,strided_cm)1835 TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64, strided_cm) {
1836 TEST_REQUIRES_ARM_NEON;
1837 GemmMicrokernelTester()
1838 .mr(4)
1839 .nr(8)
1840 .kr(1)
1841 .sr(1)
1842 .m(4)
1843 .n(8)
1844 .k(8)
1845 .cm_stride(11)
1846 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8__aarch32_neon_mlal_lane_prfm_ld64, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1847 }
1848 #endif // XNN_ARCH_ARM && XNN_ENABLE_ASSEMBLY
1849
1850
1851 #if XNN_ARCH_ARM && XNN_ENABLE_ASSEMBLY
TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_CORTEX_A35,k_eq_8)1852 TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_CORTEX_A35, k_eq_8) {
1853 TEST_REQUIRES_ARM_NEON_V8;
1854 GemmMicrokernelTester()
1855 .mr(4)
1856 .nr(8)
1857 .kr(1)
1858 .sr(1)
1859 .m(4)
1860 .n(8)
1861 .k(8)
1862 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8__aarch32_neonv8_mlal_lane_cortex_a35, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
1863 }
1864
TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_CORTEX_A35,strided_cn)1865 TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_CORTEX_A35, strided_cn) {
1866 TEST_REQUIRES_ARM_NEON_V8;
1867 GemmMicrokernelTester()
1868 .mr(4)
1869 .nr(8)
1870 .kr(1)
1871 .sr(1)
1872 .m(4)
1873 .n(8)
1874 .k(8)
1875 .cn_stride(11)
1876 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8__aarch32_neonv8_mlal_lane_cortex_a35, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
1877 }
1878
TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_CORTEX_A35,k_eq_8_strided_a)1879 TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_CORTEX_A35, k_eq_8_strided_a) {
1880 TEST_REQUIRES_ARM_NEON_V8;
1881 GemmMicrokernelTester()
1882 .mr(4)
1883 .nr(8)
1884 .kr(1)
1885 .sr(1)
1886 .m(4)
1887 .n(8)
1888 .k(8)
1889 .a_stride(11)
1890 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8__aarch32_neonv8_mlal_lane_cortex_a35, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
1891 }
1892
TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_CORTEX_A35,k_eq_8_subtile)1893 TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_CORTEX_A35, k_eq_8_subtile) {
1894 TEST_REQUIRES_ARM_NEON_V8;
1895 for (uint32_t n = 1; n <= 8; n++) {
1896 for (uint32_t m = 1; m <= 4; m++) {
1897 GemmMicrokernelTester()
1898 .mr(4)
1899 .nr(8)
1900 .kr(1)
1901 .sr(1)
1902 .m(m)
1903 .n(n)
1904 .k(8)
1905 .iterations(1)
1906 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8__aarch32_neonv8_mlal_lane_cortex_a35, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
1907 }
1908 }
1909 }
1910
TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_CORTEX_A35,k_eq_8_subtile_m)1911 TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_CORTEX_A35, k_eq_8_subtile_m) {
1912 TEST_REQUIRES_ARM_NEON_V8;
1913 for (uint32_t m = 1; m <= 4; m++) {
1914 GemmMicrokernelTester()
1915 .mr(4)
1916 .nr(8)
1917 .kr(1)
1918 .sr(1)
1919 .m(m)
1920 .n(8)
1921 .k(8)
1922 .iterations(1)
1923 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8__aarch32_neonv8_mlal_lane_cortex_a35, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
1924 }
1925 }
1926
TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_CORTEX_A35,k_eq_8_subtile_n)1927 TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_CORTEX_A35, k_eq_8_subtile_n) {
1928 TEST_REQUIRES_ARM_NEON_V8;
1929 for (uint32_t n = 1; n <= 8; n++) {
1930 GemmMicrokernelTester()
1931 .mr(4)
1932 .nr(8)
1933 .kr(1)
1934 .sr(1)
1935 .m(4)
1936 .n(n)
1937 .k(8)
1938 .iterations(1)
1939 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8__aarch32_neonv8_mlal_lane_cortex_a35, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
1940 }
1941 }
1942
TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_CORTEX_A35,k_lt_8)1943 TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_CORTEX_A35, k_lt_8) {
1944 TEST_REQUIRES_ARM_NEON_V8;
1945 for (size_t k = 1; k < 8; k++) {
1946 GemmMicrokernelTester()
1947 .mr(4)
1948 .nr(8)
1949 .kr(1)
1950 .sr(1)
1951 .m(4)
1952 .n(8)
1953 .k(k)
1954 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8__aarch32_neonv8_mlal_lane_cortex_a35, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
1955 }
1956 }
1957
TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_CORTEX_A35,k_lt_8_strided_a)1958 TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_CORTEX_A35, k_lt_8_strided_a) {
1959 TEST_REQUIRES_ARM_NEON_V8;
1960 for (size_t k = 1; k < 8; k++) {
1961 GemmMicrokernelTester()
1962 .mr(4)
1963 .nr(8)
1964 .kr(1)
1965 .sr(1)
1966 .m(4)
1967 .n(8)
1968 .k(k)
1969 .a_stride(11)
1970 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8__aarch32_neonv8_mlal_lane_cortex_a35, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
1971 }
1972 }
1973
TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_CORTEX_A35,k_lt_8_subtile)1974 TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_CORTEX_A35, k_lt_8_subtile) {
1975 TEST_REQUIRES_ARM_NEON_V8;
1976 for (size_t k = 1; k < 8; k++) {
1977 for (uint32_t n = 1; n <= 8; n++) {
1978 for (uint32_t m = 1; m <= 4; m++) {
1979 GemmMicrokernelTester()
1980 .mr(4)
1981 .nr(8)
1982 .kr(1)
1983 .sr(1)
1984 .m(m)
1985 .n(n)
1986 .k(k)
1987 .iterations(1)
1988 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8__aarch32_neonv8_mlal_lane_cortex_a35, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
1989 }
1990 }
1991 }
1992 }
1993
TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_CORTEX_A35,k_gt_8)1994 TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_CORTEX_A35, k_gt_8) {
1995 TEST_REQUIRES_ARM_NEON_V8;
1996 for (size_t k = 9; k < 16; k++) {
1997 GemmMicrokernelTester()
1998 .mr(4)
1999 .nr(8)
2000 .kr(1)
2001 .sr(1)
2002 .m(4)
2003 .n(8)
2004 .k(k)
2005 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8__aarch32_neonv8_mlal_lane_cortex_a35, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2006 }
2007 }
2008
TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_CORTEX_A35,k_gt_8_strided_a)2009 TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_CORTEX_A35, k_gt_8_strided_a) {
2010 TEST_REQUIRES_ARM_NEON_V8;
2011 for (size_t k = 9; k < 16; k++) {
2012 GemmMicrokernelTester()
2013 .mr(4)
2014 .nr(8)
2015 .kr(1)
2016 .sr(1)
2017 .m(4)
2018 .n(8)
2019 .k(k)
2020 .a_stride(19)
2021 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8__aarch32_neonv8_mlal_lane_cortex_a35, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2022 }
2023 }
2024
TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_CORTEX_A35,k_gt_8_subtile)2025 TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_CORTEX_A35, k_gt_8_subtile) {
2026 TEST_REQUIRES_ARM_NEON_V8;
2027 for (size_t k = 9; k < 16; k++) {
2028 for (uint32_t n = 1; n <= 8; n++) {
2029 for (uint32_t m = 1; m <= 4; m++) {
2030 GemmMicrokernelTester()
2031 .mr(4)
2032 .nr(8)
2033 .kr(1)
2034 .sr(1)
2035 .m(m)
2036 .n(n)
2037 .k(k)
2038 .iterations(1)
2039 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8__aarch32_neonv8_mlal_lane_cortex_a35, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2040 }
2041 }
2042 }
2043 }
2044
TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_CORTEX_A35,k_div_8)2045 TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_CORTEX_A35, k_div_8) {
2046 TEST_REQUIRES_ARM_NEON_V8;
2047 for (size_t k = 16; k <= 80; k += 8) {
2048 GemmMicrokernelTester()
2049 .mr(4)
2050 .nr(8)
2051 .kr(1)
2052 .sr(1)
2053 .m(4)
2054 .n(8)
2055 .k(k)
2056 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8__aarch32_neonv8_mlal_lane_cortex_a35, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2057 }
2058 }
2059
TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_CORTEX_A35,k_div_8_strided_a)2060 TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_CORTEX_A35, k_div_8_strided_a) {
2061 TEST_REQUIRES_ARM_NEON_V8;
2062 for (size_t k = 16; k <= 80; k += 8) {
2063 GemmMicrokernelTester()
2064 .mr(4)
2065 .nr(8)
2066 .kr(1)
2067 .sr(1)
2068 .m(4)
2069 .n(8)
2070 .k(k)
2071 .a_stride(83)
2072 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8__aarch32_neonv8_mlal_lane_cortex_a35, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2073 }
2074 }
2075
TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_CORTEX_A35,k_div_8_subtile)2076 TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_CORTEX_A35, k_div_8_subtile) {
2077 TEST_REQUIRES_ARM_NEON_V8;
2078 for (size_t k = 16; k <= 80; k += 8) {
2079 for (uint32_t n = 1; n <= 8; n++) {
2080 for (uint32_t m = 1; m <= 4; m++) {
2081 GemmMicrokernelTester()
2082 .mr(4)
2083 .nr(8)
2084 .kr(1)
2085 .sr(1)
2086 .m(m)
2087 .n(n)
2088 .k(k)
2089 .iterations(1)
2090 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8__aarch32_neonv8_mlal_lane_cortex_a35, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2091 }
2092 }
2093 }
2094 }
2095
TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_CORTEX_A35,n_gt_8)2096 TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_CORTEX_A35, n_gt_8) {
2097 TEST_REQUIRES_ARM_NEON_V8;
2098 for (uint32_t n = 9; n < 16; n++) {
2099 for (size_t k = 1; k <= 40; k += 9) {
2100 GemmMicrokernelTester()
2101 .mr(4)
2102 .nr(8)
2103 .kr(1)
2104 .sr(1)
2105 .m(4)
2106 .n(n)
2107 .k(k)
2108 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8__aarch32_neonv8_mlal_lane_cortex_a35, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2109 }
2110 }
2111 }
2112
TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_CORTEX_A35,n_gt_8_strided_cn)2113 TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_CORTEX_A35, n_gt_8_strided_cn) {
2114 TEST_REQUIRES_ARM_NEON_V8;
2115 for (uint32_t n = 9; n < 16; n++) {
2116 for (size_t k = 1; k <= 40; k += 9) {
2117 GemmMicrokernelTester()
2118 .mr(4)
2119 .nr(8)
2120 .kr(1)
2121 .sr(1)
2122 .m(4)
2123 .n(n)
2124 .k(k)
2125 .cn_stride(11)
2126 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8__aarch32_neonv8_mlal_lane_cortex_a35, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2127 }
2128 }
2129 }
2130
TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_CORTEX_A35,n_gt_8_strided_a)2131 TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_CORTEX_A35, n_gt_8_strided_a) {
2132 TEST_REQUIRES_ARM_NEON_V8;
2133 for (uint32_t n = 9; n < 16; n++) {
2134 for (size_t k = 1; k <= 40; k += 9) {
2135 GemmMicrokernelTester()
2136 .mr(4)
2137 .nr(8)
2138 .kr(1)
2139 .sr(1)
2140 .m(4)
2141 .n(n)
2142 .k(k)
2143 .a_stride(43)
2144 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8__aarch32_neonv8_mlal_lane_cortex_a35, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2145 }
2146 }
2147 }
2148
TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_CORTEX_A35,n_gt_8_subtile)2149 TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_CORTEX_A35, n_gt_8_subtile) {
2150 TEST_REQUIRES_ARM_NEON_V8;
2151 for (uint32_t n = 9; n < 16; n++) {
2152 for (size_t k = 1; k <= 40; k += 9) {
2153 for (uint32_t m = 1; m <= 4; m++) {
2154 GemmMicrokernelTester()
2155 .mr(4)
2156 .nr(8)
2157 .kr(1)
2158 .sr(1)
2159 .m(m)
2160 .n(n)
2161 .k(k)
2162 .iterations(1)
2163 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8__aarch32_neonv8_mlal_lane_cortex_a35, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2164 }
2165 }
2166 }
2167 }
2168
TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_CORTEX_A35,n_div_8)2169 TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_CORTEX_A35, n_div_8) {
2170 TEST_REQUIRES_ARM_NEON_V8;
2171 for (uint32_t n = 16; n <= 24; n += 8) {
2172 for (size_t k = 1; k <= 40; k += 9) {
2173 GemmMicrokernelTester()
2174 .mr(4)
2175 .nr(8)
2176 .kr(1)
2177 .sr(1)
2178 .m(4)
2179 .n(n)
2180 .k(k)
2181 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8__aarch32_neonv8_mlal_lane_cortex_a35, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2182 }
2183 }
2184 }
2185
TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_CORTEX_A35,n_div_8_strided_cn)2186 TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_CORTEX_A35, n_div_8_strided_cn) {
2187 TEST_REQUIRES_ARM_NEON_V8;
2188 for (uint32_t n = 16; n <= 24; n += 8) {
2189 for (size_t k = 1; k <= 40; k += 9) {
2190 GemmMicrokernelTester()
2191 .mr(4)
2192 .nr(8)
2193 .kr(1)
2194 .sr(1)
2195 .m(4)
2196 .n(n)
2197 .k(k)
2198 .cn_stride(11)
2199 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8__aarch32_neonv8_mlal_lane_cortex_a35, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2200 }
2201 }
2202 }
2203
TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_CORTEX_A35,n_div_8_strided_a)2204 TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_CORTEX_A35, n_div_8_strided_a) {
2205 TEST_REQUIRES_ARM_NEON_V8;
2206 for (uint32_t n = 16; n <= 24; n += 8) {
2207 for (size_t k = 1; k <= 40; k += 9) {
2208 GemmMicrokernelTester()
2209 .mr(4)
2210 .nr(8)
2211 .kr(1)
2212 .sr(1)
2213 .m(4)
2214 .n(n)
2215 .k(k)
2216 .a_stride(43)
2217 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8__aarch32_neonv8_mlal_lane_cortex_a35, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2218 }
2219 }
2220 }
2221
TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_CORTEX_A35,n_div_8_subtile)2222 TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_CORTEX_A35, n_div_8_subtile) {
2223 TEST_REQUIRES_ARM_NEON_V8;
2224 for (uint32_t n = 16; n <= 24; n += 8) {
2225 for (size_t k = 1; k <= 40; k += 9) {
2226 for (uint32_t m = 1; m <= 4; m++) {
2227 GemmMicrokernelTester()
2228 .mr(4)
2229 .nr(8)
2230 .kr(1)
2231 .sr(1)
2232 .m(m)
2233 .n(n)
2234 .k(k)
2235 .iterations(1)
2236 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8__aarch32_neonv8_mlal_lane_cortex_a35, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2237 }
2238 }
2239 }
2240 }
2241
TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_CORTEX_A35,strided_cm_subtile)2242 TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_CORTEX_A35, strided_cm_subtile) {
2243 TEST_REQUIRES_ARM_NEON_V8;
2244 for (size_t k = 1; k <= 40; k += 9) {
2245 for (uint32_t n = 1; n <= 8; n++) {
2246 for (uint32_t m = 1; m <= 4; m++) {
2247 GemmMicrokernelTester()
2248 .mr(4)
2249 .nr(8)
2250 .kr(1)
2251 .sr(1)
2252 .m(m)
2253 .n(n)
2254 .k(k)
2255 .cm_stride(11)
2256 .iterations(1)
2257 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8__aarch32_neonv8_mlal_lane_cortex_a35, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2258 }
2259 }
2260 }
2261 }
2262
TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_CORTEX_A35,qmin)2263 TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_CORTEX_A35, qmin) {
2264 TEST_REQUIRES_ARM_NEON_V8;
2265 GemmMicrokernelTester()
2266 .mr(4)
2267 .nr(8)
2268 .kr(1)
2269 .sr(1)
2270 .m(4)
2271 .n(8)
2272 .k(8)
2273 .qmin(128)
2274 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8__aarch32_neonv8_mlal_lane_cortex_a35, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2275 }
2276
TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_CORTEX_A35,qmax)2277 TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_CORTEX_A35, qmax) {
2278 TEST_REQUIRES_ARM_NEON_V8;
2279 GemmMicrokernelTester()
2280 .mr(4)
2281 .nr(8)
2282 .kr(1)
2283 .sr(1)
2284 .m(4)
2285 .n(8)
2286 .k(8)
2287 .qmax(128)
2288 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8__aarch32_neonv8_mlal_lane_cortex_a35, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2289 }
2290
TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_CORTEX_A35,strided_cm)2291 TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_CORTEX_A35, strided_cm) {
2292 TEST_REQUIRES_ARM_NEON_V8;
2293 GemmMicrokernelTester()
2294 .mr(4)
2295 .nr(8)
2296 .kr(1)
2297 .sr(1)
2298 .m(4)
2299 .n(8)
2300 .k(8)
2301 .cm_stride(11)
2302 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8__aarch32_neonv8_mlal_lane_cortex_a35, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2303 }
2304 #endif // XNN_ARCH_ARM && XNN_ENABLE_ASSEMBLY
2305
2306
2307 #if XNN_ARCH_ARM && XNN_ENABLE_ASSEMBLY
TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_PRFM_CORTEX_A53,k_eq_8)2308 TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_PRFM_CORTEX_A53, k_eq_8) {
2309 TEST_REQUIRES_ARM_NEON_V8;
2310 GemmMicrokernelTester()
2311 .mr(4)
2312 .nr(8)
2313 .kr(1)
2314 .sr(1)
2315 .m(4)
2316 .n(8)
2317 .k(8)
2318 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8__aarch32_neonv8_mlal_lane_prfm_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2319 }
2320
TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_PRFM_CORTEX_A53,strided_cn)2321 TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_PRFM_CORTEX_A53, strided_cn) {
2322 TEST_REQUIRES_ARM_NEON_V8;
2323 GemmMicrokernelTester()
2324 .mr(4)
2325 .nr(8)
2326 .kr(1)
2327 .sr(1)
2328 .m(4)
2329 .n(8)
2330 .k(8)
2331 .cn_stride(11)
2332 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8__aarch32_neonv8_mlal_lane_prfm_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2333 }
2334
TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_PRFM_CORTEX_A53,k_eq_8_strided_a)2335 TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_PRFM_CORTEX_A53, k_eq_8_strided_a) {
2336 TEST_REQUIRES_ARM_NEON_V8;
2337 GemmMicrokernelTester()
2338 .mr(4)
2339 .nr(8)
2340 .kr(1)
2341 .sr(1)
2342 .m(4)
2343 .n(8)
2344 .k(8)
2345 .a_stride(11)
2346 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8__aarch32_neonv8_mlal_lane_prfm_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2347 }
2348
TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_PRFM_CORTEX_A53,k_eq_8_subtile)2349 TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_PRFM_CORTEX_A53, k_eq_8_subtile) {
2350 TEST_REQUIRES_ARM_NEON_V8;
2351 for (uint32_t n = 1; n <= 8; n++) {
2352 for (uint32_t m = 1; m <= 4; m++) {
2353 GemmMicrokernelTester()
2354 .mr(4)
2355 .nr(8)
2356 .kr(1)
2357 .sr(1)
2358 .m(m)
2359 .n(n)
2360 .k(8)
2361 .iterations(1)
2362 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8__aarch32_neonv8_mlal_lane_prfm_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2363 }
2364 }
2365 }
2366
TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_PRFM_CORTEX_A53,k_eq_8_subtile_m)2367 TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_PRFM_CORTEX_A53, k_eq_8_subtile_m) {
2368 TEST_REQUIRES_ARM_NEON_V8;
2369 for (uint32_t m = 1; m <= 4; m++) {
2370 GemmMicrokernelTester()
2371 .mr(4)
2372 .nr(8)
2373 .kr(1)
2374 .sr(1)
2375 .m(m)
2376 .n(8)
2377 .k(8)
2378 .iterations(1)
2379 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8__aarch32_neonv8_mlal_lane_prfm_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2380 }
2381 }
2382
TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_PRFM_CORTEX_A53,k_eq_8_subtile_n)2383 TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_PRFM_CORTEX_A53, k_eq_8_subtile_n) {
2384 TEST_REQUIRES_ARM_NEON_V8;
2385 for (uint32_t n = 1; n <= 8; n++) {
2386 GemmMicrokernelTester()
2387 .mr(4)
2388 .nr(8)
2389 .kr(1)
2390 .sr(1)
2391 .m(4)
2392 .n(n)
2393 .k(8)
2394 .iterations(1)
2395 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8__aarch32_neonv8_mlal_lane_prfm_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2396 }
2397 }
2398
TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_PRFM_CORTEX_A53,k_lt_8)2399 TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_PRFM_CORTEX_A53, k_lt_8) {
2400 TEST_REQUIRES_ARM_NEON_V8;
2401 for (size_t k = 1; k < 8; k++) {
2402 GemmMicrokernelTester()
2403 .mr(4)
2404 .nr(8)
2405 .kr(1)
2406 .sr(1)
2407 .m(4)
2408 .n(8)
2409 .k(k)
2410 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8__aarch32_neonv8_mlal_lane_prfm_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2411 }
2412 }
2413
TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_PRFM_CORTEX_A53,k_lt_8_strided_a)2414 TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_PRFM_CORTEX_A53, k_lt_8_strided_a) {
2415 TEST_REQUIRES_ARM_NEON_V8;
2416 for (size_t k = 1; k < 8; k++) {
2417 GemmMicrokernelTester()
2418 .mr(4)
2419 .nr(8)
2420 .kr(1)
2421 .sr(1)
2422 .m(4)
2423 .n(8)
2424 .k(k)
2425 .a_stride(11)
2426 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8__aarch32_neonv8_mlal_lane_prfm_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2427 }
2428 }
2429
TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_PRFM_CORTEX_A53,k_lt_8_subtile)2430 TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_PRFM_CORTEX_A53, k_lt_8_subtile) {
2431 TEST_REQUIRES_ARM_NEON_V8;
2432 for (size_t k = 1; k < 8; k++) {
2433 for (uint32_t n = 1; n <= 8; n++) {
2434 for (uint32_t m = 1; m <= 4; m++) {
2435 GemmMicrokernelTester()
2436 .mr(4)
2437 .nr(8)
2438 .kr(1)
2439 .sr(1)
2440 .m(m)
2441 .n(n)
2442 .k(k)
2443 .iterations(1)
2444 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8__aarch32_neonv8_mlal_lane_prfm_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2445 }
2446 }
2447 }
2448 }
2449
TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_PRFM_CORTEX_A53,k_gt_8)2450 TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_PRFM_CORTEX_A53, k_gt_8) {
2451 TEST_REQUIRES_ARM_NEON_V8;
2452 for (size_t k = 9; k < 16; k++) {
2453 GemmMicrokernelTester()
2454 .mr(4)
2455 .nr(8)
2456 .kr(1)
2457 .sr(1)
2458 .m(4)
2459 .n(8)
2460 .k(k)
2461 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8__aarch32_neonv8_mlal_lane_prfm_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2462 }
2463 }
2464
TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_PRFM_CORTEX_A53,k_gt_8_strided_a)2465 TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_PRFM_CORTEX_A53, k_gt_8_strided_a) {
2466 TEST_REQUIRES_ARM_NEON_V8;
2467 for (size_t k = 9; k < 16; k++) {
2468 GemmMicrokernelTester()
2469 .mr(4)
2470 .nr(8)
2471 .kr(1)
2472 .sr(1)
2473 .m(4)
2474 .n(8)
2475 .k(k)
2476 .a_stride(19)
2477 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8__aarch32_neonv8_mlal_lane_prfm_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2478 }
2479 }
2480
TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_PRFM_CORTEX_A53,k_gt_8_subtile)2481 TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_PRFM_CORTEX_A53, k_gt_8_subtile) {
2482 TEST_REQUIRES_ARM_NEON_V8;
2483 for (size_t k = 9; k < 16; k++) {
2484 for (uint32_t n = 1; n <= 8; n++) {
2485 for (uint32_t m = 1; m <= 4; m++) {
2486 GemmMicrokernelTester()
2487 .mr(4)
2488 .nr(8)
2489 .kr(1)
2490 .sr(1)
2491 .m(m)
2492 .n(n)
2493 .k(k)
2494 .iterations(1)
2495 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8__aarch32_neonv8_mlal_lane_prfm_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2496 }
2497 }
2498 }
2499 }
2500
TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_PRFM_CORTEX_A53,k_div_8)2501 TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_PRFM_CORTEX_A53, k_div_8) {
2502 TEST_REQUIRES_ARM_NEON_V8;
2503 for (size_t k = 16; k <= 80; k += 8) {
2504 GemmMicrokernelTester()
2505 .mr(4)
2506 .nr(8)
2507 .kr(1)
2508 .sr(1)
2509 .m(4)
2510 .n(8)
2511 .k(k)
2512 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8__aarch32_neonv8_mlal_lane_prfm_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2513 }
2514 }
2515
TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_PRFM_CORTEX_A53,k_div_8_strided_a)2516 TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_PRFM_CORTEX_A53, k_div_8_strided_a) {
2517 TEST_REQUIRES_ARM_NEON_V8;
2518 for (size_t k = 16; k <= 80; k += 8) {
2519 GemmMicrokernelTester()
2520 .mr(4)
2521 .nr(8)
2522 .kr(1)
2523 .sr(1)
2524 .m(4)
2525 .n(8)
2526 .k(k)
2527 .a_stride(83)
2528 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8__aarch32_neonv8_mlal_lane_prfm_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2529 }
2530 }
2531
TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_PRFM_CORTEX_A53,k_div_8_subtile)2532 TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_PRFM_CORTEX_A53, k_div_8_subtile) {
2533 TEST_REQUIRES_ARM_NEON_V8;
2534 for (size_t k = 16; k <= 80; k += 8) {
2535 for (uint32_t n = 1; n <= 8; n++) {
2536 for (uint32_t m = 1; m <= 4; m++) {
2537 GemmMicrokernelTester()
2538 .mr(4)
2539 .nr(8)
2540 .kr(1)
2541 .sr(1)
2542 .m(m)
2543 .n(n)
2544 .k(k)
2545 .iterations(1)
2546 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8__aarch32_neonv8_mlal_lane_prfm_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2547 }
2548 }
2549 }
2550 }
2551
TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_PRFM_CORTEX_A53,n_gt_8)2552 TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_PRFM_CORTEX_A53, n_gt_8) {
2553 TEST_REQUIRES_ARM_NEON_V8;
2554 for (uint32_t n = 9; n < 16; n++) {
2555 for (size_t k = 1; k <= 40; k += 9) {
2556 GemmMicrokernelTester()
2557 .mr(4)
2558 .nr(8)
2559 .kr(1)
2560 .sr(1)
2561 .m(4)
2562 .n(n)
2563 .k(k)
2564 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8__aarch32_neonv8_mlal_lane_prfm_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2565 }
2566 }
2567 }
2568
TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_PRFM_CORTEX_A53,n_gt_8_strided_cn)2569 TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_PRFM_CORTEX_A53, n_gt_8_strided_cn) {
2570 TEST_REQUIRES_ARM_NEON_V8;
2571 for (uint32_t n = 9; n < 16; n++) {
2572 for (size_t k = 1; k <= 40; k += 9) {
2573 GemmMicrokernelTester()
2574 .mr(4)
2575 .nr(8)
2576 .kr(1)
2577 .sr(1)
2578 .m(4)
2579 .n(n)
2580 .k(k)
2581 .cn_stride(11)
2582 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8__aarch32_neonv8_mlal_lane_prfm_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2583 }
2584 }
2585 }
2586
TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_PRFM_CORTEX_A53,n_gt_8_strided_a)2587 TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_PRFM_CORTEX_A53, n_gt_8_strided_a) {
2588 TEST_REQUIRES_ARM_NEON_V8;
2589 for (uint32_t n = 9; n < 16; n++) {
2590 for (size_t k = 1; k <= 40; k += 9) {
2591 GemmMicrokernelTester()
2592 .mr(4)
2593 .nr(8)
2594 .kr(1)
2595 .sr(1)
2596 .m(4)
2597 .n(n)
2598 .k(k)
2599 .a_stride(43)
2600 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8__aarch32_neonv8_mlal_lane_prfm_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2601 }
2602 }
2603 }
2604
TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_PRFM_CORTEX_A53,n_gt_8_subtile)2605 TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_PRFM_CORTEX_A53, n_gt_8_subtile) {
2606 TEST_REQUIRES_ARM_NEON_V8;
2607 for (uint32_t n = 9; n < 16; n++) {
2608 for (size_t k = 1; k <= 40; k += 9) {
2609 for (uint32_t m = 1; m <= 4; m++) {
2610 GemmMicrokernelTester()
2611 .mr(4)
2612 .nr(8)
2613 .kr(1)
2614 .sr(1)
2615 .m(m)
2616 .n(n)
2617 .k(k)
2618 .iterations(1)
2619 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8__aarch32_neonv8_mlal_lane_prfm_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2620 }
2621 }
2622 }
2623 }
2624
TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_PRFM_CORTEX_A53,n_div_8)2625 TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_PRFM_CORTEX_A53, n_div_8) {
2626 TEST_REQUIRES_ARM_NEON_V8;
2627 for (uint32_t n = 16; n <= 24; n += 8) {
2628 for (size_t k = 1; k <= 40; k += 9) {
2629 GemmMicrokernelTester()
2630 .mr(4)
2631 .nr(8)
2632 .kr(1)
2633 .sr(1)
2634 .m(4)
2635 .n(n)
2636 .k(k)
2637 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8__aarch32_neonv8_mlal_lane_prfm_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2638 }
2639 }
2640 }
2641
TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_PRFM_CORTEX_A53,n_div_8_strided_cn)2642 TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_PRFM_CORTEX_A53, n_div_8_strided_cn) {
2643 TEST_REQUIRES_ARM_NEON_V8;
2644 for (uint32_t n = 16; n <= 24; n += 8) {
2645 for (size_t k = 1; k <= 40; k += 9) {
2646 GemmMicrokernelTester()
2647 .mr(4)
2648 .nr(8)
2649 .kr(1)
2650 .sr(1)
2651 .m(4)
2652 .n(n)
2653 .k(k)
2654 .cn_stride(11)
2655 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8__aarch32_neonv8_mlal_lane_prfm_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2656 }
2657 }
2658 }
2659
TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_PRFM_CORTEX_A53,n_div_8_strided_a)2660 TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_PRFM_CORTEX_A53, n_div_8_strided_a) {
2661 TEST_REQUIRES_ARM_NEON_V8;
2662 for (uint32_t n = 16; n <= 24; n += 8) {
2663 for (size_t k = 1; k <= 40; k += 9) {
2664 GemmMicrokernelTester()
2665 .mr(4)
2666 .nr(8)
2667 .kr(1)
2668 .sr(1)
2669 .m(4)
2670 .n(n)
2671 .k(k)
2672 .a_stride(43)
2673 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8__aarch32_neonv8_mlal_lane_prfm_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2674 }
2675 }
2676 }
2677
TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_PRFM_CORTEX_A53,n_div_8_subtile)2678 TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_PRFM_CORTEX_A53, n_div_8_subtile) {
2679 TEST_REQUIRES_ARM_NEON_V8;
2680 for (uint32_t n = 16; n <= 24; n += 8) {
2681 for (size_t k = 1; k <= 40; k += 9) {
2682 for (uint32_t m = 1; m <= 4; m++) {
2683 GemmMicrokernelTester()
2684 .mr(4)
2685 .nr(8)
2686 .kr(1)
2687 .sr(1)
2688 .m(m)
2689 .n(n)
2690 .k(k)
2691 .iterations(1)
2692 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8__aarch32_neonv8_mlal_lane_prfm_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2693 }
2694 }
2695 }
2696 }
2697
TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_PRFM_CORTEX_A53,strided_cm_subtile)2698 TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_PRFM_CORTEX_A53, strided_cm_subtile) {
2699 TEST_REQUIRES_ARM_NEON_V8;
2700 for (size_t k = 1; k <= 40; k += 9) {
2701 for (uint32_t n = 1; n <= 8; n++) {
2702 for (uint32_t m = 1; m <= 4; m++) {
2703 GemmMicrokernelTester()
2704 .mr(4)
2705 .nr(8)
2706 .kr(1)
2707 .sr(1)
2708 .m(m)
2709 .n(n)
2710 .k(k)
2711 .cm_stride(11)
2712 .iterations(1)
2713 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8__aarch32_neonv8_mlal_lane_prfm_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2714 }
2715 }
2716 }
2717 }
2718
TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_PRFM_CORTEX_A53,qmin)2719 TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_PRFM_CORTEX_A53, qmin) {
2720 TEST_REQUIRES_ARM_NEON_V8;
2721 GemmMicrokernelTester()
2722 .mr(4)
2723 .nr(8)
2724 .kr(1)
2725 .sr(1)
2726 .m(4)
2727 .n(8)
2728 .k(8)
2729 .qmin(128)
2730 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8__aarch32_neonv8_mlal_lane_prfm_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2731 }
2732
TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_PRFM_CORTEX_A53,qmax)2733 TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_PRFM_CORTEX_A53, qmax) {
2734 TEST_REQUIRES_ARM_NEON_V8;
2735 GemmMicrokernelTester()
2736 .mr(4)
2737 .nr(8)
2738 .kr(1)
2739 .sr(1)
2740 .m(4)
2741 .n(8)
2742 .k(8)
2743 .qmax(128)
2744 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8__aarch32_neonv8_mlal_lane_prfm_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2745 }
2746
TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_PRFM_CORTEX_A53,strided_cm)2747 TEST(QC8_GEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_PRFM_CORTEX_A53, strided_cm) {
2748 TEST_REQUIRES_ARM_NEON_V8;
2749 GemmMicrokernelTester()
2750 .mr(4)
2751 .nr(8)
2752 .kr(1)
2753 .sr(1)
2754 .m(4)
2755 .n(8)
2756 .k(8)
2757 .cm_stride(11)
2758 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8__aarch32_neonv8_mlal_lane_prfm_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2759 }
2760 #endif // XNN_ARCH_ARM && XNN_ENABLE_ASSEMBLY
2761
2762
2763 #if XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
TEST(QC8_GEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL,k_eq_16)2764 TEST(QC8_GEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL, k_eq_16) {
2765 TEST_REQUIRES_ARM_NEON;
2766 GemmMicrokernelTester()
2767 .mr(1)
2768 .nr(8)
2769 .kr(8)
2770 .sr(1)
2771 .m(1)
2772 .n(8)
2773 .k(16)
2774 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2775 }
2776
TEST(QC8_GEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL,strided_cn)2777 TEST(QC8_GEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL, strided_cn) {
2778 TEST_REQUIRES_ARM_NEON;
2779 GemmMicrokernelTester()
2780 .mr(1)
2781 .nr(8)
2782 .kr(8)
2783 .sr(1)
2784 .m(1)
2785 .n(8)
2786 .k(16)
2787 .cn_stride(11)
2788 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2789 }
2790
TEST(QC8_GEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL,k_eq_16_strided_a)2791 TEST(QC8_GEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL, k_eq_16_strided_a) {
2792 TEST_REQUIRES_ARM_NEON;
2793 GemmMicrokernelTester()
2794 .mr(1)
2795 .nr(8)
2796 .kr(8)
2797 .sr(1)
2798 .m(1)
2799 .n(8)
2800 .k(16)
2801 .a_stride(19)
2802 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2803 }
2804
TEST(QC8_GEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL,k_eq_16_subtile)2805 TEST(QC8_GEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL, k_eq_16_subtile) {
2806 TEST_REQUIRES_ARM_NEON;
2807 for (uint32_t n = 1; n <= 8; n++) {
2808 for (uint32_t m = 1; m <= 1; m++) {
2809 GemmMicrokernelTester()
2810 .mr(1)
2811 .nr(8)
2812 .kr(8)
2813 .sr(1)
2814 .m(m)
2815 .n(n)
2816 .k(16)
2817 .iterations(1)
2818 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2819 }
2820 }
2821 }
2822
TEST(QC8_GEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL,k_eq_16_subtile_m)2823 TEST(QC8_GEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL, k_eq_16_subtile_m) {
2824 TEST_REQUIRES_ARM_NEON;
2825 for (uint32_t m = 1; m <= 1; m++) {
2826 GemmMicrokernelTester()
2827 .mr(1)
2828 .nr(8)
2829 .kr(8)
2830 .sr(1)
2831 .m(m)
2832 .n(8)
2833 .k(16)
2834 .iterations(1)
2835 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2836 }
2837 }
2838
TEST(QC8_GEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL,k_eq_16_subtile_n)2839 TEST(QC8_GEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL, k_eq_16_subtile_n) {
2840 TEST_REQUIRES_ARM_NEON;
2841 for (uint32_t n = 1; n <= 8; n++) {
2842 GemmMicrokernelTester()
2843 .mr(1)
2844 .nr(8)
2845 .kr(8)
2846 .sr(1)
2847 .m(1)
2848 .n(n)
2849 .k(16)
2850 .iterations(1)
2851 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2852 }
2853 }
2854
TEST(QC8_GEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL,k_lt_16)2855 TEST(QC8_GEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL, k_lt_16) {
2856 TEST_REQUIRES_ARM_NEON;
2857 for (size_t k = 1; k < 16; k++) {
2858 GemmMicrokernelTester()
2859 .mr(1)
2860 .nr(8)
2861 .kr(8)
2862 .sr(1)
2863 .m(1)
2864 .n(8)
2865 .k(k)
2866 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2867 }
2868 }
2869
TEST(QC8_GEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL,k_lt_16_strided_a)2870 TEST(QC8_GEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL, k_lt_16_strided_a) {
2871 TEST_REQUIRES_ARM_NEON;
2872 for (size_t k = 1; k < 16; k++) {
2873 GemmMicrokernelTester()
2874 .mr(1)
2875 .nr(8)
2876 .kr(8)
2877 .sr(1)
2878 .m(1)
2879 .n(8)
2880 .k(k)
2881 .a_stride(19)
2882 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2883 }
2884 }
2885
TEST(QC8_GEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL,k_lt_16_subtile)2886 TEST(QC8_GEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL, k_lt_16_subtile) {
2887 TEST_REQUIRES_ARM_NEON;
2888 for (size_t k = 1; k < 16; k++) {
2889 for (uint32_t n = 1; n <= 8; n++) {
2890 for (uint32_t m = 1; m <= 1; m++) {
2891 GemmMicrokernelTester()
2892 .mr(1)
2893 .nr(8)
2894 .kr(8)
2895 .sr(1)
2896 .m(m)
2897 .n(n)
2898 .k(k)
2899 .iterations(1)
2900 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2901 }
2902 }
2903 }
2904 }
2905
TEST(QC8_GEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL,k_gt_16)2906 TEST(QC8_GEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL, k_gt_16) {
2907 TEST_REQUIRES_ARM_NEON;
2908 for (size_t k = 17; k < 32; k++) {
2909 GemmMicrokernelTester()
2910 .mr(1)
2911 .nr(8)
2912 .kr(8)
2913 .sr(1)
2914 .m(1)
2915 .n(8)
2916 .k(k)
2917 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2918 }
2919 }
2920
TEST(QC8_GEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL,k_gt_16_strided_a)2921 TEST(QC8_GEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL, k_gt_16_strided_a) {
2922 TEST_REQUIRES_ARM_NEON;
2923 for (size_t k = 17; k < 32; k++) {
2924 GemmMicrokernelTester()
2925 .mr(1)
2926 .nr(8)
2927 .kr(8)
2928 .sr(1)
2929 .m(1)
2930 .n(8)
2931 .k(k)
2932 .a_stride(37)
2933 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2934 }
2935 }
2936
TEST(QC8_GEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL,k_gt_16_subtile)2937 TEST(QC8_GEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL, k_gt_16_subtile) {
2938 TEST_REQUIRES_ARM_NEON;
2939 for (size_t k = 17; k < 32; k++) {
2940 for (uint32_t n = 1; n <= 8; n++) {
2941 for (uint32_t m = 1; m <= 1; m++) {
2942 GemmMicrokernelTester()
2943 .mr(1)
2944 .nr(8)
2945 .kr(8)
2946 .sr(1)
2947 .m(m)
2948 .n(n)
2949 .k(k)
2950 .iterations(1)
2951 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2952 }
2953 }
2954 }
2955 }
2956
TEST(QC8_GEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL,k_div_16)2957 TEST(QC8_GEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL, k_div_16) {
2958 TEST_REQUIRES_ARM_NEON;
2959 for (size_t k = 32; k <= 160; k += 16) {
2960 GemmMicrokernelTester()
2961 .mr(1)
2962 .nr(8)
2963 .kr(8)
2964 .sr(1)
2965 .m(1)
2966 .n(8)
2967 .k(k)
2968 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2969 }
2970 }
2971
TEST(QC8_GEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL,k_div_16_strided_a)2972 TEST(QC8_GEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL, k_div_16_strided_a) {
2973 TEST_REQUIRES_ARM_NEON;
2974 for (size_t k = 32; k <= 160; k += 16) {
2975 GemmMicrokernelTester()
2976 .mr(1)
2977 .nr(8)
2978 .kr(8)
2979 .sr(1)
2980 .m(1)
2981 .n(8)
2982 .k(k)
2983 .a_stride(163)
2984 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2985 }
2986 }
2987
TEST(QC8_GEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL,k_div_16_subtile)2988 TEST(QC8_GEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL, k_div_16_subtile) {
2989 TEST_REQUIRES_ARM_NEON;
2990 for (size_t k = 32; k <= 160; k += 16) {
2991 for (uint32_t n = 1; n <= 8; n++) {
2992 for (uint32_t m = 1; m <= 1; m++) {
2993 GemmMicrokernelTester()
2994 .mr(1)
2995 .nr(8)
2996 .kr(8)
2997 .sr(1)
2998 .m(m)
2999 .n(n)
3000 .k(k)
3001 .iterations(1)
3002 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3003 }
3004 }
3005 }
3006 }
3007
TEST(QC8_GEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL,n_gt_8)3008 TEST(QC8_GEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL, n_gt_8) {
3009 TEST_REQUIRES_ARM_NEON;
3010 for (uint32_t n = 9; n < 16; n++) {
3011 for (size_t k = 1; k <= 80; k += 17) {
3012 GemmMicrokernelTester()
3013 .mr(1)
3014 .nr(8)
3015 .kr(8)
3016 .sr(1)
3017 .m(1)
3018 .n(n)
3019 .k(k)
3020 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3021 }
3022 }
3023 }
3024
TEST(QC8_GEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL,n_gt_8_strided_cn)3025 TEST(QC8_GEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL, n_gt_8_strided_cn) {
3026 TEST_REQUIRES_ARM_NEON;
3027 for (uint32_t n = 9; n < 16; n++) {
3028 for (size_t k = 1; k <= 80; k += 17) {
3029 GemmMicrokernelTester()
3030 .mr(1)
3031 .nr(8)
3032 .kr(8)
3033 .sr(1)
3034 .m(1)
3035 .n(n)
3036 .k(k)
3037 .cn_stride(11)
3038 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3039 }
3040 }
3041 }
3042
TEST(QC8_GEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL,n_gt_8_strided_a)3043 TEST(QC8_GEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL, n_gt_8_strided_a) {
3044 TEST_REQUIRES_ARM_NEON;
3045 for (uint32_t n = 9; n < 16; n++) {
3046 for (size_t k = 1; k <= 80; k += 17) {
3047 GemmMicrokernelTester()
3048 .mr(1)
3049 .nr(8)
3050 .kr(8)
3051 .sr(1)
3052 .m(1)
3053 .n(n)
3054 .k(k)
3055 .a_stride(83)
3056 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3057 }
3058 }
3059 }
3060
TEST(QC8_GEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL,n_gt_8_subtile)3061 TEST(QC8_GEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL, n_gt_8_subtile) {
3062 TEST_REQUIRES_ARM_NEON;
3063 for (uint32_t n = 9; n < 16; n++) {
3064 for (size_t k = 1; k <= 80; k += 17) {
3065 for (uint32_t m = 1; m <= 1; m++) {
3066 GemmMicrokernelTester()
3067 .mr(1)
3068 .nr(8)
3069 .kr(8)
3070 .sr(1)
3071 .m(m)
3072 .n(n)
3073 .k(k)
3074 .iterations(1)
3075 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3076 }
3077 }
3078 }
3079 }
3080
TEST(QC8_GEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL,n_div_8)3081 TEST(QC8_GEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL, n_div_8) {
3082 TEST_REQUIRES_ARM_NEON;
3083 for (uint32_t n = 16; n <= 24; n += 8) {
3084 for (size_t k = 1; k <= 80; k += 17) {
3085 GemmMicrokernelTester()
3086 .mr(1)
3087 .nr(8)
3088 .kr(8)
3089 .sr(1)
3090 .m(1)
3091 .n(n)
3092 .k(k)
3093 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3094 }
3095 }
3096 }
3097
TEST(QC8_GEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL,n_div_8_strided_cn)3098 TEST(QC8_GEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL, n_div_8_strided_cn) {
3099 TEST_REQUIRES_ARM_NEON;
3100 for (uint32_t n = 16; n <= 24; n += 8) {
3101 for (size_t k = 1; k <= 80; k += 17) {
3102 GemmMicrokernelTester()
3103 .mr(1)
3104 .nr(8)
3105 .kr(8)
3106 .sr(1)
3107 .m(1)
3108 .n(n)
3109 .k(k)
3110 .cn_stride(11)
3111 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3112 }
3113 }
3114 }
3115
TEST(QC8_GEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL,n_div_8_strided_a)3116 TEST(QC8_GEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL, n_div_8_strided_a) {
3117 TEST_REQUIRES_ARM_NEON;
3118 for (uint32_t n = 16; n <= 24; n += 8) {
3119 for (size_t k = 1; k <= 80; k += 17) {
3120 GemmMicrokernelTester()
3121 .mr(1)
3122 .nr(8)
3123 .kr(8)
3124 .sr(1)
3125 .m(1)
3126 .n(n)
3127 .k(k)
3128 .a_stride(83)
3129 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3130 }
3131 }
3132 }
3133
TEST(QC8_GEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL,n_div_8_subtile)3134 TEST(QC8_GEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL, n_div_8_subtile) {
3135 TEST_REQUIRES_ARM_NEON;
3136 for (uint32_t n = 16; n <= 24; n += 8) {
3137 for (size_t k = 1; k <= 80; k += 17) {
3138 for (uint32_t m = 1; m <= 1; m++) {
3139 GemmMicrokernelTester()
3140 .mr(1)
3141 .nr(8)
3142 .kr(8)
3143 .sr(1)
3144 .m(m)
3145 .n(n)
3146 .k(k)
3147 .iterations(1)
3148 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3149 }
3150 }
3151 }
3152 }
3153
TEST(QC8_GEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL,strided_cm_subtile)3154 TEST(QC8_GEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL, strided_cm_subtile) {
3155 TEST_REQUIRES_ARM_NEON;
3156 for (size_t k = 1; k <= 80; k += 17) {
3157 for (uint32_t n = 1; n <= 8; n++) {
3158 for (uint32_t m = 1; m <= 1; m++) {
3159 GemmMicrokernelTester()
3160 .mr(1)
3161 .nr(8)
3162 .kr(8)
3163 .sr(1)
3164 .m(m)
3165 .n(n)
3166 .k(k)
3167 .cm_stride(11)
3168 .iterations(1)
3169 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3170 }
3171 }
3172 }
3173 }
3174
TEST(QC8_GEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL,qmin)3175 TEST(QC8_GEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL, qmin) {
3176 TEST_REQUIRES_ARM_NEON;
3177 GemmMicrokernelTester()
3178 .mr(1)
3179 .nr(8)
3180 .kr(8)
3181 .sr(1)
3182 .m(1)
3183 .n(8)
3184 .k(16)
3185 .qmin(128)
3186 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3187 }
3188
TEST(QC8_GEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL,qmax)3189 TEST(QC8_GEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL, qmax) {
3190 TEST_REQUIRES_ARM_NEON;
3191 GemmMicrokernelTester()
3192 .mr(1)
3193 .nr(8)
3194 .kr(8)
3195 .sr(1)
3196 .m(1)
3197 .n(8)
3198 .k(16)
3199 .qmax(128)
3200 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3201 }
3202
TEST(QC8_GEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL,strided_cm)3203 TEST(QC8_GEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL, strided_cm) {
3204 TEST_REQUIRES_ARM_NEON;
3205 GemmMicrokernelTester()
3206 .mr(1)
3207 .nr(8)
3208 .kr(8)
3209 .sr(1)
3210 .m(1)
3211 .n(8)
3212 .k(16)
3213 .cm_stride(11)
3214 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3215 }
3216 #endif // XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
3217
3218
3219 #if XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
TEST(QC8_GEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_PRFM,k_eq_16)3220 TEST(QC8_GEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_PRFM, k_eq_16) {
3221 TEST_REQUIRES_ARM_NEON;
3222 GemmMicrokernelTester()
3223 .mr(1)
3224 .nr(8)
3225 .kr(8)
3226 .sr(1)
3227 .m(1)
3228 .n(8)
3229 .k(16)
3230 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3231 }
3232
TEST(QC8_GEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_PRFM,strided_cn)3233 TEST(QC8_GEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_PRFM, strided_cn) {
3234 TEST_REQUIRES_ARM_NEON;
3235 GemmMicrokernelTester()
3236 .mr(1)
3237 .nr(8)
3238 .kr(8)
3239 .sr(1)
3240 .m(1)
3241 .n(8)
3242 .k(16)
3243 .cn_stride(11)
3244 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3245 }
3246
TEST(QC8_GEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_PRFM,k_eq_16_strided_a)3247 TEST(QC8_GEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_PRFM, k_eq_16_strided_a) {
3248 TEST_REQUIRES_ARM_NEON;
3249 GemmMicrokernelTester()
3250 .mr(1)
3251 .nr(8)
3252 .kr(8)
3253 .sr(1)
3254 .m(1)
3255 .n(8)
3256 .k(16)
3257 .a_stride(19)
3258 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3259 }
3260
TEST(QC8_GEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_PRFM,k_eq_16_subtile)3261 TEST(QC8_GEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_PRFM, k_eq_16_subtile) {
3262 TEST_REQUIRES_ARM_NEON;
3263 for (uint32_t n = 1; n <= 8; n++) {
3264 for (uint32_t m = 1; m <= 1; m++) {
3265 GemmMicrokernelTester()
3266 .mr(1)
3267 .nr(8)
3268 .kr(8)
3269 .sr(1)
3270 .m(m)
3271 .n(n)
3272 .k(16)
3273 .iterations(1)
3274 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3275 }
3276 }
3277 }
3278
TEST(QC8_GEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_PRFM,k_eq_16_subtile_m)3279 TEST(QC8_GEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_PRFM, k_eq_16_subtile_m) {
3280 TEST_REQUIRES_ARM_NEON;
3281 for (uint32_t m = 1; m <= 1; m++) {
3282 GemmMicrokernelTester()
3283 .mr(1)
3284 .nr(8)
3285 .kr(8)
3286 .sr(1)
3287 .m(m)
3288 .n(8)
3289 .k(16)
3290 .iterations(1)
3291 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3292 }
3293 }
3294
TEST(QC8_GEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_PRFM,k_eq_16_subtile_n)3295 TEST(QC8_GEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_PRFM, k_eq_16_subtile_n) {
3296 TEST_REQUIRES_ARM_NEON;
3297 for (uint32_t n = 1; n <= 8; n++) {
3298 GemmMicrokernelTester()
3299 .mr(1)
3300 .nr(8)
3301 .kr(8)
3302 .sr(1)
3303 .m(1)
3304 .n(n)
3305 .k(16)
3306 .iterations(1)
3307 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3308 }
3309 }
3310
TEST(QC8_GEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_PRFM,k_lt_16)3311 TEST(QC8_GEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_PRFM, k_lt_16) {
3312 TEST_REQUIRES_ARM_NEON;
3313 for (size_t k = 1; k < 16; k++) {
3314 GemmMicrokernelTester()
3315 .mr(1)
3316 .nr(8)
3317 .kr(8)
3318 .sr(1)
3319 .m(1)
3320 .n(8)
3321 .k(k)
3322 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3323 }
3324 }
3325
TEST(QC8_GEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_PRFM,k_lt_16_strided_a)3326 TEST(QC8_GEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_PRFM, k_lt_16_strided_a) {
3327 TEST_REQUIRES_ARM_NEON;
3328 for (size_t k = 1; k < 16; k++) {
3329 GemmMicrokernelTester()
3330 .mr(1)
3331 .nr(8)
3332 .kr(8)
3333 .sr(1)
3334 .m(1)
3335 .n(8)
3336 .k(k)
3337 .a_stride(19)
3338 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3339 }
3340 }
3341
TEST(QC8_GEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_PRFM,k_lt_16_subtile)3342 TEST(QC8_GEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_PRFM, k_lt_16_subtile) {
3343 TEST_REQUIRES_ARM_NEON;
3344 for (size_t k = 1; k < 16; k++) {
3345 for (uint32_t n = 1; n <= 8; n++) {
3346 for (uint32_t m = 1; m <= 1; m++) {
3347 GemmMicrokernelTester()
3348 .mr(1)
3349 .nr(8)
3350 .kr(8)
3351 .sr(1)
3352 .m(m)
3353 .n(n)
3354 .k(k)
3355 .iterations(1)
3356 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3357 }
3358 }
3359 }
3360 }
3361
TEST(QC8_GEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_PRFM,k_gt_16)3362 TEST(QC8_GEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_PRFM, k_gt_16) {
3363 TEST_REQUIRES_ARM_NEON;
3364 for (size_t k = 17; k < 32; k++) {
3365 GemmMicrokernelTester()
3366 .mr(1)
3367 .nr(8)
3368 .kr(8)
3369 .sr(1)
3370 .m(1)
3371 .n(8)
3372 .k(k)
3373 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3374 }
3375 }
3376
TEST(QC8_GEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_PRFM,k_gt_16_strided_a)3377 TEST(QC8_GEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_PRFM, k_gt_16_strided_a) {
3378 TEST_REQUIRES_ARM_NEON;
3379 for (size_t k = 17; k < 32; k++) {
3380 GemmMicrokernelTester()
3381 .mr(1)
3382 .nr(8)
3383 .kr(8)
3384 .sr(1)
3385 .m(1)
3386 .n(8)
3387 .k(k)
3388 .a_stride(37)
3389 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3390 }
3391 }
3392
TEST(QC8_GEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_PRFM,k_gt_16_subtile)3393 TEST(QC8_GEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_PRFM, k_gt_16_subtile) {
3394 TEST_REQUIRES_ARM_NEON;
3395 for (size_t k = 17; k < 32; k++) {
3396 for (uint32_t n = 1; n <= 8; n++) {
3397 for (uint32_t m = 1; m <= 1; m++) {
3398 GemmMicrokernelTester()
3399 .mr(1)
3400 .nr(8)
3401 .kr(8)
3402 .sr(1)
3403 .m(m)
3404 .n(n)
3405 .k(k)
3406 .iterations(1)
3407 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3408 }
3409 }
3410 }
3411 }
3412
TEST(QC8_GEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_PRFM,k_div_16)3413 TEST(QC8_GEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_PRFM, k_div_16) {
3414 TEST_REQUIRES_ARM_NEON;
3415 for (size_t k = 32; k <= 160; k += 16) {
3416 GemmMicrokernelTester()
3417 .mr(1)
3418 .nr(8)
3419 .kr(8)
3420 .sr(1)
3421 .m(1)
3422 .n(8)
3423 .k(k)
3424 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3425 }
3426 }
3427
TEST(QC8_GEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_PRFM,k_div_16_strided_a)3428 TEST(QC8_GEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_PRFM, k_div_16_strided_a) {
3429 TEST_REQUIRES_ARM_NEON;
3430 for (size_t k = 32; k <= 160; k += 16) {
3431 GemmMicrokernelTester()
3432 .mr(1)
3433 .nr(8)
3434 .kr(8)
3435 .sr(1)
3436 .m(1)
3437 .n(8)
3438 .k(k)
3439 .a_stride(163)
3440 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3441 }
3442 }
3443
TEST(QC8_GEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_PRFM,k_div_16_subtile)3444 TEST(QC8_GEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_PRFM, k_div_16_subtile) {
3445 TEST_REQUIRES_ARM_NEON;
3446 for (size_t k = 32; k <= 160; k += 16) {
3447 for (uint32_t n = 1; n <= 8; n++) {
3448 for (uint32_t m = 1; m <= 1; m++) {
3449 GemmMicrokernelTester()
3450 .mr(1)
3451 .nr(8)
3452 .kr(8)
3453 .sr(1)
3454 .m(m)
3455 .n(n)
3456 .k(k)
3457 .iterations(1)
3458 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3459 }
3460 }
3461 }
3462 }
3463
TEST(QC8_GEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_PRFM,n_gt_8)3464 TEST(QC8_GEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_PRFM, n_gt_8) {
3465 TEST_REQUIRES_ARM_NEON;
3466 for (uint32_t n = 9; n < 16; n++) {
3467 for (size_t k = 1; k <= 80; k += 17) {
3468 GemmMicrokernelTester()
3469 .mr(1)
3470 .nr(8)
3471 .kr(8)
3472 .sr(1)
3473 .m(1)
3474 .n(n)
3475 .k(k)
3476 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3477 }
3478 }
3479 }
3480
TEST(QC8_GEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_PRFM,n_gt_8_strided_cn)3481 TEST(QC8_GEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_PRFM, n_gt_8_strided_cn) {
3482 TEST_REQUIRES_ARM_NEON;
3483 for (uint32_t n = 9; n < 16; n++) {
3484 for (size_t k = 1; k <= 80; k += 17) {
3485 GemmMicrokernelTester()
3486 .mr(1)
3487 .nr(8)
3488 .kr(8)
3489 .sr(1)
3490 .m(1)
3491 .n(n)
3492 .k(k)
3493 .cn_stride(11)
3494 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3495 }
3496 }
3497 }
3498
TEST(QC8_GEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_PRFM,n_gt_8_strided_a)3499 TEST(QC8_GEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_PRFM, n_gt_8_strided_a) {
3500 TEST_REQUIRES_ARM_NEON;
3501 for (uint32_t n = 9; n < 16; n++) {
3502 for (size_t k = 1; k <= 80; k += 17) {
3503 GemmMicrokernelTester()
3504 .mr(1)
3505 .nr(8)
3506 .kr(8)
3507 .sr(1)
3508 .m(1)
3509 .n(n)
3510 .k(k)
3511 .a_stride(83)
3512 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3513 }
3514 }
3515 }
3516
TEST(QC8_GEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_PRFM,n_gt_8_subtile)3517 TEST(QC8_GEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_PRFM, n_gt_8_subtile) {
3518 TEST_REQUIRES_ARM_NEON;
3519 for (uint32_t n = 9; n < 16; n++) {
3520 for (size_t k = 1; k <= 80; k += 17) {
3521 for (uint32_t m = 1; m <= 1; m++) {
3522 GemmMicrokernelTester()
3523 .mr(1)
3524 .nr(8)
3525 .kr(8)
3526 .sr(1)
3527 .m(m)
3528 .n(n)
3529 .k(k)
3530 .iterations(1)
3531 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3532 }
3533 }
3534 }
3535 }
3536
TEST(QC8_GEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_PRFM,n_div_8)3537 TEST(QC8_GEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_PRFM, n_div_8) {
3538 TEST_REQUIRES_ARM_NEON;
3539 for (uint32_t n = 16; n <= 24; n += 8) {
3540 for (size_t k = 1; k <= 80; k += 17) {
3541 GemmMicrokernelTester()
3542 .mr(1)
3543 .nr(8)
3544 .kr(8)
3545 .sr(1)
3546 .m(1)
3547 .n(n)
3548 .k(k)
3549 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3550 }
3551 }
3552 }
3553
TEST(QC8_GEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_PRFM,n_div_8_strided_cn)3554 TEST(QC8_GEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_PRFM, n_div_8_strided_cn) {
3555 TEST_REQUIRES_ARM_NEON;
3556 for (uint32_t n = 16; n <= 24; n += 8) {
3557 for (size_t k = 1; k <= 80; k += 17) {
3558 GemmMicrokernelTester()
3559 .mr(1)
3560 .nr(8)
3561 .kr(8)
3562 .sr(1)
3563 .m(1)
3564 .n(n)
3565 .k(k)
3566 .cn_stride(11)
3567 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3568 }
3569 }
3570 }
3571
TEST(QC8_GEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_PRFM,n_div_8_strided_a)3572 TEST(QC8_GEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_PRFM, n_div_8_strided_a) {
3573 TEST_REQUIRES_ARM_NEON;
3574 for (uint32_t n = 16; n <= 24; n += 8) {
3575 for (size_t k = 1; k <= 80; k += 17) {
3576 GemmMicrokernelTester()
3577 .mr(1)
3578 .nr(8)
3579 .kr(8)
3580 .sr(1)
3581 .m(1)
3582 .n(n)
3583 .k(k)
3584 .a_stride(83)
3585 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3586 }
3587 }
3588 }
3589
TEST(QC8_GEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_PRFM,n_div_8_subtile)3590 TEST(QC8_GEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_PRFM, n_div_8_subtile) {
3591 TEST_REQUIRES_ARM_NEON;
3592 for (uint32_t n = 16; n <= 24; n += 8) {
3593 for (size_t k = 1; k <= 80; k += 17) {
3594 for (uint32_t m = 1; m <= 1; m++) {
3595 GemmMicrokernelTester()
3596 .mr(1)
3597 .nr(8)
3598 .kr(8)
3599 .sr(1)
3600 .m(m)
3601 .n(n)
3602 .k(k)
3603 .iterations(1)
3604 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3605 }
3606 }
3607 }
3608 }
3609
TEST(QC8_GEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_PRFM,strided_cm_subtile)3610 TEST(QC8_GEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_PRFM, strided_cm_subtile) {
3611 TEST_REQUIRES_ARM_NEON;
3612 for (size_t k = 1; k <= 80; k += 17) {
3613 for (uint32_t n = 1; n <= 8; n++) {
3614 for (uint32_t m = 1; m <= 1; m++) {
3615 GemmMicrokernelTester()
3616 .mr(1)
3617 .nr(8)
3618 .kr(8)
3619 .sr(1)
3620 .m(m)
3621 .n(n)
3622 .k(k)
3623 .cm_stride(11)
3624 .iterations(1)
3625 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3626 }
3627 }
3628 }
3629 }
3630
TEST(QC8_GEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_PRFM,qmin)3631 TEST(QC8_GEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_PRFM, qmin) {
3632 TEST_REQUIRES_ARM_NEON;
3633 GemmMicrokernelTester()
3634 .mr(1)
3635 .nr(8)
3636 .kr(8)
3637 .sr(1)
3638 .m(1)
3639 .n(8)
3640 .k(16)
3641 .qmin(128)
3642 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3643 }
3644
TEST(QC8_GEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_PRFM,qmax)3645 TEST(QC8_GEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_PRFM, qmax) {
3646 TEST_REQUIRES_ARM_NEON;
3647 GemmMicrokernelTester()
3648 .mr(1)
3649 .nr(8)
3650 .kr(8)
3651 .sr(1)
3652 .m(1)
3653 .n(8)
3654 .k(16)
3655 .qmax(128)
3656 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3657 }
3658
TEST(QC8_GEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_PRFM,strided_cm)3659 TEST(QC8_GEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_PRFM, strided_cm) {
3660 TEST_REQUIRES_ARM_NEON;
3661 GemmMicrokernelTester()
3662 .mr(1)
3663 .nr(8)
3664 .kr(8)
3665 .sr(1)
3666 .m(1)
3667 .n(8)
3668 .k(16)
3669 .cm_stride(11)
3670 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3671 }
3672 #endif // XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
3673
3674
3675 #if XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
TEST(QC8_GEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53,k_eq_16)3676 TEST(QC8_GEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53, k_eq_16) {
3677 TEST_REQUIRES_ARM_NEON;
3678 GemmMicrokernelTester()
3679 .mr(1)
3680 .nr(8)
3681 .kr(8)
3682 .sr(1)
3683 .m(1)
3684 .n(8)
3685 .k(16)
3686 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal_prfm_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3687 }
3688
TEST(QC8_GEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53,strided_cn)3689 TEST(QC8_GEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53, strided_cn) {
3690 TEST_REQUIRES_ARM_NEON;
3691 GemmMicrokernelTester()
3692 .mr(1)
3693 .nr(8)
3694 .kr(8)
3695 .sr(1)
3696 .m(1)
3697 .n(8)
3698 .k(16)
3699 .cn_stride(11)
3700 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal_prfm_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3701 }
3702
TEST(QC8_GEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53,k_eq_16_strided_a)3703 TEST(QC8_GEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53, k_eq_16_strided_a) {
3704 TEST_REQUIRES_ARM_NEON;
3705 GemmMicrokernelTester()
3706 .mr(1)
3707 .nr(8)
3708 .kr(8)
3709 .sr(1)
3710 .m(1)
3711 .n(8)
3712 .k(16)
3713 .a_stride(19)
3714 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal_prfm_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3715 }
3716
TEST(QC8_GEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53,k_eq_16_subtile)3717 TEST(QC8_GEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53, k_eq_16_subtile) {
3718 TEST_REQUIRES_ARM_NEON;
3719 for (uint32_t n = 1; n <= 8; n++) {
3720 for (uint32_t m = 1; m <= 1; m++) {
3721 GemmMicrokernelTester()
3722 .mr(1)
3723 .nr(8)
3724 .kr(8)
3725 .sr(1)
3726 .m(m)
3727 .n(n)
3728 .k(16)
3729 .iterations(1)
3730 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal_prfm_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3731 }
3732 }
3733 }
3734
TEST(QC8_GEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53,k_eq_16_subtile_m)3735 TEST(QC8_GEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53, k_eq_16_subtile_m) {
3736 TEST_REQUIRES_ARM_NEON;
3737 for (uint32_t m = 1; m <= 1; m++) {
3738 GemmMicrokernelTester()
3739 .mr(1)
3740 .nr(8)
3741 .kr(8)
3742 .sr(1)
3743 .m(m)
3744 .n(8)
3745 .k(16)
3746 .iterations(1)
3747 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal_prfm_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3748 }
3749 }
3750
TEST(QC8_GEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53,k_eq_16_subtile_n)3751 TEST(QC8_GEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53, k_eq_16_subtile_n) {
3752 TEST_REQUIRES_ARM_NEON;
3753 for (uint32_t n = 1; n <= 8; n++) {
3754 GemmMicrokernelTester()
3755 .mr(1)
3756 .nr(8)
3757 .kr(8)
3758 .sr(1)
3759 .m(1)
3760 .n(n)
3761 .k(16)
3762 .iterations(1)
3763 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal_prfm_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3764 }
3765 }
3766
TEST(QC8_GEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53,k_lt_16)3767 TEST(QC8_GEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53, k_lt_16) {
3768 TEST_REQUIRES_ARM_NEON;
3769 for (size_t k = 1; k < 16; k++) {
3770 GemmMicrokernelTester()
3771 .mr(1)
3772 .nr(8)
3773 .kr(8)
3774 .sr(1)
3775 .m(1)
3776 .n(8)
3777 .k(k)
3778 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal_prfm_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3779 }
3780 }
3781
TEST(QC8_GEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53,k_lt_16_strided_a)3782 TEST(QC8_GEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53, k_lt_16_strided_a) {
3783 TEST_REQUIRES_ARM_NEON;
3784 for (size_t k = 1; k < 16; k++) {
3785 GemmMicrokernelTester()
3786 .mr(1)
3787 .nr(8)
3788 .kr(8)
3789 .sr(1)
3790 .m(1)
3791 .n(8)
3792 .k(k)
3793 .a_stride(19)
3794 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal_prfm_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3795 }
3796 }
3797
TEST(QC8_GEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53,k_lt_16_subtile)3798 TEST(QC8_GEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53, k_lt_16_subtile) {
3799 TEST_REQUIRES_ARM_NEON;
3800 for (size_t k = 1; k < 16; k++) {
3801 for (uint32_t n = 1; n <= 8; n++) {
3802 for (uint32_t m = 1; m <= 1; m++) {
3803 GemmMicrokernelTester()
3804 .mr(1)
3805 .nr(8)
3806 .kr(8)
3807 .sr(1)
3808 .m(m)
3809 .n(n)
3810 .k(k)
3811 .iterations(1)
3812 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal_prfm_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3813 }
3814 }
3815 }
3816 }
3817
TEST(QC8_GEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53,k_gt_16)3818 TEST(QC8_GEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53, k_gt_16) {
3819 TEST_REQUIRES_ARM_NEON;
3820 for (size_t k = 17; k < 32; k++) {
3821 GemmMicrokernelTester()
3822 .mr(1)
3823 .nr(8)
3824 .kr(8)
3825 .sr(1)
3826 .m(1)
3827 .n(8)
3828 .k(k)
3829 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal_prfm_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3830 }
3831 }
3832
TEST(QC8_GEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53,k_gt_16_strided_a)3833 TEST(QC8_GEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53, k_gt_16_strided_a) {
3834 TEST_REQUIRES_ARM_NEON;
3835 for (size_t k = 17; k < 32; k++) {
3836 GemmMicrokernelTester()
3837 .mr(1)
3838 .nr(8)
3839 .kr(8)
3840 .sr(1)
3841 .m(1)
3842 .n(8)
3843 .k(k)
3844 .a_stride(37)
3845 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal_prfm_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3846 }
3847 }
3848
TEST(QC8_GEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53,k_gt_16_subtile)3849 TEST(QC8_GEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53, k_gt_16_subtile) {
3850 TEST_REQUIRES_ARM_NEON;
3851 for (size_t k = 17; k < 32; k++) {
3852 for (uint32_t n = 1; n <= 8; n++) {
3853 for (uint32_t m = 1; m <= 1; m++) {
3854 GemmMicrokernelTester()
3855 .mr(1)
3856 .nr(8)
3857 .kr(8)
3858 .sr(1)
3859 .m(m)
3860 .n(n)
3861 .k(k)
3862 .iterations(1)
3863 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal_prfm_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3864 }
3865 }
3866 }
3867 }
3868
TEST(QC8_GEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53,k_div_16)3869 TEST(QC8_GEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53, k_div_16) {
3870 TEST_REQUIRES_ARM_NEON;
3871 for (size_t k = 32; k <= 160; k += 16) {
3872 GemmMicrokernelTester()
3873 .mr(1)
3874 .nr(8)
3875 .kr(8)
3876 .sr(1)
3877 .m(1)
3878 .n(8)
3879 .k(k)
3880 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal_prfm_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3881 }
3882 }
3883
TEST(QC8_GEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53,k_div_16_strided_a)3884 TEST(QC8_GEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53, k_div_16_strided_a) {
3885 TEST_REQUIRES_ARM_NEON;
3886 for (size_t k = 32; k <= 160; k += 16) {
3887 GemmMicrokernelTester()
3888 .mr(1)
3889 .nr(8)
3890 .kr(8)
3891 .sr(1)
3892 .m(1)
3893 .n(8)
3894 .k(k)
3895 .a_stride(163)
3896 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal_prfm_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3897 }
3898 }
3899
TEST(QC8_GEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53,k_div_16_subtile)3900 TEST(QC8_GEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53, k_div_16_subtile) {
3901 TEST_REQUIRES_ARM_NEON;
3902 for (size_t k = 32; k <= 160; k += 16) {
3903 for (uint32_t n = 1; n <= 8; n++) {
3904 for (uint32_t m = 1; m <= 1; m++) {
3905 GemmMicrokernelTester()
3906 .mr(1)
3907 .nr(8)
3908 .kr(8)
3909 .sr(1)
3910 .m(m)
3911 .n(n)
3912 .k(k)
3913 .iterations(1)
3914 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal_prfm_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3915 }
3916 }
3917 }
3918 }
3919
TEST(QC8_GEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53,n_gt_8)3920 TEST(QC8_GEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53, n_gt_8) {
3921 TEST_REQUIRES_ARM_NEON;
3922 for (uint32_t n = 9; n < 16; n++) {
3923 for (size_t k = 1; k <= 80; k += 17) {
3924 GemmMicrokernelTester()
3925 .mr(1)
3926 .nr(8)
3927 .kr(8)
3928 .sr(1)
3929 .m(1)
3930 .n(n)
3931 .k(k)
3932 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal_prfm_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3933 }
3934 }
3935 }
3936
TEST(QC8_GEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53,n_gt_8_strided_cn)3937 TEST(QC8_GEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53, n_gt_8_strided_cn) {
3938 TEST_REQUIRES_ARM_NEON;
3939 for (uint32_t n = 9; n < 16; n++) {
3940 for (size_t k = 1; k <= 80; k += 17) {
3941 GemmMicrokernelTester()
3942 .mr(1)
3943 .nr(8)
3944 .kr(8)
3945 .sr(1)
3946 .m(1)
3947 .n(n)
3948 .k(k)
3949 .cn_stride(11)
3950 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal_prfm_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3951 }
3952 }
3953 }
3954
TEST(QC8_GEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53,n_gt_8_strided_a)3955 TEST(QC8_GEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53, n_gt_8_strided_a) {
3956 TEST_REQUIRES_ARM_NEON;
3957 for (uint32_t n = 9; n < 16; n++) {
3958 for (size_t k = 1; k <= 80; k += 17) {
3959 GemmMicrokernelTester()
3960 .mr(1)
3961 .nr(8)
3962 .kr(8)
3963 .sr(1)
3964 .m(1)
3965 .n(n)
3966 .k(k)
3967 .a_stride(83)
3968 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal_prfm_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3969 }
3970 }
3971 }
3972
TEST(QC8_GEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53,n_gt_8_subtile)3973 TEST(QC8_GEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53, n_gt_8_subtile) {
3974 TEST_REQUIRES_ARM_NEON;
3975 for (uint32_t n = 9; n < 16; n++) {
3976 for (size_t k = 1; k <= 80; k += 17) {
3977 for (uint32_t m = 1; m <= 1; m++) {
3978 GemmMicrokernelTester()
3979 .mr(1)
3980 .nr(8)
3981 .kr(8)
3982 .sr(1)
3983 .m(m)
3984 .n(n)
3985 .k(k)
3986 .iterations(1)
3987 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal_prfm_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3988 }
3989 }
3990 }
3991 }
3992
TEST(QC8_GEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53,n_div_8)3993 TEST(QC8_GEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53, n_div_8) {
3994 TEST_REQUIRES_ARM_NEON;
3995 for (uint32_t n = 16; n <= 24; n += 8) {
3996 for (size_t k = 1; k <= 80; k += 17) {
3997 GemmMicrokernelTester()
3998 .mr(1)
3999 .nr(8)
4000 .kr(8)
4001 .sr(1)
4002 .m(1)
4003 .n(n)
4004 .k(k)
4005 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal_prfm_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4006 }
4007 }
4008 }
4009
TEST(QC8_GEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53,n_div_8_strided_cn)4010 TEST(QC8_GEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53, n_div_8_strided_cn) {
4011 TEST_REQUIRES_ARM_NEON;
4012 for (uint32_t n = 16; n <= 24; n += 8) {
4013 for (size_t k = 1; k <= 80; k += 17) {
4014 GemmMicrokernelTester()
4015 .mr(1)
4016 .nr(8)
4017 .kr(8)
4018 .sr(1)
4019 .m(1)
4020 .n(n)
4021 .k(k)
4022 .cn_stride(11)
4023 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal_prfm_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4024 }
4025 }
4026 }
4027
TEST(QC8_GEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53,n_div_8_strided_a)4028 TEST(QC8_GEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53, n_div_8_strided_a) {
4029 TEST_REQUIRES_ARM_NEON;
4030 for (uint32_t n = 16; n <= 24; n += 8) {
4031 for (size_t k = 1; k <= 80; k += 17) {
4032 GemmMicrokernelTester()
4033 .mr(1)
4034 .nr(8)
4035 .kr(8)
4036 .sr(1)
4037 .m(1)
4038 .n(n)
4039 .k(k)
4040 .a_stride(83)
4041 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal_prfm_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4042 }
4043 }
4044 }
4045
TEST(QC8_GEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53,n_div_8_subtile)4046 TEST(QC8_GEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53, n_div_8_subtile) {
4047 TEST_REQUIRES_ARM_NEON;
4048 for (uint32_t n = 16; n <= 24; n += 8) {
4049 for (size_t k = 1; k <= 80; k += 17) {
4050 for (uint32_t m = 1; m <= 1; m++) {
4051 GemmMicrokernelTester()
4052 .mr(1)
4053 .nr(8)
4054 .kr(8)
4055 .sr(1)
4056 .m(m)
4057 .n(n)
4058 .k(k)
4059 .iterations(1)
4060 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal_prfm_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4061 }
4062 }
4063 }
4064 }
4065
TEST(QC8_GEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53,strided_cm_subtile)4066 TEST(QC8_GEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53, strided_cm_subtile) {
4067 TEST_REQUIRES_ARM_NEON;
4068 for (size_t k = 1; k <= 80; k += 17) {
4069 for (uint32_t n = 1; n <= 8; n++) {
4070 for (uint32_t m = 1; m <= 1; m++) {
4071 GemmMicrokernelTester()
4072 .mr(1)
4073 .nr(8)
4074 .kr(8)
4075 .sr(1)
4076 .m(m)
4077 .n(n)
4078 .k(k)
4079 .cm_stride(11)
4080 .iterations(1)
4081 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal_prfm_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4082 }
4083 }
4084 }
4085 }
4086
TEST(QC8_GEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53,qmin)4087 TEST(QC8_GEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53, qmin) {
4088 TEST_REQUIRES_ARM_NEON;
4089 GemmMicrokernelTester()
4090 .mr(1)
4091 .nr(8)
4092 .kr(8)
4093 .sr(1)
4094 .m(1)
4095 .n(8)
4096 .k(16)
4097 .qmin(128)
4098 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal_prfm_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4099 }
4100
TEST(QC8_GEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53,qmax)4101 TEST(QC8_GEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53, qmax) {
4102 TEST_REQUIRES_ARM_NEON;
4103 GemmMicrokernelTester()
4104 .mr(1)
4105 .nr(8)
4106 .kr(8)
4107 .sr(1)
4108 .m(1)
4109 .n(8)
4110 .k(16)
4111 .qmax(128)
4112 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal_prfm_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4113 }
4114
TEST(QC8_GEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53,strided_cm)4115 TEST(QC8_GEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53, strided_cm) {
4116 TEST_REQUIRES_ARM_NEON;
4117 GemmMicrokernelTester()
4118 .mr(1)
4119 .nr(8)
4120 .kr(8)
4121 .sr(1)
4122 .m(1)
4123 .n(8)
4124 .k(16)
4125 .cm_stride(11)
4126 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal_prfm_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4127 }
4128 #endif // XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
4129
4130
4131 #if XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
TEST(QC8_GEMM_MINMAX_FP32_2X8C16__AARCH64_NEON_MLAL,k_eq_16)4132 TEST(QC8_GEMM_MINMAX_FP32_2X8C16__AARCH64_NEON_MLAL, k_eq_16) {
4133 TEST_REQUIRES_ARM_NEON;
4134 GemmMicrokernelTester()
4135 .mr(2)
4136 .nr(8)
4137 .kr(16)
4138 .sr(1)
4139 .m(2)
4140 .n(8)
4141 .k(16)
4142 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c16__aarch64_neon_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4143 }
4144
TEST(QC8_GEMM_MINMAX_FP32_2X8C16__AARCH64_NEON_MLAL,strided_cn)4145 TEST(QC8_GEMM_MINMAX_FP32_2X8C16__AARCH64_NEON_MLAL, strided_cn) {
4146 TEST_REQUIRES_ARM_NEON;
4147 GemmMicrokernelTester()
4148 .mr(2)
4149 .nr(8)
4150 .kr(16)
4151 .sr(1)
4152 .m(2)
4153 .n(8)
4154 .k(16)
4155 .cn_stride(11)
4156 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c16__aarch64_neon_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4157 }
4158
TEST(QC8_GEMM_MINMAX_FP32_2X8C16__AARCH64_NEON_MLAL,k_eq_16_strided_a)4159 TEST(QC8_GEMM_MINMAX_FP32_2X8C16__AARCH64_NEON_MLAL, k_eq_16_strided_a) {
4160 TEST_REQUIRES_ARM_NEON;
4161 GemmMicrokernelTester()
4162 .mr(2)
4163 .nr(8)
4164 .kr(16)
4165 .sr(1)
4166 .m(2)
4167 .n(8)
4168 .k(16)
4169 .a_stride(19)
4170 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c16__aarch64_neon_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4171 }
4172
TEST(QC8_GEMM_MINMAX_FP32_2X8C16__AARCH64_NEON_MLAL,k_eq_16_subtile)4173 TEST(QC8_GEMM_MINMAX_FP32_2X8C16__AARCH64_NEON_MLAL, k_eq_16_subtile) {
4174 TEST_REQUIRES_ARM_NEON;
4175 for (uint32_t n = 1; n <= 8; n++) {
4176 for (uint32_t m = 1; m <= 2; m++) {
4177 GemmMicrokernelTester()
4178 .mr(2)
4179 .nr(8)
4180 .kr(16)
4181 .sr(1)
4182 .m(m)
4183 .n(n)
4184 .k(16)
4185 .iterations(1)
4186 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c16__aarch64_neon_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4187 }
4188 }
4189 }
4190
TEST(QC8_GEMM_MINMAX_FP32_2X8C16__AARCH64_NEON_MLAL,k_eq_16_subtile_m)4191 TEST(QC8_GEMM_MINMAX_FP32_2X8C16__AARCH64_NEON_MLAL, k_eq_16_subtile_m) {
4192 TEST_REQUIRES_ARM_NEON;
4193 for (uint32_t m = 1; m <= 2; m++) {
4194 GemmMicrokernelTester()
4195 .mr(2)
4196 .nr(8)
4197 .kr(16)
4198 .sr(1)
4199 .m(m)
4200 .n(8)
4201 .k(16)
4202 .iterations(1)
4203 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c16__aarch64_neon_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4204 }
4205 }
4206
TEST(QC8_GEMM_MINMAX_FP32_2X8C16__AARCH64_NEON_MLAL,k_eq_16_subtile_n)4207 TEST(QC8_GEMM_MINMAX_FP32_2X8C16__AARCH64_NEON_MLAL, k_eq_16_subtile_n) {
4208 TEST_REQUIRES_ARM_NEON;
4209 for (uint32_t n = 1; n <= 8; n++) {
4210 GemmMicrokernelTester()
4211 .mr(2)
4212 .nr(8)
4213 .kr(16)
4214 .sr(1)
4215 .m(2)
4216 .n(n)
4217 .k(16)
4218 .iterations(1)
4219 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c16__aarch64_neon_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4220 }
4221 }
4222
TEST(QC8_GEMM_MINMAX_FP32_2X8C16__AARCH64_NEON_MLAL,k_lt_16)4223 TEST(QC8_GEMM_MINMAX_FP32_2X8C16__AARCH64_NEON_MLAL, k_lt_16) {
4224 TEST_REQUIRES_ARM_NEON;
4225 for (size_t k = 1; k < 16; k++) {
4226 GemmMicrokernelTester()
4227 .mr(2)
4228 .nr(8)
4229 .kr(16)
4230 .sr(1)
4231 .m(2)
4232 .n(8)
4233 .k(k)
4234 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c16__aarch64_neon_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4235 }
4236 }
4237
TEST(QC8_GEMM_MINMAX_FP32_2X8C16__AARCH64_NEON_MLAL,k_lt_16_strided_a)4238 TEST(QC8_GEMM_MINMAX_FP32_2X8C16__AARCH64_NEON_MLAL, k_lt_16_strided_a) {
4239 TEST_REQUIRES_ARM_NEON;
4240 for (size_t k = 1; k < 16; k++) {
4241 GemmMicrokernelTester()
4242 .mr(2)
4243 .nr(8)
4244 .kr(16)
4245 .sr(1)
4246 .m(2)
4247 .n(8)
4248 .k(k)
4249 .a_stride(19)
4250 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c16__aarch64_neon_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4251 }
4252 }
4253
TEST(QC8_GEMM_MINMAX_FP32_2X8C16__AARCH64_NEON_MLAL,k_lt_16_subtile)4254 TEST(QC8_GEMM_MINMAX_FP32_2X8C16__AARCH64_NEON_MLAL, k_lt_16_subtile) {
4255 TEST_REQUIRES_ARM_NEON;
4256 for (size_t k = 1; k < 16; k++) {
4257 for (uint32_t n = 1; n <= 8; n++) {
4258 for (uint32_t m = 1; m <= 2; m++) {
4259 GemmMicrokernelTester()
4260 .mr(2)
4261 .nr(8)
4262 .kr(16)
4263 .sr(1)
4264 .m(m)
4265 .n(n)
4266 .k(k)
4267 .iterations(1)
4268 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c16__aarch64_neon_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4269 }
4270 }
4271 }
4272 }
4273
TEST(QC8_GEMM_MINMAX_FP32_2X8C16__AARCH64_NEON_MLAL,k_gt_16)4274 TEST(QC8_GEMM_MINMAX_FP32_2X8C16__AARCH64_NEON_MLAL, k_gt_16) {
4275 TEST_REQUIRES_ARM_NEON;
4276 for (size_t k = 17; k < 32; k++) {
4277 GemmMicrokernelTester()
4278 .mr(2)
4279 .nr(8)
4280 .kr(16)
4281 .sr(1)
4282 .m(2)
4283 .n(8)
4284 .k(k)
4285 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c16__aarch64_neon_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4286 }
4287 }
4288
TEST(QC8_GEMM_MINMAX_FP32_2X8C16__AARCH64_NEON_MLAL,k_gt_16_strided_a)4289 TEST(QC8_GEMM_MINMAX_FP32_2X8C16__AARCH64_NEON_MLAL, k_gt_16_strided_a) {
4290 TEST_REQUIRES_ARM_NEON;
4291 for (size_t k = 17; k < 32; k++) {
4292 GemmMicrokernelTester()
4293 .mr(2)
4294 .nr(8)
4295 .kr(16)
4296 .sr(1)
4297 .m(2)
4298 .n(8)
4299 .k(k)
4300 .a_stride(37)
4301 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c16__aarch64_neon_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4302 }
4303 }
4304
TEST(QC8_GEMM_MINMAX_FP32_2X8C16__AARCH64_NEON_MLAL,k_gt_16_subtile)4305 TEST(QC8_GEMM_MINMAX_FP32_2X8C16__AARCH64_NEON_MLAL, k_gt_16_subtile) {
4306 TEST_REQUIRES_ARM_NEON;
4307 for (size_t k = 17; k < 32; k++) {
4308 for (uint32_t n = 1; n <= 8; n++) {
4309 for (uint32_t m = 1; m <= 2; m++) {
4310 GemmMicrokernelTester()
4311 .mr(2)
4312 .nr(8)
4313 .kr(16)
4314 .sr(1)
4315 .m(m)
4316 .n(n)
4317 .k(k)
4318 .iterations(1)
4319 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c16__aarch64_neon_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4320 }
4321 }
4322 }
4323 }
4324
TEST(QC8_GEMM_MINMAX_FP32_2X8C16__AARCH64_NEON_MLAL,k_div_16)4325 TEST(QC8_GEMM_MINMAX_FP32_2X8C16__AARCH64_NEON_MLAL, k_div_16) {
4326 TEST_REQUIRES_ARM_NEON;
4327 for (size_t k = 32; k <= 160; k += 16) {
4328 GemmMicrokernelTester()
4329 .mr(2)
4330 .nr(8)
4331 .kr(16)
4332 .sr(1)
4333 .m(2)
4334 .n(8)
4335 .k(k)
4336 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c16__aarch64_neon_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4337 }
4338 }
4339
TEST(QC8_GEMM_MINMAX_FP32_2X8C16__AARCH64_NEON_MLAL,k_div_16_strided_a)4340 TEST(QC8_GEMM_MINMAX_FP32_2X8C16__AARCH64_NEON_MLAL, k_div_16_strided_a) {
4341 TEST_REQUIRES_ARM_NEON;
4342 for (size_t k = 32; k <= 160; k += 16) {
4343 GemmMicrokernelTester()
4344 .mr(2)
4345 .nr(8)
4346 .kr(16)
4347 .sr(1)
4348 .m(2)
4349 .n(8)
4350 .k(k)
4351 .a_stride(163)
4352 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c16__aarch64_neon_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4353 }
4354 }
4355
TEST(QC8_GEMM_MINMAX_FP32_2X8C16__AARCH64_NEON_MLAL,k_div_16_subtile)4356 TEST(QC8_GEMM_MINMAX_FP32_2X8C16__AARCH64_NEON_MLAL, k_div_16_subtile) {
4357 TEST_REQUIRES_ARM_NEON;
4358 for (size_t k = 32; k <= 160; k += 16) {
4359 for (uint32_t n = 1; n <= 8; n++) {
4360 for (uint32_t m = 1; m <= 2; m++) {
4361 GemmMicrokernelTester()
4362 .mr(2)
4363 .nr(8)
4364 .kr(16)
4365 .sr(1)
4366 .m(m)
4367 .n(n)
4368 .k(k)
4369 .iterations(1)
4370 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c16__aarch64_neon_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4371 }
4372 }
4373 }
4374 }
4375
TEST(QC8_GEMM_MINMAX_FP32_2X8C16__AARCH64_NEON_MLAL,n_gt_8)4376 TEST(QC8_GEMM_MINMAX_FP32_2X8C16__AARCH64_NEON_MLAL, n_gt_8) {
4377 TEST_REQUIRES_ARM_NEON;
4378 for (uint32_t n = 9; n < 16; n++) {
4379 for (size_t k = 1; k <= 80; k += 17) {
4380 GemmMicrokernelTester()
4381 .mr(2)
4382 .nr(8)
4383 .kr(16)
4384 .sr(1)
4385 .m(2)
4386 .n(n)
4387 .k(k)
4388 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c16__aarch64_neon_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4389 }
4390 }
4391 }
4392
TEST(QC8_GEMM_MINMAX_FP32_2X8C16__AARCH64_NEON_MLAL,n_gt_8_strided_cn)4393 TEST(QC8_GEMM_MINMAX_FP32_2X8C16__AARCH64_NEON_MLAL, n_gt_8_strided_cn) {
4394 TEST_REQUIRES_ARM_NEON;
4395 for (uint32_t n = 9; n < 16; n++) {
4396 for (size_t k = 1; k <= 80; k += 17) {
4397 GemmMicrokernelTester()
4398 .mr(2)
4399 .nr(8)
4400 .kr(16)
4401 .sr(1)
4402 .m(2)
4403 .n(n)
4404 .k(k)
4405 .cn_stride(11)
4406 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c16__aarch64_neon_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4407 }
4408 }
4409 }
4410
TEST(QC8_GEMM_MINMAX_FP32_2X8C16__AARCH64_NEON_MLAL,n_gt_8_strided_a)4411 TEST(QC8_GEMM_MINMAX_FP32_2X8C16__AARCH64_NEON_MLAL, n_gt_8_strided_a) {
4412 TEST_REQUIRES_ARM_NEON;
4413 for (uint32_t n = 9; n < 16; n++) {
4414 for (size_t k = 1; k <= 80; k += 17) {
4415 GemmMicrokernelTester()
4416 .mr(2)
4417 .nr(8)
4418 .kr(16)
4419 .sr(1)
4420 .m(2)
4421 .n(n)
4422 .k(k)
4423 .a_stride(83)
4424 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c16__aarch64_neon_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4425 }
4426 }
4427 }
4428
TEST(QC8_GEMM_MINMAX_FP32_2X8C16__AARCH64_NEON_MLAL,n_gt_8_subtile)4429 TEST(QC8_GEMM_MINMAX_FP32_2X8C16__AARCH64_NEON_MLAL, n_gt_8_subtile) {
4430 TEST_REQUIRES_ARM_NEON;
4431 for (uint32_t n = 9; n < 16; n++) {
4432 for (size_t k = 1; k <= 80; k += 17) {
4433 for (uint32_t m = 1; m <= 2; m++) {
4434 GemmMicrokernelTester()
4435 .mr(2)
4436 .nr(8)
4437 .kr(16)
4438 .sr(1)
4439 .m(m)
4440 .n(n)
4441 .k(k)
4442 .iterations(1)
4443 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c16__aarch64_neon_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4444 }
4445 }
4446 }
4447 }
4448
TEST(QC8_GEMM_MINMAX_FP32_2X8C16__AARCH64_NEON_MLAL,n_div_8)4449 TEST(QC8_GEMM_MINMAX_FP32_2X8C16__AARCH64_NEON_MLAL, n_div_8) {
4450 TEST_REQUIRES_ARM_NEON;
4451 for (uint32_t n = 16; n <= 24; n += 8) {
4452 for (size_t k = 1; k <= 80; k += 17) {
4453 GemmMicrokernelTester()
4454 .mr(2)
4455 .nr(8)
4456 .kr(16)
4457 .sr(1)
4458 .m(2)
4459 .n(n)
4460 .k(k)
4461 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c16__aarch64_neon_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4462 }
4463 }
4464 }
4465
TEST(QC8_GEMM_MINMAX_FP32_2X8C16__AARCH64_NEON_MLAL,n_div_8_strided_cn)4466 TEST(QC8_GEMM_MINMAX_FP32_2X8C16__AARCH64_NEON_MLAL, n_div_8_strided_cn) {
4467 TEST_REQUIRES_ARM_NEON;
4468 for (uint32_t n = 16; n <= 24; n += 8) {
4469 for (size_t k = 1; k <= 80; k += 17) {
4470 GemmMicrokernelTester()
4471 .mr(2)
4472 .nr(8)
4473 .kr(16)
4474 .sr(1)
4475 .m(2)
4476 .n(n)
4477 .k(k)
4478 .cn_stride(11)
4479 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c16__aarch64_neon_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4480 }
4481 }
4482 }
4483
TEST(QC8_GEMM_MINMAX_FP32_2X8C16__AARCH64_NEON_MLAL,n_div_8_strided_a)4484 TEST(QC8_GEMM_MINMAX_FP32_2X8C16__AARCH64_NEON_MLAL, n_div_8_strided_a) {
4485 TEST_REQUIRES_ARM_NEON;
4486 for (uint32_t n = 16; n <= 24; n += 8) {
4487 for (size_t k = 1; k <= 80; k += 17) {
4488 GemmMicrokernelTester()
4489 .mr(2)
4490 .nr(8)
4491 .kr(16)
4492 .sr(1)
4493 .m(2)
4494 .n(n)
4495 .k(k)
4496 .a_stride(83)
4497 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c16__aarch64_neon_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4498 }
4499 }
4500 }
4501
TEST(QC8_GEMM_MINMAX_FP32_2X8C16__AARCH64_NEON_MLAL,n_div_8_subtile)4502 TEST(QC8_GEMM_MINMAX_FP32_2X8C16__AARCH64_NEON_MLAL, n_div_8_subtile) {
4503 TEST_REQUIRES_ARM_NEON;
4504 for (uint32_t n = 16; n <= 24; n += 8) {
4505 for (size_t k = 1; k <= 80; k += 17) {
4506 for (uint32_t m = 1; m <= 2; m++) {
4507 GemmMicrokernelTester()
4508 .mr(2)
4509 .nr(8)
4510 .kr(16)
4511 .sr(1)
4512 .m(m)
4513 .n(n)
4514 .k(k)
4515 .iterations(1)
4516 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c16__aarch64_neon_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4517 }
4518 }
4519 }
4520 }
4521
TEST(QC8_GEMM_MINMAX_FP32_2X8C16__AARCH64_NEON_MLAL,strided_cm_subtile)4522 TEST(QC8_GEMM_MINMAX_FP32_2X8C16__AARCH64_NEON_MLAL, strided_cm_subtile) {
4523 TEST_REQUIRES_ARM_NEON;
4524 for (size_t k = 1; k <= 80; k += 17) {
4525 for (uint32_t n = 1; n <= 8; n++) {
4526 for (uint32_t m = 1; m <= 2; m++) {
4527 GemmMicrokernelTester()
4528 .mr(2)
4529 .nr(8)
4530 .kr(16)
4531 .sr(1)
4532 .m(m)
4533 .n(n)
4534 .k(k)
4535 .cm_stride(11)
4536 .iterations(1)
4537 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c16__aarch64_neon_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4538 }
4539 }
4540 }
4541 }
4542
TEST(QC8_GEMM_MINMAX_FP32_2X8C16__AARCH64_NEON_MLAL,qmin)4543 TEST(QC8_GEMM_MINMAX_FP32_2X8C16__AARCH64_NEON_MLAL, qmin) {
4544 TEST_REQUIRES_ARM_NEON;
4545 GemmMicrokernelTester()
4546 .mr(2)
4547 .nr(8)
4548 .kr(16)
4549 .sr(1)
4550 .m(2)
4551 .n(8)
4552 .k(16)
4553 .qmin(128)
4554 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c16__aarch64_neon_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4555 }
4556
TEST(QC8_GEMM_MINMAX_FP32_2X8C16__AARCH64_NEON_MLAL,qmax)4557 TEST(QC8_GEMM_MINMAX_FP32_2X8C16__AARCH64_NEON_MLAL, qmax) {
4558 TEST_REQUIRES_ARM_NEON;
4559 GemmMicrokernelTester()
4560 .mr(2)
4561 .nr(8)
4562 .kr(16)
4563 .sr(1)
4564 .m(2)
4565 .n(8)
4566 .k(16)
4567 .qmax(128)
4568 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c16__aarch64_neon_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4569 }
4570
TEST(QC8_GEMM_MINMAX_FP32_2X8C16__AARCH64_NEON_MLAL,strided_cm)4571 TEST(QC8_GEMM_MINMAX_FP32_2X8C16__AARCH64_NEON_MLAL, strided_cm) {
4572 TEST_REQUIRES_ARM_NEON;
4573 GemmMicrokernelTester()
4574 .mr(2)
4575 .nr(8)
4576 .kr(16)
4577 .sr(1)
4578 .m(2)
4579 .n(8)
4580 .k(16)
4581 .cm_stride(11)
4582 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c16__aarch64_neon_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4583 }
4584 #endif // XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
4585
4586
4587 #if XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
TEST(QC8_GEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53,k_eq_8)4588 TEST(QC8_GEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53, k_eq_8) {
4589 TEST_REQUIRES_ARM_NEON;
4590 GemmMicrokernelTester()
4591 .mr(4)
4592 .nr(16)
4593 .kr(1)
4594 .sr(1)
4595 .m(4)
4596 .n(16)
4597 .k(8)
4598 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4599 }
4600
TEST(QC8_GEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53,strided_cn)4601 TEST(QC8_GEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53, strided_cn) {
4602 TEST_REQUIRES_ARM_NEON;
4603 GemmMicrokernelTester()
4604 .mr(4)
4605 .nr(16)
4606 .kr(1)
4607 .sr(1)
4608 .m(4)
4609 .n(16)
4610 .k(8)
4611 .cn_stride(19)
4612 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4613 }
4614
TEST(QC8_GEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53,k_eq_8_strided_a)4615 TEST(QC8_GEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53, k_eq_8_strided_a) {
4616 TEST_REQUIRES_ARM_NEON;
4617 GemmMicrokernelTester()
4618 .mr(4)
4619 .nr(16)
4620 .kr(1)
4621 .sr(1)
4622 .m(4)
4623 .n(16)
4624 .k(8)
4625 .a_stride(11)
4626 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4627 }
4628
TEST(QC8_GEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53,k_eq_8_subtile)4629 TEST(QC8_GEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53, k_eq_8_subtile) {
4630 TEST_REQUIRES_ARM_NEON;
4631 for (uint32_t n = 1; n <= 16; n++) {
4632 for (uint32_t m = 1; m <= 4; m++) {
4633 GemmMicrokernelTester()
4634 .mr(4)
4635 .nr(16)
4636 .kr(1)
4637 .sr(1)
4638 .m(m)
4639 .n(n)
4640 .k(8)
4641 .iterations(1)
4642 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4643 }
4644 }
4645 }
4646
TEST(QC8_GEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53,k_eq_8_subtile_m)4647 TEST(QC8_GEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53, k_eq_8_subtile_m) {
4648 TEST_REQUIRES_ARM_NEON;
4649 for (uint32_t m = 1; m <= 4; m++) {
4650 GemmMicrokernelTester()
4651 .mr(4)
4652 .nr(16)
4653 .kr(1)
4654 .sr(1)
4655 .m(m)
4656 .n(16)
4657 .k(8)
4658 .iterations(1)
4659 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4660 }
4661 }
4662
TEST(QC8_GEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53,k_eq_8_subtile_n)4663 TEST(QC8_GEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53, k_eq_8_subtile_n) {
4664 TEST_REQUIRES_ARM_NEON;
4665 for (uint32_t n = 1; n <= 16; n++) {
4666 GemmMicrokernelTester()
4667 .mr(4)
4668 .nr(16)
4669 .kr(1)
4670 .sr(1)
4671 .m(4)
4672 .n(n)
4673 .k(8)
4674 .iterations(1)
4675 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4676 }
4677 }
4678
TEST(QC8_GEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53,k_lt_8)4679 TEST(QC8_GEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53, k_lt_8) {
4680 TEST_REQUIRES_ARM_NEON;
4681 for (size_t k = 1; k < 8; k++) {
4682 GemmMicrokernelTester()
4683 .mr(4)
4684 .nr(16)
4685 .kr(1)
4686 .sr(1)
4687 .m(4)
4688 .n(16)
4689 .k(k)
4690 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4691 }
4692 }
4693
TEST(QC8_GEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53,k_lt_8_strided_a)4694 TEST(QC8_GEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53, k_lt_8_strided_a) {
4695 TEST_REQUIRES_ARM_NEON;
4696 for (size_t k = 1; k < 8; k++) {
4697 GemmMicrokernelTester()
4698 .mr(4)
4699 .nr(16)
4700 .kr(1)
4701 .sr(1)
4702 .m(4)
4703 .n(16)
4704 .k(k)
4705 .a_stride(11)
4706 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4707 }
4708 }
4709
TEST(QC8_GEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53,k_lt_8_subtile)4710 TEST(QC8_GEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53, k_lt_8_subtile) {
4711 TEST_REQUIRES_ARM_NEON;
4712 for (size_t k = 1; k < 8; k++) {
4713 for (uint32_t n = 1; n <= 16; n++) {
4714 for (uint32_t m = 1; m <= 4; m++) {
4715 GemmMicrokernelTester()
4716 .mr(4)
4717 .nr(16)
4718 .kr(1)
4719 .sr(1)
4720 .m(m)
4721 .n(n)
4722 .k(k)
4723 .iterations(1)
4724 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4725 }
4726 }
4727 }
4728 }
4729
TEST(QC8_GEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53,k_gt_8)4730 TEST(QC8_GEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53, k_gt_8) {
4731 TEST_REQUIRES_ARM_NEON;
4732 for (size_t k = 9; k < 16; k++) {
4733 GemmMicrokernelTester()
4734 .mr(4)
4735 .nr(16)
4736 .kr(1)
4737 .sr(1)
4738 .m(4)
4739 .n(16)
4740 .k(k)
4741 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4742 }
4743 }
4744
TEST(QC8_GEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53,k_gt_8_strided_a)4745 TEST(QC8_GEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53, k_gt_8_strided_a) {
4746 TEST_REQUIRES_ARM_NEON;
4747 for (size_t k = 9; k < 16; k++) {
4748 GemmMicrokernelTester()
4749 .mr(4)
4750 .nr(16)
4751 .kr(1)
4752 .sr(1)
4753 .m(4)
4754 .n(16)
4755 .k(k)
4756 .a_stride(19)
4757 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4758 }
4759 }
4760
TEST(QC8_GEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53,k_gt_8_subtile)4761 TEST(QC8_GEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53, k_gt_8_subtile) {
4762 TEST_REQUIRES_ARM_NEON;
4763 for (size_t k = 9; k < 16; k++) {
4764 for (uint32_t n = 1; n <= 16; n++) {
4765 for (uint32_t m = 1; m <= 4; m++) {
4766 GemmMicrokernelTester()
4767 .mr(4)
4768 .nr(16)
4769 .kr(1)
4770 .sr(1)
4771 .m(m)
4772 .n(n)
4773 .k(k)
4774 .iterations(1)
4775 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4776 }
4777 }
4778 }
4779 }
4780
TEST(QC8_GEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53,k_div_8)4781 TEST(QC8_GEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53, k_div_8) {
4782 TEST_REQUIRES_ARM_NEON;
4783 for (size_t k = 16; k <= 80; k += 8) {
4784 GemmMicrokernelTester()
4785 .mr(4)
4786 .nr(16)
4787 .kr(1)
4788 .sr(1)
4789 .m(4)
4790 .n(16)
4791 .k(k)
4792 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4793 }
4794 }
4795
TEST(QC8_GEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53,k_div_8_strided_a)4796 TEST(QC8_GEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53, k_div_8_strided_a) {
4797 TEST_REQUIRES_ARM_NEON;
4798 for (size_t k = 16; k <= 80; k += 8) {
4799 GemmMicrokernelTester()
4800 .mr(4)
4801 .nr(16)
4802 .kr(1)
4803 .sr(1)
4804 .m(4)
4805 .n(16)
4806 .k(k)
4807 .a_stride(83)
4808 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4809 }
4810 }
4811
TEST(QC8_GEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53,k_div_8_subtile)4812 TEST(QC8_GEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53, k_div_8_subtile) {
4813 TEST_REQUIRES_ARM_NEON;
4814 for (size_t k = 16; k <= 80; k += 8) {
4815 for (uint32_t n = 1; n <= 16; n++) {
4816 for (uint32_t m = 1; m <= 4; m++) {
4817 GemmMicrokernelTester()
4818 .mr(4)
4819 .nr(16)
4820 .kr(1)
4821 .sr(1)
4822 .m(m)
4823 .n(n)
4824 .k(k)
4825 .iterations(1)
4826 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4827 }
4828 }
4829 }
4830 }
4831
TEST(QC8_GEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53,n_gt_16)4832 TEST(QC8_GEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53, n_gt_16) {
4833 TEST_REQUIRES_ARM_NEON;
4834 for (uint32_t n = 17; n < 32; n++) {
4835 for (size_t k = 1; k <= 40; k += 9) {
4836 GemmMicrokernelTester()
4837 .mr(4)
4838 .nr(16)
4839 .kr(1)
4840 .sr(1)
4841 .m(4)
4842 .n(n)
4843 .k(k)
4844 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4845 }
4846 }
4847 }
4848
TEST(QC8_GEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53,n_gt_16_strided_cn)4849 TEST(QC8_GEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53, n_gt_16_strided_cn) {
4850 TEST_REQUIRES_ARM_NEON;
4851 for (uint32_t n = 17; n < 32; n++) {
4852 for (size_t k = 1; k <= 40; k += 9) {
4853 GemmMicrokernelTester()
4854 .mr(4)
4855 .nr(16)
4856 .kr(1)
4857 .sr(1)
4858 .m(4)
4859 .n(n)
4860 .k(k)
4861 .cn_stride(19)
4862 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4863 }
4864 }
4865 }
4866
TEST(QC8_GEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53,n_gt_16_strided_a)4867 TEST(QC8_GEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53, n_gt_16_strided_a) {
4868 TEST_REQUIRES_ARM_NEON;
4869 for (uint32_t n = 17; n < 32; n++) {
4870 for (size_t k = 1; k <= 40; k += 9) {
4871 GemmMicrokernelTester()
4872 .mr(4)
4873 .nr(16)
4874 .kr(1)
4875 .sr(1)
4876 .m(4)
4877 .n(n)
4878 .k(k)
4879 .a_stride(43)
4880 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4881 }
4882 }
4883 }
4884
TEST(QC8_GEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53,n_gt_16_subtile)4885 TEST(QC8_GEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53, n_gt_16_subtile) {
4886 TEST_REQUIRES_ARM_NEON;
4887 for (uint32_t n = 17; n < 32; n++) {
4888 for (size_t k = 1; k <= 40; k += 9) {
4889 for (uint32_t m = 1; m <= 4; m++) {
4890 GemmMicrokernelTester()
4891 .mr(4)
4892 .nr(16)
4893 .kr(1)
4894 .sr(1)
4895 .m(m)
4896 .n(n)
4897 .k(k)
4898 .iterations(1)
4899 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4900 }
4901 }
4902 }
4903 }
4904
TEST(QC8_GEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53,n_div_16)4905 TEST(QC8_GEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53, n_div_16) {
4906 TEST_REQUIRES_ARM_NEON;
4907 for (uint32_t n = 32; n <= 48; n += 16) {
4908 for (size_t k = 1; k <= 40; k += 9) {
4909 GemmMicrokernelTester()
4910 .mr(4)
4911 .nr(16)
4912 .kr(1)
4913 .sr(1)
4914 .m(4)
4915 .n(n)
4916 .k(k)
4917 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4918 }
4919 }
4920 }
4921
TEST(QC8_GEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53,n_div_16_strided_cn)4922 TEST(QC8_GEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53, n_div_16_strided_cn) {
4923 TEST_REQUIRES_ARM_NEON;
4924 for (uint32_t n = 32; n <= 48; n += 16) {
4925 for (size_t k = 1; k <= 40; k += 9) {
4926 GemmMicrokernelTester()
4927 .mr(4)
4928 .nr(16)
4929 .kr(1)
4930 .sr(1)
4931 .m(4)
4932 .n(n)
4933 .k(k)
4934 .cn_stride(19)
4935 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4936 }
4937 }
4938 }
4939
TEST(QC8_GEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53,n_div_16_strided_a)4940 TEST(QC8_GEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53, n_div_16_strided_a) {
4941 TEST_REQUIRES_ARM_NEON;
4942 for (uint32_t n = 32; n <= 48; n += 16) {
4943 for (size_t k = 1; k <= 40; k += 9) {
4944 GemmMicrokernelTester()
4945 .mr(4)
4946 .nr(16)
4947 .kr(1)
4948 .sr(1)
4949 .m(4)
4950 .n(n)
4951 .k(k)
4952 .a_stride(43)
4953 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4954 }
4955 }
4956 }
4957
TEST(QC8_GEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53,n_div_16_subtile)4958 TEST(QC8_GEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53, n_div_16_subtile) {
4959 TEST_REQUIRES_ARM_NEON;
4960 for (uint32_t n = 32; n <= 48; n += 16) {
4961 for (size_t k = 1; k <= 40; k += 9) {
4962 for (uint32_t m = 1; m <= 4; m++) {
4963 GemmMicrokernelTester()
4964 .mr(4)
4965 .nr(16)
4966 .kr(1)
4967 .sr(1)
4968 .m(m)
4969 .n(n)
4970 .k(k)
4971 .iterations(1)
4972 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4973 }
4974 }
4975 }
4976 }
4977
TEST(QC8_GEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53,strided_cm_subtile)4978 TEST(QC8_GEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53, strided_cm_subtile) {
4979 TEST_REQUIRES_ARM_NEON;
4980 for (size_t k = 1; k <= 40; k += 9) {
4981 for (uint32_t n = 1; n <= 16; n++) {
4982 for (uint32_t m = 1; m <= 4; m++) {
4983 GemmMicrokernelTester()
4984 .mr(4)
4985 .nr(16)
4986 .kr(1)
4987 .sr(1)
4988 .m(m)
4989 .n(n)
4990 .k(k)
4991 .cm_stride(19)
4992 .iterations(1)
4993 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4994 }
4995 }
4996 }
4997 }
4998
TEST(QC8_GEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53,qmin)4999 TEST(QC8_GEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53, qmin) {
5000 TEST_REQUIRES_ARM_NEON;
5001 GemmMicrokernelTester()
5002 .mr(4)
5003 .nr(16)
5004 .kr(1)
5005 .sr(1)
5006 .m(4)
5007 .n(16)
5008 .k(8)
5009 .qmin(128)
5010 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
5011 }
5012
TEST(QC8_GEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53,qmax)5013 TEST(QC8_GEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53, qmax) {
5014 TEST_REQUIRES_ARM_NEON;
5015 GemmMicrokernelTester()
5016 .mr(4)
5017 .nr(16)
5018 .kr(1)
5019 .sr(1)
5020 .m(4)
5021 .n(16)
5022 .k(8)
5023 .qmax(128)
5024 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
5025 }
5026
TEST(QC8_GEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53,strided_cm)5027 TEST(QC8_GEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53, strided_cm) {
5028 TEST_REQUIRES_ARM_NEON;
5029 GemmMicrokernelTester()
5030 .mr(4)
5031 .nr(16)
5032 .kr(1)
5033 .sr(1)
5034 .m(4)
5035 .n(16)
5036 .k(8)
5037 .cm_stride(19)
5038 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
5039 }
5040 #endif // XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
5041
5042
5043 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QC8_GEMM_MINMAX_FP32_1X8__NEON_MLAL_LANE_PRFM,k_eq_8)5044 TEST(QC8_GEMM_MINMAX_FP32_1X8__NEON_MLAL_LANE_PRFM, k_eq_8) {
5045 TEST_REQUIRES_ARM_NEON;
5046 GemmMicrokernelTester()
5047 .mr(1)
5048 .nr(8)
5049 .kr(1)
5050 .sr(1)
5051 .m(1)
5052 .n(8)
5053 .k(8)
5054 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
5055 }
5056
TEST(QC8_GEMM_MINMAX_FP32_1X8__NEON_MLAL_LANE_PRFM,strided_cn)5057 TEST(QC8_GEMM_MINMAX_FP32_1X8__NEON_MLAL_LANE_PRFM, strided_cn) {
5058 TEST_REQUIRES_ARM_NEON;
5059 GemmMicrokernelTester()
5060 .mr(1)
5061 .nr(8)
5062 .kr(1)
5063 .sr(1)
5064 .m(1)
5065 .n(8)
5066 .k(8)
5067 .cn_stride(11)
5068 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
5069 }
5070
TEST(QC8_GEMM_MINMAX_FP32_1X8__NEON_MLAL_LANE_PRFM,k_eq_8_strided_a)5071 TEST(QC8_GEMM_MINMAX_FP32_1X8__NEON_MLAL_LANE_PRFM, k_eq_8_strided_a) {
5072 TEST_REQUIRES_ARM_NEON;
5073 GemmMicrokernelTester()
5074 .mr(1)
5075 .nr(8)
5076 .kr(1)
5077 .sr(1)
5078 .m(1)
5079 .n(8)
5080 .k(8)
5081 .a_stride(11)
5082 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
5083 }
5084
TEST(QC8_GEMM_MINMAX_FP32_1X8__NEON_MLAL_LANE_PRFM,k_eq_8_subtile)5085 TEST(QC8_GEMM_MINMAX_FP32_1X8__NEON_MLAL_LANE_PRFM, k_eq_8_subtile) {
5086 TEST_REQUIRES_ARM_NEON;
5087 for (uint32_t n = 1; n <= 8; n++) {
5088 for (uint32_t m = 1; m <= 1; m++) {
5089 GemmMicrokernelTester()
5090 .mr(1)
5091 .nr(8)
5092 .kr(1)
5093 .sr(1)
5094 .m(m)
5095 .n(n)
5096 .k(8)
5097 .iterations(1)
5098 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
5099 }
5100 }
5101 }
5102
TEST(QC8_GEMM_MINMAX_FP32_1X8__NEON_MLAL_LANE_PRFM,k_eq_8_subtile_m)5103 TEST(QC8_GEMM_MINMAX_FP32_1X8__NEON_MLAL_LANE_PRFM, k_eq_8_subtile_m) {
5104 TEST_REQUIRES_ARM_NEON;
5105 for (uint32_t m = 1; m <= 1; m++) {
5106 GemmMicrokernelTester()
5107 .mr(1)
5108 .nr(8)
5109 .kr(1)
5110 .sr(1)
5111 .m(m)
5112 .n(8)
5113 .k(8)
5114 .iterations(1)
5115 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
5116 }
5117 }
5118
TEST(QC8_GEMM_MINMAX_FP32_1X8__NEON_MLAL_LANE_PRFM,k_eq_8_subtile_n)5119 TEST(QC8_GEMM_MINMAX_FP32_1X8__NEON_MLAL_LANE_PRFM, k_eq_8_subtile_n) {
5120 TEST_REQUIRES_ARM_NEON;
5121 for (uint32_t n = 1; n <= 8; n++) {
5122 GemmMicrokernelTester()
5123 .mr(1)
5124 .nr(8)
5125 .kr(1)
5126 .sr(1)
5127 .m(1)
5128 .n(n)
5129 .k(8)
5130 .iterations(1)
5131 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
5132 }
5133 }
5134
TEST(QC8_GEMM_MINMAX_FP32_1X8__NEON_MLAL_LANE_PRFM,k_lt_8)5135 TEST(QC8_GEMM_MINMAX_FP32_1X8__NEON_MLAL_LANE_PRFM, k_lt_8) {
5136 TEST_REQUIRES_ARM_NEON;
5137 for (size_t k = 1; k < 8; k++) {
5138 GemmMicrokernelTester()
5139 .mr(1)
5140 .nr(8)
5141 .kr(1)
5142 .sr(1)
5143 .m(1)
5144 .n(8)
5145 .k(k)
5146 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
5147 }
5148 }
5149
TEST(QC8_GEMM_MINMAX_FP32_1X8__NEON_MLAL_LANE_PRFM,k_lt_8_strided_a)5150 TEST(QC8_GEMM_MINMAX_FP32_1X8__NEON_MLAL_LANE_PRFM, k_lt_8_strided_a) {
5151 TEST_REQUIRES_ARM_NEON;
5152 for (size_t k = 1; k < 8; k++) {
5153 GemmMicrokernelTester()
5154 .mr(1)
5155 .nr(8)
5156 .kr(1)
5157 .sr(1)
5158 .m(1)
5159 .n(8)
5160 .k(k)
5161 .a_stride(11)
5162 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
5163 }
5164 }
5165
TEST(QC8_GEMM_MINMAX_FP32_1X8__NEON_MLAL_LANE_PRFM,k_lt_8_subtile)5166 TEST(QC8_GEMM_MINMAX_FP32_1X8__NEON_MLAL_LANE_PRFM, k_lt_8_subtile) {
5167 TEST_REQUIRES_ARM_NEON;
5168 for (size_t k = 1; k < 8; k++) {
5169 for (uint32_t n = 1; n <= 8; n++) {
5170 for (uint32_t m = 1; m <= 1; m++) {
5171 GemmMicrokernelTester()
5172 .mr(1)
5173 .nr(8)
5174 .kr(1)
5175 .sr(1)
5176 .m(m)
5177 .n(n)
5178 .k(k)
5179 .iterations(1)
5180 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
5181 }
5182 }
5183 }
5184 }
5185
TEST(QC8_GEMM_MINMAX_FP32_1X8__NEON_MLAL_LANE_PRFM,k_gt_8)5186 TEST(QC8_GEMM_MINMAX_FP32_1X8__NEON_MLAL_LANE_PRFM, k_gt_8) {
5187 TEST_REQUIRES_ARM_NEON;
5188 for (size_t k = 9; k < 16; k++) {
5189 GemmMicrokernelTester()
5190 .mr(1)
5191 .nr(8)
5192 .kr(1)
5193 .sr(1)
5194 .m(1)
5195 .n(8)
5196 .k(k)
5197 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
5198 }
5199 }
5200
TEST(QC8_GEMM_MINMAX_FP32_1X8__NEON_MLAL_LANE_PRFM,k_gt_8_strided_a)5201 TEST(QC8_GEMM_MINMAX_FP32_1X8__NEON_MLAL_LANE_PRFM, k_gt_8_strided_a) {
5202 TEST_REQUIRES_ARM_NEON;
5203 for (size_t k = 9; k < 16; k++) {
5204 GemmMicrokernelTester()
5205 .mr(1)
5206 .nr(8)
5207 .kr(1)
5208 .sr(1)
5209 .m(1)
5210 .n(8)
5211 .k(k)
5212 .a_stride(19)
5213 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
5214 }
5215 }
5216
TEST(QC8_GEMM_MINMAX_FP32_1X8__NEON_MLAL_LANE_PRFM,k_gt_8_subtile)5217 TEST(QC8_GEMM_MINMAX_FP32_1X8__NEON_MLAL_LANE_PRFM, k_gt_8_subtile) {
5218 TEST_REQUIRES_ARM_NEON;
5219 for (size_t k = 9; k < 16; k++) {
5220 for (uint32_t n = 1; n <= 8; n++) {
5221 for (uint32_t m = 1; m <= 1; m++) {
5222 GemmMicrokernelTester()
5223 .mr(1)
5224 .nr(8)
5225 .kr(1)
5226 .sr(1)
5227 .m(m)
5228 .n(n)
5229 .k(k)
5230 .iterations(1)
5231 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
5232 }
5233 }
5234 }
5235 }
5236
TEST(QC8_GEMM_MINMAX_FP32_1X8__NEON_MLAL_LANE_PRFM,k_div_8)5237 TEST(QC8_GEMM_MINMAX_FP32_1X8__NEON_MLAL_LANE_PRFM, k_div_8) {
5238 TEST_REQUIRES_ARM_NEON;
5239 for (size_t k = 16; k <= 80; k += 8) {
5240 GemmMicrokernelTester()
5241 .mr(1)
5242 .nr(8)
5243 .kr(1)
5244 .sr(1)
5245 .m(1)
5246 .n(8)
5247 .k(k)
5248 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
5249 }
5250 }
5251
TEST(QC8_GEMM_MINMAX_FP32_1X8__NEON_MLAL_LANE_PRFM,k_div_8_strided_a)5252 TEST(QC8_GEMM_MINMAX_FP32_1X8__NEON_MLAL_LANE_PRFM, k_div_8_strided_a) {
5253 TEST_REQUIRES_ARM_NEON;
5254 for (size_t k = 16; k <= 80; k += 8) {
5255 GemmMicrokernelTester()
5256 .mr(1)
5257 .nr(8)
5258 .kr(1)
5259 .sr(1)
5260 .m(1)
5261 .n(8)
5262 .k(k)
5263 .a_stride(83)
5264 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
5265 }
5266 }
5267
TEST(QC8_GEMM_MINMAX_FP32_1X8__NEON_MLAL_LANE_PRFM,k_div_8_subtile)5268 TEST(QC8_GEMM_MINMAX_FP32_1X8__NEON_MLAL_LANE_PRFM, k_div_8_subtile) {
5269 TEST_REQUIRES_ARM_NEON;
5270 for (size_t k = 16; k <= 80; k += 8) {
5271 for (uint32_t n = 1; n <= 8; n++) {
5272 for (uint32_t m = 1; m <= 1; m++) {
5273 GemmMicrokernelTester()
5274 .mr(1)
5275 .nr(8)
5276 .kr(1)
5277 .sr(1)
5278 .m(m)
5279 .n(n)
5280 .k(k)
5281 .iterations(1)
5282 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
5283 }
5284 }
5285 }
5286 }
5287
TEST(QC8_GEMM_MINMAX_FP32_1X8__NEON_MLAL_LANE_PRFM,n_gt_8)5288 TEST(QC8_GEMM_MINMAX_FP32_1X8__NEON_MLAL_LANE_PRFM, n_gt_8) {
5289 TEST_REQUIRES_ARM_NEON;
5290 for (uint32_t n = 9; n < 16; n++) {
5291 for (size_t k = 1; k <= 40; k += 9) {
5292 GemmMicrokernelTester()
5293 .mr(1)
5294 .nr(8)
5295 .kr(1)
5296 .sr(1)
5297 .m(1)
5298 .n(n)
5299 .k(k)
5300 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
5301 }
5302 }
5303 }
5304
TEST(QC8_GEMM_MINMAX_FP32_1X8__NEON_MLAL_LANE_PRFM,n_gt_8_strided_cn)5305 TEST(QC8_GEMM_MINMAX_FP32_1X8__NEON_MLAL_LANE_PRFM, n_gt_8_strided_cn) {
5306 TEST_REQUIRES_ARM_NEON;
5307 for (uint32_t n = 9; n < 16; n++) {
5308 for (size_t k = 1; k <= 40; k += 9) {
5309 GemmMicrokernelTester()
5310 .mr(1)
5311 .nr(8)
5312 .kr(1)
5313 .sr(1)
5314 .m(1)
5315 .n(n)
5316 .k(k)
5317 .cn_stride(11)
5318 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
5319 }
5320 }
5321 }
5322
TEST(QC8_GEMM_MINMAX_FP32_1X8__NEON_MLAL_LANE_PRFM,n_gt_8_strided_a)5323 TEST(QC8_GEMM_MINMAX_FP32_1X8__NEON_MLAL_LANE_PRFM, n_gt_8_strided_a) {
5324 TEST_REQUIRES_ARM_NEON;
5325 for (uint32_t n = 9; n < 16; n++) {
5326 for (size_t k = 1; k <= 40; k += 9) {
5327 GemmMicrokernelTester()
5328 .mr(1)
5329 .nr(8)
5330 .kr(1)
5331 .sr(1)
5332 .m(1)
5333 .n(n)
5334 .k(k)
5335 .a_stride(43)
5336 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
5337 }
5338 }
5339 }
5340
TEST(QC8_GEMM_MINMAX_FP32_1X8__NEON_MLAL_LANE_PRFM,n_gt_8_subtile)5341 TEST(QC8_GEMM_MINMAX_FP32_1X8__NEON_MLAL_LANE_PRFM, n_gt_8_subtile) {
5342 TEST_REQUIRES_ARM_NEON;
5343 for (uint32_t n = 9; n < 16; n++) {
5344 for (size_t k = 1; k <= 40; k += 9) {
5345 for (uint32_t m = 1; m <= 1; m++) {
5346 GemmMicrokernelTester()
5347 .mr(1)
5348 .nr(8)
5349 .kr(1)
5350 .sr(1)
5351 .m(m)
5352 .n(n)
5353 .k(k)
5354 .iterations(1)
5355 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
5356 }
5357 }
5358 }
5359 }
5360
TEST(QC8_GEMM_MINMAX_FP32_1X8__NEON_MLAL_LANE_PRFM,n_div_8)5361 TEST(QC8_GEMM_MINMAX_FP32_1X8__NEON_MLAL_LANE_PRFM, n_div_8) {
5362 TEST_REQUIRES_ARM_NEON;
5363 for (uint32_t n = 16; n <= 24; n += 8) {
5364 for (size_t k = 1; k <= 40; k += 9) {
5365 GemmMicrokernelTester()
5366 .mr(1)
5367 .nr(8)
5368 .kr(1)
5369 .sr(1)
5370 .m(1)
5371 .n(n)
5372 .k(k)
5373 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
5374 }
5375 }
5376 }
5377
TEST(QC8_GEMM_MINMAX_FP32_1X8__NEON_MLAL_LANE_PRFM,n_div_8_strided_cn)5378 TEST(QC8_GEMM_MINMAX_FP32_1X8__NEON_MLAL_LANE_PRFM, n_div_8_strided_cn) {
5379 TEST_REQUIRES_ARM_NEON;
5380 for (uint32_t n = 16; n <= 24; n += 8) {
5381 for (size_t k = 1; k <= 40; k += 9) {
5382 GemmMicrokernelTester()
5383 .mr(1)
5384 .nr(8)
5385 .kr(1)
5386 .sr(1)
5387 .m(1)
5388 .n(n)
5389 .k(k)
5390 .cn_stride(11)
5391 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
5392 }
5393 }
5394 }
5395
TEST(QC8_GEMM_MINMAX_FP32_1X8__NEON_MLAL_LANE_PRFM,n_div_8_strided_a)5396 TEST(QC8_GEMM_MINMAX_FP32_1X8__NEON_MLAL_LANE_PRFM, n_div_8_strided_a) {
5397 TEST_REQUIRES_ARM_NEON;
5398 for (uint32_t n = 16; n <= 24; n += 8) {
5399 for (size_t k = 1; k <= 40; k += 9) {
5400 GemmMicrokernelTester()
5401 .mr(1)
5402 .nr(8)
5403 .kr(1)
5404 .sr(1)
5405 .m(1)
5406 .n(n)
5407 .k(k)
5408 .a_stride(43)
5409 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
5410 }
5411 }
5412 }
5413
TEST(QC8_GEMM_MINMAX_FP32_1X8__NEON_MLAL_LANE_PRFM,n_div_8_subtile)5414 TEST(QC8_GEMM_MINMAX_FP32_1X8__NEON_MLAL_LANE_PRFM, n_div_8_subtile) {
5415 TEST_REQUIRES_ARM_NEON;
5416 for (uint32_t n = 16; n <= 24; n += 8) {
5417 for (size_t k = 1; k <= 40; k += 9) {
5418 for (uint32_t m = 1; m <= 1; m++) {
5419 GemmMicrokernelTester()
5420 .mr(1)
5421 .nr(8)
5422 .kr(1)
5423 .sr(1)
5424 .m(m)
5425 .n(n)
5426 .k(k)
5427 .iterations(1)
5428 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
5429 }
5430 }
5431 }
5432 }
5433
TEST(QC8_GEMM_MINMAX_FP32_1X8__NEON_MLAL_LANE_PRFM,strided_cm_subtile)5434 TEST(QC8_GEMM_MINMAX_FP32_1X8__NEON_MLAL_LANE_PRFM, strided_cm_subtile) {
5435 TEST_REQUIRES_ARM_NEON;
5436 for (size_t k = 1; k <= 40; k += 9) {
5437 for (uint32_t n = 1; n <= 8; n++) {
5438 for (uint32_t m = 1; m <= 1; m++) {
5439 GemmMicrokernelTester()
5440 .mr(1)
5441 .nr(8)
5442 .kr(1)
5443 .sr(1)
5444 .m(m)
5445 .n(n)
5446 .k(k)
5447 .cm_stride(11)
5448 .iterations(1)
5449 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
5450 }
5451 }
5452 }
5453 }
5454
TEST(QC8_GEMM_MINMAX_FP32_1X8__NEON_MLAL_LANE_PRFM,qmin)5455 TEST(QC8_GEMM_MINMAX_FP32_1X8__NEON_MLAL_LANE_PRFM, qmin) {
5456 TEST_REQUIRES_ARM_NEON;
5457 GemmMicrokernelTester()
5458 .mr(1)
5459 .nr(8)
5460 .kr(1)
5461 .sr(1)
5462 .m(1)
5463 .n(8)
5464 .k(8)
5465 .qmin(128)
5466 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
5467 }
5468
TEST(QC8_GEMM_MINMAX_FP32_1X8__NEON_MLAL_LANE_PRFM,qmax)5469 TEST(QC8_GEMM_MINMAX_FP32_1X8__NEON_MLAL_LANE_PRFM, qmax) {
5470 TEST_REQUIRES_ARM_NEON;
5471 GemmMicrokernelTester()
5472 .mr(1)
5473 .nr(8)
5474 .kr(1)
5475 .sr(1)
5476 .m(1)
5477 .n(8)
5478 .k(8)
5479 .qmax(128)
5480 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
5481 }
5482
TEST(QC8_GEMM_MINMAX_FP32_1X8__NEON_MLAL_LANE_PRFM,strided_cm)5483 TEST(QC8_GEMM_MINMAX_FP32_1X8__NEON_MLAL_LANE_PRFM, strided_cm) {
5484 TEST_REQUIRES_ARM_NEON;
5485 GemmMicrokernelTester()
5486 .mr(1)
5487 .nr(8)
5488 .kr(1)
5489 .sr(1)
5490 .m(1)
5491 .n(8)
5492 .k(8)
5493 .cm_stride(11)
5494 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
5495 }
5496 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
5497
5498
5499 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QC8_GEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD1R,k_eq_16)5500 TEST(QC8_GEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD1R, k_eq_16) {
5501 TEST_REQUIRES_ARM_NEON;
5502 GemmMicrokernelTester()
5503 .mr(1)
5504 .nr(8)
5505 .kr(2)
5506 .sr(1)
5507 .m(1)
5508 .n(8)
5509 .k(16)
5510 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
5511 }
5512
TEST(QC8_GEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD1R,strided_cn)5513 TEST(QC8_GEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD1R, strided_cn) {
5514 TEST_REQUIRES_ARM_NEON;
5515 GemmMicrokernelTester()
5516 .mr(1)
5517 .nr(8)
5518 .kr(2)
5519 .sr(1)
5520 .m(1)
5521 .n(8)
5522 .k(16)
5523 .cn_stride(11)
5524 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
5525 }
5526
TEST(QC8_GEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD1R,k_eq_16_strided_a)5527 TEST(QC8_GEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD1R, k_eq_16_strided_a) {
5528 TEST_REQUIRES_ARM_NEON;
5529 GemmMicrokernelTester()
5530 .mr(1)
5531 .nr(8)
5532 .kr(2)
5533 .sr(1)
5534 .m(1)
5535 .n(8)
5536 .k(16)
5537 .a_stride(19)
5538 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
5539 }
5540
TEST(QC8_GEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD1R,k_eq_16_subtile)5541 TEST(QC8_GEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD1R, k_eq_16_subtile) {
5542 TEST_REQUIRES_ARM_NEON;
5543 for (uint32_t n = 1; n <= 8; n++) {
5544 for (uint32_t m = 1; m <= 1; m++) {
5545 GemmMicrokernelTester()
5546 .mr(1)
5547 .nr(8)
5548 .kr(2)
5549 .sr(1)
5550 .m(m)
5551 .n(n)
5552 .k(16)
5553 .iterations(1)
5554 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
5555 }
5556 }
5557 }
5558
TEST(QC8_GEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD1R,k_eq_16_subtile_m)5559 TEST(QC8_GEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD1R, k_eq_16_subtile_m) {
5560 TEST_REQUIRES_ARM_NEON;
5561 for (uint32_t m = 1; m <= 1; m++) {
5562 GemmMicrokernelTester()
5563 .mr(1)
5564 .nr(8)
5565 .kr(2)
5566 .sr(1)
5567 .m(m)
5568 .n(8)
5569 .k(16)
5570 .iterations(1)
5571 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
5572 }
5573 }
5574
TEST(QC8_GEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD1R,k_eq_16_subtile_n)5575 TEST(QC8_GEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD1R, k_eq_16_subtile_n) {
5576 TEST_REQUIRES_ARM_NEON;
5577 for (uint32_t n = 1; n <= 8; n++) {
5578 GemmMicrokernelTester()
5579 .mr(1)
5580 .nr(8)
5581 .kr(2)
5582 .sr(1)
5583 .m(1)
5584 .n(n)
5585 .k(16)
5586 .iterations(1)
5587 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
5588 }
5589 }
5590
TEST(QC8_GEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD1R,k_lt_16)5591 TEST(QC8_GEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD1R, k_lt_16) {
5592 TEST_REQUIRES_ARM_NEON;
5593 for (size_t k = 1; k < 16; k++) {
5594 GemmMicrokernelTester()
5595 .mr(1)
5596 .nr(8)
5597 .kr(2)
5598 .sr(1)
5599 .m(1)
5600 .n(8)
5601 .k(k)
5602 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
5603 }
5604 }
5605
TEST(QC8_GEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD1R,k_lt_16_strided_a)5606 TEST(QC8_GEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD1R, k_lt_16_strided_a) {
5607 TEST_REQUIRES_ARM_NEON;
5608 for (size_t k = 1; k < 16; k++) {
5609 GemmMicrokernelTester()
5610 .mr(1)
5611 .nr(8)
5612 .kr(2)
5613 .sr(1)
5614 .m(1)
5615 .n(8)
5616 .k(k)
5617 .a_stride(19)
5618 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
5619 }
5620 }
5621
TEST(QC8_GEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD1R,k_lt_16_subtile)5622 TEST(QC8_GEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD1R, k_lt_16_subtile) {
5623 TEST_REQUIRES_ARM_NEON;
5624 for (size_t k = 1; k < 16; k++) {
5625 for (uint32_t n = 1; n <= 8; n++) {
5626 for (uint32_t m = 1; m <= 1; m++) {
5627 GemmMicrokernelTester()
5628 .mr(1)
5629 .nr(8)
5630 .kr(2)
5631 .sr(1)
5632 .m(m)
5633 .n(n)
5634 .k(k)
5635 .iterations(1)
5636 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
5637 }
5638 }
5639 }
5640 }
5641
TEST(QC8_GEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD1R,k_gt_16)5642 TEST(QC8_GEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD1R, k_gt_16) {
5643 TEST_REQUIRES_ARM_NEON;
5644 for (size_t k = 17; k < 32; k++) {
5645 GemmMicrokernelTester()
5646 .mr(1)
5647 .nr(8)
5648 .kr(2)
5649 .sr(1)
5650 .m(1)
5651 .n(8)
5652 .k(k)
5653 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
5654 }
5655 }
5656
TEST(QC8_GEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD1R,k_gt_16_strided_a)5657 TEST(QC8_GEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD1R, k_gt_16_strided_a) {
5658 TEST_REQUIRES_ARM_NEON;
5659 for (size_t k = 17; k < 32; k++) {
5660 GemmMicrokernelTester()
5661 .mr(1)
5662 .nr(8)
5663 .kr(2)
5664 .sr(1)
5665 .m(1)
5666 .n(8)
5667 .k(k)
5668 .a_stride(37)
5669 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
5670 }
5671 }
5672
TEST(QC8_GEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD1R,k_gt_16_subtile)5673 TEST(QC8_GEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD1R, k_gt_16_subtile) {
5674 TEST_REQUIRES_ARM_NEON;
5675 for (size_t k = 17; k < 32; k++) {
5676 for (uint32_t n = 1; n <= 8; n++) {
5677 for (uint32_t m = 1; m <= 1; m++) {
5678 GemmMicrokernelTester()
5679 .mr(1)
5680 .nr(8)
5681 .kr(2)
5682 .sr(1)
5683 .m(m)
5684 .n(n)
5685 .k(k)
5686 .iterations(1)
5687 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
5688 }
5689 }
5690 }
5691 }
5692
TEST(QC8_GEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD1R,k_div_16)5693 TEST(QC8_GEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD1R, k_div_16) {
5694 TEST_REQUIRES_ARM_NEON;
5695 for (size_t k = 32; k <= 160; k += 16) {
5696 GemmMicrokernelTester()
5697 .mr(1)
5698 .nr(8)
5699 .kr(2)
5700 .sr(1)
5701 .m(1)
5702 .n(8)
5703 .k(k)
5704 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
5705 }
5706 }
5707
TEST(QC8_GEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD1R,k_div_16_strided_a)5708 TEST(QC8_GEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD1R, k_div_16_strided_a) {
5709 TEST_REQUIRES_ARM_NEON;
5710 for (size_t k = 32; k <= 160; k += 16) {
5711 GemmMicrokernelTester()
5712 .mr(1)
5713 .nr(8)
5714 .kr(2)
5715 .sr(1)
5716 .m(1)
5717 .n(8)
5718 .k(k)
5719 .a_stride(163)
5720 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
5721 }
5722 }
5723
TEST(QC8_GEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD1R,k_div_16_subtile)5724 TEST(QC8_GEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD1R, k_div_16_subtile) {
5725 TEST_REQUIRES_ARM_NEON;
5726 for (size_t k = 32; k <= 160; k += 16) {
5727 for (uint32_t n = 1; n <= 8; n++) {
5728 for (uint32_t m = 1; m <= 1; m++) {
5729 GemmMicrokernelTester()
5730 .mr(1)
5731 .nr(8)
5732 .kr(2)
5733 .sr(1)
5734 .m(m)
5735 .n(n)
5736 .k(k)
5737 .iterations(1)
5738 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
5739 }
5740 }
5741 }
5742 }
5743
TEST(QC8_GEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD1R,n_gt_8)5744 TEST(QC8_GEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD1R, n_gt_8) {
5745 TEST_REQUIRES_ARM_NEON;
5746 for (uint32_t n = 9; n < 16; n++) {
5747 for (size_t k = 1; k <= 80; k += 17) {
5748 GemmMicrokernelTester()
5749 .mr(1)
5750 .nr(8)
5751 .kr(2)
5752 .sr(1)
5753 .m(1)
5754 .n(n)
5755 .k(k)
5756 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
5757 }
5758 }
5759 }
5760
TEST(QC8_GEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD1R,n_gt_8_strided_cn)5761 TEST(QC8_GEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD1R, n_gt_8_strided_cn) {
5762 TEST_REQUIRES_ARM_NEON;
5763 for (uint32_t n = 9; n < 16; n++) {
5764 for (size_t k = 1; k <= 80; k += 17) {
5765 GemmMicrokernelTester()
5766 .mr(1)
5767 .nr(8)
5768 .kr(2)
5769 .sr(1)
5770 .m(1)
5771 .n(n)
5772 .k(k)
5773 .cn_stride(11)
5774 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
5775 }
5776 }
5777 }
5778
TEST(QC8_GEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD1R,n_gt_8_strided_a)5779 TEST(QC8_GEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD1R, n_gt_8_strided_a) {
5780 TEST_REQUIRES_ARM_NEON;
5781 for (uint32_t n = 9; n < 16; n++) {
5782 for (size_t k = 1; k <= 80; k += 17) {
5783 GemmMicrokernelTester()
5784 .mr(1)
5785 .nr(8)
5786 .kr(2)
5787 .sr(1)
5788 .m(1)
5789 .n(n)
5790 .k(k)
5791 .a_stride(83)
5792 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
5793 }
5794 }
5795 }
5796
TEST(QC8_GEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD1R,n_gt_8_subtile)5797 TEST(QC8_GEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD1R, n_gt_8_subtile) {
5798 TEST_REQUIRES_ARM_NEON;
5799 for (uint32_t n = 9; n < 16; n++) {
5800 for (size_t k = 1; k <= 80; k += 17) {
5801 for (uint32_t m = 1; m <= 1; m++) {
5802 GemmMicrokernelTester()
5803 .mr(1)
5804 .nr(8)
5805 .kr(2)
5806 .sr(1)
5807 .m(m)
5808 .n(n)
5809 .k(k)
5810 .iterations(1)
5811 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
5812 }
5813 }
5814 }
5815 }
5816
TEST(QC8_GEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD1R,n_div_8)5817 TEST(QC8_GEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD1R, n_div_8) {
5818 TEST_REQUIRES_ARM_NEON;
5819 for (uint32_t n = 16; n <= 24; n += 8) {
5820 for (size_t k = 1; k <= 80; k += 17) {
5821 GemmMicrokernelTester()
5822 .mr(1)
5823 .nr(8)
5824 .kr(2)
5825 .sr(1)
5826 .m(1)
5827 .n(n)
5828 .k(k)
5829 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
5830 }
5831 }
5832 }
5833
TEST(QC8_GEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD1R,n_div_8_strided_cn)5834 TEST(QC8_GEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD1R, n_div_8_strided_cn) {
5835 TEST_REQUIRES_ARM_NEON;
5836 for (uint32_t n = 16; n <= 24; n += 8) {
5837 for (size_t k = 1; k <= 80; k += 17) {
5838 GemmMicrokernelTester()
5839 .mr(1)
5840 .nr(8)
5841 .kr(2)
5842 .sr(1)
5843 .m(1)
5844 .n(n)
5845 .k(k)
5846 .cn_stride(11)
5847 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
5848 }
5849 }
5850 }
5851
TEST(QC8_GEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD1R,n_div_8_strided_a)5852 TEST(QC8_GEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD1R, n_div_8_strided_a) {
5853 TEST_REQUIRES_ARM_NEON;
5854 for (uint32_t n = 16; n <= 24; n += 8) {
5855 for (size_t k = 1; k <= 80; k += 17) {
5856 GemmMicrokernelTester()
5857 .mr(1)
5858 .nr(8)
5859 .kr(2)
5860 .sr(1)
5861 .m(1)
5862 .n(n)
5863 .k(k)
5864 .a_stride(83)
5865 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
5866 }
5867 }
5868 }
5869
TEST(QC8_GEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD1R,n_div_8_subtile)5870 TEST(QC8_GEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD1R, n_div_8_subtile) {
5871 TEST_REQUIRES_ARM_NEON;
5872 for (uint32_t n = 16; n <= 24; n += 8) {
5873 for (size_t k = 1; k <= 80; k += 17) {
5874 for (uint32_t m = 1; m <= 1; m++) {
5875 GemmMicrokernelTester()
5876 .mr(1)
5877 .nr(8)
5878 .kr(2)
5879 .sr(1)
5880 .m(m)
5881 .n(n)
5882 .k(k)
5883 .iterations(1)
5884 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
5885 }
5886 }
5887 }
5888 }
5889
TEST(QC8_GEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD1R,strided_cm_subtile)5890 TEST(QC8_GEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD1R, strided_cm_subtile) {
5891 TEST_REQUIRES_ARM_NEON;
5892 for (size_t k = 1; k <= 80; k += 17) {
5893 for (uint32_t n = 1; n <= 8; n++) {
5894 for (uint32_t m = 1; m <= 1; m++) {
5895 GemmMicrokernelTester()
5896 .mr(1)
5897 .nr(8)
5898 .kr(2)
5899 .sr(1)
5900 .m(m)
5901 .n(n)
5902 .k(k)
5903 .cm_stride(11)
5904 .iterations(1)
5905 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
5906 }
5907 }
5908 }
5909 }
5910
TEST(QC8_GEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD1R,qmin)5911 TEST(QC8_GEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD1R, qmin) {
5912 TEST_REQUIRES_ARM_NEON;
5913 GemmMicrokernelTester()
5914 .mr(1)
5915 .nr(8)
5916 .kr(2)
5917 .sr(1)
5918 .m(1)
5919 .n(8)
5920 .k(16)
5921 .qmin(128)
5922 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
5923 }
5924
TEST(QC8_GEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD1R,qmax)5925 TEST(QC8_GEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD1R, qmax) {
5926 TEST_REQUIRES_ARM_NEON;
5927 GemmMicrokernelTester()
5928 .mr(1)
5929 .nr(8)
5930 .kr(2)
5931 .sr(1)
5932 .m(1)
5933 .n(8)
5934 .k(16)
5935 .qmax(128)
5936 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
5937 }
5938
TEST(QC8_GEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD1R,strided_cm)5939 TEST(QC8_GEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD1R, strided_cm) {
5940 TEST_REQUIRES_ARM_NEON;
5941 GemmMicrokernelTester()
5942 .mr(1)
5943 .nr(8)
5944 .kr(2)
5945 .sr(1)
5946 .m(1)
5947 .n(8)
5948 .k(16)
5949 .cm_stride(11)
5950 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
5951 }
5952 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
5953
5954
5955 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QC8_GEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_DUP,k_eq_16)5956 TEST(QC8_GEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_DUP, k_eq_16) {
5957 TEST_REQUIRES_ARM_NEON_V8;
5958 GemmMicrokernelTester()
5959 .mr(1)
5960 .nr(8)
5961 .kr(2)
5962 .sr(1)
5963 .m(1)
5964 .n(8)
5965 .k(16)
5966 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
5967 }
5968
TEST(QC8_GEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_DUP,strided_cn)5969 TEST(QC8_GEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_DUP, strided_cn) {
5970 TEST_REQUIRES_ARM_NEON_V8;
5971 GemmMicrokernelTester()
5972 .mr(1)
5973 .nr(8)
5974 .kr(2)
5975 .sr(1)
5976 .m(1)
5977 .n(8)
5978 .k(16)
5979 .cn_stride(11)
5980 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
5981 }
5982
TEST(QC8_GEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_DUP,k_eq_16_strided_a)5983 TEST(QC8_GEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_DUP, k_eq_16_strided_a) {
5984 TEST_REQUIRES_ARM_NEON_V8;
5985 GemmMicrokernelTester()
5986 .mr(1)
5987 .nr(8)
5988 .kr(2)
5989 .sr(1)
5990 .m(1)
5991 .n(8)
5992 .k(16)
5993 .a_stride(19)
5994 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
5995 }
5996
TEST(QC8_GEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_DUP,k_eq_16_subtile)5997 TEST(QC8_GEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_DUP, k_eq_16_subtile) {
5998 TEST_REQUIRES_ARM_NEON_V8;
5999 for (uint32_t n = 1; n <= 8; n++) {
6000 for (uint32_t m = 1; m <= 1; m++) {
6001 GemmMicrokernelTester()
6002 .mr(1)
6003 .nr(8)
6004 .kr(2)
6005 .sr(1)
6006 .m(m)
6007 .n(n)
6008 .k(16)
6009 .iterations(1)
6010 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
6011 }
6012 }
6013 }
6014
TEST(QC8_GEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_DUP,k_eq_16_subtile_m)6015 TEST(QC8_GEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_DUP, k_eq_16_subtile_m) {
6016 TEST_REQUIRES_ARM_NEON_V8;
6017 for (uint32_t m = 1; m <= 1; m++) {
6018 GemmMicrokernelTester()
6019 .mr(1)
6020 .nr(8)
6021 .kr(2)
6022 .sr(1)
6023 .m(m)
6024 .n(8)
6025 .k(16)
6026 .iterations(1)
6027 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
6028 }
6029 }
6030
TEST(QC8_GEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_DUP,k_eq_16_subtile_n)6031 TEST(QC8_GEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_DUP, k_eq_16_subtile_n) {
6032 TEST_REQUIRES_ARM_NEON_V8;
6033 for (uint32_t n = 1; n <= 8; n++) {
6034 GemmMicrokernelTester()
6035 .mr(1)
6036 .nr(8)
6037 .kr(2)
6038 .sr(1)
6039 .m(1)
6040 .n(n)
6041 .k(16)
6042 .iterations(1)
6043 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
6044 }
6045 }
6046
TEST(QC8_GEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_DUP,k_lt_16)6047 TEST(QC8_GEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_DUP, k_lt_16) {
6048 TEST_REQUIRES_ARM_NEON_V8;
6049 for (size_t k = 1; k < 16; k++) {
6050 GemmMicrokernelTester()
6051 .mr(1)
6052 .nr(8)
6053 .kr(2)
6054 .sr(1)
6055 .m(1)
6056 .n(8)
6057 .k(k)
6058 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
6059 }
6060 }
6061
TEST(QC8_GEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_DUP,k_lt_16_strided_a)6062 TEST(QC8_GEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_DUP, k_lt_16_strided_a) {
6063 TEST_REQUIRES_ARM_NEON_V8;
6064 for (size_t k = 1; k < 16; k++) {
6065 GemmMicrokernelTester()
6066 .mr(1)
6067 .nr(8)
6068 .kr(2)
6069 .sr(1)
6070 .m(1)
6071 .n(8)
6072 .k(k)
6073 .a_stride(19)
6074 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
6075 }
6076 }
6077
TEST(QC8_GEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_DUP,k_lt_16_subtile)6078 TEST(QC8_GEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_DUP, k_lt_16_subtile) {
6079 TEST_REQUIRES_ARM_NEON_V8;
6080 for (size_t k = 1; k < 16; k++) {
6081 for (uint32_t n = 1; n <= 8; n++) {
6082 for (uint32_t m = 1; m <= 1; m++) {
6083 GemmMicrokernelTester()
6084 .mr(1)
6085 .nr(8)
6086 .kr(2)
6087 .sr(1)
6088 .m(m)
6089 .n(n)
6090 .k(k)
6091 .iterations(1)
6092 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
6093 }
6094 }
6095 }
6096 }
6097
TEST(QC8_GEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_DUP,k_gt_16)6098 TEST(QC8_GEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_DUP, k_gt_16) {
6099 TEST_REQUIRES_ARM_NEON_V8;
6100 for (size_t k = 17; k < 32; k++) {
6101 GemmMicrokernelTester()
6102 .mr(1)
6103 .nr(8)
6104 .kr(2)
6105 .sr(1)
6106 .m(1)
6107 .n(8)
6108 .k(k)
6109 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
6110 }
6111 }
6112
TEST(QC8_GEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_DUP,k_gt_16_strided_a)6113 TEST(QC8_GEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_DUP, k_gt_16_strided_a) {
6114 TEST_REQUIRES_ARM_NEON_V8;
6115 for (size_t k = 17; k < 32; k++) {
6116 GemmMicrokernelTester()
6117 .mr(1)
6118 .nr(8)
6119 .kr(2)
6120 .sr(1)
6121 .m(1)
6122 .n(8)
6123 .k(k)
6124 .a_stride(37)
6125 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
6126 }
6127 }
6128
TEST(QC8_GEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_DUP,k_gt_16_subtile)6129 TEST(QC8_GEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_DUP, k_gt_16_subtile) {
6130 TEST_REQUIRES_ARM_NEON_V8;
6131 for (size_t k = 17; k < 32; k++) {
6132 for (uint32_t n = 1; n <= 8; n++) {
6133 for (uint32_t m = 1; m <= 1; m++) {
6134 GemmMicrokernelTester()
6135 .mr(1)
6136 .nr(8)
6137 .kr(2)
6138 .sr(1)
6139 .m(m)
6140 .n(n)
6141 .k(k)
6142 .iterations(1)
6143 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
6144 }
6145 }
6146 }
6147 }
6148
TEST(QC8_GEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_DUP,k_div_16)6149 TEST(QC8_GEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_DUP, k_div_16) {
6150 TEST_REQUIRES_ARM_NEON_V8;
6151 for (size_t k = 32; k <= 160; k += 16) {
6152 GemmMicrokernelTester()
6153 .mr(1)
6154 .nr(8)
6155 .kr(2)
6156 .sr(1)
6157 .m(1)
6158 .n(8)
6159 .k(k)
6160 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
6161 }
6162 }
6163
TEST(QC8_GEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_DUP,k_div_16_strided_a)6164 TEST(QC8_GEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_DUP, k_div_16_strided_a) {
6165 TEST_REQUIRES_ARM_NEON_V8;
6166 for (size_t k = 32; k <= 160; k += 16) {
6167 GemmMicrokernelTester()
6168 .mr(1)
6169 .nr(8)
6170 .kr(2)
6171 .sr(1)
6172 .m(1)
6173 .n(8)
6174 .k(k)
6175 .a_stride(163)
6176 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
6177 }
6178 }
6179
TEST(QC8_GEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_DUP,k_div_16_subtile)6180 TEST(QC8_GEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_DUP, k_div_16_subtile) {
6181 TEST_REQUIRES_ARM_NEON_V8;
6182 for (size_t k = 32; k <= 160; k += 16) {
6183 for (uint32_t n = 1; n <= 8; n++) {
6184 for (uint32_t m = 1; m <= 1; m++) {
6185 GemmMicrokernelTester()
6186 .mr(1)
6187 .nr(8)
6188 .kr(2)
6189 .sr(1)
6190 .m(m)
6191 .n(n)
6192 .k(k)
6193 .iterations(1)
6194 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
6195 }
6196 }
6197 }
6198 }
6199
TEST(QC8_GEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_DUP,n_gt_8)6200 TEST(QC8_GEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_DUP, n_gt_8) {
6201 TEST_REQUIRES_ARM_NEON_V8;
6202 for (uint32_t n = 9; n < 16; n++) {
6203 for (size_t k = 1; k <= 80; k += 17) {
6204 GemmMicrokernelTester()
6205 .mr(1)
6206 .nr(8)
6207 .kr(2)
6208 .sr(1)
6209 .m(1)
6210 .n(n)
6211 .k(k)
6212 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
6213 }
6214 }
6215 }
6216
TEST(QC8_GEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_DUP,n_gt_8_strided_cn)6217 TEST(QC8_GEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_DUP, n_gt_8_strided_cn) {
6218 TEST_REQUIRES_ARM_NEON_V8;
6219 for (uint32_t n = 9; n < 16; n++) {
6220 for (size_t k = 1; k <= 80; k += 17) {
6221 GemmMicrokernelTester()
6222 .mr(1)
6223 .nr(8)
6224 .kr(2)
6225 .sr(1)
6226 .m(1)
6227 .n(n)
6228 .k(k)
6229 .cn_stride(11)
6230 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
6231 }
6232 }
6233 }
6234
TEST(QC8_GEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_DUP,n_gt_8_strided_a)6235 TEST(QC8_GEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_DUP, n_gt_8_strided_a) {
6236 TEST_REQUIRES_ARM_NEON_V8;
6237 for (uint32_t n = 9; n < 16; n++) {
6238 for (size_t k = 1; k <= 80; k += 17) {
6239 GemmMicrokernelTester()
6240 .mr(1)
6241 .nr(8)
6242 .kr(2)
6243 .sr(1)
6244 .m(1)
6245 .n(n)
6246 .k(k)
6247 .a_stride(83)
6248 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
6249 }
6250 }
6251 }
6252
TEST(QC8_GEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_DUP,n_gt_8_subtile)6253 TEST(QC8_GEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_DUP, n_gt_8_subtile) {
6254 TEST_REQUIRES_ARM_NEON_V8;
6255 for (uint32_t n = 9; n < 16; n++) {
6256 for (size_t k = 1; k <= 80; k += 17) {
6257 for (uint32_t m = 1; m <= 1; m++) {
6258 GemmMicrokernelTester()
6259 .mr(1)
6260 .nr(8)
6261 .kr(2)
6262 .sr(1)
6263 .m(m)
6264 .n(n)
6265 .k(k)
6266 .iterations(1)
6267 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
6268 }
6269 }
6270 }
6271 }
6272
TEST(QC8_GEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_DUP,n_div_8)6273 TEST(QC8_GEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_DUP, n_div_8) {
6274 TEST_REQUIRES_ARM_NEON_V8;
6275 for (uint32_t n = 16; n <= 24; n += 8) {
6276 for (size_t k = 1; k <= 80; k += 17) {
6277 GemmMicrokernelTester()
6278 .mr(1)
6279 .nr(8)
6280 .kr(2)
6281 .sr(1)
6282 .m(1)
6283 .n(n)
6284 .k(k)
6285 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
6286 }
6287 }
6288 }
6289
TEST(QC8_GEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_DUP,n_div_8_strided_cn)6290 TEST(QC8_GEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_DUP, n_div_8_strided_cn) {
6291 TEST_REQUIRES_ARM_NEON_V8;
6292 for (uint32_t n = 16; n <= 24; n += 8) {
6293 for (size_t k = 1; k <= 80; k += 17) {
6294 GemmMicrokernelTester()
6295 .mr(1)
6296 .nr(8)
6297 .kr(2)
6298 .sr(1)
6299 .m(1)
6300 .n(n)
6301 .k(k)
6302 .cn_stride(11)
6303 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
6304 }
6305 }
6306 }
6307
TEST(QC8_GEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_DUP,n_div_8_strided_a)6308 TEST(QC8_GEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_DUP, n_div_8_strided_a) {
6309 TEST_REQUIRES_ARM_NEON_V8;
6310 for (uint32_t n = 16; n <= 24; n += 8) {
6311 for (size_t k = 1; k <= 80; k += 17) {
6312 GemmMicrokernelTester()
6313 .mr(1)
6314 .nr(8)
6315 .kr(2)
6316 .sr(1)
6317 .m(1)
6318 .n(n)
6319 .k(k)
6320 .a_stride(83)
6321 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
6322 }
6323 }
6324 }
6325
TEST(QC8_GEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_DUP,n_div_8_subtile)6326 TEST(QC8_GEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_DUP, n_div_8_subtile) {
6327 TEST_REQUIRES_ARM_NEON_V8;
6328 for (uint32_t n = 16; n <= 24; n += 8) {
6329 for (size_t k = 1; k <= 80; k += 17) {
6330 for (uint32_t m = 1; m <= 1; m++) {
6331 GemmMicrokernelTester()
6332 .mr(1)
6333 .nr(8)
6334 .kr(2)
6335 .sr(1)
6336 .m(m)
6337 .n(n)
6338 .k(k)
6339 .iterations(1)
6340 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
6341 }
6342 }
6343 }
6344 }
6345
TEST(QC8_GEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_DUP,strided_cm_subtile)6346 TEST(QC8_GEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_DUP, strided_cm_subtile) {
6347 TEST_REQUIRES_ARM_NEON_V8;
6348 for (size_t k = 1; k <= 80; k += 17) {
6349 for (uint32_t n = 1; n <= 8; n++) {
6350 for (uint32_t m = 1; m <= 1; m++) {
6351 GemmMicrokernelTester()
6352 .mr(1)
6353 .nr(8)
6354 .kr(2)
6355 .sr(1)
6356 .m(m)
6357 .n(n)
6358 .k(k)
6359 .cm_stride(11)
6360 .iterations(1)
6361 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
6362 }
6363 }
6364 }
6365 }
6366
TEST(QC8_GEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_DUP,qmin)6367 TEST(QC8_GEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_DUP, qmin) {
6368 TEST_REQUIRES_ARM_NEON_V8;
6369 GemmMicrokernelTester()
6370 .mr(1)
6371 .nr(8)
6372 .kr(2)
6373 .sr(1)
6374 .m(1)
6375 .n(8)
6376 .k(16)
6377 .qmin(128)
6378 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
6379 }
6380
TEST(QC8_GEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_DUP,qmax)6381 TEST(QC8_GEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_DUP, qmax) {
6382 TEST_REQUIRES_ARM_NEON_V8;
6383 GemmMicrokernelTester()
6384 .mr(1)
6385 .nr(8)
6386 .kr(2)
6387 .sr(1)
6388 .m(1)
6389 .n(8)
6390 .k(16)
6391 .qmax(128)
6392 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
6393 }
6394
TEST(QC8_GEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_DUP,strided_cm)6395 TEST(QC8_GEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_DUP, strided_cm) {
6396 TEST_REQUIRES_ARM_NEON_V8;
6397 GemmMicrokernelTester()
6398 .mr(1)
6399 .nr(8)
6400 .kr(2)
6401 .sr(1)
6402 .m(1)
6403 .n(8)
6404 .k(16)
6405 .cm_stride(11)
6406 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
6407 }
6408 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
6409
6410
6411 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QC8_GEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD4R,k_eq_16)6412 TEST(QC8_GEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD4R, k_eq_16) {
6413 TEST_REQUIRES_ARM_NEON_V8;
6414 GemmMicrokernelTester()
6415 .mr(1)
6416 .nr(8)
6417 .kr(2)
6418 .sr(1)
6419 .m(1)
6420 .n(8)
6421 .k(16)
6422 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld4r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
6423 }
6424
TEST(QC8_GEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD4R,strided_cn)6425 TEST(QC8_GEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD4R, strided_cn) {
6426 TEST_REQUIRES_ARM_NEON_V8;
6427 GemmMicrokernelTester()
6428 .mr(1)
6429 .nr(8)
6430 .kr(2)
6431 .sr(1)
6432 .m(1)
6433 .n(8)
6434 .k(16)
6435 .cn_stride(11)
6436 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld4r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
6437 }
6438
TEST(QC8_GEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD4R,k_eq_16_strided_a)6439 TEST(QC8_GEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD4R, k_eq_16_strided_a) {
6440 TEST_REQUIRES_ARM_NEON_V8;
6441 GemmMicrokernelTester()
6442 .mr(1)
6443 .nr(8)
6444 .kr(2)
6445 .sr(1)
6446 .m(1)
6447 .n(8)
6448 .k(16)
6449 .a_stride(19)
6450 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld4r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
6451 }
6452
TEST(QC8_GEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD4R,k_eq_16_subtile)6453 TEST(QC8_GEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD4R, k_eq_16_subtile) {
6454 TEST_REQUIRES_ARM_NEON_V8;
6455 for (uint32_t n = 1; n <= 8; n++) {
6456 for (uint32_t m = 1; m <= 1; m++) {
6457 GemmMicrokernelTester()
6458 .mr(1)
6459 .nr(8)
6460 .kr(2)
6461 .sr(1)
6462 .m(m)
6463 .n(n)
6464 .k(16)
6465 .iterations(1)
6466 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld4r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
6467 }
6468 }
6469 }
6470
TEST(QC8_GEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD4R,k_eq_16_subtile_m)6471 TEST(QC8_GEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD4R, k_eq_16_subtile_m) {
6472 TEST_REQUIRES_ARM_NEON_V8;
6473 for (uint32_t m = 1; m <= 1; m++) {
6474 GemmMicrokernelTester()
6475 .mr(1)
6476 .nr(8)
6477 .kr(2)
6478 .sr(1)
6479 .m(m)
6480 .n(8)
6481 .k(16)
6482 .iterations(1)
6483 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld4r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
6484 }
6485 }
6486
TEST(QC8_GEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD4R,k_eq_16_subtile_n)6487 TEST(QC8_GEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD4R, k_eq_16_subtile_n) {
6488 TEST_REQUIRES_ARM_NEON_V8;
6489 for (uint32_t n = 1; n <= 8; n++) {
6490 GemmMicrokernelTester()
6491 .mr(1)
6492 .nr(8)
6493 .kr(2)
6494 .sr(1)
6495 .m(1)
6496 .n(n)
6497 .k(16)
6498 .iterations(1)
6499 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld4r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
6500 }
6501 }
6502
TEST(QC8_GEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD4R,k_lt_16)6503 TEST(QC8_GEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD4R, k_lt_16) {
6504 TEST_REQUIRES_ARM_NEON_V8;
6505 for (size_t k = 1; k < 16; k++) {
6506 GemmMicrokernelTester()
6507 .mr(1)
6508 .nr(8)
6509 .kr(2)
6510 .sr(1)
6511 .m(1)
6512 .n(8)
6513 .k(k)
6514 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld4r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
6515 }
6516 }
6517
TEST(QC8_GEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD4R,k_lt_16_strided_a)6518 TEST(QC8_GEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD4R, k_lt_16_strided_a) {
6519 TEST_REQUIRES_ARM_NEON_V8;
6520 for (size_t k = 1; k < 16; k++) {
6521 GemmMicrokernelTester()
6522 .mr(1)
6523 .nr(8)
6524 .kr(2)
6525 .sr(1)
6526 .m(1)
6527 .n(8)
6528 .k(k)
6529 .a_stride(19)
6530 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld4r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
6531 }
6532 }
6533
TEST(QC8_GEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD4R,k_lt_16_subtile)6534 TEST(QC8_GEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD4R, k_lt_16_subtile) {
6535 TEST_REQUIRES_ARM_NEON_V8;
6536 for (size_t k = 1; k < 16; k++) {
6537 for (uint32_t n = 1; n <= 8; n++) {
6538 for (uint32_t m = 1; m <= 1; m++) {
6539 GemmMicrokernelTester()
6540 .mr(1)
6541 .nr(8)
6542 .kr(2)
6543 .sr(1)
6544 .m(m)
6545 .n(n)
6546 .k(k)
6547 .iterations(1)
6548 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld4r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
6549 }
6550 }
6551 }
6552 }
6553
TEST(QC8_GEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD4R,k_gt_16)6554 TEST(QC8_GEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD4R, k_gt_16) {
6555 TEST_REQUIRES_ARM_NEON_V8;
6556 for (size_t k = 17; k < 32; k++) {
6557 GemmMicrokernelTester()
6558 .mr(1)
6559 .nr(8)
6560 .kr(2)
6561 .sr(1)
6562 .m(1)
6563 .n(8)
6564 .k(k)
6565 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld4r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
6566 }
6567 }
6568
TEST(QC8_GEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD4R,k_gt_16_strided_a)6569 TEST(QC8_GEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD4R, k_gt_16_strided_a) {
6570 TEST_REQUIRES_ARM_NEON_V8;
6571 for (size_t k = 17; k < 32; k++) {
6572 GemmMicrokernelTester()
6573 .mr(1)
6574 .nr(8)
6575 .kr(2)
6576 .sr(1)
6577 .m(1)
6578 .n(8)
6579 .k(k)
6580 .a_stride(37)
6581 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld4r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
6582 }
6583 }
6584
TEST(QC8_GEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD4R,k_gt_16_subtile)6585 TEST(QC8_GEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD4R, k_gt_16_subtile) {
6586 TEST_REQUIRES_ARM_NEON_V8;
6587 for (size_t k = 17; k < 32; k++) {
6588 for (uint32_t n = 1; n <= 8; n++) {
6589 for (uint32_t m = 1; m <= 1; m++) {
6590 GemmMicrokernelTester()
6591 .mr(1)
6592 .nr(8)
6593 .kr(2)
6594 .sr(1)
6595 .m(m)
6596 .n(n)
6597 .k(k)
6598 .iterations(1)
6599 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld4r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
6600 }
6601 }
6602 }
6603 }
6604
TEST(QC8_GEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD4R,k_div_16)6605 TEST(QC8_GEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD4R, k_div_16) {
6606 TEST_REQUIRES_ARM_NEON_V8;
6607 for (size_t k = 32; k <= 160; k += 16) {
6608 GemmMicrokernelTester()
6609 .mr(1)
6610 .nr(8)
6611 .kr(2)
6612 .sr(1)
6613 .m(1)
6614 .n(8)
6615 .k(k)
6616 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld4r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
6617 }
6618 }
6619
TEST(QC8_GEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD4R,k_div_16_strided_a)6620 TEST(QC8_GEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD4R, k_div_16_strided_a) {
6621 TEST_REQUIRES_ARM_NEON_V8;
6622 for (size_t k = 32; k <= 160; k += 16) {
6623 GemmMicrokernelTester()
6624 .mr(1)
6625 .nr(8)
6626 .kr(2)
6627 .sr(1)
6628 .m(1)
6629 .n(8)
6630 .k(k)
6631 .a_stride(163)
6632 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld4r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
6633 }
6634 }
6635
TEST(QC8_GEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD4R,k_div_16_subtile)6636 TEST(QC8_GEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD4R, k_div_16_subtile) {
6637 TEST_REQUIRES_ARM_NEON_V8;
6638 for (size_t k = 32; k <= 160; k += 16) {
6639 for (uint32_t n = 1; n <= 8; n++) {
6640 for (uint32_t m = 1; m <= 1; m++) {
6641 GemmMicrokernelTester()
6642 .mr(1)
6643 .nr(8)
6644 .kr(2)
6645 .sr(1)
6646 .m(m)
6647 .n(n)
6648 .k(k)
6649 .iterations(1)
6650 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld4r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
6651 }
6652 }
6653 }
6654 }
6655
TEST(QC8_GEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD4R,n_gt_8)6656 TEST(QC8_GEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD4R, n_gt_8) {
6657 TEST_REQUIRES_ARM_NEON_V8;
6658 for (uint32_t n = 9; n < 16; n++) {
6659 for (size_t k = 1; k <= 80; k += 17) {
6660 GemmMicrokernelTester()
6661 .mr(1)
6662 .nr(8)
6663 .kr(2)
6664 .sr(1)
6665 .m(1)
6666 .n(n)
6667 .k(k)
6668 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld4r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
6669 }
6670 }
6671 }
6672
TEST(QC8_GEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD4R,n_gt_8_strided_cn)6673 TEST(QC8_GEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD4R, n_gt_8_strided_cn) {
6674 TEST_REQUIRES_ARM_NEON_V8;
6675 for (uint32_t n = 9; n < 16; n++) {
6676 for (size_t k = 1; k <= 80; k += 17) {
6677 GemmMicrokernelTester()
6678 .mr(1)
6679 .nr(8)
6680 .kr(2)
6681 .sr(1)
6682 .m(1)
6683 .n(n)
6684 .k(k)
6685 .cn_stride(11)
6686 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld4r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
6687 }
6688 }
6689 }
6690
TEST(QC8_GEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD4R,n_gt_8_strided_a)6691 TEST(QC8_GEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD4R, n_gt_8_strided_a) {
6692 TEST_REQUIRES_ARM_NEON_V8;
6693 for (uint32_t n = 9; n < 16; n++) {
6694 for (size_t k = 1; k <= 80; k += 17) {
6695 GemmMicrokernelTester()
6696 .mr(1)
6697 .nr(8)
6698 .kr(2)
6699 .sr(1)
6700 .m(1)
6701 .n(n)
6702 .k(k)
6703 .a_stride(83)
6704 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld4r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
6705 }
6706 }
6707 }
6708
TEST(QC8_GEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD4R,n_gt_8_subtile)6709 TEST(QC8_GEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD4R, n_gt_8_subtile) {
6710 TEST_REQUIRES_ARM_NEON_V8;
6711 for (uint32_t n = 9; n < 16; n++) {
6712 for (size_t k = 1; k <= 80; k += 17) {
6713 for (uint32_t m = 1; m <= 1; m++) {
6714 GemmMicrokernelTester()
6715 .mr(1)
6716 .nr(8)
6717 .kr(2)
6718 .sr(1)
6719 .m(m)
6720 .n(n)
6721 .k(k)
6722 .iterations(1)
6723 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld4r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
6724 }
6725 }
6726 }
6727 }
6728
TEST(QC8_GEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD4R,n_div_8)6729 TEST(QC8_GEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD4R, n_div_8) {
6730 TEST_REQUIRES_ARM_NEON_V8;
6731 for (uint32_t n = 16; n <= 24; n += 8) {
6732 for (size_t k = 1; k <= 80; k += 17) {
6733 GemmMicrokernelTester()
6734 .mr(1)
6735 .nr(8)
6736 .kr(2)
6737 .sr(1)
6738 .m(1)
6739 .n(n)
6740 .k(k)
6741 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld4r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
6742 }
6743 }
6744 }
6745
TEST(QC8_GEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD4R,n_div_8_strided_cn)6746 TEST(QC8_GEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD4R, n_div_8_strided_cn) {
6747 TEST_REQUIRES_ARM_NEON_V8;
6748 for (uint32_t n = 16; n <= 24; n += 8) {
6749 for (size_t k = 1; k <= 80; k += 17) {
6750 GemmMicrokernelTester()
6751 .mr(1)
6752 .nr(8)
6753 .kr(2)
6754 .sr(1)
6755 .m(1)
6756 .n(n)
6757 .k(k)
6758 .cn_stride(11)
6759 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld4r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
6760 }
6761 }
6762 }
6763
TEST(QC8_GEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD4R,n_div_8_strided_a)6764 TEST(QC8_GEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD4R, n_div_8_strided_a) {
6765 TEST_REQUIRES_ARM_NEON_V8;
6766 for (uint32_t n = 16; n <= 24; n += 8) {
6767 for (size_t k = 1; k <= 80; k += 17) {
6768 GemmMicrokernelTester()
6769 .mr(1)
6770 .nr(8)
6771 .kr(2)
6772 .sr(1)
6773 .m(1)
6774 .n(n)
6775 .k(k)
6776 .a_stride(83)
6777 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld4r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
6778 }
6779 }
6780 }
6781
TEST(QC8_GEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD4R,n_div_8_subtile)6782 TEST(QC8_GEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD4R, n_div_8_subtile) {
6783 TEST_REQUIRES_ARM_NEON_V8;
6784 for (uint32_t n = 16; n <= 24; n += 8) {
6785 for (size_t k = 1; k <= 80; k += 17) {
6786 for (uint32_t m = 1; m <= 1; m++) {
6787 GemmMicrokernelTester()
6788 .mr(1)
6789 .nr(8)
6790 .kr(2)
6791 .sr(1)
6792 .m(m)
6793 .n(n)
6794 .k(k)
6795 .iterations(1)
6796 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld4r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
6797 }
6798 }
6799 }
6800 }
6801
TEST(QC8_GEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD4R,strided_cm_subtile)6802 TEST(QC8_GEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD4R, strided_cm_subtile) {
6803 TEST_REQUIRES_ARM_NEON_V8;
6804 for (size_t k = 1; k <= 80; k += 17) {
6805 for (uint32_t n = 1; n <= 8; n++) {
6806 for (uint32_t m = 1; m <= 1; m++) {
6807 GemmMicrokernelTester()
6808 .mr(1)
6809 .nr(8)
6810 .kr(2)
6811 .sr(1)
6812 .m(m)
6813 .n(n)
6814 .k(k)
6815 .cm_stride(11)
6816 .iterations(1)
6817 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld4r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
6818 }
6819 }
6820 }
6821 }
6822
TEST(QC8_GEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD4R,qmin)6823 TEST(QC8_GEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD4R, qmin) {
6824 TEST_REQUIRES_ARM_NEON_V8;
6825 GemmMicrokernelTester()
6826 .mr(1)
6827 .nr(8)
6828 .kr(2)
6829 .sr(1)
6830 .m(1)
6831 .n(8)
6832 .k(16)
6833 .qmin(128)
6834 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld4r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
6835 }
6836
TEST(QC8_GEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD4R,qmax)6837 TEST(QC8_GEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD4R, qmax) {
6838 TEST_REQUIRES_ARM_NEON_V8;
6839 GemmMicrokernelTester()
6840 .mr(1)
6841 .nr(8)
6842 .kr(2)
6843 .sr(1)
6844 .m(1)
6845 .n(8)
6846 .k(16)
6847 .qmax(128)
6848 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld4r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
6849 }
6850
TEST(QC8_GEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD4R,strided_cm)6851 TEST(QC8_GEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD4R, strided_cm) {
6852 TEST_REQUIRES_ARM_NEON_V8;
6853 GemmMicrokernelTester()
6854 .mr(1)
6855 .nr(8)
6856 .kr(2)
6857 .sr(1)
6858 .m(1)
6859 .n(8)
6860 .k(16)
6861 .cm_stride(11)
6862 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld4r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
6863 }
6864 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
6865
6866
6867 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QC8_GEMM_MINMAX_FP32_1X8C2S4__NEONV8_MLAL,k_eq_16)6868 TEST(QC8_GEMM_MINMAX_FP32_1X8C2S4__NEONV8_MLAL, k_eq_16) {
6869 TEST_REQUIRES_ARM_NEON_V8;
6870 GemmMicrokernelTester()
6871 .mr(1)
6872 .nr(8)
6873 .kr(2)
6874 .sr(4)
6875 .m(1)
6876 .n(8)
6877 .k(16)
6878 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c2s4__neonv8_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
6879 }
6880
TEST(QC8_GEMM_MINMAX_FP32_1X8C2S4__NEONV8_MLAL,strided_cn)6881 TEST(QC8_GEMM_MINMAX_FP32_1X8C2S4__NEONV8_MLAL, strided_cn) {
6882 TEST_REQUIRES_ARM_NEON_V8;
6883 GemmMicrokernelTester()
6884 .mr(1)
6885 .nr(8)
6886 .kr(2)
6887 .sr(4)
6888 .m(1)
6889 .n(8)
6890 .k(16)
6891 .cn_stride(11)
6892 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c2s4__neonv8_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
6893 }
6894
TEST(QC8_GEMM_MINMAX_FP32_1X8C2S4__NEONV8_MLAL,k_eq_16_strided_a)6895 TEST(QC8_GEMM_MINMAX_FP32_1X8C2S4__NEONV8_MLAL, k_eq_16_strided_a) {
6896 TEST_REQUIRES_ARM_NEON_V8;
6897 GemmMicrokernelTester()
6898 .mr(1)
6899 .nr(8)
6900 .kr(2)
6901 .sr(4)
6902 .m(1)
6903 .n(8)
6904 .k(16)
6905 .a_stride(19)
6906 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c2s4__neonv8_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
6907 }
6908
TEST(QC8_GEMM_MINMAX_FP32_1X8C2S4__NEONV8_MLAL,k_eq_16_subtile)6909 TEST(QC8_GEMM_MINMAX_FP32_1X8C2S4__NEONV8_MLAL, k_eq_16_subtile) {
6910 TEST_REQUIRES_ARM_NEON_V8;
6911 for (uint32_t n = 1; n <= 8; n++) {
6912 for (uint32_t m = 1; m <= 1; m++) {
6913 GemmMicrokernelTester()
6914 .mr(1)
6915 .nr(8)
6916 .kr(2)
6917 .sr(4)
6918 .m(m)
6919 .n(n)
6920 .k(16)
6921 .iterations(1)
6922 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c2s4__neonv8_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
6923 }
6924 }
6925 }
6926
TEST(QC8_GEMM_MINMAX_FP32_1X8C2S4__NEONV8_MLAL,k_eq_16_subtile_m)6927 TEST(QC8_GEMM_MINMAX_FP32_1X8C2S4__NEONV8_MLAL, k_eq_16_subtile_m) {
6928 TEST_REQUIRES_ARM_NEON_V8;
6929 for (uint32_t m = 1; m <= 1; m++) {
6930 GemmMicrokernelTester()
6931 .mr(1)
6932 .nr(8)
6933 .kr(2)
6934 .sr(4)
6935 .m(m)
6936 .n(8)
6937 .k(16)
6938 .iterations(1)
6939 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c2s4__neonv8_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
6940 }
6941 }
6942
TEST(QC8_GEMM_MINMAX_FP32_1X8C2S4__NEONV8_MLAL,k_eq_16_subtile_n)6943 TEST(QC8_GEMM_MINMAX_FP32_1X8C2S4__NEONV8_MLAL, k_eq_16_subtile_n) {
6944 TEST_REQUIRES_ARM_NEON_V8;
6945 for (uint32_t n = 1; n <= 8; n++) {
6946 GemmMicrokernelTester()
6947 .mr(1)
6948 .nr(8)
6949 .kr(2)
6950 .sr(4)
6951 .m(1)
6952 .n(n)
6953 .k(16)
6954 .iterations(1)
6955 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c2s4__neonv8_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
6956 }
6957 }
6958
TEST(QC8_GEMM_MINMAX_FP32_1X8C2S4__NEONV8_MLAL,k_lt_16)6959 TEST(QC8_GEMM_MINMAX_FP32_1X8C2S4__NEONV8_MLAL, k_lt_16) {
6960 TEST_REQUIRES_ARM_NEON_V8;
6961 for (size_t k = 1; k < 16; k++) {
6962 GemmMicrokernelTester()
6963 .mr(1)
6964 .nr(8)
6965 .kr(2)
6966 .sr(4)
6967 .m(1)
6968 .n(8)
6969 .k(k)
6970 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c2s4__neonv8_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
6971 }
6972 }
6973
TEST(QC8_GEMM_MINMAX_FP32_1X8C2S4__NEONV8_MLAL,k_lt_16_strided_a)6974 TEST(QC8_GEMM_MINMAX_FP32_1X8C2S4__NEONV8_MLAL, k_lt_16_strided_a) {
6975 TEST_REQUIRES_ARM_NEON_V8;
6976 for (size_t k = 1; k < 16; k++) {
6977 GemmMicrokernelTester()
6978 .mr(1)
6979 .nr(8)
6980 .kr(2)
6981 .sr(4)
6982 .m(1)
6983 .n(8)
6984 .k(k)
6985 .a_stride(19)
6986 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c2s4__neonv8_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
6987 }
6988 }
6989
TEST(QC8_GEMM_MINMAX_FP32_1X8C2S4__NEONV8_MLAL,k_lt_16_subtile)6990 TEST(QC8_GEMM_MINMAX_FP32_1X8C2S4__NEONV8_MLAL, k_lt_16_subtile) {
6991 TEST_REQUIRES_ARM_NEON_V8;
6992 for (size_t k = 1; k < 16; k++) {
6993 for (uint32_t n = 1; n <= 8; n++) {
6994 for (uint32_t m = 1; m <= 1; m++) {
6995 GemmMicrokernelTester()
6996 .mr(1)
6997 .nr(8)
6998 .kr(2)
6999 .sr(4)
7000 .m(m)
7001 .n(n)
7002 .k(k)
7003 .iterations(1)
7004 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c2s4__neonv8_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
7005 }
7006 }
7007 }
7008 }
7009
TEST(QC8_GEMM_MINMAX_FP32_1X8C2S4__NEONV8_MLAL,k_gt_16)7010 TEST(QC8_GEMM_MINMAX_FP32_1X8C2S4__NEONV8_MLAL, k_gt_16) {
7011 TEST_REQUIRES_ARM_NEON_V8;
7012 for (size_t k = 17; k < 32; k++) {
7013 GemmMicrokernelTester()
7014 .mr(1)
7015 .nr(8)
7016 .kr(2)
7017 .sr(4)
7018 .m(1)
7019 .n(8)
7020 .k(k)
7021 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c2s4__neonv8_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
7022 }
7023 }
7024
TEST(QC8_GEMM_MINMAX_FP32_1X8C2S4__NEONV8_MLAL,k_gt_16_strided_a)7025 TEST(QC8_GEMM_MINMAX_FP32_1X8C2S4__NEONV8_MLAL, k_gt_16_strided_a) {
7026 TEST_REQUIRES_ARM_NEON_V8;
7027 for (size_t k = 17; k < 32; k++) {
7028 GemmMicrokernelTester()
7029 .mr(1)
7030 .nr(8)
7031 .kr(2)
7032 .sr(4)
7033 .m(1)
7034 .n(8)
7035 .k(k)
7036 .a_stride(37)
7037 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c2s4__neonv8_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
7038 }
7039 }
7040
TEST(QC8_GEMM_MINMAX_FP32_1X8C2S4__NEONV8_MLAL,k_gt_16_subtile)7041 TEST(QC8_GEMM_MINMAX_FP32_1X8C2S4__NEONV8_MLAL, k_gt_16_subtile) {
7042 TEST_REQUIRES_ARM_NEON_V8;
7043 for (size_t k = 17; k < 32; k++) {
7044 for (uint32_t n = 1; n <= 8; n++) {
7045 for (uint32_t m = 1; m <= 1; m++) {
7046 GemmMicrokernelTester()
7047 .mr(1)
7048 .nr(8)
7049 .kr(2)
7050 .sr(4)
7051 .m(m)
7052 .n(n)
7053 .k(k)
7054 .iterations(1)
7055 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c2s4__neonv8_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
7056 }
7057 }
7058 }
7059 }
7060
TEST(QC8_GEMM_MINMAX_FP32_1X8C2S4__NEONV8_MLAL,k_div_16)7061 TEST(QC8_GEMM_MINMAX_FP32_1X8C2S4__NEONV8_MLAL, k_div_16) {
7062 TEST_REQUIRES_ARM_NEON_V8;
7063 for (size_t k = 32; k <= 160; k += 16) {
7064 GemmMicrokernelTester()
7065 .mr(1)
7066 .nr(8)
7067 .kr(2)
7068 .sr(4)
7069 .m(1)
7070 .n(8)
7071 .k(k)
7072 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c2s4__neonv8_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
7073 }
7074 }
7075
TEST(QC8_GEMM_MINMAX_FP32_1X8C2S4__NEONV8_MLAL,k_div_16_strided_a)7076 TEST(QC8_GEMM_MINMAX_FP32_1X8C2S4__NEONV8_MLAL, k_div_16_strided_a) {
7077 TEST_REQUIRES_ARM_NEON_V8;
7078 for (size_t k = 32; k <= 160; k += 16) {
7079 GemmMicrokernelTester()
7080 .mr(1)
7081 .nr(8)
7082 .kr(2)
7083 .sr(4)
7084 .m(1)
7085 .n(8)
7086 .k(k)
7087 .a_stride(163)
7088 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c2s4__neonv8_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
7089 }
7090 }
7091
TEST(QC8_GEMM_MINMAX_FP32_1X8C2S4__NEONV8_MLAL,k_div_16_subtile)7092 TEST(QC8_GEMM_MINMAX_FP32_1X8C2S4__NEONV8_MLAL, k_div_16_subtile) {
7093 TEST_REQUIRES_ARM_NEON_V8;
7094 for (size_t k = 32; k <= 160; k += 16) {
7095 for (uint32_t n = 1; n <= 8; n++) {
7096 for (uint32_t m = 1; m <= 1; m++) {
7097 GemmMicrokernelTester()
7098 .mr(1)
7099 .nr(8)
7100 .kr(2)
7101 .sr(4)
7102 .m(m)
7103 .n(n)
7104 .k(k)
7105 .iterations(1)
7106 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c2s4__neonv8_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
7107 }
7108 }
7109 }
7110 }
7111
TEST(QC8_GEMM_MINMAX_FP32_1X8C2S4__NEONV8_MLAL,n_gt_8)7112 TEST(QC8_GEMM_MINMAX_FP32_1X8C2S4__NEONV8_MLAL, n_gt_8) {
7113 TEST_REQUIRES_ARM_NEON_V8;
7114 for (uint32_t n = 9; n < 16; n++) {
7115 for (size_t k = 1; k <= 80; k += 17) {
7116 GemmMicrokernelTester()
7117 .mr(1)
7118 .nr(8)
7119 .kr(2)
7120 .sr(4)
7121 .m(1)
7122 .n(n)
7123 .k(k)
7124 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c2s4__neonv8_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
7125 }
7126 }
7127 }
7128
TEST(QC8_GEMM_MINMAX_FP32_1X8C2S4__NEONV8_MLAL,n_gt_8_strided_cn)7129 TEST(QC8_GEMM_MINMAX_FP32_1X8C2S4__NEONV8_MLAL, n_gt_8_strided_cn) {
7130 TEST_REQUIRES_ARM_NEON_V8;
7131 for (uint32_t n = 9; n < 16; n++) {
7132 for (size_t k = 1; k <= 80; k += 17) {
7133 GemmMicrokernelTester()
7134 .mr(1)
7135 .nr(8)
7136 .kr(2)
7137 .sr(4)
7138 .m(1)
7139 .n(n)
7140 .k(k)
7141 .cn_stride(11)
7142 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c2s4__neonv8_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
7143 }
7144 }
7145 }
7146
TEST(QC8_GEMM_MINMAX_FP32_1X8C2S4__NEONV8_MLAL,n_gt_8_strided_a)7147 TEST(QC8_GEMM_MINMAX_FP32_1X8C2S4__NEONV8_MLAL, n_gt_8_strided_a) {
7148 TEST_REQUIRES_ARM_NEON_V8;
7149 for (uint32_t n = 9; n < 16; n++) {
7150 for (size_t k = 1; k <= 80; k += 17) {
7151 GemmMicrokernelTester()
7152 .mr(1)
7153 .nr(8)
7154 .kr(2)
7155 .sr(4)
7156 .m(1)
7157 .n(n)
7158 .k(k)
7159 .a_stride(83)
7160 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c2s4__neonv8_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
7161 }
7162 }
7163 }
7164
TEST(QC8_GEMM_MINMAX_FP32_1X8C2S4__NEONV8_MLAL,n_gt_8_subtile)7165 TEST(QC8_GEMM_MINMAX_FP32_1X8C2S4__NEONV8_MLAL, n_gt_8_subtile) {
7166 TEST_REQUIRES_ARM_NEON_V8;
7167 for (uint32_t n = 9; n < 16; n++) {
7168 for (size_t k = 1; k <= 80; k += 17) {
7169 for (uint32_t m = 1; m <= 1; m++) {
7170 GemmMicrokernelTester()
7171 .mr(1)
7172 .nr(8)
7173 .kr(2)
7174 .sr(4)
7175 .m(m)
7176 .n(n)
7177 .k(k)
7178 .iterations(1)
7179 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c2s4__neonv8_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
7180 }
7181 }
7182 }
7183 }
7184
TEST(QC8_GEMM_MINMAX_FP32_1X8C2S4__NEONV8_MLAL,n_div_8)7185 TEST(QC8_GEMM_MINMAX_FP32_1X8C2S4__NEONV8_MLAL, n_div_8) {
7186 TEST_REQUIRES_ARM_NEON_V8;
7187 for (uint32_t n = 16; n <= 24; n += 8) {
7188 for (size_t k = 1; k <= 80; k += 17) {
7189 GemmMicrokernelTester()
7190 .mr(1)
7191 .nr(8)
7192 .kr(2)
7193 .sr(4)
7194 .m(1)
7195 .n(n)
7196 .k(k)
7197 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c2s4__neonv8_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
7198 }
7199 }
7200 }
7201
TEST(QC8_GEMM_MINMAX_FP32_1X8C2S4__NEONV8_MLAL,n_div_8_strided_cn)7202 TEST(QC8_GEMM_MINMAX_FP32_1X8C2S4__NEONV8_MLAL, n_div_8_strided_cn) {
7203 TEST_REQUIRES_ARM_NEON_V8;
7204 for (uint32_t n = 16; n <= 24; n += 8) {
7205 for (size_t k = 1; k <= 80; k += 17) {
7206 GemmMicrokernelTester()
7207 .mr(1)
7208 .nr(8)
7209 .kr(2)
7210 .sr(4)
7211 .m(1)
7212 .n(n)
7213 .k(k)
7214 .cn_stride(11)
7215 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c2s4__neonv8_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
7216 }
7217 }
7218 }
7219
TEST(QC8_GEMM_MINMAX_FP32_1X8C2S4__NEONV8_MLAL,n_div_8_strided_a)7220 TEST(QC8_GEMM_MINMAX_FP32_1X8C2S4__NEONV8_MLAL, n_div_8_strided_a) {
7221 TEST_REQUIRES_ARM_NEON_V8;
7222 for (uint32_t n = 16; n <= 24; n += 8) {
7223 for (size_t k = 1; k <= 80; k += 17) {
7224 GemmMicrokernelTester()
7225 .mr(1)
7226 .nr(8)
7227 .kr(2)
7228 .sr(4)
7229 .m(1)
7230 .n(n)
7231 .k(k)
7232 .a_stride(83)
7233 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c2s4__neonv8_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
7234 }
7235 }
7236 }
7237
TEST(QC8_GEMM_MINMAX_FP32_1X8C2S4__NEONV8_MLAL,n_div_8_subtile)7238 TEST(QC8_GEMM_MINMAX_FP32_1X8C2S4__NEONV8_MLAL, n_div_8_subtile) {
7239 TEST_REQUIRES_ARM_NEON_V8;
7240 for (uint32_t n = 16; n <= 24; n += 8) {
7241 for (size_t k = 1; k <= 80; k += 17) {
7242 for (uint32_t m = 1; m <= 1; m++) {
7243 GemmMicrokernelTester()
7244 .mr(1)
7245 .nr(8)
7246 .kr(2)
7247 .sr(4)
7248 .m(m)
7249 .n(n)
7250 .k(k)
7251 .iterations(1)
7252 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c2s4__neonv8_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
7253 }
7254 }
7255 }
7256 }
7257
TEST(QC8_GEMM_MINMAX_FP32_1X8C2S4__NEONV8_MLAL,strided_cm_subtile)7258 TEST(QC8_GEMM_MINMAX_FP32_1X8C2S4__NEONV8_MLAL, strided_cm_subtile) {
7259 TEST_REQUIRES_ARM_NEON_V8;
7260 for (size_t k = 1; k <= 80; k += 17) {
7261 for (uint32_t n = 1; n <= 8; n++) {
7262 for (uint32_t m = 1; m <= 1; m++) {
7263 GemmMicrokernelTester()
7264 .mr(1)
7265 .nr(8)
7266 .kr(2)
7267 .sr(4)
7268 .m(m)
7269 .n(n)
7270 .k(k)
7271 .cm_stride(11)
7272 .iterations(1)
7273 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c2s4__neonv8_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
7274 }
7275 }
7276 }
7277 }
7278
TEST(QC8_GEMM_MINMAX_FP32_1X8C2S4__NEONV8_MLAL,qmin)7279 TEST(QC8_GEMM_MINMAX_FP32_1X8C2S4__NEONV8_MLAL, qmin) {
7280 TEST_REQUIRES_ARM_NEON_V8;
7281 GemmMicrokernelTester()
7282 .mr(1)
7283 .nr(8)
7284 .kr(2)
7285 .sr(4)
7286 .m(1)
7287 .n(8)
7288 .k(16)
7289 .qmin(128)
7290 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c2s4__neonv8_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
7291 }
7292
TEST(QC8_GEMM_MINMAX_FP32_1X8C2S4__NEONV8_MLAL,qmax)7293 TEST(QC8_GEMM_MINMAX_FP32_1X8C2S4__NEONV8_MLAL, qmax) {
7294 TEST_REQUIRES_ARM_NEON_V8;
7295 GemmMicrokernelTester()
7296 .mr(1)
7297 .nr(8)
7298 .kr(2)
7299 .sr(4)
7300 .m(1)
7301 .n(8)
7302 .k(16)
7303 .qmax(128)
7304 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c2s4__neonv8_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
7305 }
7306
TEST(QC8_GEMM_MINMAX_FP32_1X8C2S4__NEONV8_MLAL,strided_cm)7307 TEST(QC8_GEMM_MINMAX_FP32_1X8C2S4__NEONV8_MLAL, strided_cm) {
7308 TEST_REQUIRES_ARM_NEON_V8;
7309 GemmMicrokernelTester()
7310 .mr(1)
7311 .nr(8)
7312 .kr(2)
7313 .sr(4)
7314 .m(1)
7315 .n(8)
7316 .k(16)
7317 .cm_stride(11)
7318 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c2s4__neonv8_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
7319 }
7320 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
7321
7322
7323 #if XNN_ENABLE_ARM_DOTPROD && (XNN_ARCH_ARM && !XNN_PLATFORM_IOS || XNN_ARCH_ARM64)
TEST(QC8_GEMM_MINMAX_FP32_1X8C4__NEONDOT,k_eq_8)7324 TEST(QC8_GEMM_MINMAX_FP32_1X8C4__NEONDOT, k_eq_8) {
7325 TEST_REQUIRES_ARM_NEON_DOT;
7326 GemmMicrokernelTester()
7327 .mr(1)
7328 .nr(8)
7329 .kr(4)
7330 .sr(1)
7331 .m(1)
7332 .n(8)
7333 .k(8)
7334 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
7335 }
7336
TEST(QC8_GEMM_MINMAX_FP32_1X8C4__NEONDOT,strided_cn)7337 TEST(QC8_GEMM_MINMAX_FP32_1X8C4__NEONDOT, strided_cn) {
7338 TEST_REQUIRES_ARM_NEON_DOT;
7339 GemmMicrokernelTester()
7340 .mr(1)
7341 .nr(8)
7342 .kr(4)
7343 .sr(1)
7344 .m(1)
7345 .n(8)
7346 .k(8)
7347 .cn_stride(11)
7348 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
7349 }
7350
TEST(QC8_GEMM_MINMAX_FP32_1X8C4__NEONDOT,k_eq_8_strided_a)7351 TEST(QC8_GEMM_MINMAX_FP32_1X8C4__NEONDOT, k_eq_8_strided_a) {
7352 TEST_REQUIRES_ARM_NEON_DOT;
7353 GemmMicrokernelTester()
7354 .mr(1)
7355 .nr(8)
7356 .kr(4)
7357 .sr(1)
7358 .m(1)
7359 .n(8)
7360 .k(8)
7361 .a_stride(11)
7362 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
7363 }
7364
TEST(QC8_GEMM_MINMAX_FP32_1X8C4__NEONDOT,k_eq_8_subtile)7365 TEST(QC8_GEMM_MINMAX_FP32_1X8C4__NEONDOT, k_eq_8_subtile) {
7366 TEST_REQUIRES_ARM_NEON_DOT;
7367 for (uint32_t n = 1; n <= 8; n++) {
7368 for (uint32_t m = 1; m <= 1; m++) {
7369 GemmMicrokernelTester()
7370 .mr(1)
7371 .nr(8)
7372 .kr(4)
7373 .sr(1)
7374 .m(m)
7375 .n(n)
7376 .k(8)
7377 .iterations(1)
7378 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
7379 }
7380 }
7381 }
7382
TEST(QC8_GEMM_MINMAX_FP32_1X8C4__NEONDOT,k_eq_8_subtile_m)7383 TEST(QC8_GEMM_MINMAX_FP32_1X8C4__NEONDOT, k_eq_8_subtile_m) {
7384 TEST_REQUIRES_ARM_NEON_DOT;
7385 for (uint32_t m = 1; m <= 1; m++) {
7386 GemmMicrokernelTester()
7387 .mr(1)
7388 .nr(8)
7389 .kr(4)
7390 .sr(1)
7391 .m(m)
7392 .n(8)
7393 .k(8)
7394 .iterations(1)
7395 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
7396 }
7397 }
7398
TEST(QC8_GEMM_MINMAX_FP32_1X8C4__NEONDOT,k_eq_8_subtile_n)7399 TEST(QC8_GEMM_MINMAX_FP32_1X8C4__NEONDOT, k_eq_8_subtile_n) {
7400 TEST_REQUIRES_ARM_NEON_DOT;
7401 for (uint32_t n = 1; n <= 8; n++) {
7402 GemmMicrokernelTester()
7403 .mr(1)
7404 .nr(8)
7405 .kr(4)
7406 .sr(1)
7407 .m(1)
7408 .n(n)
7409 .k(8)
7410 .iterations(1)
7411 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
7412 }
7413 }
7414
TEST(QC8_GEMM_MINMAX_FP32_1X8C4__NEONDOT,k_lt_8)7415 TEST(QC8_GEMM_MINMAX_FP32_1X8C4__NEONDOT, k_lt_8) {
7416 TEST_REQUIRES_ARM_NEON_DOT;
7417 for (size_t k = 1; k < 8; k++) {
7418 GemmMicrokernelTester()
7419 .mr(1)
7420 .nr(8)
7421 .kr(4)
7422 .sr(1)
7423 .m(1)
7424 .n(8)
7425 .k(k)
7426 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
7427 }
7428 }
7429
TEST(QC8_GEMM_MINMAX_FP32_1X8C4__NEONDOT,k_lt_8_strided_a)7430 TEST(QC8_GEMM_MINMAX_FP32_1X8C4__NEONDOT, k_lt_8_strided_a) {
7431 TEST_REQUIRES_ARM_NEON_DOT;
7432 for (size_t k = 1; k < 8; k++) {
7433 GemmMicrokernelTester()
7434 .mr(1)
7435 .nr(8)
7436 .kr(4)
7437 .sr(1)
7438 .m(1)
7439 .n(8)
7440 .k(k)
7441 .a_stride(11)
7442 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
7443 }
7444 }
7445
TEST(QC8_GEMM_MINMAX_FP32_1X8C4__NEONDOT,k_lt_8_subtile)7446 TEST(QC8_GEMM_MINMAX_FP32_1X8C4__NEONDOT, k_lt_8_subtile) {
7447 TEST_REQUIRES_ARM_NEON_DOT;
7448 for (size_t k = 1; k < 8; k++) {
7449 for (uint32_t n = 1; n <= 8; n++) {
7450 for (uint32_t m = 1; m <= 1; m++) {
7451 GemmMicrokernelTester()
7452 .mr(1)
7453 .nr(8)
7454 .kr(4)
7455 .sr(1)
7456 .m(m)
7457 .n(n)
7458 .k(k)
7459 .iterations(1)
7460 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
7461 }
7462 }
7463 }
7464 }
7465
TEST(QC8_GEMM_MINMAX_FP32_1X8C4__NEONDOT,k_gt_8)7466 TEST(QC8_GEMM_MINMAX_FP32_1X8C4__NEONDOT, k_gt_8) {
7467 TEST_REQUIRES_ARM_NEON_DOT;
7468 for (size_t k = 9; k < 16; k++) {
7469 GemmMicrokernelTester()
7470 .mr(1)
7471 .nr(8)
7472 .kr(4)
7473 .sr(1)
7474 .m(1)
7475 .n(8)
7476 .k(k)
7477 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
7478 }
7479 }
7480
TEST(QC8_GEMM_MINMAX_FP32_1X8C4__NEONDOT,k_gt_8_strided_a)7481 TEST(QC8_GEMM_MINMAX_FP32_1X8C4__NEONDOT, k_gt_8_strided_a) {
7482 TEST_REQUIRES_ARM_NEON_DOT;
7483 for (size_t k = 9; k < 16; k++) {
7484 GemmMicrokernelTester()
7485 .mr(1)
7486 .nr(8)
7487 .kr(4)
7488 .sr(1)
7489 .m(1)
7490 .n(8)
7491 .k(k)
7492 .a_stride(19)
7493 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
7494 }
7495 }
7496
TEST(QC8_GEMM_MINMAX_FP32_1X8C4__NEONDOT,k_gt_8_subtile)7497 TEST(QC8_GEMM_MINMAX_FP32_1X8C4__NEONDOT, k_gt_8_subtile) {
7498 TEST_REQUIRES_ARM_NEON_DOT;
7499 for (size_t k = 9; k < 16; k++) {
7500 for (uint32_t n = 1; n <= 8; n++) {
7501 for (uint32_t m = 1; m <= 1; m++) {
7502 GemmMicrokernelTester()
7503 .mr(1)
7504 .nr(8)
7505 .kr(4)
7506 .sr(1)
7507 .m(m)
7508 .n(n)
7509 .k(k)
7510 .iterations(1)
7511 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
7512 }
7513 }
7514 }
7515 }
7516
TEST(QC8_GEMM_MINMAX_FP32_1X8C4__NEONDOT,k_div_8)7517 TEST(QC8_GEMM_MINMAX_FP32_1X8C4__NEONDOT, k_div_8) {
7518 TEST_REQUIRES_ARM_NEON_DOT;
7519 for (size_t k = 16; k <= 80; k += 8) {
7520 GemmMicrokernelTester()
7521 .mr(1)
7522 .nr(8)
7523 .kr(4)
7524 .sr(1)
7525 .m(1)
7526 .n(8)
7527 .k(k)
7528 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
7529 }
7530 }
7531
TEST(QC8_GEMM_MINMAX_FP32_1X8C4__NEONDOT,k_div_8_strided_a)7532 TEST(QC8_GEMM_MINMAX_FP32_1X8C4__NEONDOT, k_div_8_strided_a) {
7533 TEST_REQUIRES_ARM_NEON_DOT;
7534 for (size_t k = 16; k <= 80; k += 8) {
7535 GemmMicrokernelTester()
7536 .mr(1)
7537 .nr(8)
7538 .kr(4)
7539 .sr(1)
7540 .m(1)
7541 .n(8)
7542 .k(k)
7543 .a_stride(83)
7544 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
7545 }
7546 }
7547
TEST(QC8_GEMM_MINMAX_FP32_1X8C4__NEONDOT,k_div_8_subtile)7548 TEST(QC8_GEMM_MINMAX_FP32_1X8C4__NEONDOT, k_div_8_subtile) {
7549 TEST_REQUIRES_ARM_NEON_DOT;
7550 for (size_t k = 16; k <= 80; k += 8) {
7551 for (uint32_t n = 1; n <= 8; n++) {
7552 for (uint32_t m = 1; m <= 1; m++) {
7553 GemmMicrokernelTester()
7554 .mr(1)
7555 .nr(8)
7556 .kr(4)
7557 .sr(1)
7558 .m(m)
7559 .n(n)
7560 .k(k)
7561 .iterations(1)
7562 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
7563 }
7564 }
7565 }
7566 }
7567
TEST(QC8_GEMM_MINMAX_FP32_1X8C4__NEONDOT,n_gt_8)7568 TEST(QC8_GEMM_MINMAX_FP32_1X8C4__NEONDOT, n_gt_8) {
7569 TEST_REQUIRES_ARM_NEON_DOT;
7570 for (uint32_t n = 9; n < 16; n++) {
7571 for (size_t k = 1; k <= 40; k += 9) {
7572 GemmMicrokernelTester()
7573 .mr(1)
7574 .nr(8)
7575 .kr(4)
7576 .sr(1)
7577 .m(1)
7578 .n(n)
7579 .k(k)
7580 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
7581 }
7582 }
7583 }
7584
TEST(QC8_GEMM_MINMAX_FP32_1X8C4__NEONDOT,n_gt_8_strided_cn)7585 TEST(QC8_GEMM_MINMAX_FP32_1X8C4__NEONDOT, n_gt_8_strided_cn) {
7586 TEST_REQUIRES_ARM_NEON_DOT;
7587 for (uint32_t n = 9; n < 16; n++) {
7588 for (size_t k = 1; k <= 40; k += 9) {
7589 GemmMicrokernelTester()
7590 .mr(1)
7591 .nr(8)
7592 .kr(4)
7593 .sr(1)
7594 .m(1)
7595 .n(n)
7596 .k(k)
7597 .cn_stride(11)
7598 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
7599 }
7600 }
7601 }
7602
TEST(QC8_GEMM_MINMAX_FP32_1X8C4__NEONDOT,n_gt_8_strided_a)7603 TEST(QC8_GEMM_MINMAX_FP32_1X8C4__NEONDOT, n_gt_8_strided_a) {
7604 TEST_REQUIRES_ARM_NEON_DOT;
7605 for (uint32_t n = 9; n < 16; n++) {
7606 for (size_t k = 1; k <= 40; k += 9) {
7607 GemmMicrokernelTester()
7608 .mr(1)
7609 .nr(8)
7610 .kr(4)
7611 .sr(1)
7612 .m(1)
7613 .n(n)
7614 .k(k)
7615 .a_stride(43)
7616 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
7617 }
7618 }
7619 }
7620
TEST(QC8_GEMM_MINMAX_FP32_1X8C4__NEONDOT,n_gt_8_subtile)7621 TEST(QC8_GEMM_MINMAX_FP32_1X8C4__NEONDOT, n_gt_8_subtile) {
7622 TEST_REQUIRES_ARM_NEON_DOT;
7623 for (uint32_t n = 9; n < 16; n++) {
7624 for (size_t k = 1; k <= 40; k += 9) {
7625 for (uint32_t m = 1; m <= 1; m++) {
7626 GemmMicrokernelTester()
7627 .mr(1)
7628 .nr(8)
7629 .kr(4)
7630 .sr(1)
7631 .m(m)
7632 .n(n)
7633 .k(k)
7634 .iterations(1)
7635 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
7636 }
7637 }
7638 }
7639 }
7640
TEST(QC8_GEMM_MINMAX_FP32_1X8C4__NEONDOT,n_div_8)7641 TEST(QC8_GEMM_MINMAX_FP32_1X8C4__NEONDOT, n_div_8) {
7642 TEST_REQUIRES_ARM_NEON_DOT;
7643 for (uint32_t n = 16; n <= 24; n += 8) {
7644 for (size_t k = 1; k <= 40; k += 9) {
7645 GemmMicrokernelTester()
7646 .mr(1)
7647 .nr(8)
7648 .kr(4)
7649 .sr(1)
7650 .m(1)
7651 .n(n)
7652 .k(k)
7653 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
7654 }
7655 }
7656 }
7657
TEST(QC8_GEMM_MINMAX_FP32_1X8C4__NEONDOT,n_div_8_strided_cn)7658 TEST(QC8_GEMM_MINMAX_FP32_1X8C4__NEONDOT, n_div_8_strided_cn) {
7659 TEST_REQUIRES_ARM_NEON_DOT;
7660 for (uint32_t n = 16; n <= 24; n += 8) {
7661 for (size_t k = 1; k <= 40; k += 9) {
7662 GemmMicrokernelTester()
7663 .mr(1)
7664 .nr(8)
7665 .kr(4)
7666 .sr(1)
7667 .m(1)
7668 .n(n)
7669 .k(k)
7670 .cn_stride(11)
7671 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
7672 }
7673 }
7674 }
7675
TEST(QC8_GEMM_MINMAX_FP32_1X8C4__NEONDOT,n_div_8_strided_a)7676 TEST(QC8_GEMM_MINMAX_FP32_1X8C4__NEONDOT, n_div_8_strided_a) {
7677 TEST_REQUIRES_ARM_NEON_DOT;
7678 for (uint32_t n = 16; n <= 24; n += 8) {
7679 for (size_t k = 1; k <= 40; k += 9) {
7680 GemmMicrokernelTester()
7681 .mr(1)
7682 .nr(8)
7683 .kr(4)
7684 .sr(1)
7685 .m(1)
7686 .n(n)
7687 .k(k)
7688 .a_stride(43)
7689 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
7690 }
7691 }
7692 }
7693
TEST(QC8_GEMM_MINMAX_FP32_1X8C4__NEONDOT,n_div_8_subtile)7694 TEST(QC8_GEMM_MINMAX_FP32_1X8C4__NEONDOT, n_div_8_subtile) {
7695 TEST_REQUIRES_ARM_NEON_DOT;
7696 for (uint32_t n = 16; n <= 24; n += 8) {
7697 for (size_t k = 1; k <= 40; k += 9) {
7698 for (uint32_t m = 1; m <= 1; m++) {
7699 GemmMicrokernelTester()
7700 .mr(1)
7701 .nr(8)
7702 .kr(4)
7703 .sr(1)
7704 .m(m)
7705 .n(n)
7706 .k(k)
7707 .iterations(1)
7708 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
7709 }
7710 }
7711 }
7712 }
7713
TEST(QC8_GEMM_MINMAX_FP32_1X8C4__NEONDOT,strided_cm_subtile)7714 TEST(QC8_GEMM_MINMAX_FP32_1X8C4__NEONDOT, strided_cm_subtile) {
7715 TEST_REQUIRES_ARM_NEON_DOT;
7716 for (size_t k = 1; k <= 40; k += 9) {
7717 for (uint32_t n = 1; n <= 8; n++) {
7718 for (uint32_t m = 1; m <= 1; m++) {
7719 GemmMicrokernelTester()
7720 .mr(1)
7721 .nr(8)
7722 .kr(4)
7723 .sr(1)
7724 .m(m)
7725 .n(n)
7726 .k(k)
7727 .cm_stride(11)
7728 .iterations(1)
7729 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
7730 }
7731 }
7732 }
7733 }
7734
TEST(QC8_GEMM_MINMAX_FP32_1X8C4__NEONDOT,qmin)7735 TEST(QC8_GEMM_MINMAX_FP32_1X8C4__NEONDOT, qmin) {
7736 TEST_REQUIRES_ARM_NEON_DOT;
7737 GemmMicrokernelTester()
7738 .mr(1)
7739 .nr(8)
7740 .kr(4)
7741 .sr(1)
7742 .m(1)
7743 .n(8)
7744 .k(8)
7745 .qmin(128)
7746 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
7747 }
7748
TEST(QC8_GEMM_MINMAX_FP32_1X8C4__NEONDOT,qmax)7749 TEST(QC8_GEMM_MINMAX_FP32_1X8C4__NEONDOT, qmax) {
7750 TEST_REQUIRES_ARM_NEON_DOT;
7751 GemmMicrokernelTester()
7752 .mr(1)
7753 .nr(8)
7754 .kr(4)
7755 .sr(1)
7756 .m(1)
7757 .n(8)
7758 .k(8)
7759 .qmax(128)
7760 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
7761 }
7762
TEST(QC8_GEMM_MINMAX_FP32_1X8C4__NEONDOT,strided_cm)7763 TEST(QC8_GEMM_MINMAX_FP32_1X8C4__NEONDOT, strided_cm) {
7764 TEST_REQUIRES_ARM_NEON_DOT;
7765 GemmMicrokernelTester()
7766 .mr(1)
7767 .nr(8)
7768 .kr(4)
7769 .sr(1)
7770 .m(1)
7771 .n(8)
7772 .k(8)
7773 .cm_stride(11)
7774 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
7775 }
7776 #endif // XNN_ENABLE_ARM_DOTPROD && (XNN_ARCH_ARM && !XNN_PLATFORM_IOS || XNN_ARCH_ARM64)
7777
7778
7779 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QC8_GEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_DUP,k_eq_16)7780 TEST(QC8_GEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_DUP, k_eq_16) {
7781 TEST_REQUIRES_ARM_NEON_V8;
7782 GemmMicrokernelTester()
7783 .mr(1)
7784 .nr(8)
7785 .kr(4)
7786 .sr(1)
7787 .m(1)
7788 .n(8)
7789 .k(16)
7790 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c4__neonv8_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
7791 }
7792
TEST(QC8_GEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_DUP,strided_cn)7793 TEST(QC8_GEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_DUP, strided_cn) {
7794 TEST_REQUIRES_ARM_NEON_V8;
7795 GemmMicrokernelTester()
7796 .mr(1)
7797 .nr(8)
7798 .kr(4)
7799 .sr(1)
7800 .m(1)
7801 .n(8)
7802 .k(16)
7803 .cn_stride(11)
7804 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c4__neonv8_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
7805 }
7806
TEST(QC8_GEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_DUP,k_eq_16_strided_a)7807 TEST(QC8_GEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_DUP, k_eq_16_strided_a) {
7808 TEST_REQUIRES_ARM_NEON_V8;
7809 GemmMicrokernelTester()
7810 .mr(1)
7811 .nr(8)
7812 .kr(4)
7813 .sr(1)
7814 .m(1)
7815 .n(8)
7816 .k(16)
7817 .a_stride(19)
7818 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c4__neonv8_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
7819 }
7820
TEST(QC8_GEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_DUP,k_eq_16_subtile)7821 TEST(QC8_GEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_DUP, k_eq_16_subtile) {
7822 TEST_REQUIRES_ARM_NEON_V8;
7823 for (uint32_t n = 1; n <= 8; n++) {
7824 for (uint32_t m = 1; m <= 1; m++) {
7825 GemmMicrokernelTester()
7826 .mr(1)
7827 .nr(8)
7828 .kr(4)
7829 .sr(1)
7830 .m(m)
7831 .n(n)
7832 .k(16)
7833 .iterations(1)
7834 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c4__neonv8_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
7835 }
7836 }
7837 }
7838
TEST(QC8_GEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_DUP,k_eq_16_subtile_m)7839 TEST(QC8_GEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_DUP, k_eq_16_subtile_m) {
7840 TEST_REQUIRES_ARM_NEON_V8;
7841 for (uint32_t m = 1; m <= 1; m++) {
7842 GemmMicrokernelTester()
7843 .mr(1)
7844 .nr(8)
7845 .kr(4)
7846 .sr(1)
7847 .m(m)
7848 .n(8)
7849 .k(16)
7850 .iterations(1)
7851 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c4__neonv8_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
7852 }
7853 }
7854
TEST(QC8_GEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_DUP,k_eq_16_subtile_n)7855 TEST(QC8_GEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_DUP, k_eq_16_subtile_n) {
7856 TEST_REQUIRES_ARM_NEON_V8;
7857 for (uint32_t n = 1; n <= 8; n++) {
7858 GemmMicrokernelTester()
7859 .mr(1)
7860 .nr(8)
7861 .kr(4)
7862 .sr(1)
7863 .m(1)
7864 .n(n)
7865 .k(16)
7866 .iterations(1)
7867 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c4__neonv8_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
7868 }
7869 }
7870
TEST(QC8_GEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_DUP,k_lt_16)7871 TEST(QC8_GEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_DUP, k_lt_16) {
7872 TEST_REQUIRES_ARM_NEON_V8;
7873 for (size_t k = 1; k < 16; k++) {
7874 GemmMicrokernelTester()
7875 .mr(1)
7876 .nr(8)
7877 .kr(4)
7878 .sr(1)
7879 .m(1)
7880 .n(8)
7881 .k(k)
7882 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c4__neonv8_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
7883 }
7884 }
7885
TEST(QC8_GEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_DUP,k_lt_16_strided_a)7886 TEST(QC8_GEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_DUP, k_lt_16_strided_a) {
7887 TEST_REQUIRES_ARM_NEON_V8;
7888 for (size_t k = 1; k < 16; k++) {
7889 GemmMicrokernelTester()
7890 .mr(1)
7891 .nr(8)
7892 .kr(4)
7893 .sr(1)
7894 .m(1)
7895 .n(8)
7896 .k(k)
7897 .a_stride(19)
7898 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c4__neonv8_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
7899 }
7900 }
7901
TEST(QC8_GEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_DUP,k_lt_16_subtile)7902 TEST(QC8_GEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_DUP, k_lt_16_subtile) {
7903 TEST_REQUIRES_ARM_NEON_V8;
7904 for (size_t k = 1; k < 16; k++) {
7905 for (uint32_t n = 1; n <= 8; n++) {
7906 for (uint32_t m = 1; m <= 1; m++) {
7907 GemmMicrokernelTester()
7908 .mr(1)
7909 .nr(8)
7910 .kr(4)
7911 .sr(1)
7912 .m(m)
7913 .n(n)
7914 .k(k)
7915 .iterations(1)
7916 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c4__neonv8_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
7917 }
7918 }
7919 }
7920 }
7921
TEST(QC8_GEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_DUP,k_gt_16)7922 TEST(QC8_GEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_DUP, k_gt_16) {
7923 TEST_REQUIRES_ARM_NEON_V8;
7924 for (size_t k = 17; k < 32; k++) {
7925 GemmMicrokernelTester()
7926 .mr(1)
7927 .nr(8)
7928 .kr(4)
7929 .sr(1)
7930 .m(1)
7931 .n(8)
7932 .k(k)
7933 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c4__neonv8_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
7934 }
7935 }
7936
TEST(QC8_GEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_DUP,k_gt_16_strided_a)7937 TEST(QC8_GEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_DUP, k_gt_16_strided_a) {
7938 TEST_REQUIRES_ARM_NEON_V8;
7939 for (size_t k = 17; k < 32; k++) {
7940 GemmMicrokernelTester()
7941 .mr(1)
7942 .nr(8)
7943 .kr(4)
7944 .sr(1)
7945 .m(1)
7946 .n(8)
7947 .k(k)
7948 .a_stride(37)
7949 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c4__neonv8_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
7950 }
7951 }
7952
TEST(QC8_GEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_DUP,k_gt_16_subtile)7953 TEST(QC8_GEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_DUP, k_gt_16_subtile) {
7954 TEST_REQUIRES_ARM_NEON_V8;
7955 for (size_t k = 17; k < 32; k++) {
7956 for (uint32_t n = 1; n <= 8; n++) {
7957 for (uint32_t m = 1; m <= 1; m++) {
7958 GemmMicrokernelTester()
7959 .mr(1)
7960 .nr(8)
7961 .kr(4)
7962 .sr(1)
7963 .m(m)
7964 .n(n)
7965 .k(k)
7966 .iterations(1)
7967 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c4__neonv8_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
7968 }
7969 }
7970 }
7971 }
7972
TEST(QC8_GEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_DUP,k_div_16)7973 TEST(QC8_GEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_DUP, k_div_16) {
7974 TEST_REQUIRES_ARM_NEON_V8;
7975 for (size_t k = 32; k <= 160; k += 16) {
7976 GemmMicrokernelTester()
7977 .mr(1)
7978 .nr(8)
7979 .kr(4)
7980 .sr(1)
7981 .m(1)
7982 .n(8)
7983 .k(k)
7984 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c4__neonv8_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
7985 }
7986 }
7987
TEST(QC8_GEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_DUP,k_div_16_strided_a)7988 TEST(QC8_GEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_DUP, k_div_16_strided_a) {
7989 TEST_REQUIRES_ARM_NEON_V8;
7990 for (size_t k = 32; k <= 160; k += 16) {
7991 GemmMicrokernelTester()
7992 .mr(1)
7993 .nr(8)
7994 .kr(4)
7995 .sr(1)
7996 .m(1)
7997 .n(8)
7998 .k(k)
7999 .a_stride(163)
8000 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c4__neonv8_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8001 }
8002 }
8003
TEST(QC8_GEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_DUP,k_div_16_subtile)8004 TEST(QC8_GEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_DUP, k_div_16_subtile) {
8005 TEST_REQUIRES_ARM_NEON_V8;
8006 for (size_t k = 32; k <= 160; k += 16) {
8007 for (uint32_t n = 1; n <= 8; n++) {
8008 for (uint32_t m = 1; m <= 1; m++) {
8009 GemmMicrokernelTester()
8010 .mr(1)
8011 .nr(8)
8012 .kr(4)
8013 .sr(1)
8014 .m(m)
8015 .n(n)
8016 .k(k)
8017 .iterations(1)
8018 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c4__neonv8_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8019 }
8020 }
8021 }
8022 }
8023
TEST(QC8_GEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_DUP,n_gt_8)8024 TEST(QC8_GEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_DUP, n_gt_8) {
8025 TEST_REQUIRES_ARM_NEON_V8;
8026 for (uint32_t n = 9; n < 16; n++) {
8027 for (size_t k = 1; k <= 80; k += 17) {
8028 GemmMicrokernelTester()
8029 .mr(1)
8030 .nr(8)
8031 .kr(4)
8032 .sr(1)
8033 .m(1)
8034 .n(n)
8035 .k(k)
8036 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c4__neonv8_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8037 }
8038 }
8039 }
8040
TEST(QC8_GEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_DUP,n_gt_8_strided_cn)8041 TEST(QC8_GEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_DUP, n_gt_8_strided_cn) {
8042 TEST_REQUIRES_ARM_NEON_V8;
8043 for (uint32_t n = 9; n < 16; n++) {
8044 for (size_t k = 1; k <= 80; k += 17) {
8045 GemmMicrokernelTester()
8046 .mr(1)
8047 .nr(8)
8048 .kr(4)
8049 .sr(1)
8050 .m(1)
8051 .n(n)
8052 .k(k)
8053 .cn_stride(11)
8054 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c4__neonv8_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8055 }
8056 }
8057 }
8058
TEST(QC8_GEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_DUP,n_gt_8_strided_a)8059 TEST(QC8_GEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_DUP, n_gt_8_strided_a) {
8060 TEST_REQUIRES_ARM_NEON_V8;
8061 for (uint32_t n = 9; n < 16; n++) {
8062 for (size_t k = 1; k <= 80; k += 17) {
8063 GemmMicrokernelTester()
8064 .mr(1)
8065 .nr(8)
8066 .kr(4)
8067 .sr(1)
8068 .m(1)
8069 .n(n)
8070 .k(k)
8071 .a_stride(83)
8072 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c4__neonv8_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8073 }
8074 }
8075 }
8076
TEST(QC8_GEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_DUP,n_gt_8_subtile)8077 TEST(QC8_GEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_DUP, n_gt_8_subtile) {
8078 TEST_REQUIRES_ARM_NEON_V8;
8079 for (uint32_t n = 9; n < 16; n++) {
8080 for (size_t k = 1; k <= 80; k += 17) {
8081 for (uint32_t m = 1; m <= 1; m++) {
8082 GemmMicrokernelTester()
8083 .mr(1)
8084 .nr(8)
8085 .kr(4)
8086 .sr(1)
8087 .m(m)
8088 .n(n)
8089 .k(k)
8090 .iterations(1)
8091 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c4__neonv8_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8092 }
8093 }
8094 }
8095 }
8096
TEST(QC8_GEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_DUP,n_div_8)8097 TEST(QC8_GEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_DUP, n_div_8) {
8098 TEST_REQUIRES_ARM_NEON_V8;
8099 for (uint32_t n = 16; n <= 24; n += 8) {
8100 for (size_t k = 1; k <= 80; k += 17) {
8101 GemmMicrokernelTester()
8102 .mr(1)
8103 .nr(8)
8104 .kr(4)
8105 .sr(1)
8106 .m(1)
8107 .n(n)
8108 .k(k)
8109 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c4__neonv8_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8110 }
8111 }
8112 }
8113
TEST(QC8_GEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_DUP,n_div_8_strided_cn)8114 TEST(QC8_GEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_DUP, n_div_8_strided_cn) {
8115 TEST_REQUIRES_ARM_NEON_V8;
8116 for (uint32_t n = 16; n <= 24; n += 8) {
8117 for (size_t k = 1; k <= 80; k += 17) {
8118 GemmMicrokernelTester()
8119 .mr(1)
8120 .nr(8)
8121 .kr(4)
8122 .sr(1)
8123 .m(1)
8124 .n(n)
8125 .k(k)
8126 .cn_stride(11)
8127 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c4__neonv8_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8128 }
8129 }
8130 }
8131
TEST(QC8_GEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_DUP,n_div_8_strided_a)8132 TEST(QC8_GEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_DUP, n_div_8_strided_a) {
8133 TEST_REQUIRES_ARM_NEON_V8;
8134 for (uint32_t n = 16; n <= 24; n += 8) {
8135 for (size_t k = 1; k <= 80; k += 17) {
8136 GemmMicrokernelTester()
8137 .mr(1)
8138 .nr(8)
8139 .kr(4)
8140 .sr(1)
8141 .m(1)
8142 .n(n)
8143 .k(k)
8144 .a_stride(83)
8145 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c4__neonv8_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8146 }
8147 }
8148 }
8149
TEST(QC8_GEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_DUP,n_div_8_subtile)8150 TEST(QC8_GEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_DUP, n_div_8_subtile) {
8151 TEST_REQUIRES_ARM_NEON_V8;
8152 for (uint32_t n = 16; n <= 24; n += 8) {
8153 for (size_t k = 1; k <= 80; k += 17) {
8154 for (uint32_t m = 1; m <= 1; m++) {
8155 GemmMicrokernelTester()
8156 .mr(1)
8157 .nr(8)
8158 .kr(4)
8159 .sr(1)
8160 .m(m)
8161 .n(n)
8162 .k(k)
8163 .iterations(1)
8164 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c4__neonv8_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8165 }
8166 }
8167 }
8168 }
8169
TEST(QC8_GEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_DUP,strided_cm_subtile)8170 TEST(QC8_GEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_DUP, strided_cm_subtile) {
8171 TEST_REQUIRES_ARM_NEON_V8;
8172 for (size_t k = 1; k <= 80; k += 17) {
8173 for (uint32_t n = 1; n <= 8; n++) {
8174 for (uint32_t m = 1; m <= 1; m++) {
8175 GemmMicrokernelTester()
8176 .mr(1)
8177 .nr(8)
8178 .kr(4)
8179 .sr(1)
8180 .m(m)
8181 .n(n)
8182 .k(k)
8183 .cm_stride(11)
8184 .iterations(1)
8185 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c4__neonv8_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8186 }
8187 }
8188 }
8189 }
8190
TEST(QC8_GEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_DUP,qmin)8191 TEST(QC8_GEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_DUP, qmin) {
8192 TEST_REQUIRES_ARM_NEON_V8;
8193 GemmMicrokernelTester()
8194 .mr(1)
8195 .nr(8)
8196 .kr(4)
8197 .sr(1)
8198 .m(1)
8199 .n(8)
8200 .k(16)
8201 .qmin(128)
8202 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c4__neonv8_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8203 }
8204
TEST(QC8_GEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_DUP,qmax)8205 TEST(QC8_GEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_DUP, qmax) {
8206 TEST_REQUIRES_ARM_NEON_V8;
8207 GemmMicrokernelTester()
8208 .mr(1)
8209 .nr(8)
8210 .kr(4)
8211 .sr(1)
8212 .m(1)
8213 .n(8)
8214 .k(16)
8215 .qmax(128)
8216 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c4__neonv8_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8217 }
8218
TEST(QC8_GEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_DUP,strided_cm)8219 TEST(QC8_GEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_DUP, strided_cm) {
8220 TEST_REQUIRES_ARM_NEON_V8;
8221 GemmMicrokernelTester()
8222 .mr(1)
8223 .nr(8)
8224 .kr(4)
8225 .sr(1)
8226 .m(1)
8227 .n(8)
8228 .k(16)
8229 .cm_stride(11)
8230 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c4__neonv8_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8231 }
8232 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
8233
8234
8235 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QC8_GEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_LD1R,k_eq_16)8236 TEST(QC8_GEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_LD1R, k_eq_16) {
8237 TEST_REQUIRES_ARM_NEON_V8;
8238 GemmMicrokernelTester()
8239 .mr(1)
8240 .nr(8)
8241 .kr(4)
8242 .sr(1)
8243 .m(1)
8244 .n(8)
8245 .k(16)
8246 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c4__neonv8_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8247 }
8248
TEST(QC8_GEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_LD1R,strided_cn)8249 TEST(QC8_GEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_LD1R, strided_cn) {
8250 TEST_REQUIRES_ARM_NEON_V8;
8251 GemmMicrokernelTester()
8252 .mr(1)
8253 .nr(8)
8254 .kr(4)
8255 .sr(1)
8256 .m(1)
8257 .n(8)
8258 .k(16)
8259 .cn_stride(11)
8260 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c4__neonv8_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8261 }
8262
TEST(QC8_GEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_LD1R,k_eq_16_strided_a)8263 TEST(QC8_GEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_LD1R, k_eq_16_strided_a) {
8264 TEST_REQUIRES_ARM_NEON_V8;
8265 GemmMicrokernelTester()
8266 .mr(1)
8267 .nr(8)
8268 .kr(4)
8269 .sr(1)
8270 .m(1)
8271 .n(8)
8272 .k(16)
8273 .a_stride(19)
8274 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c4__neonv8_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8275 }
8276
TEST(QC8_GEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_LD1R,k_eq_16_subtile)8277 TEST(QC8_GEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_LD1R, k_eq_16_subtile) {
8278 TEST_REQUIRES_ARM_NEON_V8;
8279 for (uint32_t n = 1; n <= 8; n++) {
8280 for (uint32_t m = 1; m <= 1; m++) {
8281 GemmMicrokernelTester()
8282 .mr(1)
8283 .nr(8)
8284 .kr(4)
8285 .sr(1)
8286 .m(m)
8287 .n(n)
8288 .k(16)
8289 .iterations(1)
8290 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c4__neonv8_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8291 }
8292 }
8293 }
8294
TEST(QC8_GEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_LD1R,k_eq_16_subtile_m)8295 TEST(QC8_GEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_LD1R, k_eq_16_subtile_m) {
8296 TEST_REQUIRES_ARM_NEON_V8;
8297 for (uint32_t m = 1; m <= 1; m++) {
8298 GemmMicrokernelTester()
8299 .mr(1)
8300 .nr(8)
8301 .kr(4)
8302 .sr(1)
8303 .m(m)
8304 .n(8)
8305 .k(16)
8306 .iterations(1)
8307 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c4__neonv8_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8308 }
8309 }
8310
TEST(QC8_GEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_LD1R,k_eq_16_subtile_n)8311 TEST(QC8_GEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_LD1R, k_eq_16_subtile_n) {
8312 TEST_REQUIRES_ARM_NEON_V8;
8313 for (uint32_t n = 1; n <= 8; n++) {
8314 GemmMicrokernelTester()
8315 .mr(1)
8316 .nr(8)
8317 .kr(4)
8318 .sr(1)
8319 .m(1)
8320 .n(n)
8321 .k(16)
8322 .iterations(1)
8323 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c4__neonv8_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8324 }
8325 }
8326
TEST(QC8_GEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_LD1R,k_lt_16)8327 TEST(QC8_GEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_LD1R, k_lt_16) {
8328 TEST_REQUIRES_ARM_NEON_V8;
8329 for (size_t k = 1; k < 16; k++) {
8330 GemmMicrokernelTester()
8331 .mr(1)
8332 .nr(8)
8333 .kr(4)
8334 .sr(1)
8335 .m(1)
8336 .n(8)
8337 .k(k)
8338 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c4__neonv8_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8339 }
8340 }
8341
TEST(QC8_GEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_LD1R,k_lt_16_strided_a)8342 TEST(QC8_GEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_LD1R, k_lt_16_strided_a) {
8343 TEST_REQUIRES_ARM_NEON_V8;
8344 for (size_t k = 1; k < 16; k++) {
8345 GemmMicrokernelTester()
8346 .mr(1)
8347 .nr(8)
8348 .kr(4)
8349 .sr(1)
8350 .m(1)
8351 .n(8)
8352 .k(k)
8353 .a_stride(19)
8354 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c4__neonv8_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8355 }
8356 }
8357
TEST(QC8_GEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_LD1R,k_lt_16_subtile)8358 TEST(QC8_GEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_LD1R, k_lt_16_subtile) {
8359 TEST_REQUIRES_ARM_NEON_V8;
8360 for (size_t k = 1; k < 16; k++) {
8361 for (uint32_t n = 1; n <= 8; n++) {
8362 for (uint32_t m = 1; m <= 1; m++) {
8363 GemmMicrokernelTester()
8364 .mr(1)
8365 .nr(8)
8366 .kr(4)
8367 .sr(1)
8368 .m(m)
8369 .n(n)
8370 .k(k)
8371 .iterations(1)
8372 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c4__neonv8_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8373 }
8374 }
8375 }
8376 }
8377
TEST(QC8_GEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_LD1R,k_gt_16)8378 TEST(QC8_GEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_LD1R, k_gt_16) {
8379 TEST_REQUIRES_ARM_NEON_V8;
8380 for (size_t k = 17; k < 32; k++) {
8381 GemmMicrokernelTester()
8382 .mr(1)
8383 .nr(8)
8384 .kr(4)
8385 .sr(1)
8386 .m(1)
8387 .n(8)
8388 .k(k)
8389 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c4__neonv8_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8390 }
8391 }
8392
TEST(QC8_GEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_LD1R,k_gt_16_strided_a)8393 TEST(QC8_GEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_LD1R, k_gt_16_strided_a) {
8394 TEST_REQUIRES_ARM_NEON_V8;
8395 for (size_t k = 17; k < 32; k++) {
8396 GemmMicrokernelTester()
8397 .mr(1)
8398 .nr(8)
8399 .kr(4)
8400 .sr(1)
8401 .m(1)
8402 .n(8)
8403 .k(k)
8404 .a_stride(37)
8405 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c4__neonv8_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8406 }
8407 }
8408
TEST(QC8_GEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_LD1R,k_gt_16_subtile)8409 TEST(QC8_GEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_LD1R, k_gt_16_subtile) {
8410 TEST_REQUIRES_ARM_NEON_V8;
8411 for (size_t k = 17; k < 32; k++) {
8412 for (uint32_t n = 1; n <= 8; n++) {
8413 for (uint32_t m = 1; m <= 1; m++) {
8414 GemmMicrokernelTester()
8415 .mr(1)
8416 .nr(8)
8417 .kr(4)
8418 .sr(1)
8419 .m(m)
8420 .n(n)
8421 .k(k)
8422 .iterations(1)
8423 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c4__neonv8_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8424 }
8425 }
8426 }
8427 }
8428
TEST(QC8_GEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_LD1R,k_div_16)8429 TEST(QC8_GEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_LD1R, k_div_16) {
8430 TEST_REQUIRES_ARM_NEON_V8;
8431 for (size_t k = 32; k <= 160; k += 16) {
8432 GemmMicrokernelTester()
8433 .mr(1)
8434 .nr(8)
8435 .kr(4)
8436 .sr(1)
8437 .m(1)
8438 .n(8)
8439 .k(k)
8440 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c4__neonv8_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8441 }
8442 }
8443
TEST(QC8_GEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_LD1R,k_div_16_strided_a)8444 TEST(QC8_GEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_LD1R, k_div_16_strided_a) {
8445 TEST_REQUIRES_ARM_NEON_V8;
8446 for (size_t k = 32; k <= 160; k += 16) {
8447 GemmMicrokernelTester()
8448 .mr(1)
8449 .nr(8)
8450 .kr(4)
8451 .sr(1)
8452 .m(1)
8453 .n(8)
8454 .k(k)
8455 .a_stride(163)
8456 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c4__neonv8_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8457 }
8458 }
8459
TEST(QC8_GEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_LD1R,k_div_16_subtile)8460 TEST(QC8_GEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_LD1R, k_div_16_subtile) {
8461 TEST_REQUIRES_ARM_NEON_V8;
8462 for (size_t k = 32; k <= 160; k += 16) {
8463 for (uint32_t n = 1; n <= 8; n++) {
8464 for (uint32_t m = 1; m <= 1; m++) {
8465 GemmMicrokernelTester()
8466 .mr(1)
8467 .nr(8)
8468 .kr(4)
8469 .sr(1)
8470 .m(m)
8471 .n(n)
8472 .k(k)
8473 .iterations(1)
8474 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c4__neonv8_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8475 }
8476 }
8477 }
8478 }
8479
TEST(QC8_GEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_LD1R,n_gt_8)8480 TEST(QC8_GEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_LD1R, n_gt_8) {
8481 TEST_REQUIRES_ARM_NEON_V8;
8482 for (uint32_t n = 9; n < 16; n++) {
8483 for (size_t k = 1; k <= 80; k += 17) {
8484 GemmMicrokernelTester()
8485 .mr(1)
8486 .nr(8)
8487 .kr(4)
8488 .sr(1)
8489 .m(1)
8490 .n(n)
8491 .k(k)
8492 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c4__neonv8_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8493 }
8494 }
8495 }
8496
TEST(QC8_GEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_LD1R,n_gt_8_strided_cn)8497 TEST(QC8_GEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_LD1R, n_gt_8_strided_cn) {
8498 TEST_REQUIRES_ARM_NEON_V8;
8499 for (uint32_t n = 9; n < 16; n++) {
8500 for (size_t k = 1; k <= 80; k += 17) {
8501 GemmMicrokernelTester()
8502 .mr(1)
8503 .nr(8)
8504 .kr(4)
8505 .sr(1)
8506 .m(1)
8507 .n(n)
8508 .k(k)
8509 .cn_stride(11)
8510 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c4__neonv8_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8511 }
8512 }
8513 }
8514
TEST(QC8_GEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_LD1R,n_gt_8_strided_a)8515 TEST(QC8_GEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_LD1R, n_gt_8_strided_a) {
8516 TEST_REQUIRES_ARM_NEON_V8;
8517 for (uint32_t n = 9; n < 16; n++) {
8518 for (size_t k = 1; k <= 80; k += 17) {
8519 GemmMicrokernelTester()
8520 .mr(1)
8521 .nr(8)
8522 .kr(4)
8523 .sr(1)
8524 .m(1)
8525 .n(n)
8526 .k(k)
8527 .a_stride(83)
8528 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c4__neonv8_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8529 }
8530 }
8531 }
8532
TEST(QC8_GEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_LD1R,n_gt_8_subtile)8533 TEST(QC8_GEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_LD1R, n_gt_8_subtile) {
8534 TEST_REQUIRES_ARM_NEON_V8;
8535 for (uint32_t n = 9; n < 16; n++) {
8536 for (size_t k = 1; k <= 80; k += 17) {
8537 for (uint32_t m = 1; m <= 1; m++) {
8538 GemmMicrokernelTester()
8539 .mr(1)
8540 .nr(8)
8541 .kr(4)
8542 .sr(1)
8543 .m(m)
8544 .n(n)
8545 .k(k)
8546 .iterations(1)
8547 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c4__neonv8_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8548 }
8549 }
8550 }
8551 }
8552
TEST(QC8_GEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_LD1R,n_div_8)8553 TEST(QC8_GEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_LD1R, n_div_8) {
8554 TEST_REQUIRES_ARM_NEON_V8;
8555 for (uint32_t n = 16; n <= 24; n += 8) {
8556 for (size_t k = 1; k <= 80; k += 17) {
8557 GemmMicrokernelTester()
8558 .mr(1)
8559 .nr(8)
8560 .kr(4)
8561 .sr(1)
8562 .m(1)
8563 .n(n)
8564 .k(k)
8565 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c4__neonv8_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8566 }
8567 }
8568 }
8569
TEST(QC8_GEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_LD1R,n_div_8_strided_cn)8570 TEST(QC8_GEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_LD1R, n_div_8_strided_cn) {
8571 TEST_REQUIRES_ARM_NEON_V8;
8572 for (uint32_t n = 16; n <= 24; n += 8) {
8573 for (size_t k = 1; k <= 80; k += 17) {
8574 GemmMicrokernelTester()
8575 .mr(1)
8576 .nr(8)
8577 .kr(4)
8578 .sr(1)
8579 .m(1)
8580 .n(n)
8581 .k(k)
8582 .cn_stride(11)
8583 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c4__neonv8_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8584 }
8585 }
8586 }
8587
TEST(QC8_GEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_LD1R,n_div_8_strided_a)8588 TEST(QC8_GEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_LD1R, n_div_8_strided_a) {
8589 TEST_REQUIRES_ARM_NEON_V8;
8590 for (uint32_t n = 16; n <= 24; n += 8) {
8591 for (size_t k = 1; k <= 80; k += 17) {
8592 GemmMicrokernelTester()
8593 .mr(1)
8594 .nr(8)
8595 .kr(4)
8596 .sr(1)
8597 .m(1)
8598 .n(n)
8599 .k(k)
8600 .a_stride(83)
8601 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c4__neonv8_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8602 }
8603 }
8604 }
8605
TEST(QC8_GEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_LD1R,n_div_8_subtile)8606 TEST(QC8_GEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_LD1R, n_div_8_subtile) {
8607 TEST_REQUIRES_ARM_NEON_V8;
8608 for (uint32_t n = 16; n <= 24; n += 8) {
8609 for (size_t k = 1; k <= 80; k += 17) {
8610 for (uint32_t m = 1; m <= 1; m++) {
8611 GemmMicrokernelTester()
8612 .mr(1)
8613 .nr(8)
8614 .kr(4)
8615 .sr(1)
8616 .m(m)
8617 .n(n)
8618 .k(k)
8619 .iterations(1)
8620 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c4__neonv8_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8621 }
8622 }
8623 }
8624 }
8625
TEST(QC8_GEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_LD1R,strided_cm_subtile)8626 TEST(QC8_GEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_LD1R, strided_cm_subtile) {
8627 TEST_REQUIRES_ARM_NEON_V8;
8628 for (size_t k = 1; k <= 80; k += 17) {
8629 for (uint32_t n = 1; n <= 8; n++) {
8630 for (uint32_t m = 1; m <= 1; m++) {
8631 GemmMicrokernelTester()
8632 .mr(1)
8633 .nr(8)
8634 .kr(4)
8635 .sr(1)
8636 .m(m)
8637 .n(n)
8638 .k(k)
8639 .cm_stride(11)
8640 .iterations(1)
8641 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c4__neonv8_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8642 }
8643 }
8644 }
8645 }
8646
TEST(QC8_GEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_LD1R,qmin)8647 TEST(QC8_GEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_LD1R, qmin) {
8648 TEST_REQUIRES_ARM_NEON_V8;
8649 GemmMicrokernelTester()
8650 .mr(1)
8651 .nr(8)
8652 .kr(4)
8653 .sr(1)
8654 .m(1)
8655 .n(8)
8656 .k(16)
8657 .qmin(128)
8658 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c4__neonv8_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8659 }
8660
TEST(QC8_GEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_LD1R,qmax)8661 TEST(QC8_GEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_LD1R, qmax) {
8662 TEST_REQUIRES_ARM_NEON_V8;
8663 GemmMicrokernelTester()
8664 .mr(1)
8665 .nr(8)
8666 .kr(4)
8667 .sr(1)
8668 .m(1)
8669 .n(8)
8670 .k(16)
8671 .qmax(128)
8672 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c4__neonv8_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8673 }
8674
TEST(QC8_GEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_LD1R,strided_cm)8675 TEST(QC8_GEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_LD1R, strided_cm) {
8676 TEST_REQUIRES_ARM_NEON_V8;
8677 GemmMicrokernelTester()
8678 .mr(1)
8679 .nr(8)
8680 .kr(4)
8681 .sr(1)
8682 .m(1)
8683 .n(8)
8684 .k(16)
8685 .cm_stride(11)
8686 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c4__neonv8_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8687 }
8688 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
8689
8690
8691 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QC8_GEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_LD2R,k_eq_16)8692 TEST(QC8_GEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_LD2R, k_eq_16) {
8693 TEST_REQUIRES_ARM_NEON_V8;
8694 GemmMicrokernelTester()
8695 .mr(1)
8696 .nr(8)
8697 .kr(4)
8698 .sr(1)
8699 .m(1)
8700 .n(8)
8701 .k(16)
8702 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c4__neonv8_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8703 }
8704
TEST(QC8_GEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_LD2R,strided_cn)8705 TEST(QC8_GEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_LD2R, strided_cn) {
8706 TEST_REQUIRES_ARM_NEON_V8;
8707 GemmMicrokernelTester()
8708 .mr(1)
8709 .nr(8)
8710 .kr(4)
8711 .sr(1)
8712 .m(1)
8713 .n(8)
8714 .k(16)
8715 .cn_stride(11)
8716 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c4__neonv8_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8717 }
8718
TEST(QC8_GEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_LD2R,k_eq_16_strided_a)8719 TEST(QC8_GEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_LD2R, k_eq_16_strided_a) {
8720 TEST_REQUIRES_ARM_NEON_V8;
8721 GemmMicrokernelTester()
8722 .mr(1)
8723 .nr(8)
8724 .kr(4)
8725 .sr(1)
8726 .m(1)
8727 .n(8)
8728 .k(16)
8729 .a_stride(19)
8730 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c4__neonv8_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8731 }
8732
TEST(QC8_GEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_LD2R,k_eq_16_subtile)8733 TEST(QC8_GEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_LD2R, k_eq_16_subtile) {
8734 TEST_REQUIRES_ARM_NEON_V8;
8735 for (uint32_t n = 1; n <= 8; n++) {
8736 for (uint32_t m = 1; m <= 1; m++) {
8737 GemmMicrokernelTester()
8738 .mr(1)
8739 .nr(8)
8740 .kr(4)
8741 .sr(1)
8742 .m(m)
8743 .n(n)
8744 .k(16)
8745 .iterations(1)
8746 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c4__neonv8_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8747 }
8748 }
8749 }
8750
TEST(QC8_GEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_LD2R,k_eq_16_subtile_m)8751 TEST(QC8_GEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_LD2R, k_eq_16_subtile_m) {
8752 TEST_REQUIRES_ARM_NEON_V8;
8753 for (uint32_t m = 1; m <= 1; m++) {
8754 GemmMicrokernelTester()
8755 .mr(1)
8756 .nr(8)
8757 .kr(4)
8758 .sr(1)
8759 .m(m)
8760 .n(8)
8761 .k(16)
8762 .iterations(1)
8763 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c4__neonv8_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8764 }
8765 }
8766
TEST(QC8_GEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_LD2R,k_eq_16_subtile_n)8767 TEST(QC8_GEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_LD2R, k_eq_16_subtile_n) {
8768 TEST_REQUIRES_ARM_NEON_V8;
8769 for (uint32_t n = 1; n <= 8; n++) {
8770 GemmMicrokernelTester()
8771 .mr(1)
8772 .nr(8)
8773 .kr(4)
8774 .sr(1)
8775 .m(1)
8776 .n(n)
8777 .k(16)
8778 .iterations(1)
8779 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c4__neonv8_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8780 }
8781 }
8782
TEST(QC8_GEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_LD2R,k_lt_16)8783 TEST(QC8_GEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_LD2R, k_lt_16) {
8784 TEST_REQUIRES_ARM_NEON_V8;
8785 for (size_t k = 1; k < 16; k++) {
8786 GemmMicrokernelTester()
8787 .mr(1)
8788 .nr(8)
8789 .kr(4)
8790 .sr(1)
8791 .m(1)
8792 .n(8)
8793 .k(k)
8794 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c4__neonv8_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8795 }
8796 }
8797
TEST(QC8_GEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_LD2R,k_lt_16_strided_a)8798 TEST(QC8_GEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_LD2R, k_lt_16_strided_a) {
8799 TEST_REQUIRES_ARM_NEON_V8;
8800 for (size_t k = 1; k < 16; k++) {
8801 GemmMicrokernelTester()
8802 .mr(1)
8803 .nr(8)
8804 .kr(4)
8805 .sr(1)
8806 .m(1)
8807 .n(8)
8808 .k(k)
8809 .a_stride(19)
8810 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c4__neonv8_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8811 }
8812 }
8813
TEST(QC8_GEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_LD2R,k_lt_16_subtile)8814 TEST(QC8_GEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_LD2R, k_lt_16_subtile) {
8815 TEST_REQUIRES_ARM_NEON_V8;
8816 for (size_t k = 1; k < 16; k++) {
8817 for (uint32_t n = 1; n <= 8; n++) {
8818 for (uint32_t m = 1; m <= 1; m++) {
8819 GemmMicrokernelTester()
8820 .mr(1)
8821 .nr(8)
8822 .kr(4)
8823 .sr(1)
8824 .m(m)
8825 .n(n)
8826 .k(k)
8827 .iterations(1)
8828 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c4__neonv8_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8829 }
8830 }
8831 }
8832 }
8833
TEST(QC8_GEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_LD2R,k_gt_16)8834 TEST(QC8_GEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_LD2R, k_gt_16) {
8835 TEST_REQUIRES_ARM_NEON_V8;
8836 for (size_t k = 17; k < 32; k++) {
8837 GemmMicrokernelTester()
8838 .mr(1)
8839 .nr(8)
8840 .kr(4)
8841 .sr(1)
8842 .m(1)
8843 .n(8)
8844 .k(k)
8845 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c4__neonv8_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8846 }
8847 }
8848
TEST(QC8_GEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_LD2R,k_gt_16_strided_a)8849 TEST(QC8_GEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_LD2R, k_gt_16_strided_a) {
8850 TEST_REQUIRES_ARM_NEON_V8;
8851 for (size_t k = 17; k < 32; k++) {
8852 GemmMicrokernelTester()
8853 .mr(1)
8854 .nr(8)
8855 .kr(4)
8856 .sr(1)
8857 .m(1)
8858 .n(8)
8859 .k(k)
8860 .a_stride(37)
8861 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c4__neonv8_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8862 }
8863 }
8864
TEST(QC8_GEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_LD2R,k_gt_16_subtile)8865 TEST(QC8_GEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_LD2R, k_gt_16_subtile) {
8866 TEST_REQUIRES_ARM_NEON_V8;
8867 for (size_t k = 17; k < 32; k++) {
8868 for (uint32_t n = 1; n <= 8; n++) {
8869 for (uint32_t m = 1; m <= 1; m++) {
8870 GemmMicrokernelTester()
8871 .mr(1)
8872 .nr(8)
8873 .kr(4)
8874 .sr(1)
8875 .m(m)
8876 .n(n)
8877 .k(k)
8878 .iterations(1)
8879 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c4__neonv8_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8880 }
8881 }
8882 }
8883 }
8884
TEST(QC8_GEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_LD2R,k_div_16)8885 TEST(QC8_GEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_LD2R, k_div_16) {
8886 TEST_REQUIRES_ARM_NEON_V8;
8887 for (size_t k = 32; k <= 160; k += 16) {
8888 GemmMicrokernelTester()
8889 .mr(1)
8890 .nr(8)
8891 .kr(4)
8892 .sr(1)
8893 .m(1)
8894 .n(8)
8895 .k(k)
8896 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c4__neonv8_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8897 }
8898 }
8899
TEST(QC8_GEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_LD2R,k_div_16_strided_a)8900 TEST(QC8_GEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_LD2R, k_div_16_strided_a) {
8901 TEST_REQUIRES_ARM_NEON_V8;
8902 for (size_t k = 32; k <= 160; k += 16) {
8903 GemmMicrokernelTester()
8904 .mr(1)
8905 .nr(8)
8906 .kr(4)
8907 .sr(1)
8908 .m(1)
8909 .n(8)
8910 .k(k)
8911 .a_stride(163)
8912 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c4__neonv8_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8913 }
8914 }
8915
TEST(QC8_GEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_LD2R,k_div_16_subtile)8916 TEST(QC8_GEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_LD2R, k_div_16_subtile) {
8917 TEST_REQUIRES_ARM_NEON_V8;
8918 for (size_t k = 32; k <= 160; k += 16) {
8919 for (uint32_t n = 1; n <= 8; n++) {
8920 for (uint32_t m = 1; m <= 1; m++) {
8921 GemmMicrokernelTester()
8922 .mr(1)
8923 .nr(8)
8924 .kr(4)
8925 .sr(1)
8926 .m(m)
8927 .n(n)
8928 .k(k)
8929 .iterations(1)
8930 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c4__neonv8_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8931 }
8932 }
8933 }
8934 }
8935
TEST(QC8_GEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_LD2R,n_gt_8)8936 TEST(QC8_GEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_LD2R, n_gt_8) {
8937 TEST_REQUIRES_ARM_NEON_V8;
8938 for (uint32_t n = 9; n < 16; n++) {
8939 for (size_t k = 1; k <= 80; k += 17) {
8940 GemmMicrokernelTester()
8941 .mr(1)
8942 .nr(8)
8943 .kr(4)
8944 .sr(1)
8945 .m(1)
8946 .n(n)
8947 .k(k)
8948 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c4__neonv8_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8949 }
8950 }
8951 }
8952
TEST(QC8_GEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_LD2R,n_gt_8_strided_cn)8953 TEST(QC8_GEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_LD2R, n_gt_8_strided_cn) {
8954 TEST_REQUIRES_ARM_NEON_V8;
8955 for (uint32_t n = 9; n < 16; n++) {
8956 for (size_t k = 1; k <= 80; k += 17) {
8957 GemmMicrokernelTester()
8958 .mr(1)
8959 .nr(8)
8960 .kr(4)
8961 .sr(1)
8962 .m(1)
8963 .n(n)
8964 .k(k)
8965 .cn_stride(11)
8966 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c4__neonv8_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8967 }
8968 }
8969 }
8970
TEST(QC8_GEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_LD2R,n_gt_8_strided_a)8971 TEST(QC8_GEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_LD2R, n_gt_8_strided_a) {
8972 TEST_REQUIRES_ARM_NEON_V8;
8973 for (uint32_t n = 9; n < 16; n++) {
8974 for (size_t k = 1; k <= 80; k += 17) {
8975 GemmMicrokernelTester()
8976 .mr(1)
8977 .nr(8)
8978 .kr(4)
8979 .sr(1)
8980 .m(1)
8981 .n(n)
8982 .k(k)
8983 .a_stride(83)
8984 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c4__neonv8_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8985 }
8986 }
8987 }
8988
TEST(QC8_GEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_LD2R,n_gt_8_subtile)8989 TEST(QC8_GEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_LD2R, n_gt_8_subtile) {
8990 TEST_REQUIRES_ARM_NEON_V8;
8991 for (uint32_t n = 9; n < 16; n++) {
8992 for (size_t k = 1; k <= 80; k += 17) {
8993 for (uint32_t m = 1; m <= 1; m++) {
8994 GemmMicrokernelTester()
8995 .mr(1)
8996 .nr(8)
8997 .kr(4)
8998 .sr(1)
8999 .m(m)
9000 .n(n)
9001 .k(k)
9002 .iterations(1)
9003 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c4__neonv8_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
9004 }
9005 }
9006 }
9007 }
9008
TEST(QC8_GEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_LD2R,n_div_8)9009 TEST(QC8_GEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_LD2R, n_div_8) {
9010 TEST_REQUIRES_ARM_NEON_V8;
9011 for (uint32_t n = 16; n <= 24; n += 8) {
9012 for (size_t k = 1; k <= 80; k += 17) {
9013 GemmMicrokernelTester()
9014 .mr(1)
9015 .nr(8)
9016 .kr(4)
9017 .sr(1)
9018 .m(1)
9019 .n(n)
9020 .k(k)
9021 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c4__neonv8_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
9022 }
9023 }
9024 }
9025
TEST(QC8_GEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_LD2R,n_div_8_strided_cn)9026 TEST(QC8_GEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_LD2R, n_div_8_strided_cn) {
9027 TEST_REQUIRES_ARM_NEON_V8;
9028 for (uint32_t n = 16; n <= 24; n += 8) {
9029 for (size_t k = 1; k <= 80; k += 17) {
9030 GemmMicrokernelTester()
9031 .mr(1)
9032 .nr(8)
9033 .kr(4)
9034 .sr(1)
9035 .m(1)
9036 .n(n)
9037 .k(k)
9038 .cn_stride(11)
9039 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c4__neonv8_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
9040 }
9041 }
9042 }
9043
TEST(QC8_GEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_LD2R,n_div_8_strided_a)9044 TEST(QC8_GEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_LD2R, n_div_8_strided_a) {
9045 TEST_REQUIRES_ARM_NEON_V8;
9046 for (uint32_t n = 16; n <= 24; n += 8) {
9047 for (size_t k = 1; k <= 80; k += 17) {
9048 GemmMicrokernelTester()
9049 .mr(1)
9050 .nr(8)
9051 .kr(4)
9052 .sr(1)
9053 .m(1)
9054 .n(n)
9055 .k(k)
9056 .a_stride(83)
9057 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c4__neonv8_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
9058 }
9059 }
9060 }
9061
TEST(QC8_GEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_LD2R,n_div_8_subtile)9062 TEST(QC8_GEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_LD2R, n_div_8_subtile) {
9063 TEST_REQUIRES_ARM_NEON_V8;
9064 for (uint32_t n = 16; n <= 24; n += 8) {
9065 for (size_t k = 1; k <= 80; k += 17) {
9066 for (uint32_t m = 1; m <= 1; m++) {
9067 GemmMicrokernelTester()
9068 .mr(1)
9069 .nr(8)
9070 .kr(4)
9071 .sr(1)
9072 .m(m)
9073 .n(n)
9074 .k(k)
9075 .iterations(1)
9076 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c4__neonv8_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
9077 }
9078 }
9079 }
9080 }
9081
TEST(QC8_GEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_LD2R,strided_cm_subtile)9082 TEST(QC8_GEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_LD2R, strided_cm_subtile) {
9083 TEST_REQUIRES_ARM_NEON_V8;
9084 for (size_t k = 1; k <= 80; k += 17) {
9085 for (uint32_t n = 1; n <= 8; n++) {
9086 for (uint32_t m = 1; m <= 1; m++) {
9087 GemmMicrokernelTester()
9088 .mr(1)
9089 .nr(8)
9090 .kr(4)
9091 .sr(1)
9092 .m(m)
9093 .n(n)
9094 .k(k)
9095 .cm_stride(11)
9096 .iterations(1)
9097 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c4__neonv8_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
9098 }
9099 }
9100 }
9101 }
9102
TEST(QC8_GEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_LD2R,qmin)9103 TEST(QC8_GEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_LD2R, qmin) {
9104 TEST_REQUIRES_ARM_NEON_V8;
9105 GemmMicrokernelTester()
9106 .mr(1)
9107 .nr(8)
9108 .kr(4)
9109 .sr(1)
9110 .m(1)
9111 .n(8)
9112 .k(16)
9113 .qmin(128)
9114 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c4__neonv8_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
9115 }
9116
TEST(QC8_GEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_LD2R,qmax)9117 TEST(QC8_GEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_LD2R, qmax) {
9118 TEST_REQUIRES_ARM_NEON_V8;
9119 GemmMicrokernelTester()
9120 .mr(1)
9121 .nr(8)
9122 .kr(4)
9123 .sr(1)
9124 .m(1)
9125 .n(8)
9126 .k(16)
9127 .qmax(128)
9128 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c4__neonv8_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
9129 }
9130
TEST(QC8_GEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_LD2R,strided_cm)9131 TEST(QC8_GEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_LD2R, strided_cm) {
9132 TEST_REQUIRES_ARM_NEON_V8;
9133 GemmMicrokernelTester()
9134 .mr(1)
9135 .nr(8)
9136 .kr(4)
9137 .sr(1)
9138 .m(1)
9139 .n(8)
9140 .k(16)
9141 .cm_stride(11)
9142 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c4__neonv8_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
9143 }
9144 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
9145
9146
9147 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QC8_GEMM_MINMAX_FP32_1X8C4S2__NEON_MLAL,k_eq_16)9148 TEST(QC8_GEMM_MINMAX_FP32_1X8C4S2__NEON_MLAL, k_eq_16) {
9149 TEST_REQUIRES_ARM_NEON;
9150 GemmMicrokernelTester()
9151 .mr(1)
9152 .nr(8)
9153 .kr(4)
9154 .sr(2)
9155 .m(1)
9156 .n(8)
9157 .k(16)
9158 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c4s2__neon_mlal, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
9159 }
9160
TEST(QC8_GEMM_MINMAX_FP32_1X8C4S2__NEON_MLAL,strided_cn)9161 TEST(QC8_GEMM_MINMAX_FP32_1X8C4S2__NEON_MLAL, strided_cn) {
9162 TEST_REQUIRES_ARM_NEON;
9163 GemmMicrokernelTester()
9164 .mr(1)
9165 .nr(8)
9166 .kr(4)
9167 .sr(2)
9168 .m(1)
9169 .n(8)
9170 .k(16)
9171 .cn_stride(11)
9172 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c4s2__neon_mlal, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
9173 }
9174
TEST(QC8_GEMM_MINMAX_FP32_1X8C4S2__NEON_MLAL,k_eq_16_strided_a)9175 TEST(QC8_GEMM_MINMAX_FP32_1X8C4S2__NEON_MLAL, k_eq_16_strided_a) {
9176 TEST_REQUIRES_ARM_NEON;
9177 GemmMicrokernelTester()
9178 .mr(1)
9179 .nr(8)
9180 .kr(4)
9181 .sr(2)
9182 .m(1)
9183 .n(8)
9184 .k(16)
9185 .a_stride(19)
9186 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c4s2__neon_mlal, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
9187 }
9188
TEST(QC8_GEMM_MINMAX_FP32_1X8C4S2__NEON_MLAL,k_eq_16_subtile)9189 TEST(QC8_GEMM_MINMAX_FP32_1X8C4S2__NEON_MLAL, k_eq_16_subtile) {
9190 TEST_REQUIRES_ARM_NEON;
9191 for (uint32_t n = 1; n <= 8; n++) {
9192 for (uint32_t m = 1; m <= 1; m++) {
9193 GemmMicrokernelTester()
9194 .mr(1)
9195 .nr(8)
9196 .kr(4)
9197 .sr(2)
9198 .m(m)
9199 .n(n)
9200 .k(16)
9201 .iterations(1)
9202 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c4s2__neon_mlal, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
9203 }
9204 }
9205 }
9206
TEST(QC8_GEMM_MINMAX_FP32_1X8C4S2__NEON_MLAL,k_eq_16_subtile_m)9207 TEST(QC8_GEMM_MINMAX_FP32_1X8C4S2__NEON_MLAL, k_eq_16_subtile_m) {
9208 TEST_REQUIRES_ARM_NEON;
9209 for (uint32_t m = 1; m <= 1; m++) {
9210 GemmMicrokernelTester()
9211 .mr(1)
9212 .nr(8)
9213 .kr(4)
9214 .sr(2)
9215 .m(m)
9216 .n(8)
9217 .k(16)
9218 .iterations(1)
9219 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c4s2__neon_mlal, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
9220 }
9221 }
9222
TEST(QC8_GEMM_MINMAX_FP32_1X8C4S2__NEON_MLAL,k_eq_16_subtile_n)9223 TEST(QC8_GEMM_MINMAX_FP32_1X8C4S2__NEON_MLAL, k_eq_16_subtile_n) {
9224 TEST_REQUIRES_ARM_NEON;
9225 for (uint32_t n = 1; n <= 8; n++) {
9226 GemmMicrokernelTester()
9227 .mr(1)
9228 .nr(8)
9229 .kr(4)
9230 .sr(2)
9231 .m(1)
9232 .n(n)
9233 .k(16)
9234 .iterations(1)
9235 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c4s2__neon_mlal, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
9236 }
9237 }
9238
TEST(QC8_GEMM_MINMAX_FP32_1X8C4S2__NEON_MLAL,k_lt_16)9239 TEST(QC8_GEMM_MINMAX_FP32_1X8C4S2__NEON_MLAL, k_lt_16) {
9240 TEST_REQUIRES_ARM_NEON;
9241 for (size_t k = 1; k < 16; k++) {
9242 GemmMicrokernelTester()
9243 .mr(1)
9244 .nr(8)
9245 .kr(4)
9246 .sr(2)
9247 .m(1)
9248 .n(8)
9249 .k(k)
9250 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c4s2__neon_mlal, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
9251 }
9252 }
9253
TEST(QC8_GEMM_MINMAX_FP32_1X8C4S2__NEON_MLAL,k_lt_16_strided_a)9254 TEST(QC8_GEMM_MINMAX_FP32_1X8C4S2__NEON_MLAL, k_lt_16_strided_a) {
9255 TEST_REQUIRES_ARM_NEON;
9256 for (size_t k = 1; k < 16; k++) {
9257 GemmMicrokernelTester()
9258 .mr(1)
9259 .nr(8)
9260 .kr(4)
9261 .sr(2)
9262 .m(1)
9263 .n(8)
9264 .k(k)
9265 .a_stride(19)
9266 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c4s2__neon_mlal, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
9267 }
9268 }
9269
TEST(QC8_GEMM_MINMAX_FP32_1X8C4S2__NEON_MLAL,k_lt_16_subtile)9270 TEST(QC8_GEMM_MINMAX_FP32_1X8C4S2__NEON_MLAL, k_lt_16_subtile) {
9271 TEST_REQUIRES_ARM_NEON;
9272 for (size_t k = 1; k < 16; k++) {
9273 for (uint32_t n = 1; n <= 8; n++) {
9274 for (uint32_t m = 1; m <= 1; m++) {
9275 GemmMicrokernelTester()
9276 .mr(1)
9277 .nr(8)
9278 .kr(4)
9279 .sr(2)
9280 .m(m)
9281 .n(n)
9282 .k(k)
9283 .iterations(1)
9284 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c4s2__neon_mlal, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
9285 }
9286 }
9287 }
9288 }
9289
TEST(QC8_GEMM_MINMAX_FP32_1X8C4S2__NEON_MLAL,k_gt_16)9290 TEST(QC8_GEMM_MINMAX_FP32_1X8C4S2__NEON_MLAL, k_gt_16) {
9291 TEST_REQUIRES_ARM_NEON;
9292 for (size_t k = 17; k < 32; k++) {
9293 GemmMicrokernelTester()
9294 .mr(1)
9295 .nr(8)
9296 .kr(4)
9297 .sr(2)
9298 .m(1)
9299 .n(8)
9300 .k(k)
9301 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c4s2__neon_mlal, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
9302 }
9303 }
9304
TEST(QC8_GEMM_MINMAX_FP32_1X8C4S2__NEON_MLAL,k_gt_16_strided_a)9305 TEST(QC8_GEMM_MINMAX_FP32_1X8C4S2__NEON_MLAL, k_gt_16_strided_a) {
9306 TEST_REQUIRES_ARM_NEON;
9307 for (size_t k = 17; k < 32; k++) {
9308 GemmMicrokernelTester()
9309 .mr(1)
9310 .nr(8)
9311 .kr(4)
9312 .sr(2)
9313 .m(1)
9314 .n(8)
9315 .k(k)
9316 .a_stride(37)
9317 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c4s2__neon_mlal, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
9318 }
9319 }
9320
TEST(QC8_GEMM_MINMAX_FP32_1X8C4S2__NEON_MLAL,k_gt_16_subtile)9321 TEST(QC8_GEMM_MINMAX_FP32_1X8C4S2__NEON_MLAL, k_gt_16_subtile) {
9322 TEST_REQUIRES_ARM_NEON;
9323 for (size_t k = 17; k < 32; k++) {
9324 for (uint32_t n = 1; n <= 8; n++) {
9325 for (uint32_t m = 1; m <= 1; m++) {
9326 GemmMicrokernelTester()
9327 .mr(1)
9328 .nr(8)
9329 .kr(4)
9330 .sr(2)
9331 .m(m)
9332 .n(n)
9333 .k(k)
9334 .iterations(1)
9335 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c4s2__neon_mlal, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
9336 }
9337 }
9338 }
9339 }
9340
TEST(QC8_GEMM_MINMAX_FP32_1X8C4S2__NEON_MLAL,k_div_16)9341 TEST(QC8_GEMM_MINMAX_FP32_1X8C4S2__NEON_MLAL, k_div_16) {
9342 TEST_REQUIRES_ARM_NEON;
9343 for (size_t k = 32; k <= 160; k += 16) {
9344 GemmMicrokernelTester()
9345 .mr(1)
9346 .nr(8)
9347 .kr(4)
9348 .sr(2)
9349 .m(1)
9350 .n(8)
9351 .k(k)
9352 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c4s2__neon_mlal, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
9353 }
9354 }
9355
TEST(QC8_GEMM_MINMAX_FP32_1X8C4S2__NEON_MLAL,k_div_16_strided_a)9356 TEST(QC8_GEMM_MINMAX_FP32_1X8C4S2__NEON_MLAL, k_div_16_strided_a) {
9357 TEST_REQUIRES_ARM_NEON;
9358 for (size_t k = 32; k <= 160; k += 16) {
9359 GemmMicrokernelTester()
9360 .mr(1)
9361 .nr(8)
9362 .kr(4)
9363 .sr(2)
9364 .m(1)
9365 .n(8)
9366 .k(k)
9367 .a_stride(163)
9368 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c4s2__neon_mlal, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
9369 }
9370 }
9371
TEST(QC8_GEMM_MINMAX_FP32_1X8C4S2__NEON_MLAL,k_div_16_subtile)9372 TEST(QC8_GEMM_MINMAX_FP32_1X8C4S2__NEON_MLAL, k_div_16_subtile) {
9373 TEST_REQUIRES_ARM_NEON;
9374 for (size_t k = 32; k <= 160; k += 16) {
9375 for (uint32_t n = 1; n <= 8; n++) {
9376 for (uint32_t m = 1; m <= 1; m++) {
9377 GemmMicrokernelTester()
9378 .mr(1)
9379 .nr(8)
9380 .kr(4)
9381 .sr(2)
9382 .m(m)
9383 .n(n)
9384 .k(k)
9385 .iterations(1)
9386 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c4s2__neon_mlal, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
9387 }
9388 }
9389 }
9390 }
9391
TEST(QC8_GEMM_MINMAX_FP32_1X8C4S2__NEON_MLAL,n_gt_8)9392 TEST(QC8_GEMM_MINMAX_FP32_1X8C4S2__NEON_MLAL, n_gt_8) {
9393 TEST_REQUIRES_ARM_NEON;
9394 for (uint32_t n = 9; n < 16; n++) {
9395 for (size_t k = 1; k <= 80; k += 17) {
9396 GemmMicrokernelTester()
9397 .mr(1)
9398 .nr(8)
9399 .kr(4)
9400 .sr(2)
9401 .m(1)
9402 .n(n)
9403 .k(k)
9404 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c4s2__neon_mlal, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
9405 }
9406 }
9407 }
9408
TEST(QC8_GEMM_MINMAX_FP32_1X8C4S2__NEON_MLAL,n_gt_8_strided_cn)9409 TEST(QC8_GEMM_MINMAX_FP32_1X8C4S2__NEON_MLAL, n_gt_8_strided_cn) {
9410 TEST_REQUIRES_ARM_NEON;
9411 for (uint32_t n = 9; n < 16; n++) {
9412 for (size_t k = 1; k <= 80; k += 17) {
9413 GemmMicrokernelTester()
9414 .mr(1)
9415 .nr(8)
9416 .kr(4)
9417 .sr(2)
9418 .m(1)
9419 .n(n)
9420 .k(k)
9421 .cn_stride(11)
9422 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c4s2__neon_mlal, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
9423 }
9424 }
9425 }
9426
TEST(QC8_GEMM_MINMAX_FP32_1X8C4S2__NEON_MLAL,n_gt_8_strided_a)9427 TEST(QC8_GEMM_MINMAX_FP32_1X8C4S2__NEON_MLAL, n_gt_8_strided_a) {
9428 TEST_REQUIRES_ARM_NEON;
9429 for (uint32_t n = 9; n < 16; n++) {
9430 for (size_t k = 1; k <= 80; k += 17) {
9431 GemmMicrokernelTester()
9432 .mr(1)
9433 .nr(8)
9434 .kr(4)
9435 .sr(2)
9436 .m(1)
9437 .n(n)
9438 .k(k)
9439 .a_stride(83)
9440 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c4s2__neon_mlal, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
9441 }
9442 }
9443 }
9444
TEST(QC8_GEMM_MINMAX_FP32_1X8C4S2__NEON_MLAL,n_gt_8_subtile)9445 TEST(QC8_GEMM_MINMAX_FP32_1X8C4S2__NEON_MLAL, n_gt_8_subtile) {
9446 TEST_REQUIRES_ARM_NEON;
9447 for (uint32_t n = 9; n < 16; n++) {
9448 for (size_t k = 1; k <= 80; k += 17) {
9449 for (uint32_t m = 1; m <= 1; m++) {
9450 GemmMicrokernelTester()
9451 .mr(1)
9452 .nr(8)
9453 .kr(4)
9454 .sr(2)
9455 .m(m)
9456 .n(n)
9457 .k(k)
9458 .iterations(1)
9459 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c4s2__neon_mlal, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
9460 }
9461 }
9462 }
9463 }
9464
TEST(QC8_GEMM_MINMAX_FP32_1X8C4S2__NEON_MLAL,n_div_8)9465 TEST(QC8_GEMM_MINMAX_FP32_1X8C4S2__NEON_MLAL, n_div_8) {
9466 TEST_REQUIRES_ARM_NEON;
9467 for (uint32_t n = 16; n <= 24; n += 8) {
9468 for (size_t k = 1; k <= 80; k += 17) {
9469 GemmMicrokernelTester()
9470 .mr(1)
9471 .nr(8)
9472 .kr(4)
9473 .sr(2)
9474 .m(1)
9475 .n(n)
9476 .k(k)
9477 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c4s2__neon_mlal, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
9478 }
9479 }
9480 }
9481
TEST(QC8_GEMM_MINMAX_FP32_1X8C4S2__NEON_MLAL,n_div_8_strided_cn)9482 TEST(QC8_GEMM_MINMAX_FP32_1X8C4S2__NEON_MLAL, n_div_8_strided_cn) {
9483 TEST_REQUIRES_ARM_NEON;
9484 for (uint32_t n = 16; n <= 24; n += 8) {
9485 for (size_t k = 1; k <= 80; k += 17) {
9486 GemmMicrokernelTester()
9487 .mr(1)
9488 .nr(8)
9489 .kr(4)
9490 .sr(2)
9491 .m(1)
9492 .n(n)
9493 .k(k)
9494 .cn_stride(11)
9495 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c4s2__neon_mlal, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
9496 }
9497 }
9498 }
9499
TEST(QC8_GEMM_MINMAX_FP32_1X8C4S2__NEON_MLAL,n_div_8_strided_a)9500 TEST(QC8_GEMM_MINMAX_FP32_1X8C4S2__NEON_MLAL, n_div_8_strided_a) {
9501 TEST_REQUIRES_ARM_NEON;
9502 for (uint32_t n = 16; n <= 24; n += 8) {
9503 for (size_t k = 1; k <= 80; k += 17) {
9504 GemmMicrokernelTester()
9505 .mr(1)
9506 .nr(8)
9507 .kr(4)
9508 .sr(2)
9509 .m(1)
9510 .n(n)
9511 .k(k)
9512 .a_stride(83)
9513 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c4s2__neon_mlal, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
9514 }
9515 }
9516 }
9517
TEST(QC8_GEMM_MINMAX_FP32_1X8C4S2__NEON_MLAL,n_div_8_subtile)9518 TEST(QC8_GEMM_MINMAX_FP32_1X8C4S2__NEON_MLAL, n_div_8_subtile) {
9519 TEST_REQUIRES_ARM_NEON;
9520 for (uint32_t n = 16; n <= 24; n += 8) {
9521 for (size_t k = 1; k <= 80; k += 17) {
9522 for (uint32_t m = 1; m <= 1; m++) {
9523 GemmMicrokernelTester()
9524 .mr(1)
9525 .nr(8)
9526 .kr(4)
9527 .sr(2)
9528 .m(m)
9529 .n(n)
9530 .k(k)
9531 .iterations(1)
9532 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c4s2__neon_mlal, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
9533 }
9534 }
9535 }
9536 }
9537
TEST(QC8_GEMM_MINMAX_FP32_1X8C4S2__NEON_MLAL,strided_cm_subtile)9538 TEST(QC8_GEMM_MINMAX_FP32_1X8C4S2__NEON_MLAL, strided_cm_subtile) {
9539 TEST_REQUIRES_ARM_NEON;
9540 for (size_t k = 1; k <= 80; k += 17) {
9541 for (uint32_t n = 1; n <= 8; n++) {
9542 for (uint32_t m = 1; m <= 1; m++) {
9543 GemmMicrokernelTester()
9544 .mr(1)
9545 .nr(8)
9546 .kr(4)
9547 .sr(2)
9548 .m(m)
9549 .n(n)
9550 .k(k)
9551 .cm_stride(11)
9552 .iterations(1)
9553 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c4s2__neon_mlal, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
9554 }
9555 }
9556 }
9557 }
9558
TEST(QC8_GEMM_MINMAX_FP32_1X8C4S2__NEON_MLAL,qmin)9559 TEST(QC8_GEMM_MINMAX_FP32_1X8C4S2__NEON_MLAL, qmin) {
9560 TEST_REQUIRES_ARM_NEON;
9561 GemmMicrokernelTester()
9562 .mr(1)
9563 .nr(8)
9564 .kr(4)
9565 .sr(2)
9566 .m(1)
9567 .n(8)
9568 .k(16)
9569 .qmin(128)
9570 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c4s2__neon_mlal, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
9571 }
9572
TEST(QC8_GEMM_MINMAX_FP32_1X8C4S2__NEON_MLAL,qmax)9573 TEST(QC8_GEMM_MINMAX_FP32_1X8C4S2__NEON_MLAL, qmax) {
9574 TEST_REQUIRES_ARM_NEON;
9575 GemmMicrokernelTester()
9576 .mr(1)
9577 .nr(8)
9578 .kr(4)
9579 .sr(2)
9580 .m(1)
9581 .n(8)
9582 .k(16)
9583 .qmax(128)
9584 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c4s2__neon_mlal, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
9585 }
9586
TEST(QC8_GEMM_MINMAX_FP32_1X8C4S2__NEON_MLAL,strided_cm)9587 TEST(QC8_GEMM_MINMAX_FP32_1X8C4S2__NEON_MLAL, strided_cm) {
9588 TEST_REQUIRES_ARM_NEON;
9589 GemmMicrokernelTester()
9590 .mr(1)
9591 .nr(8)
9592 .kr(4)
9593 .sr(2)
9594 .m(1)
9595 .n(8)
9596 .k(16)
9597 .cm_stride(11)
9598 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c4s2__neon_mlal, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
9599 }
9600 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
9601
9602
9603 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QC8_GEMM_MINMAX_FP32_1X8C4S2__NEONV8_MLAL,k_eq_16)9604 TEST(QC8_GEMM_MINMAX_FP32_1X8C4S2__NEONV8_MLAL, k_eq_16) {
9605 TEST_REQUIRES_ARM_NEON_V8;
9606 GemmMicrokernelTester()
9607 .mr(1)
9608 .nr(8)
9609 .kr(4)
9610 .sr(2)
9611 .m(1)
9612 .n(8)
9613 .k(16)
9614 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c4s2__neonv8_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
9615 }
9616
TEST(QC8_GEMM_MINMAX_FP32_1X8C4S2__NEONV8_MLAL,strided_cn)9617 TEST(QC8_GEMM_MINMAX_FP32_1X8C4S2__NEONV8_MLAL, strided_cn) {
9618 TEST_REQUIRES_ARM_NEON_V8;
9619 GemmMicrokernelTester()
9620 .mr(1)
9621 .nr(8)
9622 .kr(4)
9623 .sr(2)
9624 .m(1)
9625 .n(8)
9626 .k(16)
9627 .cn_stride(11)
9628 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c4s2__neonv8_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
9629 }
9630
TEST(QC8_GEMM_MINMAX_FP32_1X8C4S2__NEONV8_MLAL,k_eq_16_strided_a)9631 TEST(QC8_GEMM_MINMAX_FP32_1X8C4S2__NEONV8_MLAL, k_eq_16_strided_a) {
9632 TEST_REQUIRES_ARM_NEON_V8;
9633 GemmMicrokernelTester()
9634 .mr(1)
9635 .nr(8)
9636 .kr(4)
9637 .sr(2)
9638 .m(1)
9639 .n(8)
9640 .k(16)
9641 .a_stride(19)
9642 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c4s2__neonv8_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
9643 }
9644
TEST(QC8_GEMM_MINMAX_FP32_1X8C4S2__NEONV8_MLAL,k_eq_16_subtile)9645 TEST(QC8_GEMM_MINMAX_FP32_1X8C4S2__NEONV8_MLAL, k_eq_16_subtile) {
9646 TEST_REQUIRES_ARM_NEON_V8;
9647 for (uint32_t n = 1; n <= 8; n++) {
9648 for (uint32_t m = 1; m <= 1; m++) {
9649 GemmMicrokernelTester()
9650 .mr(1)
9651 .nr(8)
9652 .kr(4)
9653 .sr(2)
9654 .m(m)
9655 .n(n)
9656 .k(16)
9657 .iterations(1)
9658 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c4s2__neonv8_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
9659 }
9660 }
9661 }
9662
TEST(QC8_GEMM_MINMAX_FP32_1X8C4S2__NEONV8_MLAL,k_eq_16_subtile_m)9663 TEST(QC8_GEMM_MINMAX_FP32_1X8C4S2__NEONV8_MLAL, k_eq_16_subtile_m) {
9664 TEST_REQUIRES_ARM_NEON_V8;
9665 for (uint32_t m = 1; m <= 1; m++) {
9666 GemmMicrokernelTester()
9667 .mr(1)
9668 .nr(8)
9669 .kr(4)
9670 .sr(2)
9671 .m(m)
9672 .n(8)
9673 .k(16)
9674 .iterations(1)
9675 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c4s2__neonv8_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
9676 }
9677 }
9678
TEST(QC8_GEMM_MINMAX_FP32_1X8C4S2__NEONV8_MLAL,k_eq_16_subtile_n)9679 TEST(QC8_GEMM_MINMAX_FP32_1X8C4S2__NEONV8_MLAL, k_eq_16_subtile_n) {
9680 TEST_REQUIRES_ARM_NEON_V8;
9681 for (uint32_t n = 1; n <= 8; n++) {
9682 GemmMicrokernelTester()
9683 .mr(1)
9684 .nr(8)
9685 .kr(4)
9686 .sr(2)
9687 .m(1)
9688 .n(n)
9689 .k(16)
9690 .iterations(1)
9691 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c4s2__neonv8_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
9692 }
9693 }
9694
TEST(QC8_GEMM_MINMAX_FP32_1X8C4S2__NEONV8_MLAL,k_lt_16)9695 TEST(QC8_GEMM_MINMAX_FP32_1X8C4S2__NEONV8_MLAL, k_lt_16) {
9696 TEST_REQUIRES_ARM_NEON_V8;
9697 for (size_t k = 1; k < 16; k++) {
9698 GemmMicrokernelTester()
9699 .mr(1)
9700 .nr(8)
9701 .kr(4)
9702 .sr(2)
9703 .m(1)
9704 .n(8)
9705 .k(k)
9706 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c4s2__neonv8_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
9707 }
9708 }
9709
TEST(QC8_GEMM_MINMAX_FP32_1X8C4S2__NEONV8_MLAL,k_lt_16_strided_a)9710 TEST(QC8_GEMM_MINMAX_FP32_1X8C4S2__NEONV8_MLAL, k_lt_16_strided_a) {
9711 TEST_REQUIRES_ARM_NEON_V8;
9712 for (size_t k = 1; k < 16; k++) {
9713 GemmMicrokernelTester()
9714 .mr(1)
9715 .nr(8)
9716 .kr(4)
9717 .sr(2)
9718 .m(1)
9719 .n(8)
9720 .k(k)
9721 .a_stride(19)
9722 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c4s2__neonv8_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
9723 }
9724 }
9725
TEST(QC8_GEMM_MINMAX_FP32_1X8C4S2__NEONV8_MLAL,k_lt_16_subtile)9726 TEST(QC8_GEMM_MINMAX_FP32_1X8C4S2__NEONV8_MLAL, k_lt_16_subtile) {
9727 TEST_REQUIRES_ARM_NEON_V8;
9728 for (size_t k = 1; k < 16; k++) {
9729 for (uint32_t n = 1; n <= 8; n++) {
9730 for (uint32_t m = 1; m <= 1; m++) {
9731 GemmMicrokernelTester()
9732 .mr(1)
9733 .nr(8)
9734 .kr(4)
9735 .sr(2)
9736 .m(m)
9737 .n(n)
9738 .k(k)
9739 .iterations(1)
9740 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c4s2__neonv8_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
9741 }
9742 }
9743 }
9744 }
9745
TEST(QC8_GEMM_MINMAX_FP32_1X8C4S2__NEONV8_MLAL,k_gt_16)9746 TEST(QC8_GEMM_MINMAX_FP32_1X8C4S2__NEONV8_MLAL, k_gt_16) {
9747 TEST_REQUIRES_ARM_NEON_V8;
9748 for (size_t k = 17; k < 32; k++) {
9749 GemmMicrokernelTester()
9750 .mr(1)
9751 .nr(8)
9752 .kr(4)
9753 .sr(2)
9754 .m(1)
9755 .n(8)
9756 .k(k)
9757 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c4s2__neonv8_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
9758 }
9759 }
9760
TEST(QC8_GEMM_MINMAX_FP32_1X8C4S2__NEONV8_MLAL,k_gt_16_strided_a)9761 TEST(QC8_GEMM_MINMAX_FP32_1X8C4S2__NEONV8_MLAL, k_gt_16_strided_a) {
9762 TEST_REQUIRES_ARM_NEON_V8;
9763 for (size_t k = 17; k < 32; k++) {
9764 GemmMicrokernelTester()
9765 .mr(1)
9766 .nr(8)
9767 .kr(4)
9768 .sr(2)
9769 .m(1)
9770 .n(8)
9771 .k(k)
9772 .a_stride(37)
9773 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c4s2__neonv8_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
9774 }
9775 }
9776
TEST(QC8_GEMM_MINMAX_FP32_1X8C4S2__NEONV8_MLAL,k_gt_16_subtile)9777 TEST(QC8_GEMM_MINMAX_FP32_1X8C4S2__NEONV8_MLAL, k_gt_16_subtile) {
9778 TEST_REQUIRES_ARM_NEON_V8;
9779 for (size_t k = 17; k < 32; k++) {
9780 for (uint32_t n = 1; n <= 8; n++) {
9781 for (uint32_t m = 1; m <= 1; m++) {
9782 GemmMicrokernelTester()
9783 .mr(1)
9784 .nr(8)
9785 .kr(4)
9786 .sr(2)
9787 .m(m)
9788 .n(n)
9789 .k(k)
9790 .iterations(1)
9791 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c4s2__neonv8_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
9792 }
9793 }
9794 }
9795 }
9796
TEST(QC8_GEMM_MINMAX_FP32_1X8C4S2__NEONV8_MLAL,k_div_16)9797 TEST(QC8_GEMM_MINMAX_FP32_1X8C4S2__NEONV8_MLAL, k_div_16) {
9798 TEST_REQUIRES_ARM_NEON_V8;
9799 for (size_t k = 32; k <= 160; k += 16) {
9800 GemmMicrokernelTester()
9801 .mr(1)
9802 .nr(8)
9803 .kr(4)
9804 .sr(2)
9805 .m(1)
9806 .n(8)
9807 .k(k)
9808 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c4s2__neonv8_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
9809 }
9810 }
9811
TEST(QC8_GEMM_MINMAX_FP32_1X8C4S2__NEONV8_MLAL,k_div_16_strided_a)9812 TEST(QC8_GEMM_MINMAX_FP32_1X8C4S2__NEONV8_MLAL, k_div_16_strided_a) {
9813 TEST_REQUIRES_ARM_NEON_V8;
9814 for (size_t k = 32; k <= 160; k += 16) {
9815 GemmMicrokernelTester()
9816 .mr(1)
9817 .nr(8)
9818 .kr(4)
9819 .sr(2)
9820 .m(1)
9821 .n(8)
9822 .k(k)
9823 .a_stride(163)
9824 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c4s2__neonv8_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
9825 }
9826 }
9827
TEST(QC8_GEMM_MINMAX_FP32_1X8C4S2__NEONV8_MLAL,k_div_16_subtile)9828 TEST(QC8_GEMM_MINMAX_FP32_1X8C4S2__NEONV8_MLAL, k_div_16_subtile) {
9829 TEST_REQUIRES_ARM_NEON_V8;
9830 for (size_t k = 32; k <= 160; k += 16) {
9831 for (uint32_t n = 1; n <= 8; n++) {
9832 for (uint32_t m = 1; m <= 1; m++) {
9833 GemmMicrokernelTester()
9834 .mr(1)
9835 .nr(8)
9836 .kr(4)
9837 .sr(2)
9838 .m(m)
9839 .n(n)
9840 .k(k)
9841 .iterations(1)
9842 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c4s2__neonv8_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
9843 }
9844 }
9845 }
9846 }
9847
TEST(QC8_GEMM_MINMAX_FP32_1X8C4S2__NEONV8_MLAL,n_gt_8)9848 TEST(QC8_GEMM_MINMAX_FP32_1X8C4S2__NEONV8_MLAL, n_gt_8) {
9849 TEST_REQUIRES_ARM_NEON_V8;
9850 for (uint32_t n = 9; n < 16; n++) {
9851 for (size_t k = 1; k <= 80; k += 17) {
9852 GemmMicrokernelTester()
9853 .mr(1)
9854 .nr(8)
9855 .kr(4)
9856 .sr(2)
9857 .m(1)
9858 .n(n)
9859 .k(k)
9860 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c4s2__neonv8_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
9861 }
9862 }
9863 }
9864
TEST(QC8_GEMM_MINMAX_FP32_1X8C4S2__NEONV8_MLAL,n_gt_8_strided_cn)9865 TEST(QC8_GEMM_MINMAX_FP32_1X8C4S2__NEONV8_MLAL, n_gt_8_strided_cn) {
9866 TEST_REQUIRES_ARM_NEON_V8;
9867 for (uint32_t n = 9; n < 16; n++) {
9868 for (size_t k = 1; k <= 80; k += 17) {
9869 GemmMicrokernelTester()
9870 .mr(1)
9871 .nr(8)
9872 .kr(4)
9873 .sr(2)
9874 .m(1)
9875 .n(n)
9876 .k(k)
9877 .cn_stride(11)
9878 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c4s2__neonv8_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
9879 }
9880 }
9881 }
9882
TEST(QC8_GEMM_MINMAX_FP32_1X8C4S2__NEONV8_MLAL,n_gt_8_strided_a)9883 TEST(QC8_GEMM_MINMAX_FP32_1X8C4S2__NEONV8_MLAL, n_gt_8_strided_a) {
9884 TEST_REQUIRES_ARM_NEON_V8;
9885 for (uint32_t n = 9; n < 16; n++) {
9886 for (size_t k = 1; k <= 80; k += 17) {
9887 GemmMicrokernelTester()
9888 .mr(1)
9889 .nr(8)
9890 .kr(4)
9891 .sr(2)
9892 .m(1)
9893 .n(n)
9894 .k(k)
9895 .a_stride(83)
9896 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c4s2__neonv8_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
9897 }
9898 }
9899 }
9900
TEST(QC8_GEMM_MINMAX_FP32_1X8C4S2__NEONV8_MLAL,n_gt_8_subtile)9901 TEST(QC8_GEMM_MINMAX_FP32_1X8C4S2__NEONV8_MLAL, n_gt_8_subtile) {
9902 TEST_REQUIRES_ARM_NEON_V8;
9903 for (uint32_t n = 9; n < 16; n++) {
9904 for (size_t k = 1; k <= 80; k += 17) {
9905 for (uint32_t m = 1; m <= 1; m++) {
9906 GemmMicrokernelTester()
9907 .mr(1)
9908 .nr(8)
9909 .kr(4)
9910 .sr(2)
9911 .m(m)
9912 .n(n)
9913 .k(k)
9914 .iterations(1)
9915 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c4s2__neonv8_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
9916 }
9917 }
9918 }
9919 }
9920
TEST(QC8_GEMM_MINMAX_FP32_1X8C4S2__NEONV8_MLAL,n_div_8)9921 TEST(QC8_GEMM_MINMAX_FP32_1X8C4S2__NEONV8_MLAL, n_div_8) {
9922 TEST_REQUIRES_ARM_NEON_V8;
9923 for (uint32_t n = 16; n <= 24; n += 8) {
9924 for (size_t k = 1; k <= 80; k += 17) {
9925 GemmMicrokernelTester()
9926 .mr(1)
9927 .nr(8)
9928 .kr(4)
9929 .sr(2)
9930 .m(1)
9931 .n(n)
9932 .k(k)
9933 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c4s2__neonv8_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
9934 }
9935 }
9936 }
9937
TEST(QC8_GEMM_MINMAX_FP32_1X8C4S2__NEONV8_MLAL,n_div_8_strided_cn)9938 TEST(QC8_GEMM_MINMAX_FP32_1X8C4S2__NEONV8_MLAL, n_div_8_strided_cn) {
9939 TEST_REQUIRES_ARM_NEON_V8;
9940 for (uint32_t n = 16; n <= 24; n += 8) {
9941 for (size_t k = 1; k <= 80; k += 17) {
9942 GemmMicrokernelTester()
9943 .mr(1)
9944 .nr(8)
9945 .kr(4)
9946 .sr(2)
9947 .m(1)
9948 .n(n)
9949 .k(k)
9950 .cn_stride(11)
9951 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c4s2__neonv8_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
9952 }
9953 }
9954 }
9955
TEST(QC8_GEMM_MINMAX_FP32_1X8C4S2__NEONV8_MLAL,n_div_8_strided_a)9956 TEST(QC8_GEMM_MINMAX_FP32_1X8C4S2__NEONV8_MLAL, n_div_8_strided_a) {
9957 TEST_REQUIRES_ARM_NEON_V8;
9958 for (uint32_t n = 16; n <= 24; n += 8) {
9959 for (size_t k = 1; k <= 80; k += 17) {
9960 GemmMicrokernelTester()
9961 .mr(1)
9962 .nr(8)
9963 .kr(4)
9964 .sr(2)
9965 .m(1)
9966 .n(n)
9967 .k(k)
9968 .a_stride(83)
9969 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c4s2__neonv8_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
9970 }
9971 }
9972 }
9973
TEST(QC8_GEMM_MINMAX_FP32_1X8C4S2__NEONV8_MLAL,n_div_8_subtile)9974 TEST(QC8_GEMM_MINMAX_FP32_1X8C4S2__NEONV8_MLAL, n_div_8_subtile) {
9975 TEST_REQUIRES_ARM_NEON_V8;
9976 for (uint32_t n = 16; n <= 24; n += 8) {
9977 for (size_t k = 1; k <= 80; k += 17) {
9978 for (uint32_t m = 1; m <= 1; m++) {
9979 GemmMicrokernelTester()
9980 .mr(1)
9981 .nr(8)
9982 .kr(4)
9983 .sr(2)
9984 .m(m)
9985 .n(n)
9986 .k(k)
9987 .iterations(1)
9988 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c4s2__neonv8_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
9989 }
9990 }
9991 }
9992 }
9993
TEST(QC8_GEMM_MINMAX_FP32_1X8C4S2__NEONV8_MLAL,strided_cm_subtile)9994 TEST(QC8_GEMM_MINMAX_FP32_1X8C4S2__NEONV8_MLAL, strided_cm_subtile) {
9995 TEST_REQUIRES_ARM_NEON_V8;
9996 for (size_t k = 1; k <= 80; k += 17) {
9997 for (uint32_t n = 1; n <= 8; n++) {
9998 for (uint32_t m = 1; m <= 1; m++) {
9999 GemmMicrokernelTester()
10000 .mr(1)
10001 .nr(8)
10002 .kr(4)
10003 .sr(2)
10004 .m(m)
10005 .n(n)
10006 .k(k)
10007 .cm_stride(11)
10008 .iterations(1)
10009 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c4s2__neonv8_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
10010 }
10011 }
10012 }
10013 }
10014
TEST(QC8_GEMM_MINMAX_FP32_1X8C4S2__NEONV8_MLAL,qmin)10015 TEST(QC8_GEMM_MINMAX_FP32_1X8C4S2__NEONV8_MLAL, qmin) {
10016 TEST_REQUIRES_ARM_NEON_V8;
10017 GemmMicrokernelTester()
10018 .mr(1)
10019 .nr(8)
10020 .kr(4)
10021 .sr(2)
10022 .m(1)
10023 .n(8)
10024 .k(16)
10025 .qmin(128)
10026 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c4s2__neonv8_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
10027 }
10028
TEST(QC8_GEMM_MINMAX_FP32_1X8C4S2__NEONV8_MLAL,qmax)10029 TEST(QC8_GEMM_MINMAX_FP32_1X8C4S2__NEONV8_MLAL, qmax) {
10030 TEST_REQUIRES_ARM_NEON_V8;
10031 GemmMicrokernelTester()
10032 .mr(1)
10033 .nr(8)
10034 .kr(4)
10035 .sr(2)
10036 .m(1)
10037 .n(8)
10038 .k(16)
10039 .qmax(128)
10040 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c4s2__neonv8_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
10041 }
10042
TEST(QC8_GEMM_MINMAX_FP32_1X8C4S2__NEONV8_MLAL,strided_cm)10043 TEST(QC8_GEMM_MINMAX_FP32_1X8C4S2__NEONV8_MLAL, strided_cm) {
10044 TEST_REQUIRES_ARM_NEON_V8;
10045 GemmMicrokernelTester()
10046 .mr(1)
10047 .nr(8)
10048 .kr(4)
10049 .sr(2)
10050 .m(1)
10051 .n(8)
10052 .k(16)
10053 .cm_stride(11)
10054 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c4s2__neonv8_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
10055 }
10056 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
10057
10058
10059 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QC8_GEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE,k_eq_8)10060 TEST(QC8_GEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE, k_eq_8) {
10061 TEST_REQUIRES_ARM_NEON;
10062 GemmMicrokernelTester()
10063 .mr(1)
10064 .nr(16)
10065 .kr(1)
10066 .sr(1)
10067 .m(1)
10068 .n(16)
10069 .k(8)
10070 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x16__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
10071 }
10072
TEST(QC8_GEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE,strided_cn)10073 TEST(QC8_GEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE, strided_cn) {
10074 TEST_REQUIRES_ARM_NEON;
10075 GemmMicrokernelTester()
10076 .mr(1)
10077 .nr(16)
10078 .kr(1)
10079 .sr(1)
10080 .m(1)
10081 .n(16)
10082 .k(8)
10083 .cn_stride(19)
10084 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x16__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
10085 }
10086
TEST(QC8_GEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE,k_eq_8_strided_a)10087 TEST(QC8_GEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE, k_eq_8_strided_a) {
10088 TEST_REQUIRES_ARM_NEON;
10089 GemmMicrokernelTester()
10090 .mr(1)
10091 .nr(16)
10092 .kr(1)
10093 .sr(1)
10094 .m(1)
10095 .n(16)
10096 .k(8)
10097 .a_stride(11)
10098 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x16__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
10099 }
10100
TEST(QC8_GEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE,k_eq_8_subtile)10101 TEST(QC8_GEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE, k_eq_8_subtile) {
10102 TEST_REQUIRES_ARM_NEON;
10103 for (uint32_t n = 1; n <= 16; n++) {
10104 for (uint32_t m = 1; m <= 1; m++) {
10105 GemmMicrokernelTester()
10106 .mr(1)
10107 .nr(16)
10108 .kr(1)
10109 .sr(1)
10110 .m(m)
10111 .n(n)
10112 .k(8)
10113 .iterations(1)
10114 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x16__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
10115 }
10116 }
10117 }
10118
TEST(QC8_GEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE,k_eq_8_subtile_m)10119 TEST(QC8_GEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE, k_eq_8_subtile_m) {
10120 TEST_REQUIRES_ARM_NEON;
10121 for (uint32_t m = 1; m <= 1; m++) {
10122 GemmMicrokernelTester()
10123 .mr(1)
10124 .nr(16)
10125 .kr(1)
10126 .sr(1)
10127 .m(m)
10128 .n(16)
10129 .k(8)
10130 .iterations(1)
10131 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x16__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
10132 }
10133 }
10134
TEST(QC8_GEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE,k_eq_8_subtile_n)10135 TEST(QC8_GEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE, k_eq_8_subtile_n) {
10136 TEST_REQUIRES_ARM_NEON;
10137 for (uint32_t n = 1; n <= 16; n++) {
10138 GemmMicrokernelTester()
10139 .mr(1)
10140 .nr(16)
10141 .kr(1)
10142 .sr(1)
10143 .m(1)
10144 .n(n)
10145 .k(8)
10146 .iterations(1)
10147 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x16__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
10148 }
10149 }
10150
TEST(QC8_GEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE,k_lt_8)10151 TEST(QC8_GEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE, k_lt_8) {
10152 TEST_REQUIRES_ARM_NEON;
10153 for (size_t k = 1; k < 8; k++) {
10154 GemmMicrokernelTester()
10155 .mr(1)
10156 .nr(16)
10157 .kr(1)
10158 .sr(1)
10159 .m(1)
10160 .n(16)
10161 .k(k)
10162 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x16__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
10163 }
10164 }
10165
TEST(QC8_GEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE,k_lt_8_strided_a)10166 TEST(QC8_GEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE, k_lt_8_strided_a) {
10167 TEST_REQUIRES_ARM_NEON;
10168 for (size_t k = 1; k < 8; k++) {
10169 GemmMicrokernelTester()
10170 .mr(1)
10171 .nr(16)
10172 .kr(1)
10173 .sr(1)
10174 .m(1)
10175 .n(16)
10176 .k(k)
10177 .a_stride(11)
10178 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x16__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
10179 }
10180 }
10181
TEST(QC8_GEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE,k_lt_8_subtile)10182 TEST(QC8_GEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE, k_lt_8_subtile) {
10183 TEST_REQUIRES_ARM_NEON;
10184 for (size_t k = 1; k < 8; k++) {
10185 for (uint32_t n = 1; n <= 16; n++) {
10186 for (uint32_t m = 1; m <= 1; m++) {
10187 GemmMicrokernelTester()
10188 .mr(1)
10189 .nr(16)
10190 .kr(1)
10191 .sr(1)
10192 .m(m)
10193 .n(n)
10194 .k(k)
10195 .iterations(1)
10196 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x16__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
10197 }
10198 }
10199 }
10200 }
10201
TEST(QC8_GEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE,k_gt_8)10202 TEST(QC8_GEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE, k_gt_8) {
10203 TEST_REQUIRES_ARM_NEON;
10204 for (size_t k = 9; k < 16; k++) {
10205 GemmMicrokernelTester()
10206 .mr(1)
10207 .nr(16)
10208 .kr(1)
10209 .sr(1)
10210 .m(1)
10211 .n(16)
10212 .k(k)
10213 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x16__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
10214 }
10215 }
10216
TEST(QC8_GEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE,k_gt_8_strided_a)10217 TEST(QC8_GEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE, k_gt_8_strided_a) {
10218 TEST_REQUIRES_ARM_NEON;
10219 for (size_t k = 9; k < 16; k++) {
10220 GemmMicrokernelTester()
10221 .mr(1)
10222 .nr(16)
10223 .kr(1)
10224 .sr(1)
10225 .m(1)
10226 .n(16)
10227 .k(k)
10228 .a_stride(19)
10229 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x16__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
10230 }
10231 }
10232
TEST(QC8_GEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE,k_gt_8_subtile)10233 TEST(QC8_GEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE, k_gt_8_subtile) {
10234 TEST_REQUIRES_ARM_NEON;
10235 for (size_t k = 9; k < 16; k++) {
10236 for (uint32_t n = 1; n <= 16; n++) {
10237 for (uint32_t m = 1; m <= 1; m++) {
10238 GemmMicrokernelTester()
10239 .mr(1)
10240 .nr(16)
10241 .kr(1)
10242 .sr(1)
10243 .m(m)
10244 .n(n)
10245 .k(k)
10246 .iterations(1)
10247 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x16__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
10248 }
10249 }
10250 }
10251 }
10252
TEST(QC8_GEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE,k_div_8)10253 TEST(QC8_GEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE, k_div_8) {
10254 TEST_REQUIRES_ARM_NEON;
10255 for (size_t k = 16; k <= 80; k += 8) {
10256 GemmMicrokernelTester()
10257 .mr(1)
10258 .nr(16)
10259 .kr(1)
10260 .sr(1)
10261 .m(1)
10262 .n(16)
10263 .k(k)
10264 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x16__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
10265 }
10266 }
10267
TEST(QC8_GEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE,k_div_8_strided_a)10268 TEST(QC8_GEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE, k_div_8_strided_a) {
10269 TEST_REQUIRES_ARM_NEON;
10270 for (size_t k = 16; k <= 80; k += 8) {
10271 GemmMicrokernelTester()
10272 .mr(1)
10273 .nr(16)
10274 .kr(1)
10275 .sr(1)
10276 .m(1)
10277 .n(16)
10278 .k(k)
10279 .a_stride(83)
10280 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x16__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
10281 }
10282 }
10283
TEST(QC8_GEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE,k_div_8_subtile)10284 TEST(QC8_GEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE, k_div_8_subtile) {
10285 TEST_REQUIRES_ARM_NEON;
10286 for (size_t k = 16; k <= 80; k += 8) {
10287 for (uint32_t n = 1; n <= 16; n++) {
10288 for (uint32_t m = 1; m <= 1; m++) {
10289 GemmMicrokernelTester()
10290 .mr(1)
10291 .nr(16)
10292 .kr(1)
10293 .sr(1)
10294 .m(m)
10295 .n(n)
10296 .k(k)
10297 .iterations(1)
10298 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x16__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
10299 }
10300 }
10301 }
10302 }
10303
TEST(QC8_GEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE,n_gt_16)10304 TEST(QC8_GEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE, n_gt_16) {
10305 TEST_REQUIRES_ARM_NEON;
10306 for (uint32_t n = 17; n < 32; n++) {
10307 for (size_t k = 1; k <= 40; k += 9) {
10308 GemmMicrokernelTester()
10309 .mr(1)
10310 .nr(16)
10311 .kr(1)
10312 .sr(1)
10313 .m(1)
10314 .n(n)
10315 .k(k)
10316 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x16__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
10317 }
10318 }
10319 }
10320
TEST(QC8_GEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE,n_gt_16_strided_cn)10321 TEST(QC8_GEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE, n_gt_16_strided_cn) {
10322 TEST_REQUIRES_ARM_NEON;
10323 for (uint32_t n = 17; n < 32; n++) {
10324 for (size_t k = 1; k <= 40; k += 9) {
10325 GemmMicrokernelTester()
10326 .mr(1)
10327 .nr(16)
10328 .kr(1)
10329 .sr(1)
10330 .m(1)
10331 .n(n)
10332 .k(k)
10333 .cn_stride(19)
10334 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x16__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
10335 }
10336 }
10337 }
10338
TEST(QC8_GEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE,n_gt_16_strided_a)10339 TEST(QC8_GEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE, n_gt_16_strided_a) {
10340 TEST_REQUIRES_ARM_NEON;
10341 for (uint32_t n = 17; n < 32; n++) {
10342 for (size_t k = 1; k <= 40; k += 9) {
10343 GemmMicrokernelTester()
10344 .mr(1)
10345 .nr(16)
10346 .kr(1)
10347 .sr(1)
10348 .m(1)
10349 .n(n)
10350 .k(k)
10351 .a_stride(43)
10352 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x16__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
10353 }
10354 }
10355 }
10356
TEST(QC8_GEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE,n_gt_16_subtile)10357 TEST(QC8_GEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE, n_gt_16_subtile) {
10358 TEST_REQUIRES_ARM_NEON;
10359 for (uint32_t n = 17; n < 32; n++) {
10360 for (size_t k = 1; k <= 40; k += 9) {
10361 for (uint32_t m = 1; m <= 1; m++) {
10362 GemmMicrokernelTester()
10363 .mr(1)
10364 .nr(16)
10365 .kr(1)
10366 .sr(1)
10367 .m(m)
10368 .n(n)
10369 .k(k)
10370 .iterations(1)
10371 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x16__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
10372 }
10373 }
10374 }
10375 }
10376
TEST(QC8_GEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE,n_div_16)10377 TEST(QC8_GEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE, n_div_16) {
10378 TEST_REQUIRES_ARM_NEON;
10379 for (uint32_t n = 32; n <= 48; n += 16) {
10380 for (size_t k = 1; k <= 40; k += 9) {
10381 GemmMicrokernelTester()
10382 .mr(1)
10383 .nr(16)
10384 .kr(1)
10385 .sr(1)
10386 .m(1)
10387 .n(n)
10388 .k(k)
10389 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x16__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
10390 }
10391 }
10392 }
10393
TEST(QC8_GEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE,n_div_16_strided_cn)10394 TEST(QC8_GEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE, n_div_16_strided_cn) {
10395 TEST_REQUIRES_ARM_NEON;
10396 for (uint32_t n = 32; n <= 48; n += 16) {
10397 for (size_t k = 1; k <= 40; k += 9) {
10398 GemmMicrokernelTester()
10399 .mr(1)
10400 .nr(16)
10401 .kr(1)
10402 .sr(1)
10403 .m(1)
10404 .n(n)
10405 .k(k)
10406 .cn_stride(19)
10407 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x16__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
10408 }
10409 }
10410 }
10411
TEST(QC8_GEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE,n_div_16_strided_a)10412 TEST(QC8_GEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE, n_div_16_strided_a) {
10413 TEST_REQUIRES_ARM_NEON;
10414 for (uint32_t n = 32; n <= 48; n += 16) {
10415 for (size_t k = 1; k <= 40; k += 9) {
10416 GemmMicrokernelTester()
10417 .mr(1)
10418 .nr(16)
10419 .kr(1)
10420 .sr(1)
10421 .m(1)
10422 .n(n)
10423 .k(k)
10424 .a_stride(43)
10425 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x16__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
10426 }
10427 }
10428 }
10429
TEST(QC8_GEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE,n_div_16_subtile)10430 TEST(QC8_GEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE, n_div_16_subtile) {
10431 TEST_REQUIRES_ARM_NEON;
10432 for (uint32_t n = 32; n <= 48; n += 16) {
10433 for (size_t k = 1; k <= 40; k += 9) {
10434 for (uint32_t m = 1; m <= 1; m++) {
10435 GemmMicrokernelTester()
10436 .mr(1)
10437 .nr(16)
10438 .kr(1)
10439 .sr(1)
10440 .m(m)
10441 .n(n)
10442 .k(k)
10443 .iterations(1)
10444 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x16__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
10445 }
10446 }
10447 }
10448 }
10449
TEST(QC8_GEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE,strided_cm_subtile)10450 TEST(QC8_GEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE, strided_cm_subtile) {
10451 TEST_REQUIRES_ARM_NEON;
10452 for (size_t k = 1; k <= 40; k += 9) {
10453 for (uint32_t n = 1; n <= 16; n++) {
10454 for (uint32_t m = 1; m <= 1; m++) {
10455 GemmMicrokernelTester()
10456 .mr(1)
10457 .nr(16)
10458 .kr(1)
10459 .sr(1)
10460 .m(m)
10461 .n(n)
10462 .k(k)
10463 .cm_stride(19)
10464 .iterations(1)
10465 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x16__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
10466 }
10467 }
10468 }
10469 }
10470
TEST(QC8_GEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE,qmin)10471 TEST(QC8_GEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE, qmin) {
10472 TEST_REQUIRES_ARM_NEON;
10473 GemmMicrokernelTester()
10474 .mr(1)
10475 .nr(16)
10476 .kr(1)
10477 .sr(1)
10478 .m(1)
10479 .n(16)
10480 .k(8)
10481 .qmin(128)
10482 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x16__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
10483 }
10484
TEST(QC8_GEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE,qmax)10485 TEST(QC8_GEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE, qmax) {
10486 TEST_REQUIRES_ARM_NEON;
10487 GemmMicrokernelTester()
10488 .mr(1)
10489 .nr(16)
10490 .kr(1)
10491 .sr(1)
10492 .m(1)
10493 .n(16)
10494 .k(8)
10495 .qmax(128)
10496 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x16__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
10497 }
10498
TEST(QC8_GEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE,strided_cm)10499 TEST(QC8_GEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE, strided_cm) {
10500 TEST_REQUIRES_ARM_NEON;
10501 GemmMicrokernelTester()
10502 .mr(1)
10503 .nr(16)
10504 .kr(1)
10505 .sr(1)
10506 .m(1)
10507 .n(16)
10508 .k(8)
10509 .cm_stride(19)
10510 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x16__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
10511 }
10512 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
10513
10514
10515 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QC8_GEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE_PRFM,k_eq_8)10516 TEST(QC8_GEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE_PRFM, k_eq_8) {
10517 TEST_REQUIRES_ARM_NEON;
10518 GemmMicrokernelTester()
10519 .mr(1)
10520 .nr(16)
10521 .kr(1)
10522 .sr(1)
10523 .m(1)
10524 .n(16)
10525 .k(8)
10526 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x16__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
10527 }
10528
TEST(QC8_GEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE_PRFM,strided_cn)10529 TEST(QC8_GEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE_PRFM, strided_cn) {
10530 TEST_REQUIRES_ARM_NEON;
10531 GemmMicrokernelTester()
10532 .mr(1)
10533 .nr(16)
10534 .kr(1)
10535 .sr(1)
10536 .m(1)
10537 .n(16)
10538 .k(8)
10539 .cn_stride(19)
10540 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x16__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
10541 }
10542
TEST(QC8_GEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE_PRFM,k_eq_8_strided_a)10543 TEST(QC8_GEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE_PRFM, k_eq_8_strided_a) {
10544 TEST_REQUIRES_ARM_NEON;
10545 GemmMicrokernelTester()
10546 .mr(1)
10547 .nr(16)
10548 .kr(1)
10549 .sr(1)
10550 .m(1)
10551 .n(16)
10552 .k(8)
10553 .a_stride(11)
10554 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x16__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
10555 }
10556
TEST(QC8_GEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE_PRFM,k_eq_8_subtile)10557 TEST(QC8_GEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE_PRFM, k_eq_8_subtile) {
10558 TEST_REQUIRES_ARM_NEON;
10559 for (uint32_t n = 1; n <= 16; n++) {
10560 for (uint32_t m = 1; m <= 1; m++) {
10561 GemmMicrokernelTester()
10562 .mr(1)
10563 .nr(16)
10564 .kr(1)
10565 .sr(1)
10566 .m(m)
10567 .n(n)
10568 .k(8)
10569 .iterations(1)
10570 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x16__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
10571 }
10572 }
10573 }
10574
TEST(QC8_GEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE_PRFM,k_eq_8_subtile_m)10575 TEST(QC8_GEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE_PRFM, k_eq_8_subtile_m) {
10576 TEST_REQUIRES_ARM_NEON;
10577 for (uint32_t m = 1; m <= 1; m++) {
10578 GemmMicrokernelTester()
10579 .mr(1)
10580 .nr(16)
10581 .kr(1)
10582 .sr(1)
10583 .m(m)
10584 .n(16)
10585 .k(8)
10586 .iterations(1)
10587 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x16__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
10588 }
10589 }
10590
TEST(QC8_GEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE_PRFM,k_eq_8_subtile_n)10591 TEST(QC8_GEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE_PRFM, k_eq_8_subtile_n) {
10592 TEST_REQUIRES_ARM_NEON;
10593 for (uint32_t n = 1; n <= 16; n++) {
10594 GemmMicrokernelTester()
10595 .mr(1)
10596 .nr(16)
10597 .kr(1)
10598 .sr(1)
10599 .m(1)
10600 .n(n)
10601 .k(8)
10602 .iterations(1)
10603 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x16__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
10604 }
10605 }
10606
TEST(QC8_GEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE_PRFM,k_lt_8)10607 TEST(QC8_GEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE_PRFM, k_lt_8) {
10608 TEST_REQUIRES_ARM_NEON;
10609 for (size_t k = 1; k < 8; k++) {
10610 GemmMicrokernelTester()
10611 .mr(1)
10612 .nr(16)
10613 .kr(1)
10614 .sr(1)
10615 .m(1)
10616 .n(16)
10617 .k(k)
10618 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x16__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
10619 }
10620 }
10621
TEST(QC8_GEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE_PRFM,k_lt_8_strided_a)10622 TEST(QC8_GEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE_PRFM, k_lt_8_strided_a) {
10623 TEST_REQUIRES_ARM_NEON;
10624 for (size_t k = 1; k < 8; k++) {
10625 GemmMicrokernelTester()
10626 .mr(1)
10627 .nr(16)
10628 .kr(1)
10629 .sr(1)
10630 .m(1)
10631 .n(16)
10632 .k(k)
10633 .a_stride(11)
10634 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x16__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
10635 }
10636 }
10637
TEST(QC8_GEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE_PRFM,k_lt_8_subtile)10638 TEST(QC8_GEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE_PRFM, k_lt_8_subtile) {
10639 TEST_REQUIRES_ARM_NEON;
10640 for (size_t k = 1; k < 8; k++) {
10641 for (uint32_t n = 1; n <= 16; n++) {
10642 for (uint32_t m = 1; m <= 1; m++) {
10643 GemmMicrokernelTester()
10644 .mr(1)
10645 .nr(16)
10646 .kr(1)
10647 .sr(1)
10648 .m(m)
10649 .n(n)
10650 .k(k)
10651 .iterations(1)
10652 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x16__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
10653 }
10654 }
10655 }
10656 }
10657
TEST(QC8_GEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE_PRFM,k_gt_8)10658 TEST(QC8_GEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE_PRFM, k_gt_8) {
10659 TEST_REQUIRES_ARM_NEON;
10660 for (size_t k = 9; k < 16; k++) {
10661 GemmMicrokernelTester()
10662 .mr(1)
10663 .nr(16)
10664 .kr(1)
10665 .sr(1)
10666 .m(1)
10667 .n(16)
10668 .k(k)
10669 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x16__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
10670 }
10671 }
10672
TEST(QC8_GEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE_PRFM,k_gt_8_strided_a)10673 TEST(QC8_GEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE_PRFM, k_gt_8_strided_a) {
10674 TEST_REQUIRES_ARM_NEON;
10675 for (size_t k = 9; k < 16; k++) {
10676 GemmMicrokernelTester()
10677 .mr(1)
10678 .nr(16)
10679 .kr(1)
10680 .sr(1)
10681 .m(1)
10682 .n(16)
10683 .k(k)
10684 .a_stride(19)
10685 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x16__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
10686 }
10687 }
10688
TEST(QC8_GEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE_PRFM,k_gt_8_subtile)10689 TEST(QC8_GEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE_PRFM, k_gt_8_subtile) {
10690 TEST_REQUIRES_ARM_NEON;
10691 for (size_t k = 9; k < 16; k++) {
10692 for (uint32_t n = 1; n <= 16; n++) {
10693 for (uint32_t m = 1; m <= 1; m++) {
10694 GemmMicrokernelTester()
10695 .mr(1)
10696 .nr(16)
10697 .kr(1)
10698 .sr(1)
10699 .m(m)
10700 .n(n)
10701 .k(k)
10702 .iterations(1)
10703 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x16__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
10704 }
10705 }
10706 }
10707 }
10708
TEST(QC8_GEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE_PRFM,k_div_8)10709 TEST(QC8_GEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE_PRFM, k_div_8) {
10710 TEST_REQUIRES_ARM_NEON;
10711 for (size_t k = 16; k <= 80; k += 8) {
10712 GemmMicrokernelTester()
10713 .mr(1)
10714 .nr(16)
10715 .kr(1)
10716 .sr(1)
10717 .m(1)
10718 .n(16)
10719 .k(k)
10720 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x16__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
10721 }
10722 }
10723
TEST(QC8_GEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE_PRFM,k_div_8_strided_a)10724 TEST(QC8_GEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE_PRFM, k_div_8_strided_a) {
10725 TEST_REQUIRES_ARM_NEON;
10726 for (size_t k = 16; k <= 80; k += 8) {
10727 GemmMicrokernelTester()
10728 .mr(1)
10729 .nr(16)
10730 .kr(1)
10731 .sr(1)
10732 .m(1)
10733 .n(16)
10734 .k(k)
10735 .a_stride(83)
10736 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x16__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
10737 }
10738 }
10739
TEST(QC8_GEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE_PRFM,k_div_8_subtile)10740 TEST(QC8_GEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE_PRFM, k_div_8_subtile) {
10741 TEST_REQUIRES_ARM_NEON;
10742 for (size_t k = 16; k <= 80; k += 8) {
10743 for (uint32_t n = 1; n <= 16; n++) {
10744 for (uint32_t m = 1; m <= 1; m++) {
10745 GemmMicrokernelTester()
10746 .mr(1)
10747 .nr(16)
10748 .kr(1)
10749 .sr(1)
10750 .m(m)
10751 .n(n)
10752 .k(k)
10753 .iterations(1)
10754 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x16__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
10755 }
10756 }
10757 }
10758 }
10759
TEST(QC8_GEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE_PRFM,n_gt_16)10760 TEST(QC8_GEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE_PRFM, n_gt_16) {
10761 TEST_REQUIRES_ARM_NEON;
10762 for (uint32_t n = 17; n < 32; n++) {
10763 for (size_t k = 1; k <= 40; k += 9) {
10764 GemmMicrokernelTester()
10765 .mr(1)
10766 .nr(16)
10767 .kr(1)
10768 .sr(1)
10769 .m(1)
10770 .n(n)
10771 .k(k)
10772 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x16__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
10773 }
10774 }
10775 }
10776
TEST(QC8_GEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE_PRFM,n_gt_16_strided_cn)10777 TEST(QC8_GEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE_PRFM, n_gt_16_strided_cn) {
10778 TEST_REQUIRES_ARM_NEON;
10779 for (uint32_t n = 17; n < 32; n++) {
10780 for (size_t k = 1; k <= 40; k += 9) {
10781 GemmMicrokernelTester()
10782 .mr(1)
10783 .nr(16)
10784 .kr(1)
10785 .sr(1)
10786 .m(1)
10787 .n(n)
10788 .k(k)
10789 .cn_stride(19)
10790 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x16__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
10791 }
10792 }
10793 }
10794
TEST(QC8_GEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE_PRFM,n_gt_16_strided_a)10795 TEST(QC8_GEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE_PRFM, n_gt_16_strided_a) {
10796 TEST_REQUIRES_ARM_NEON;
10797 for (uint32_t n = 17; n < 32; n++) {
10798 for (size_t k = 1; k <= 40; k += 9) {
10799 GemmMicrokernelTester()
10800 .mr(1)
10801 .nr(16)
10802 .kr(1)
10803 .sr(1)
10804 .m(1)
10805 .n(n)
10806 .k(k)
10807 .a_stride(43)
10808 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x16__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
10809 }
10810 }
10811 }
10812
TEST(QC8_GEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE_PRFM,n_gt_16_subtile)10813 TEST(QC8_GEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE_PRFM, n_gt_16_subtile) {
10814 TEST_REQUIRES_ARM_NEON;
10815 for (uint32_t n = 17; n < 32; n++) {
10816 for (size_t k = 1; k <= 40; k += 9) {
10817 for (uint32_t m = 1; m <= 1; m++) {
10818 GemmMicrokernelTester()
10819 .mr(1)
10820 .nr(16)
10821 .kr(1)
10822 .sr(1)
10823 .m(m)
10824 .n(n)
10825 .k(k)
10826 .iterations(1)
10827 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x16__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
10828 }
10829 }
10830 }
10831 }
10832
TEST(QC8_GEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE_PRFM,n_div_16)10833 TEST(QC8_GEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE_PRFM, n_div_16) {
10834 TEST_REQUIRES_ARM_NEON;
10835 for (uint32_t n = 32; n <= 48; n += 16) {
10836 for (size_t k = 1; k <= 40; k += 9) {
10837 GemmMicrokernelTester()
10838 .mr(1)
10839 .nr(16)
10840 .kr(1)
10841 .sr(1)
10842 .m(1)
10843 .n(n)
10844 .k(k)
10845 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x16__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
10846 }
10847 }
10848 }
10849
TEST(QC8_GEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE_PRFM,n_div_16_strided_cn)10850 TEST(QC8_GEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE_PRFM, n_div_16_strided_cn) {
10851 TEST_REQUIRES_ARM_NEON;
10852 for (uint32_t n = 32; n <= 48; n += 16) {
10853 for (size_t k = 1; k <= 40; k += 9) {
10854 GemmMicrokernelTester()
10855 .mr(1)
10856 .nr(16)
10857 .kr(1)
10858 .sr(1)
10859 .m(1)
10860 .n(n)
10861 .k(k)
10862 .cn_stride(19)
10863 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x16__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
10864 }
10865 }
10866 }
10867
TEST(QC8_GEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE_PRFM,n_div_16_strided_a)10868 TEST(QC8_GEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE_PRFM, n_div_16_strided_a) {
10869 TEST_REQUIRES_ARM_NEON;
10870 for (uint32_t n = 32; n <= 48; n += 16) {
10871 for (size_t k = 1; k <= 40; k += 9) {
10872 GemmMicrokernelTester()
10873 .mr(1)
10874 .nr(16)
10875 .kr(1)
10876 .sr(1)
10877 .m(1)
10878 .n(n)
10879 .k(k)
10880 .a_stride(43)
10881 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x16__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
10882 }
10883 }
10884 }
10885
TEST(QC8_GEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE_PRFM,n_div_16_subtile)10886 TEST(QC8_GEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE_PRFM, n_div_16_subtile) {
10887 TEST_REQUIRES_ARM_NEON;
10888 for (uint32_t n = 32; n <= 48; n += 16) {
10889 for (size_t k = 1; k <= 40; k += 9) {
10890 for (uint32_t m = 1; m <= 1; m++) {
10891 GemmMicrokernelTester()
10892 .mr(1)
10893 .nr(16)
10894 .kr(1)
10895 .sr(1)
10896 .m(m)
10897 .n(n)
10898 .k(k)
10899 .iterations(1)
10900 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x16__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
10901 }
10902 }
10903 }
10904 }
10905
TEST(QC8_GEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE_PRFM,strided_cm_subtile)10906 TEST(QC8_GEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE_PRFM, strided_cm_subtile) {
10907 TEST_REQUIRES_ARM_NEON;
10908 for (size_t k = 1; k <= 40; k += 9) {
10909 for (uint32_t n = 1; n <= 16; n++) {
10910 for (uint32_t m = 1; m <= 1; m++) {
10911 GemmMicrokernelTester()
10912 .mr(1)
10913 .nr(16)
10914 .kr(1)
10915 .sr(1)
10916 .m(m)
10917 .n(n)
10918 .k(k)
10919 .cm_stride(19)
10920 .iterations(1)
10921 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x16__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
10922 }
10923 }
10924 }
10925 }
10926
TEST(QC8_GEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE_PRFM,qmin)10927 TEST(QC8_GEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE_PRFM, qmin) {
10928 TEST_REQUIRES_ARM_NEON;
10929 GemmMicrokernelTester()
10930 .mr(1)
10931 .nr(16)
10932 .kr(1)
10933 .sr(1)
10934 .m(1)
10935 .n(16)
10936 .k(8)
10937 .qmin(128)
10938 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x16__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
10939 }
10940
TEST(QC8_GEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE_PRFM,qmax)10941 TEST(QC8_GEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE_PRFM, qmax) {
10942 TEST_REQUIRES_ARM_NEON;
10943 GemmMicrokernelTester()
10944 .mr(1)
10945 .nr(16)
10946 .kr(1)
10947 .sr(1)
10948 .m(1)
10949 .n(16)
10950 .k(8)
10951 .qmax(128)
10952 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x16__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
10953 }
10954
TEST(QC8_GEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE_PRFM,strided_cm)10955 TEST(QC8_GEMM_MINMAX_FP32_1X16__NEON_MLAL_LANE_PRFM, strided_cm) {
10956 TEST_REQUIRES_ARM_NEON;
10957 GemmMicrokernelTester()
10958 .mr(1)
10959 .nr(16)
10960 .kr(1)
10961 .sr(1)
10962 .m(1)
10963 .n(16)
10964 .k(8)
10965 .cm_stride(19)
10966 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x16__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
10967 }
10968 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
10969
10970
10971 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QC8_GEMM_MINMAX_FP32_2X8__NEON_MLAL_LANE,k_eq_8)10972 TEST(QC8_GEMM_MINMAX_FP32_2X8__NEON_MLAL_LANE, k_eq_8) {
10973 TEST_REQUIRES_ARM_NEON;
10974 GemmMicrokernelTester()
10975 .mr(2)
10976 .nr(8)
10977 .kr(1)
10978 .sr(1)
10979 .m(2)
10980 .n(8)
10981 .k(8)
10982 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
10983 }
10984
TEST(QC8_GEMM_MINMAX_FP32_2X8__NEON_MLAL_LANE,strided_cn)10985 TEST(QC8_GEMM_MINMAX_FP32_2X8__NEON_MLAL_LANE, strided_cn) {
10986 TEST_REQUIRES_ARM_NEON;
10987 GemmMicrokernelTester()
10988 .mr(2)
10989 .nr(8)
10990 .kr(1)
10991 .sr(1)
10992 .m(2)
10993 .n(8)
10994 .k(8)
10995 .cn_stride(11)
10996 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
10997 }
10998
TEST(QC8_GEMM_MINMAX_FP32_2X8__NEON_MLAL_LANE,k_eq_8_strided_a)10999 TEST(QC8_GEMM_MINMAX_FP32_2X8__NEON_MLAL_LANE, k_eq_8_strided_a) {
11000 TEST_REQUIRES_ARM_NEON;
11001 GemmMicrokernelTester()
11002 .mr(2)
11003 .nr(8)
11004 .kr(1)
11005 .sr(1)
11006 .m(2)
11007 .n(8)
11008 .k(8)
11009 .a_stride(11)
11010 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
11011 }
11012
TEST(QC8_GEMM_MINMAX_FP32_2X8__NEON_MLAL_LANE,k_eq_8_subtile)11013 TEST(QC8_GEMM_MINMAX_FP32_2X8__NEON_MLAL_LANE, k_eq_8_subtile) {
11014 TEST_REQUIRES_ARM_NEON;
11015 for (uint32_t n = 1; n <= 8; n++) {
11016 for (uint32_t m = 1; m <= 2; m++) {
11017 GemmMicrokernelTester()
11018 .mr(2)
11019 .nr(8)
11020 .kr(1)
11021 .sr(1)
11022 .m(m)
11023 .n(n)
11024 .k(8)
11025 .iterations(1)
11026 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
11027 }
11028 }
11029 }
11030
TEST(QC8_GEMM_MINMAX_FP32_2X8__NEON_MLAL_LANE,k_eq_8_subtile_m)11031 TEST(QC8_GEMM_MINMAX_FP32_2X8__NEON_MLAL_LANE, k_eq_8_subtile_m) {
11032 TEST_REQUIRES_ARM_NEON;
11033 for (uint32_t m = 1; m <= 2; m++) {
11034 GemmMicrokernelTester()
11035 .mr(2)
11036 .nr(8)
11037 .kr(1)
11038 .sr(1)
11039 .m(m)
11040 .n(8)
11041 .k(8)
11042 .iterations(1)
11043 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
11044 }
11045 }
11046
TEST(QC8_GEMM_MINMAX_FP32_2X8__NEON_MLAL_LANE,k_eq_8_subtile_n)11047 TEST(QC8_GEMM_MINMAX_FP32_2X8__NEON_MLAL_LANE, k_eq_8_subtile_n) {
11048 TEST_REQUIRES_ARM_NEON;
11049 for (uint32_t n = 1; n <= 8; n++) {
11050 GemmMicrokernelTester()
11051 .mr(2)
11052 .nr(8)
11053 .kr(1)
11054 .sr(1)
11055 .m(2)
11056 .n(n)
11057 .k(8)
11058 .iterations(1)
11059 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
11060 }
11061 }
11062
TEST(QC8_GEMM_MINMAX_FP32_2X8__NEON_MLAL_LANE,k_lt_8)11063 TEST(QC8_GEMM_MINMAX_FP32_2X8__NEON_MLAL_LANE, k_lt_8) {
11064 TEST_REQUIRES_ARM_NEON;
11065 for (size_t k = 1; k < 8; k++) {
11066 GemmMicrokernelTester()
11067 .mr(2)
11068 .nr(8)
11069 .kr(1)
11070 .sr(1)
11071 .m(2)
11072 .n(8)
11073 .k(k)
11074 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
11075 }
11076 }
11077
TEST(QC8_GEMM_MINMAX_FP32_2X8__NEON_MLAL_LANE,k_lt_8_strided_a)11078 TEST(QC8_GEMM_MINMAX_FP32_2X8__NEON_MLAL_LANE, k_lt_8_strided_a) {
11079 TEST_REQUIRES_ARM_NEON;
11080 for (size_t k = 1; k < 8; k++) {
11081 GemmMicrokernelTester()
11082 .mr(2)
11083 .nr(8)
11084 .kr(1)
11085 .sr(1)
11086 .m(2)
11087 .n(8)
11088 .k(k)
11089 .a_stride(11)
11090 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
11091 }
11092 }
11093
TEST(QC8_GEMM_MINMAX_FP32_2X8__NEON_MLAL_LANE,k_lt_8_subtile)11094 TEST(QC8_GEMM_MINMAX_FP32_2X8__NEON_MLAL_LANE, k_lt_8_subtile) {
11095 TEST_REQUIRES_ARM_NEON;
11096 for (size_t k = 1; k < 8; k++) {
11097 for (uint32_t n = 1; n <= 8; n++) {
11098 for (uint32_t m = 1; m <= 2; m++) {
11099 GemmMicrokernelTester()
11100 .mr(2)
11101 .nr(8)
11102 .kr(1)
11103 .sr(1)
11104 .m(m)
11105 .n(n)
11106 .k(k)
11107 .iterations(1)
11108 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
11109 }
11110 }
11111 }
11112 }
11113
TEST(QC8_GEMM_MINMAX_FP32_2X8__NEON_MLAL_LANE,k_gt_8)11114 TEST(QC8_GEMM_MINMAX_FP32_2X8__NEON_MLAL_LANE, k_gt_8) {
11115 TEST_REQUIRES_ARM_NEON;
11116 for (size_t k = 9; k < 16; k++) {
11117 GemmMicrokernelTester()
11118 .mr(2)
11119 .nr(8)
11120 .kr(1)
11121 .sr(1)
11122 .m(2)
11123 .n(8)
11124 .k(k)
11125 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
11126 }
11127 }
11128
TEST(QC8_GEMM_MINMAX_FP32_2X8__NEON_MLAL_LANE,k_gt_8_strided_a)11129 TEST(QC8_GEMM_MINMAX_FP32_2X8__NEON_MLAL_LANE, k_gt_8_strided_a) {
11130 TEST_REQUIRES_ARM_NEON;
11131 for (size_t k = 9; k < 16; k++) {
11132 GemmMicrokernelTester()
11133 .mr(2)
11134 .nr(8)
11135 .kr(1)
11136 .sr(1)
11137 .m(2)
11138 .n(8)
11139 .k(k)
11140 .a_stride(19)
11141 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
11142 }
11143 }
11144
TEST(QC8_GEMM_MINMAX_FP32_2X8__NEON_MLAL_LANE,k_gt_8_subtile)11145 TEST(QC8_GEMM_MINMAX_FP32_2X8__NEON_MLAL_LANE, k_gt_8_subtile) {
11146 TEST_REQUIRES_ARM_NEON;
11147 for (size_t k = 9; k < 16; k++) {
11148 for (uint32_t n = 1; n <= 8; n++) {
11149 for (uint32_t m = 1; m <= 2; m++) {
11150 GemmMicrokernelTester()
11151 .mr(2)
11152 .nr(8)
11153 .kr(1)
11154 .sr(1)
11155 .m(m)
11156 .n(n)
11157 .k(k)
11158 .iterations(1)
11159 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
11160 }
11161 }
11162 }
11163 }
11164
TEST(QC8_GEMM_MINMAX_FP32_2X8__NEON_MLAL_LANE,k_div_8)11165 TEST(QC8_GEMM_MINMAX_FP32_2X8__NEON_MLAL_LANE, k_div_8) {
11166 TEST_REQUIRES_ARM_NEON;
11167 for (size_t k = 16; k <= 80; k += 8) {
11168 GemmMicrokernelTester()
11169 .mr(2)
11170 .nr(8)
11171 .kr(1)
11172 .sr(1)
11173 .m(2)
11174 .n(8)
11175 .k(k)
11176 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
11177 }
11178 }
11179
TEST(QC8_GEMM_MINMAX_FP32_2X8__NEON_MLAL_LANE,k_div_8_strided_a)11180 TEST(QC8_GEMM_MINMAX_FP32_2X8__NEON_MLAL_LANE, k_div_8_strided_a) {
11181 TEST_REQUIRES_ARM_NEON;
11182 for (size_t k = 16; k <= 80; k += 8) {
11183 GemmMicrokernelTester()
11184 .mr(2)
11185 .nr(8)
11186 .kr(1)
11187 .sr(1)
11188 .m(2)
11189 .n(8)
11190 .k(k)
11191 .a_stride(83)
11192 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
11193 }
11194 }
11195
TEST(QC8_GEMM_MINMAX_FP32_2X8__NEON_MLAL_LANE,k_div_8_subtile)11196 TEST(QC8_GEMM_MINMAX_FP32_2X8__NEON_MLAL_LANE, k_div_8_subtile) {
11197 TEST_REQUIRES_ARM_NEON;
11198 for (size_t k = 16; k <= 80; k += 8) {
11199 for (uint32_t n = 1; n <= 8; n++) {
11200 for (uint32_t m = 1; m <= 2; m++) {
11201 GemmMicrokernelTester()
11202 .mr(2)
11203 .nr(8)
11204 .kr(1)
11205 .sr(1)
11206 .m(m)
11207 .n(n)
11208 .k(k)
11209 .iterations(1)
11210 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
11211 }
11212 }
11213 }
11214 }
11215
TEST(QC8_GEMM_MINMAX_FP32_2X8__NEON_MLAL_LANE,n_gt_8)11216 TEST(QC8_GEMM_MINMAX_FP32_2X8__NEON_MLAL_LANE, n_gt_8) {
11217 TEST_REQUIRES_ARM_NEON;
11218 for (uint32_t n = 9; n < 16; n++) {
11219 for (size_t k = 1; k <= 40; k += 9) {
11220 GemmMicrokernelTester()
11221 .mr(2)
11222 .nr(8)
11223 .kr(1)
11224 .sr(1)
11225 .m(2)
11226 .n(n)
11227 .k(k)
11228 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
11229 }
11230 }
11231 }
11232
TEST(QC8_GEMM_MINMAX_FP32_2X8__NEON_MLAL_LANE,n_gt_8_strided_cn)11233 TEST(QC8_GEMM_MINMAX_FP32_2X8__NEON_MLAL_LANE, n_gt_8_strided_cn) {
11234 TEST_REQUIRES_ARM_NEON;
11235 for (uint32_t n = 9; n < 16; n++) {
11236 for (size_t k = 1; k <= 40; k += 9) {
11237 GemmMicrokernelTester()
11238 .mr(2)
11239 .nr(8)
11240 .kr(1)
11241 .sr(1)
11242 .m(2)
11243 .n(n)
11244 .k(k)
11245 .cn_stride(11)
11246 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
11247 }
11248 }
11249 }
11250
TEST(QC8_GEMM_MINMAX_FP32_2X8__NEON_MLAL_LANE,n_gt_8_strided_a)11251 TEST(QC8_GEMM_MINMAX_FP32_2X8__NEON_MLAL_LANE, n_gt_8_strided_a) {
11252 TEST_REQUIRES_ARM_NEON;
11253 for (uint32_t n = 9; n < 16; n++) {
11254 for (size_t k = 1; k <= 40; k += 9) {
11255 GemmMicrokernelTester()
11256 .mr(2)
11257 .nr(8)
11258 .kr(1)
11259 .sr(1)
11260 .m(2)
11261 .n(n)
11262 .k(k)
11263 .a_stride(43)
11264 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
11265 }
11266 }
11267 }
11268
TEST(QC8_GEMM_MINMAX_FP32_2X8__NEON_MLAL_LANE,n_gt_8_subtile)11269 TEST(QC8_GEMM_MINMAX_FP32_2X8__NEON_MLAL_LANE, n_gt_8_subtile) {
11270 TEST_REQUIRES_ARM_NEON;
11271 for (uint32_t n = 9; n < 16; n++) {
11272 for (size_t k = 1; k <= 40; k += 9) {
11273 for (uint32_t m = 1; m <= 2; m++) {
11274 GemmMicrokernelTester()
11275 .mr(2)
11276 .nr(8)
11277 .kr(1)
11278 .sr(1)
11279 .m(m)
11280 .n(n)
11281 .k(k)
11282 .iterations(1)
11283 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
11284 }
11285 }
11286 }
11287 }
11288
TEST(QC8_GEMM_MINMAX_FP32_2X8__NEON_MLAL_LANE,n_div_8)11289 TEST(QC8_GEMM_MINMAX_FP32_2X8__NEON_MLAL_LANE, n_div_8) {
11290 TEST_REQUIRES_ARM_NEON;
11291 for (uint32_t n = 16; n <= 24; n += 8) {
11292 for (size_t k = 1; k <= 40; k += 9) {
11293 GemmMicrokernelTester()
11294 .mr(2)
11295 .nr(8)
11296 .kr(1)
11297 .sr(1)
11298 .m(2)
11299 .n(n)
11300 .k(k)
11301 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
11302 }
11303 }
11304 }
11305
TEST(QC8_GEMM_MINMAX_FP32_2X8__NEON_MLAL_LANE,n_div_8_strided_cn)11306 TEST(QC8_GEMM_MINMAX_FP32_2X8__NEON_MLAL_LANE, n_div_8_strided_cn) {
11307 TEST_REQUIRES_ARM_NEON;
11308 for (uint32_t n = 16; n <= 24; n += 8) {
11309 for (size_t k = 1; k <= 40; k += 9) {
11310 GemmMicrokernelTester()
11311 .mr(2)
11312 .nr(8)
11313 .kr(1)
11314 .sr(1)
11315 .m(2)
11316 .n(n)
11317 .k(k)
11318 .cn_stride(11)
11319 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
11320 }
11321 }
11322 }
11323
TEST(QC8_GEMM_MINMAX_FP32_2X8__NEON_MLAL_LANE,n_div_8_strided_a)11324 TEST(QC8_GEMM_MINMAX_FP32_2X8__NEON_MLAL_LANE, n_div_8_strided_a) {
11325 TEST_REQUIRES_ARM_NEON;
11326 for (uint32_t n = 16; n <= 24; n += 8) {
11327 for (size_t k = 1; k <= 40; k += 9) {
11328 GemmMicrokernelTester()
11329 .mr(2)
11330 .nr(8)
11331 .kr(1)
11332 .sr(1)
11333 .m(2)
11334 .n(n)
11335 .k(k)
11336 .a_stride(43)
11337 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
11338 }
11339 }
11340 }
11341
TEST(QC8_GEMM_MINMAX_FP32_2X8__NEON_MLAL_LANE,n_div_8_subtile)11342 TEST(QC8_GEMM_MINMAX_FP32_2X8__NEON_MLAL_LANE, n_div_8_subtile) {
11343 TEST_REQUIRES_ARM_NEON;
11344 for (uint32_t n = 16; n <= 24; n += 8) {
11345 for (size_t k = 1; k <= 40; k += 9) {
11346 for (uint32_t m = 1; m <= 2; m++) {
11347 GemmMicrokernelTester()
11348 .mr(2)
11349 .nr(8)
11350 .kr(1)
11351 .sr(1)
11352 .m(m)
11353 .n(n)
11354 .k(k)
11355 .iterations(1)
11356 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
11357 }
11358 }
11359 }
11360 }
11361
TEST(QC8_GEMM_MINMAX_FP32_2X8__NEON_MLAL_LANE,strided_cm_subtile)11362 TEST(QC8_GEMM_MINMAX_FP32_2X8__NEON_MLAL_LANE, strided_cm_subtile) {
11363 TEST_REQUIRES_ARM_NEON;
11364 for (size_t k = 1; k <= 40; k += 9) {
11365 for (uint32_t n = 1; n <= 8; n++) {
11366 for (uint32_t m = 1; m <= 2; m++) {
11367 GemmMicrokernelTester()
11368 .mr(2)
11369 .nr(8)
11370 .kr(1)
11371 .sr(1)
11372 .m(m)
11373 .n(n)
11374 .k(k)
11375 .cm_stride(11)
11376 .iterations(1)
11377 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
11378 }
11379 }
11380 }
11381 }
11382
TEST(QC8_GEMM_MINMAX_FP32_2X8__NEON_MLAL_LANE,qmin)11383 TEST(QC8_GEMM_MINMAX_FP32_2X8__NEON_MLAL_LANE, qmin) {
11384 TEST_REQUIRES_ARM_NEON;
11385 GemmMicrokernelTester()
11386 .mr(2)
11387 .nr(8)
11388 .kr(1)
11389 .sr(1)
11390 .m(2)
11391 .n(8)
11392 .k(8)
11393 .qmin(128)
11394 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
11395 }
11396
TEST(QC8_GEMM_MINMAX_FP32_2X8__NEON_MLAL_LANE,qmax)11397 TEST(QC8_GEMM_MINMAX_FP32_2X8__NEON_MLAL_LANE, qmax) {
11398 TEST_REQUIRES_ARM_NEON;
11399 GemmMicrokernelTester()
11400 .mr(2)
11401 .nr(8)
11402 .kr(1)
11403 .sr(1)
11404 .m(2)
11405 .n(8)
11406 .k(8)
11407 .qmax(128)
11408 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
11409 }
11410
TEST(QC8_GEMM_MINMAX_FP32_2X8__NEON_MLAL_LANE,strided_cm)11411 TEST(QC8_GEMM_MINMAX_FP32_2X8__NEON_MLAL_LANE, strided_cm) {
11412 TEST_REQUIRES_ARM_NEON;
11413 GemmMicrokernelTester()
11414 .mr(2)
11415 .nr(8)
11416 .kr(1)
11417 .sr(1)
11418 .m(2)
11419 .n(8)
11420 .k(8)
11421 .cm_stride(11)
11422 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
11423 }
11424 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
11425
11426
11427 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QC8_GEMM_MINMAX_FP32_2X8__NEON_MLAL_LANE_PRFM,k_eq_8)11428 TEST(QC8_GEMM_MINMAX_FP32_2X8__NEON_MLAL_LANE_PRFM, k_eq_8) {
11429 TEST_REQUIRES_ARM_NEON;
11430 GemmMicrokernelTester()
11431 .mr(2)
11432 .nr(8)
11433 .kr(1)
11434 .sr(1)
11435 .m(2)
11436 .n(8)
11437 .k(8)
11438 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
11439 }
11440
TEST(QC8_GEMM_MINMAX_FP32_2X8__NEON_MLAL_LANE_PRFM,strided_cn)11441 TEST(QC8_GEMM_MINMAX_FP32_2X8__NEON_MLAL_LANE_PRFM, strided_cn) {
11442 TEST_REQUIRES_ARM_NEON;
11443 GemmMicrokernelTester()
11444 .mr(2)
11445 .nr(8)
11446 .kr(1)
11447 .sr(1)
11448 .m(2)
11449 .n(8)
11450 .k(8)
11451 .cn_stride(11)
11452 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
11453 }
11454
TEST(QC8_GEMM_MINMAX_FP32_2X8__NEON_MLAL_LANE_PRFM,k_eq_8_strided_a)11455 TEST(QC8_GEMM_MINMAX_FP32_2X8__NEON_MLAL_LANE_PRFM, k_eq_8_strided_a) {
11456 TEST_REQUIRES_ARM_NEON;
11457 GemmMicrokernelTester()
11458 .mr(2)
11459 .nr(8)
11460 .kr(1)
11461 .sr(1)
11462 .m(2)
11463 .n(8)
11464 .k(8)
11465 .a_stride(11)
11466 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
11467 }
11468
TEST(QC8_GEMM_MINMAX_FP32_2X8__NEON_MLAL_LANE_PRFM,k_eq_8_subtile)11469 TEST(QC8_GEMM_MINMAX_FP32_2X8__NEON_MLAL_LANE_PRFM, k_eq_8_subtile) {
11470 TEST_REQUIRES_ARM_NEON;
11471 for (uint32_t n = 1; n <= 8; n++) {
11472 for (uint32_t m = 1; m <= 2; m++) {
11473 GemmMicrokernelTester()
11474 .mr(2)
11475 .nr(8)
11476 .kr(1)
11477 .sr(1)
11478 .m(m)
11479 .n(n)
11480 .k(8)
11481 .iterations(1)
11482 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
11483 }
11484 }
11485 }
11486
TEST(QC8_GEMM_MINMAX_FP32_2X8__NEON_MLAL_LANE_PRFM,k_eq_8_subtile_m)11487 TEST(QC8_GEMM_MINMAX_FP32_2X8__NEON_MLAL_LANE_PRFM, k_eq_8_subtile_m) {
11488 TEST_REQUIRES_ARM_NEON;
11489 for (uint32_t m = 1; m <= 2; m++) {
11490 GemmMicrokernelTester()
11491 .mr(2)
11492 .nr(8)
11493 .kr(1)
11494 .sr(1)
11495 .m(m)
11496 .n(8)
11497 .k(8)
11498 .iterations(1)
11499 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
11500 }
11501 }
11502
TEST(QC8_GEMM_MINMAX_FP32_2X8__NEON_MLAL_LANE_PRFM,k_eq_8_subtile_n)11503 TEST(QC8_GEMM_MINMAX_FP32_2X8__NEON_MLAL_LANE_PRFM, k_eq_8_subtile_n) {
11504 TEST_REQUIRES_ARM_NEON;
11505 for (uint32_t n = 1; n <= 8; n++) {
11506 GemmMicrokernelTester()
11507 .mr(2)
11508 .nr(8)
11509 .kr(1)
11510 .sr(1)
11511 .m(2)
11512 .n(n)
11513 .k(8)
11514 .iterations(1)
11515 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
11516 }
11517 }
11518
TEST(QC8_GEMM_MINMAX_FP32_2X8__NEON_MLAL_LANE_PRFM,k_lt_8)11519 TEST(QC8_GEMM_MINMAX_FP32_2X8__NEON_MLAL_LANE_PRFM, k_lt_8) {
11520 TEST_REQUIRES_ARM_NEON;
11521 for (size_t k = 1; k < 8; k++) {
11522 GemmMicrokernelTester()
11523 .mr(2)
11524 .nr(8)
11525 .kr(1)
11526 .sr(1)
11527 .m(2)
11528 .n(8)
11529 .k(k)
11530 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
11531 }
11532 }
11533
TEST(QC8_GEMM_MINMAX_FP32_2X8__NEON_MLAL_LANE_PRFM,k_lt_8_strided_a)11534 TEST(QC8_GEMM_MINMAX_FP32_2X8__NEON_MLAL_LANE_PRFM, k_lt_8_strided_a) {
11535 TEST_REQUIRES_ARM_NEON;
11536 for (size_t k = 1; k < 8; k++) {
11537 GemmMicrokernelTester()
11538 .mr(2)
11539 .nr(8)
11540 .kr(1)
11541 .sr(1)
11542 .m(2)
11543 .n(8)
11544 .k(k)
11545 .a_stride(11)
11546 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
11547 }
11548 }
11549
TEST(QC8_GEMM_MINMAX_FP32_2X8__NEON_MLAL_LANE_PRFM,k_lt_8_subtile)11550 TEST(QC8_GEMM_MINMAX_FP32_2X8__NEON_MLAL_LANE_PRFM, k_lt_8_subtile) {
11551 TEST_REQUIRES_ARM_NEON;
11552 for (size_t k = 1; k < 8; k++) {
11553 for (uint32_t n = 1; n <= 8; n++) {
11554 for (uint32_t m = 1; m <= 2; m++) {
11555 GemmMicrokernelTester()
11556 .mr(2)
11557 .nr(8)
11558 .kr(1)
11559 .sr(1)
11560 .m(m)
11561 .n(n)
11562 .k(k)
11563 .iterations(1)
11564 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
11565 }
11566 }
11567 }
11568 }
11569
TEST(QC8_GEMM_MINMAX_FP32_2X8__NEON_MLAL_LANE_PRFM,k_gt_8)11570 TEST(QC8_GEMM_MINMAX_FP32_2X8__NEON_MLAL_LANE_PRFM, k_gt_8) {
11571 TEST_REQUIRES_ARM_NEON;
11572 for (size_t k = 9; k < 16; k++) {
11573 GemmMicrokernelTester()
11574 .mr(2)
11575 .nr(8)
11576 .kr(1)
11577 .sr(1)
11578 .m(2)
11579 .n(8)
11580 .k(k)
11581 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
11582 }
11583 }
11584
TEST(QC8_GEMM_MINMAX_FP32_2X8__NEON_MLAL_LANE_PRFM,k_gt_8_strided_a)11585 TEST(QC8_GEMM_MINMAX_FP32_2X8__NEON_MLAL_LANE_PRFM, k_gt_8_strided_a) {
11586 TEST_REQUIRES_ARM_NEON;
11587 for (size_t k = 9; k < 16; k++) {
11588 GemmMicrokernelTester()
11589 .mr(2)
11590 .nr(8)
11591 .kr(1)
11592 .sr(1)
11593 .m(2)
11594 .n(8)
11595 .k(k)
11596 .a_stride(19)
11597 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
11598 }
11599 }
11600
TEST(QC8_GEMM_MINMAX_FP32_2X8__NEON_MLAL_LANE_PRFM,k_gt_8_subtile)11601 TEST(QC8_GEMM_MINMAX_FP32_2X8__NEON_MLAL_LANE_PRFM, k_gt_8_subtile) {
11602 TEST_REQUIRES_ARM_NEON;
11603 for (size_t k = 9; k < 16; k++) {
11604 for (uint32_t n = 1; n <= 8; n++) {
11605 for (uint32_t m = 1; m <= 2; m++) {
11606 GemmMicrokernelTester()
11607 .mr(2)
11608 .nr(8)
11609 .kr(1)
11610 .sr(1)
11611 .m(m)
11612 .n(n)
11613 .k(k)
11614 .iterations(1)
11615 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
11616 }
11617 }
11618 }
11619 }
11620
TEST(QC8_GEMM_MINMAX_FP32_2X8__NEON_MLAL_LANE_PRFM,k_div_8)11621 TEST(QC8_GEMM_MINMAX_FP32_2X8__NEON_MLAL_LANE_PRFM, k_div_8) {
11622 TEST_REQUIRES_ARM_NEON;
11623 for (size_t k = 16; k <= 80; k += 8) {
11624 GemmMicrokernelTester()
11625 .mr(2)
11626 .nr(8)
11627 .kr(1)
11628 .sr(1)
11629 .m(2)
11630 .n(8)
11631 .k(k)
11632 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
11633 }
11634 }
11635
TEST(QC8_GEMM_MINMAX_FP32_2X8__NEON_MLAL_LANE_PRFM,k_div_8_strided_a)11636 TEST(QC8_GEMM_MINMAX_FP32_2X8__NEON_MLAL_LANE_PRFM, k_div_8_strided_a) {
11637 TEST_REQUIRES_ARM_NEON;
11638 for (size_t k = 16; k <= 80; k += 8) {
11639 GemmMicrokernelTester()
11640 .mr(2)
11641 .nr(8)
11642 .kr(1)
11643 .sr(1)
11644 .m(2)
11645 .n(8)
11646 .k(k)
11647 .a_stride(83)
11648 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
11649 }
11650 }
11651
TEST(QC8_GEMM_MINMAX_FP32_2X8__NEON_MLAL_LANE_PRFM,k_div_8_subtile)11652 TEST(QC8_GEMM_MINMAX_FP32_2X8__NEON_MLAL_LANE_PRFM, k_div_8_subtile) {
11653 TEST_REQUIRES_ARM_NEON;
11654 for (size_t k = 16; k <= 80; k += 8) {
11655 for (uint32_t n = 1; n <= 8; n++) {
11656 for (uint32_t m = 1; m <= 2; m++) {
11657 GemmMicrokernelTester()
11658 .mr(2)
11659 .nr(8)
11660 .kr(1)
11661 .sr(1)
11662 .m(m)
11663 .n(n)
11664 .k(k)
11665 .iterations(1)
11666 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
11667 }
11668 }
11669 }
11670 }
11671
TEST(QC8_GEMM_MINMAX_FP32_2X8__NEON_MLAL_LANE_PRFM,n_gt_8)11672 TEST(QC8_GEMM_MINMAX_FP32_2X8__NEON_MLAL_LANE_PRFM, n_gt_8) {
11673 TEST_REQUIRES_ARM_NEON;
11674 for (uint32_t n = 9; n < 16; n++) {
11675 for (size_t k = 1; k <= 40; k += 9) {
11676 GemmMicrokernelTester()
11677 .mr(2)
11678 .nr(8)
11679 .kr(1)
11680 .sr(1)
11681 .m(2)
11682 .n(n)
11683 .k(k)
11684 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
11685 }
11686 }
11687 }
11688
TEST(QC8_GEMM_MINMAX_FP32_2X8__NEON_MLAL_LANE_PRFM,n_gt_8_strided_cn)11689 TEST(QC8_GEMM_MINMAX_FP32_2X8__NEON_MLAL_LANE_PRFM, n_gt_8_strided_cn) {
11690 TEST_REQUIRES_ARM_NEON;
11691 for (uint32_t n = 9; n < 16; n++) {
11692 for (size_t k = 1; k <= 40; k += 9) {
11693 GemmMicrokernelTester()
11694 .mr(2)
11695 .nr(8)
11696 .kr(1)
11697 .sr(1)
11698 .m(2)
11699 .n(n)
11700 .k(k)
11701 .cn_stride(11)
11702 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
11703 }
11704 }
11705 }
11706
TEST(QC8_GEMM_MINMAX_FP32_2X8__NEON_MLAL_LANE_PRFM,n_gt_8_strided_a)11707 TEST(QC8_GEMM_MINMAX_FP32_2X8__NEON_MLAL_LANE_PRFM, n_gt_8_strided_a) {
11708 TEST_REQUIRES_ARM_NEON;
11709 for (uint32_t n = 9; n < 16; n++) {
11710 for (size_t k = 1; k <= 40; k += 9) {
11711 GemmMicrokernelTester()
11712 .mr(2)
11713 .nr(8)
11714 .kr(1)
11715 .sr(1)
11716 .m(2)
11717 .n(n)
11718 .k(k)
11719 .a_stride(43)
11720 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
11721 }
11722 }
11723 }
11724
TEST(QC8_GEMM_MINMAX_FP32_2X8__NEON_MLAL_LANE_PRFM,n_gt_8_subtile)11725 TEST(QC8_GEMM_MINMAX_FP32_2X8__NEON_MLAL_LANE_PRFM, n_gt_8_subtile) {
11726 TEST_REQUIRES_ARM_NEON;
11727 for (uint32_t n = 9; n < 16; n++) {
11728 for (size_t k = 1; k <= 40; k += 9) {
11729 for (uint32_t m = 1; m <= 2; m++) {
11730 GemmMicrokernelTester()
11731 .mr(2)
11732 .nr(8)
11733 .kr(1)
11734 .sr(1)
11735 .m(m)
11736 .n(n)
11737 .k(k)
11738 .iterations(1)
11739 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
11740 }
11741 }
11742 }
11743 }
11744
TEST(QC8_GEMM_MINMAX_FP32_2X8__NEON_MLAL_LANE_PRFM,n_div_8)11745 TEST(QC8_GEMM_MINMAX_FP32_2X8__NEON_MLAL_LANE_PRFM, n_div_8) {
11746 TEST_REQUIRES_ARM_NEON;
11747 for (uint32_t n = 16; n <= 24; n += 8) {
11748 for (size_t k = 1; k <= 40; k += 9) {
11749 GemmMicrokernelTester()
11750 .mr(2)
11751 .nr(8)
11752 .kr(1)
11753 .sr(1)
11754 .m(2)
11755 .n(n)
11756 .k(k)
11757 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
11758 }
11759 }
11760 }
11761
TEST(QC8_GEMM_MINMAX_FP32_2X8__NEON_MLAL_LANE_PRFM,n_div_8_strided_cn)11762 TEST(QC8_GEMM_MINMAX_FP32_2X8__NEON_MLAL_LANE_PRFM, n_div_8_strided_cn) {
11763 TEST_REQUIRES_ARM_NEON;
11764 for (uint32_t n = 16; n <= 24; n += 8) {
11765 for (size_t k = 1; k <= 40; k += 9) {
11766 GemmMicrokernelTester()
11767 .mr(2)
11768 .nr(8)
11769 .kr(1)
11770 .sr(1)
11771 .m(2)
11772 .n(n)
11773 .k(k)
11774 .cn_stride(11)
11775 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
11776 }
11777 }
11778 }
11779
TEST(QC8_GEMM_MINMAX_FP32_2X8__NEON_MLAL_LANE_PRFM,n_div_8_strided_a)11780 TEST(QC8_GEMM_MINMAX_FP32_2X8__NEON_MLAL_LANE_PRFM, n_div_8_strided_a) {
11781 TEST_REQUIRES_ARM_NEON;
11782 for (uint32_t n = 16; n <= 24; n += 8) {
11783 for (size_t k = 1; k <= 40; k += 9) {
11784 GemmMicrokernelTester()
11785 .mr(2)
11786 .nr(8)
11787 .kr(1)
11788 .sr(1)
11789 .m(2)
11790 .n(n)
11791 .k(k)
11792 .a_stride(43)
11793 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
11794 }
11795 }
11796 }
11797
TEST(QC8_GEMM_MINMAX_FP32_2X8__NEON_MLAL_LANE_PRFM,n_div_8_subtile)11798 TEST(QC8_GEMM_MINMAX_FP32_2X8__NEON_MLAL_LANE_PRFM, n_div_8_subtile) {
11799 TEST_REQUIRES_ARM_NEON;
11800 for (uint32_t n = 16; n <= 24; n += 8) {
11801 for (size_t k = 1; k <= 40; k += 9) {
11802 for (uint32_t m = 1; m <= 2; m++) {
11803 GemmMicrokernelTester()
11804 .mr(2)
11805 .nr(8)
11806 .kr(1)
11807 .sr(1)
11808 .m(m)
11809 .n(n)
11810 .k(k)
11811 .iterations(1)
11812 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
11813 }
11814 }
11815 }
11816 }
11817
TEST(QC8_GEMM_MINMAX_FP32_2X8__NEON_MLAL_LANE_PRFM,strided_cm_subtile)11818 TEST(QC8_GEMM_MINMAX_FP32_2X8__NEON_MLAL_LANE_PRFM, strided_cm_subtile) {
11819 TEST_REQUIRES_ARM_NEON;
11820 for (size_t k = 1; k <= 40; k += 9) {
11821 for (uint32_t n = 1; n <= 8; n++) {
11822 for (uint32_t m = 1; m <= 2; m++) {
11823 GemmMicrokernelTester()
11824 .mr(2)
11825 .nr(8)
11826 .kr(1)
11827 .sr(1)
11828 .m(m)
11829 .n(n)
11830 .k(k)
11831 .cm_stride(11)
11832 .iterations(1)
11833 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
11834 }
11835 }
11836 }
11837 }
11838
TEST(QC8_GEMM_MINMAX_FP32_2X8__NEON_MLAL_LANE_PRFM,qmin)11839 TEST(QC8_GEMM_MINMAX_FP32_2X8__NEON_MLAL_LANE_PRFM, qmin) {
11840 TEST_REQUIRES_ARM_NEON;
11841 GemmMicrokernelTester()
11842 .mr(2)
11843 .nr(8)
11844 .kr(1)
11845 .sr(1)
11846 .m(2)
11847 .n(8)
11848 .k(8)
11849 .qmin(128)
11850 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
11851 }
11852
TEST(QC8_GEMM_MINMAX_FP32_2X8__NEON_MLAL_LANE_PRFM,qmax)11853 TEST(QC8_GEMM_MINMAX_FP32_2X8__NEON_MLAL_LANE_PRFM, qmax) {
11854 TEST_REQUIRES_ARM_NEON;
11855 GemmMicrokernelTester()
11856 .mr(2)
11857 .nr(8)
11858 .kr(1)
11859 .sr(1)
11860 .m(2)
11861 .n(8)
11862 .k(8)
11863 .qmax(128)
11864 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
11865 }
11866
TEST(QC8_GEMM_MINMAX_FP32_2X8__NEON_MLAL_LANE_PRFM,strided_cm)11867 TEST(QC8_GEMM_MINMAX_FP32_2X8__NEON_MLAL_LANE_PRFM, strided_cm) {
11868 TEST_REQUIRES_ARM_NEON;
11869 GemmMicrokernelTester()
11870 .mr(2)
11871 .nr(8)
11872 .kr(1)
11873 .sr(1)
11874 .m(2)
11875 .n(8)
11876 .k(8)
11877 .cm_stride(11)
11878 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
11879 }
11880 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
11881
11882
11883 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QC8_GEMM_MINMAX_FP32_2X8__NEONV8_MLAL_LANE,k_eq_8)11884 TEST(QC8_GEMM_MINMAX_FP32_2X8__NEONV8_MLAL_LANE, k_eq_8) {
11885 TEST_REQUIRES_ARM_NEON_V8;
11886 GemmMicrokernelTester()
11887 .mr(2)
11888 .nr(8)
11889 .kr(1)
11890 .sr(1)
11891 .m(2)
11892 .n(8)
11893 .k(8)
11894 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
11895 }
11896
TEST(QC8_GEMM_MINMAX_FP32_2X8__NEONV8_MLAL_LANE,strided_cn)11897 TEST(QC8_GEMM_MINMAX_FP32_2X8__NEONV8_MLAL_LANE, strided_cn) {
11898 TEST_REQUIRES_ARM_NEON_V8;
11899 GemmMicrokernelTester()
11900 .mr(2)
11901 .nr(8)
11902 .kr(1)
11903 .sr(1)
11904 .m(2)
11905 .n(8)
11906 .k(8)
11907 .cn_stride(11)
11908 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
11909 }
11910
TEST(QC8_GEMM_MINMAX_FP32_2X8__NEONV8_MLAL_LANE,k_eq_8_strided_a)11911 TEST(QC8_GEMM_MINMAX_FP32_2X8__NEONV8_MLAL_LANE, k_eq_8_strided_a) {
11912 TEST_REQUIRES_ARM_NEON_V8;
11913 GemmMicrokernelTester()
11914 .mr(2)
11915 .nr(8)
11916 .kr(1)
11917 .sr(1)
11918 .m(2)
11919 .n(8)
11920 .k(8)
11921 .a_stride(11)
11922 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
11923 }
11924
TEST(QC8_GEMM_MINMAX_FP32_2X8__NEONV8_MLAL_LANE,k_eq_8_subtile)11925 TEST(QC8_GEMM_MINMAX_FP32_2X8__NEONV8_MLAL_LANE, k_eq_8_subtile) {
11926 TEST_REQUIRES_ARM_NEON_V8;
11927 for (uint32_t n = 1; n <= 8; n++) {
11928 for (uint32_t m = 1; m <= 2; m++) {
11929 GemmMicrokernelTester()
11930 .mr(2)
11931 .nr(8)
11932 .kr(1)
11933 .sr(1)
11934 .m(m)
11935 .n(n)
11936 .k(8)
11937 .iterations(1)
11938 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
11939 }
11940 }
11941 }
11942
TEST(QC8_GEMM_MINMAX_FP32_2X8__NEONV8_MLAL_LANE,k_eq_8_subtile_m)11943 TEST(QC8_GEMM_MINMAX_FP32_2X8__NEONV8_MLAL_LANE, k_eq_8_subtile_m) {
11944 TEST_REQUIRES_ARM_NEON_V8;
11945 for (uint32_t m = 1; m <= 2; m++) {
11946 GemmMicrokernelTester()
11947 .mr(2)
11948 .nr(8)
11949 .kr(1)
11950 .sr(1)
11951 .m(m)
11952 .n(8)
11953 .k(8)
11954 .iterations(1)
11955 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
11956 }
11957 }
11958
TEST(QC8_GEMM_MINMAX_FP32_2X8__NEONV8_MLAL_LANE,k_eq_8_subtile_n)11959 TEST(QC8_GEMM_MINMAX_FP32_2X8__NEONV8_MLAL_LANE, k_eq_8_subtile_n) {
11960 TEST_REQUIRES_ARM_NEON_V8;
11961 for (uint32_t n = 1; n <= 8; n++) {
11962 GemmMicrokernelTester()
11963 .mr(2)
11964 .nr(8)
11965 .kr(1)
11966 .sr(1)
11967 .m(2)
11968 .n(n)
11969 .k(8)
11970 .iterations(1)
11971 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
11972 }
11973 }
11974
TEST(QC8_GEMM_MINMAX_FP32_2X8__NEONV8_MLAL_LANE,k_lt_8)11975 TEST(QC8_GEMM_MINMAX_FP32_2X8__NEONV8_MLAL_LANE, k_lt_8) {
11976 TEST_REQUIRES_ARM_NEON_V8;
11977 for (size_t k = 1; k < 8; k++) {
11978 GemmMicrokernelTester()
11979 .mr(2)
11980 .nr(8)
11981 .kr(1)
11982 .sr(1)
11983 .m(2)
11984 .n(8)
11985 .k(k)
11986 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
11987 }
11988 }
11989
TEST(QC8_GEMM_MINMAX_FP32_2X8__NEONV8_MLAL_LANE,k_lt_8_strided_a)11990 TEST(QC8_GEMM_MINMAX_FP32_2X8__NEONV8_MLAL_LANE, k_lt_8_strided_a) {
11991 TEST_REQUIRES_ARM_NEON_V8;
11992 for (size_t k = 1; k < 8; k++) {
11993 GemmMicrokernelTester()
11994 .mr(2)
11995 .nr(8)
11996 .kr(1)
11997 .sr(1)
11998 .m(2)
11999 .n(8)
12000 .k(k)
12001 .a_stride(11)
12002 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
12003 }
12004 }
12005
TEST(QC8_GEMM_MINMAX_FP32_2X8__NEONV8_MLAL_LANE,k_lt_8_subtile)12006 TEST(QC8_GEMM_MINMAX_FP32_2X8__NEONV8_MLAL_LANE, k_lt_8_subtile) {
12007 TEST_REQUIRES_ARM_NEON_V8;
12008 for (size_t k = 1; k < 8; k++) {
12009 for (uint32_t n = 1; n <= 8; n++) {
12010 for (uint32_t m = 1; m <= 2; m++) {
12011 GemmMicrokernelTester()
12012 .mr(2)
12013 .nr(8)
12014 .kr(1)
12015 .sr(1)
12016 .m(m)
12017 .n(n)
12018 .k(k)
12019 .iterations(1)
12020 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
12021 }
12022 }
12023 }
12024 }
12025
TEST(QC8_GEMM_MINMAX_FP32_2X8__NEONV8_MLAL_LANE,k_gt_8)12026 TEST(QC8_GEMM_MINMAX_FP32_2X8__NEONV8_MLAL_LANE, k_gt_8) {
12027 TEST_REQUIRES_ARM_NEON_V8;
12028 for (size_t k = 9; k < 16; k++) {
12029 GemmMicrokernelTester()
12030 .mr(2)
12031 .nr(8)
12032 .kr(1)
12033 .sr(1)
12034 .m(2)
12035 .n(8)
12036 .k(k)
12037 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
12038 }
12039 }
12040
TEST(QC8_GEMM_MINMAX_FP32_2X8__NEONV8_MLAL_LANE,k_gt_8_strided_a)12041 TEST(QC8_GEMM_MINMAX_FP32_2X8__NEONV8_MLAL_LANE, k_gt_8_strided_a) {
12042 TEST_REQUIRES_ARM_NEON_V8;
12043 for (size_t k = 9; k < 16; k++) {
12044 GemmMicrokernelTester()
12045 .mr(2)
12046 .nr(8)
12047 .kr(1)
12048 .sr(1)
12049 .m(2)
12050 .n(8)
12051 .k(k)
12052 .a_stride(19)
12053 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
12054 }
12055 }
12056
TEST(QC8_GEMM_MINMAX_FP32_2X8__NEONV8_MLAL_LANE,k_gt_8_subtile)12057 TEST(QC8_GEMM_MINMAX_FP32_2X8__NEONV8_MLAL_LANE, k_gt_8_subtile) {
12058 TEST_REQUIRES_ARM_NEON_V8;
12059 for (size_t k = 9; k < 16; k++) {
12060 for (uint32_t n = 1; n <= 8; n++) {
12061 for (uint32_t m = 1; m <= 2; m++) {
12062 GemmMicrokernelTester()
12063 .mr(2)
12064 .nr(8)
12065 .kr(1)
12066 .sr(1)
12067 .m(m)
12068 .n(n)
12069 .k(k)
12070 .iterations(1)
12071 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
12072 }
12073 }
12074 }
12075 }
12076
TEST(QC8_GEMM_MINMAX_FP32_2X8__NEONV8_MLAL_LANE,k_div_8)12077 TEST(QC8_GEMM_MINMAX_FP32_2X8__NEONV8_MLAL_LANE, k_div_8) {
12078 TEST_REQUIRES_ARM_NEON_V8;
12079 for (size_t k = 16; k <= 80; k += 8) {
12080 GemmMicrokernelTester()
12081 .mr(2)
12082 .nr(8)
12083 .kr(1)
12084 .sr(1)
12085 .m(2)
12086 .n(8)
12087 .k(k)
12088 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
12089 }
12090 }
12091
TEST(QC8_GEMM_MINMAX_FP32_2X8__NEONV8_MLAL_LANE,k_div_8_strided_a)12092 TEST(QC8_GEMM_MINMAX_FP32_2X8__NEONV8_MLAL_LANE, k_div_8_strided_a) {
12093 TEST_REQUIRES_ARM_NEON_V8;
12094 for (size_t k = 16; k <= 80; k += 8) {
12095 GemmMicrokernelTester()
12096 .mr(2)
12097 .nr(8)
12098 .kr(1)
12099 .sr(1)
12100 .m(2)
12101 .n(8)
12102 .k(k)
12103 .a_stride(83)
12104 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
12105 }
12106 }
12107
TEST(QC8_GEMM_MINMAX_FP32_2X8__NEONV8_MLAL_LANE,k_div_8_subtile)12108 TEST(QC8_GEMM_MINMAX_FP32_2X8__NEONV8_MLAL_LANE, k_div_8_subtile) {
12109 TEST_REQUIRES_ARM_NEON_V8;
12110 for (size_t k = 16; k <= 80; k += 8) {
12111 for (uint32_t n = 1; n <= 8; n++) {
12112 for (uint32_t m = 1; m <= 2; m++) {
12113 GemmMicrokernelTester()
12114 .mr(2)
12115 .nr(8)
12116 .kr(1)
12117 .sr(1)
12118 .m(m)
12119 .n(n)
12120 .k(k)
12121 .iterations(1)
12122 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
12123 }
12124 }
12125 }
12126 }
12127
TEST(QC8_GEMM_MINMAX_FP32_2X8__NEONV8_MLAL_LANE,n_gt_8)12128 TEST(QC8_GEMM_MINMAX_FP32_2X8__NEONV8_MLAL_LANE, n_gt_8) {
12129 TEST_REQUIRES_ARM_NEON_V8;
12130 for (uint32_t n = 9; n < 16; n++) {
12131 for (size_t k = 1; k <= 40; k += 9) {
12132 GemmMicrokernelTester()
12133 .mr(2)
12134 .nr(8)
12135 .kr(1)
12136 .sr(1)
12137 .m(2)
12138 .n(n)
12139 .k(k)
12140 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
12141 }
12142 }
12143 }
12144
TEST(QC8_GEMM_MINMAX_FP32_2X8__NEONV8_MLAL_LANE,n_gt_8_strided_cn)12145 TEST(QC8_GEMM_MINMAX_FP32_2X8__NEONV8_MLAL_LANE, n_gt_8_strided_cn) {
12146 TEST_REQUIRES_ARM_NEON_V8;
12147 for (uint32_t n = 9; n < 16; n++) {
12148 for (size_t k = 1; k <= 40; k += 9) {
12149 GemmMicrokernelTester()
12150 .mr(2)
12151 .nr(8)
12152 .kr(1)
12153 .sr(1)
12154 .m(2)
12155 .n(n)
12156 .k(k)
12157 .cn_stride(11)
12158 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
12159 }
12160 }
12161 }
12162
TEST(QC8_GEMM_MINMAX_FP32_2X8__NEONV8_MLAL_LANE,n_gt_8_strided_a)12163 TEST(QC8_GEMM_MINMAX_FP32_2X8__NEONV8_MLAL_LANE, n_gt_8_strided_a) {
12164 TEST_REQUIRES_ARM_NEON_V8;
12165 for (uint32_t n = 9; n < 16; n++) {
12166 for (size_t k = 1; k <= 40; k += 9) {
12167 GemmMicrokernelTester()
12168 .mr(2)
12169 .nr(8)
12170 .kr(1)
12171 .sr(1)
12172 .m(2)
12173 .n(n)
12174 .k(k)
12175 .a_stride(43)
12176 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
12177 }
12178 }
12179 }
12180
TEST(QC8_GEMM_MINMAX_FP32_2X8__NEONV8_MLAL_LANE,n_gt_8_subtile)12181 TEST(QC8_GEMM_MINMAX_FP32_2X8__NEONV8_MLAL_LANE, n_gt_8_subtile) {
12182 TEST_REQUIRES_ARM_NEON_V8;
12183 for (uint32_t n = 9; n < 16; n++) {
12184 for (size_t k = 1; k <= 40; k += 9) {
12185 for (uint32_t m = 1; m <= 2; m++) {
12186 GemmMicrokernelTester()
12187 .mr(2)
12188 .nr(8)
12189 .kr(1)
12190 .sr(1)
12191 .m(m)
12192 .n(n)
12193 .k(k)
12194 .iterations(1)
12195 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
12196 }
12197 }
12198 }
12199 }
12200
TEST(QC8_GEMM_MINMAX_FP32_2X8__NEONV8_MLAL_LANE,n_div_8)12201 TEST(QC8_GEMM_MINMAX_FP32_2X8__NEONV8_MLAL_LANE, n_div_8) {
12202 TEST_REQUIRES_ARM_NEON_V8;
12203 for (uint32_t n = 16; n <= 24; n += 8) {
12204 for (size_t k = 1; k <= 40; k += 9) {
12205 GemmMicrokernelTester()
12206 .mr(2)
12207 .nr(8)
12208 .kr(1)
12209 .sr(1)
12210 .m(2)
12211 .n(n)
12212 .k(k)
12213 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
12214 }
12215 }
12216 }
12217
TEST(QC8_GEMM_MINMAX_FP32_2X8__NEONV8_MLAL_LANE,n_div_8_strided_cn)12218 TEST(QC8_GEMM_MINMAX_FP32_2X8__NEONV8_MLAL_LANE, n_div_8_strided_cn) {
12219 TEST_REQUIRES_ARM_NEON_V8;
12220 for (uint32_t n = 16; n <= 24; n += 8) {
12221 for (size_t k = 1; k <= 40; k += 9) {
12222 GemmMicrokernelTester()
12223 .mr(2)
12224 .nr(8)
12225 .kr(1)
12226 .sr(1)
12227 .m(2)
12228 .n(n)
12229 .k(k)
12230 .cn_stride(11)
12231 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
12232 }
12233 }
12234 }
12235
TEST(QC8_GEMM_MINMAX_FP32_2X8__NEONV8_MLAL_LANE,n_div_8_strided_a)12236 TEST(QC8_GEMM_MINMAX_FP32_2X8__NEONV8_MLAL_LANE, n_div_8_strided_a) {
12237 TEST_REQUIRES_ARM_NEON_V8;
12238 for (uint32_t n = 16; n <= 24; n += 8) {
12239 for (size_t k = 1; k <= 40; k += 9) {
12240 GemmMicrokernelTester()
12241 .mr(2)
12242 .nr(8)
12243 .kr(1)
12244 .sr(1)
12245 .m(2)
12246 .n(n)
12247 .k(k)
12248 .a_stride(43)
12249 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
12250 }
12251 }
12252 }
12253
TEST(QC8_GEMM_MINMAX_FP32_2X8__NEONV8_MLAL_LANE,n_div_8_subtile)12254 TEST(QC8_GEMM_MINMAX_FP32_2X8__NEONV8_MLAL_LANE, n_div_8_subtile) {
12255 TEST_REQUIRES_ARM_NEON_V8;
12256 for (uint32_t n = 16; n <= 24; n += 8) {
12257 for (size_t k = 1; k <= 40; k += 9) {
12258 for (uint32_t m = 1; m <= 2; m++) {
12259 GemmMicrokernelTester()
12260 .mr(2)
12261 .nr(8)
12262 .kr(1)
12263 .sr(1)
12264 .m(m)
12265 .n(n)
12266 .k(k)
12267 .iterations(1)
12268 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
12269 }
12270 }
12271 }
12272 }
12273
TEST(QC8_GEMM_MINMAX_FP32_2X8__NEONV8_MLAL_LANE,strided_cm_subtile)12274 TEST(QC8_GEMM_MINMAX_FP32_2X8__NEONV8_MLAL_LANE, strided_cm_subtile) {
12275 TEST_REQUIRES_ARM_NEON_V8;
12276 for (size_t k = 1; k <= 40; k += 9) {
12277 for (uint32_t n = 1; n <= 8; n++) {
12278 for (uint32_t m = 1; m <= 2; m++) {
12279 GemmMicrokernelTester()
12280 .mr(2)
12281 .nr(8)
12282 .kr(1)
12283 .sr(1)
12284 .m(m)
12285 .n(n)
12286 .k(k)
12287 .cm_stride(11)
12288 .iterations(1)
12289 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
12290 }
12291 }
12292 }
12293 }
12294
TEST(QC8_GEMM_MINMAX_FP32_2X8__NEONV8_MLAL_LANE,qmin)12295 TEST(QC8_GEMM_MINMAX_FP32_2X8__NEONV8_MLAL_LANE, qmin) {
12296 TEST_REQUIRES_ARM_NEON_V8;
12297 GemmMicrokernelTester()
12298 .mr(2)
12299 .nr(8)
12300 .kr(1)
12301 .sr(1)
12302 .m(2)
12303 .n(8)
12304 .k(8)
12305 .qmin(128)
12306 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
12307 }
12308
TEST(QC8_GEMM_MINMAX_FP32_2X8__NEONV8_MLAL_LANE,qmax)12309 TEST(QC8_GEMM_MINMAX_FP32_2X8__NEONV8_MLAL_LANE, qmax) {
12310 TEST_REQUIRES_ARM_NEON_V8;
12311 GemmMicrokernelTester()
12312 .mr(2)
12313 .nr(8)
12314 .kr(1)
12315 .sr(1)
12316 .m(2)
12317 .n(8)
12318 .k(8)
12319 .qmax(128)
12320 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
12321 }
12322
TEST(QC8_GEMM_MINMAX_FP32_2X8__NEONV8_MLAL_LANE,strided_cm)12323 TEST(QC8_GEMM_MINMAX_FP32_2X8__NEONV8_MLAL_LANE, strided_cm) {
12324 TEST_REQUIRES_ARM_NEON_V8;
12325 GemmMicrokernelTester()
12326 .mr(2)
12327 .nr(8)
12328 .kr(1)
12329 .sr(1)
12330 .m(2)
12331 .n(8)
12332 .k(8)
12333 .cm_stride(11)
12334 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
12335 }
12336 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
12337
12338
12339 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QC8_GEMM_MINMAX_FP32_2X8C2__NEON_MLAL_DUP,k_eq_16)12340 TEST(QC8_GEMM_MINMAX_FP32_2X8C2__NEON_MLAL_DUP, k_eq_16) {
12341 TEST_REQUIRES_ARM_NEON;
12342 GemmMicrokernelTester()
12343 .mr(2)
12344 .nr(8)
12345 .kr(2)
12346 .sr(1)
12347 .m(2)
12348 .n(8)
12349 .k(16)
12350 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2__neon_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
12351 }
12352
TEST(QC8_GEMM_MINMAX_FP32_2X8C2__NEON_MLAL_DUP,strided_cn)12353 TEST(QC8_GEMM_MINMAX_FP32_2X8C2__NEON_MLAL_DUP, strided_cn) {
12354 TEST_REQUIRES_ARM_NEON;
12355 GemmMicrokernelTester()
12356 .mr(2)
12357 .nr(8)
12358 .kr(2)
12359 .sr(1)
12360 .m(2)
12361 .n(8)
12362 .k(16)
12363 .cn_stride(11)
12364 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2__neon_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
12365 }
12366
TEST(QC8_GEMM_MINMAX_FP32_2X8C2__NEON_MLAL_DUP,k_eq_16_strided_a)12367 TEST(QC8_GEMM_MINMAX_FP32_2X8C2__NEON_MLAL_DUP, k_eq_16_strided_a) {
12368 TEST_REQUIRES_ARM_NEON;
12369 GemmMicrokernelTester()
12370 .mr(2)
12371 .nr(8)
12372 .kr(2)
12373 .sr(1)
12374 .m(2)
12375 .n(8)
12376 .k(16)
12377 .a_stride(19)
12378 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2__neon_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
12379 }
12380
TEST(QC8_GEMM_MINMAX_FP32_2X8C2__NEON_MLAL_DUP,k_eq_16_subtile)12381 TEST(QC8_GEMM_MINMAX_FP32_2X8C2__NEON_MLAL_DUP, k_eq_16_subtile) {
12382 TEST_REQUIRES_ARM_NEON;
12383 for (uint32_t n = 1; n <= 8; n++) {
12384 for (uint32_t m = 1; m <= 2; m++) {
12385 GemmMicrokernelTester()
12386 .mr(2)
12387 .nr(8)
12388 .kr(2)
12389 .sr(1)
12390 .m(m)
12391 .n(n)
12392 .k(16)
12393 .iterations(1)
12394 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2__neon_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
12395 }
12396 }
12397 }
12398
TEST(QC8_GEMM_MINMAX_FP32_2X8C2__NEON_MLAL_DUP,k_eq_16_subtile_m)12399 TEST(QC8_GEMM_MINMAX_FP32_2X8C2__NEON_MLAL_DUP, k_eq_16_subtile_m) {
12400 TEST_REQUIRES_ARM_NEON;
12401 for (uint32_t m = 1; m <= 2; m++) {
12402 GemmMicrokernelTester()
12403 .mr(2)
12404 .nr(8)
12405 .kr(2)
12406 .sr(1)
12407 .m(m)
12408 .n(8)
12409 .k(16)
12410 .iterations(1)
12411 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2__neon_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
12412 }
12413 }
12414
TEST(QC8_GEMM_MINMAX_FP32_2X8C2__NEON_MLAL_DUP,k_eq_16_subtile_n)12415 TEST(QC8_GEMM_MINMAX_FP32_2X8C2__NEON_MLAL_DUP, k_eq_16_subtile_n) {
12416 TEST_REQUIRES_ARM_NEON;
12417 for (uint32_t n = 1; n <= 8; n++) {
12418 GemmMicrokernelTester()
12419 .mr(2)
12420 .nr(8)
12421 .kr(2)
12422 .sr(1)
12423 .m(2)
12424 .n(n)
12425 .k(16)
12426 .iterations(1)
12427 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2__neon_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
12428 }
12429 }
12430
TEST(QC8_GEMM_MINMAX_FP32_2X8C2__NEON_MLAL_DUP,k_lt_16)12431 TEST(QC8_GEMM_MINMAX_FP32_2X8C2__NEON_MLAL_DUP, k_lt_16) {
12432 TEST_REQUIRES_ARM_NEON;
12433 for (size_t k = 1; k < 16; k++) {
12434 GemmMicrokernelTester()
12435 .mr(2)
12436 .nr(8)
12437 .kr(2)
12438 .sr(1)
12439 .m(2)
12440 .n(8)
12441 .k(k)
12442 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2__neon_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
12443 }
12444 }
12445
TEST(QC8_GEMM_MINMAX_FP32_2X8C2__NEON_MLAL_DUP,k_lt_16_strided_a)12446 TEST(QC8_GEMM_MINMAX_FP32_2X8C2__NEON_MLAL_DUP, k_lt_16_strided_a) {
12447 TEST_REQUIRES_ARM_NEON;
12448 for (size_t k = 1; k < 16; k++) {
12449 GemmMicrokernelTester()
12450 .mr(2)
12451 .nr(8)
12452 .kr(2)
12453 .sr(1)
12454 .m(2)
12455 .n(8)
12456 .k(k)
12457 .a_stride(19)
12458 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2__neon_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
12459 }
12460 }
12461
TEST(QC8_GEMM_MINMAX_FP32_2X8C2__NEON_MLAL_DUP,k_lt_16_subtile)12462 TEST(QC8_GEMM_MINMAX_FP32_2X8C2__NEON_MLAL_DUP, k_lt_16_subtile) {
12463 TEST_REQUIRES_ARM_NEON;
12464 for (size_t k = 1; k < 16; k++) {
12465 for (uint32_t n = 1; n <= 8; n++) {
12466 for (uint32_t m = 1; m <= 2; m++) {
12467 GemmMicrokernelTester()
12468 .mr(2)
12469 .nr(8)
12470 .kr(2)
12471 .sr(1)
12472 .m(m)
12473 .n(n)
12474 .k(k)
12475 .iterations(1)
12476 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2__neon_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
12477 }
12478 }
12479 }
12480 }
12481
TEST(QC8_GEMM_MINMAX_FP32_2X8C2__NEON_MLAL_DUP,k_gt_16)12482 TEST(QC8_GEMM_MINMAX_FP32_2X8C2__NEON_MLAL_DUP, k_gt_16) {
12483 TEST_REQUIRES_ARM_NEON;
12484 for (size_t k = 17; k < 32; k++) {
12485 GemmMicrokernelTester()
12486 .mr(2)
12487 .nr(8)
12488 .kr(2)
12489 .sr(1)
12490 .m(2)
12491 .n(8)
12492 .k(k)
12493 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2__neon_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
12494 }
12495 }
12496
TEST(QC8_GEMM_MINMAX_FP32_2X8C2__NEON_MLAL_DUP,k_gt_16_strided_a)12497 TEST(QC8_GEMM_MINMAX_FP32_2X8C2__NEON_MLAL_DUP, k_gt_16_strided_a) {
12498 TEST_REQUIRES_ARM_NEON;
12499 for (size_t k = 17; k < 32; k++) {
12500 GemmMicrokernelTester()
12501 .mr(2)
12502 .nr(8)
12503 .kr(2)
12504 .sr(1)
12505 .m(2)
12506 .n(8)
12507 .k(k)
12508 .a_stride(37)
12509 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2__neon_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
12510 }
12511 }
12512
TEST(QC8_GEMM_MINMAX_FP32_2X8C2__NEON_MLAL_DUP,k_gt_16_subtile)12513 TEST(QC8_GEMM_MINMAX_FP32_2X8C2__NEON_MLAL_DUP, k_gt_16_subtile) {
12514 TEST_REQUIRES_ARM_NEON;
12515 for (size_t k = 17; k < 32; k++) {
12516 for (uint32_t n = 1; n <= 8; n++) {
12517 for (uint32_t m = 1; m <= 2; m++) {
12518 GemmMicrokernelTester()
12519 .mr(2)
12520 .nr(8)
12521 .kr(2)
12522 .sr(1)
12523 .m(m)
12524 .n(n)
12525 .k(k)
12526 .iterations(1)
12527 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2__neon_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
12528 }
12529 }
12530 }
12531 }
12532
TEST(QC8_GEMM_MINMAX_FP32_2X8C2__NEON_MLAL_DUP,k_div_16)12533 TEST(QC8_GEMM_MINMAX_FP32_2X8C2__NEON_MLAL_DUP, k_div_16) {
12534 TEST_REQUIRES_ARM_NEON;
12535 for (size_t k = 32; k <= 160; k += 16) {
12536 GemmMicrokernelTester()
12537 .mr(2)
12538 .nr(8)
12539 .kr(2)
12540 .sr(1)
12541 .m(2)
12542 .n(8)
12543 .k(k)
12544 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2__neon_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
12545 }
12546 }
12547
TEST(QC8_GEMM_MINMAX_FP32_2X8C2__NEON_MLAL_DUP,k_div_16_strided_a)12548 TEST(QC8_GEMM_MINMAX_FP32_2X8C2__NEON_MLAL_DUP, k_div_16_strided_a) {
12549 TEST_REQUIRES_ARM_NEON;
12550 for (size_t k = 32; k <= 160; k += 16) {
12551 GemmMicrokernelTester()
12552 .mr(2)
12553 .nr(8)
12554 .kr(2)
12555 .sr(1)
12556 .m(2)
12557 .n(8)
12558 .k(k)
12559 .a_stride(163)
12560 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2__neon_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
12561 }
12562 }
12563
TEST(QC8_GEMM_MINMAX_FP32_2X8C2__NEON_MLAL_DUP,k_div_16_subtile)12564 TEST(QC8_GEMM_MINMAX_FP32_2X8C2__NEON_MLAL_DUP, k_div_16_subtile) {
12565 TEST_REQUIRES_ARM_NEON;
12566 for (size_t k = 32; k <= 160; k += 16) {
12567 for (uint32_t n = 1; n <= 8; n++) {
12568 for (uint32_t m = 1; m <= 2; m++) {
12569 GemmMicrokernelTester()
12570 .mr(2)
12571 .nr(8)
12572 .kr(2)
12573 .sr(1)
12574 .m(m)
12575 .n(n)
12576 .k(k)
12577 .iterations(1)
12578 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2__neon_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
12579 }
12580 }
12581 }
12582 }
12583
TEST(QC8_GEMM_MINMAX_FP32_2X8C2__NEON_MLAL_DUP,n_gt_8)12584 TEST(QC8_GEMM_MINMAX_FP32_2X8C2__NEON_MLAL_DUP, n_gt_8) {
12585 TEST_REQUIRES_ARM_NEON;
12586 for (uint32_t n = 9; n < 16; n++) {
12587 for (size_t k = 1; k <= 80; k += 17) {
12588 GemmMicrokernelTester()
12589 .mr(2)
12590 .nr(8)
12591 .kr(2)
12592 .sr(1)
12593 .m(2)
12594 .n(n)
12595 .k(k)
12596 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2__neon_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
12597 }
12598 }
12599 }
12600
TEST(QC8_GEMM_MINMAX_FP32_2X8C2__NEON_MLAL_DUP,n_gt_8_strided_cn)12601 TEST(QC8_GEMM_MINMAX_FP32_2X8C2__NEON_MLAL_DUP, n_gt_8_strided_cn) {
12602 TEST_REQUIRES_ARM_NEON;
12603 for (uint32_t n = 9; n < 16; n++) {
12604 for (size_t k = 1; k <= 80; k += 17) {
12605 GemmMicrokernelTester()
12606 .mr(2)
12607 .nr(8)
12608 .kr(2)
12609 .sr(1)
12610 .m(2)
12611 .n(n)
12612 .k(k)
12613 .cn_stride(11)
12614 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2__neon_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
12615 }
12616 }
12617 }
12618
TEST(QC8_GEMM_MINMAX_FP32_2X8C2__NEON_MLAL_DUP,n_gt_8_strided_a)12619 TEST(QC8_GEMM_MINMAX_FP32_2X8C2__NEON_MLAL_DUP, n_gt_8_strided_a) {
12620 TEST_REQUIRES_ARM_NEON;
12621 for (uint32_t n = 9; n < 16; n++) {
12622 for (size_t k = 1; k <= 80; k += 17) {
12623 GemmMicrokernelTester()
12624 .mr(2)
12625 .nr(8)
12626 .kr(2)
12627 .sr(1)
12628 .m(2)
12629 .n(n)
12630 .k(k)
12631 .a_stride(83)
12632 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2__neon_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
12633 }
12634 }
12635 }
12636
TEST(QC8_GEMM_MINMAX_FP32_2X8C2__NEON_MLAL_DUP,n_gt_8_subtile)12637 TEST(QC8_GEMM_MINMAX_FP32_2X8C2__NEON_MLAL_DUP, n_gt_8_subtile) {
12638 TEST_REQUIRES_ARM_NEON;
12639 for (uint32_t n = 9; n < 16; n++) {
12640 for (size_t k = 1; k <= 80; k += 17) {
12641 for (uint32_t m = 1; m <= 2; m++) {
12642 GemmMicrokernelTester()
12643 .mr(2)
12644 .nr(8)
12645 .kr(2)
12646 .sr(1)
12647 .m(m)
12648 .n(n)
12649 .k(k)
12650 .iterations(1)
12651 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2__neon_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
12652 }
12653 }
12654 }
12655 }
12656
TEST(QC8_GEMM_MINMAX_FP32_2X8C2__NEON_MLAL_DUP,n_div_8)12657 TEST(QC8_GEMM_MINMAX_FP32_2X8C2__NEON_MLAL_DUP, n_div_8) {
12658 TEST_REQUIRES_ARM_NEON;
12659 for (uint32_t n = 16; n <= 24; n += 8) {
12660 for (size_t k = 1; k <= 80; k += 17) {
12661 GemmMicrokernelTester()
12662 .mr(2)
12663 .nr(8)
12664 .kr(2)
12665 .sr(1)
12666 .m(2)
12667 .n(n)
12668 .k(k)
12669 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2__neon_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
12670 }
12671 }
12672 }
12673
TEST(QC8_GEMM_MINMAX_FP32_2X8C2__NEON_MLAL_DUP,n_div_8_strided_cn)12674 TEST(QC8_GEMM_MINMAX_FP32_2X8C2__NEON_MLAL_DUP, n_div_8_strided_cn) {
12675 TEST_REQUIRES_ARM_NEON;
12676 for (uint32_t n = 16; n <= 24; n += 8) {
12677 for (size_t k = 1; k <= 80; k += 17) {
12678 GemmMicrokernelTester()
12679 .mr(2)
12680 .nr(8)
12681 .kr(2)
12682 .sr(1)
12683 .m(2)
12684 .n(n)
12685 .k(k)
12686 .cn_stride(11)
12687 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2__neon_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
12688 }
12689 }
12690 }
12691
TEST(QC8_GEMM_MINMAX_FP32_2X8C2__NEON_MLAL_DUP,n_div_8_strided_a)12692 TEST(QC8_GEMM_MINMAX_FP32_2X8C2__NEON_MLAL_DUP, n_div_8_strided_a) {
12693 TEST_REQUIRES_ARM_NEON;
12694 for (uint32_t n = 16; n <= 24; n += 8) {
12695 for (size_t k = 1; k <= 80; k += 17) {
12696 GemmMicrokernelTester()
12697 .mr(2)
12698 .nr(8)
12699 .kr(2)
12700 .sr(1)
12701 .m(2)
12702 .n(n)
12703 .k(k)
12704 .a_stride(83)
12705 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2__neon_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
12706 }
12707 }
12708 }
12709
TEST(QC8_GEMM_MINMAX_FP32_2X8C2__NEON_MLAL_DUP,n_div_8_subtile)12710 TEST(QC8_GEMM_MINMAX_FP32_2X8C2__NEON_MLAL_DUP, n_div_8_subtile) {
12711 TEST_REQUIRES_ARM_NEON;
12712 for (uint32_t n = 16; n <= 24; n += 8) {
12713 for (size_t k = 1; k <= 80; k += 17) {
12714 for (uint32_t m = 1; m <= 2; m++) {
12715 GemmMicrokernelTester()
12716 .mr(2)
12717 .nr(8)
12718 .kr(2)
12719 .sr(1)
12720 .m(m)
12721 .n(n)
12722 .k(k)
12723 .iterations(1)
12724 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2__neon_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
12725 }
12726 }
12727 }
12728 }
12729
TEST(QC8_GEMM_MINMAX_FP32_2X8C2__NEON_MLAL_DUP,strided_cm_subtile)12730 TEST(QC8_GEMM_MINMAX_FP32_2X8C2__NEON_MLAL_DUP, strided_cm_subtile) {
12731 TEST_REQUIRES_ARM_NEON;
12732 for (size_t k = 1; k <= 80; k += 17) {
12733 for (uint32_t n = 1; n <= 8; n++) {
12734 for (uint32_t m = 1; m <= 2; m++) {
12735 GemmMicrokernelTester()
12736 .mr(2)
12737 .nr(8)
12738 .kr(2)
12739 .sr(1)
12740 .m(m)
12741 .n(n)
12742 .k(k)
12743 .cm_stride(11)
12744 .iterations(1)
12745 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2__neon_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
12746 }
12747 }
12748 }
12749 }
12750
TEST(QC8_GEMM_MINMAX_FP32_2X8C2__NEON_MLAL_DUP,qmin)12751 TEST(QC8_GEMM_MINMAX_FP32_2X8C2__NEON_MLAL_DUP, qmin) {
12752 TEST_REQUIRES_ARM_NEON;
12753 GemmMicrokernelTester()
12754 .mr(2)
12755 .nr(8)
12756 .kr(2)
12757 .sr(1)
12758 .m(2)
12759 .n(8)
12760 .k(16)
12761 .qmin(128)
12762 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2__neon_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
12763 }
12764
TEST(QC8_GEMM_MINMAX_FP32_2X8C2__NEON_MLAL_DUP,qmax)12765 TEST(QC8_GEMM_MINMAX_FP32_2X8C2__NEON_MLAL_DUP, qmax) {
12766 TEST_REQUIRES_ARM_NEON;
12767 GemmMicrokernelTester()
12768 .mr(2)
12769 .nr(8)
12770 .kr(2)
12771 .sr(1)
12772 .m(2)
12773 .n(8)
12774 .k(16)
12775 .qmax(128)
12776 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2__neon_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
12777 }
12778
TEST(QC8_GEMM_MINMAX_FP32_2X8C2__NEON_MLAL_DUP,strided_cm)12779 TEST(QC8_GEMM_MINMAX_FP32_2X8C2__NEON_MLAL_DUP, strided_cm) {
12780 TEST_REQUIRES_ARM_NEON;
12781 GemmMicrokernelTester()
12782 .mr(2)
12783 .nr(8)
12784 .kr(2)
12785 .sr(1)
12786 .m(2)
12787 .n(8)
12788 .k(16)
12789 .cm_stride(11)
12790 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2__neon_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
12791 }
12792 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
12793
12794
12795 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QC8_GEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD4R,k_eq_16)12796 TEST(QC8_GEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD4R, k_eq_16) {
12797 TEST_REQUIRES_ARM_NEON_V8;
12798 GemmMicrokernelTester()
12799 .mr(2)
12800 .nr(8)
12801 .kr(2)
12802 .sr(1)
12803 .m(2)
12804 .n(8)
12805 .k(16)
12806 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld4r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
12807 }
12808
TEST(QC8_GEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD4R,strided_cn)12809 TEST(QC8_GEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD4R, strided_cn) {
12810 TEST_REQUIRES_ARM_NEON_V8;
12811 GemmMicrokernelTester()
12812 .mr(2)
12813 .nr(8)
12814 .kr(2)
12815 .sr(1)
12816 .m(2)
12817 .n(8)
12818 .k(16)
12819 .cn_stride(11)
12820 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld4r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
12821 }
12822
TEST(QC8_GEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD4R,k_eq_16_strided_a)12823 TEST(QC8_GEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD4R, k_eq_16_strided_a) {
12824 TEST_REQUIRES_ARM_NEON_V8;
12825 GemmMicrokernelTester()
12826 .mr(2)
12827 .nr(8)
12828 .kr(2)
12829 .sr(1)
12830 .m(2)
12831 .n(8)
12832 .k(16)
12833 .a_stride(19)
12834 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld4r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
12835 }
12836
TEST(QC8_GEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD4R,k_eq_16_subtile)12837 TEST(QC8_GEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD4R, k_eq_16_subtile) {
12838 TEST_REQUIRES_ARM_NEON_V8;
12839 for (uint32_t n = 1; n <= 8; n++) {
12840 for (uint32_t m = 1; m <= 2; m++) {
12841 GemmMicrokernelTester()
12842 .mr(2)
12843 .nr(8)
12844 .kr(2)
12845 .sr(1)
12846 .m(m)
12847 .n(n)
12848 .k(16)
12849 .iterations(1)
12850 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld4r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
12851 }
12852 }
12853 }
12854
TEST(QC8_GEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD4R,k_eq_16_subtile_m)12855 TEST(QC8_GEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD4R, k_eq_16_subtile_m) {
12856 TEST_REQUIRES_ARM_NEON_V8;
12857 for (uint32_t m = 1; m <= 2; m++) {
12858 GemmMicrokernelTester()
12859 .mr(2)
12860 .nr(8)
12861 .kr(2)
12862 .sr(1)
12863 .m(m)
12864 .n(8)
12865 .k(16)
12866 .iterations(1)
12867 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld4r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
12868 }
12869 }
12870
TEST(QC8_GEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD4R,k_eq_16_subtile_n)12871 TEST(QC8_GEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD4R, k_eq_16_subtile_n) {
12872 TEST_REQUIRES_ARM_NEON_V8;
12873 for (uint32_t n = 1; n <= 8; n++) {
12874 GemmMicrokernelTester()
12875 .mr(2)
12876 .nr(8)
12877 .kr(2)
12878 .sr(1)
12879 .m(2)
12880 .n(n)
12881 .k(16)
12882 .iterations(1)
12883 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld4r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
12884 }
12885 }
12886
TEST(QC8_GEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD4R,k_lt_16)12887 TEST(QC8_GEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD4R, k_lt_16) {
12888 TEST_REQUIRES_ARM_NEON_V8;
12889 for (size_t k = 1; k < 16; k++) {
12890 GemmMicrokernelTester()
12891 .mr(2)
12892 .nr(8)
12893 .kr(2)
12894 .sr(1)
12895 .m(2)
12896 .n(8)
12897 .k(k)
12898 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld4r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
12899 }
12900 }
12901
TEST(QC8_GEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD4R,k_lt_16_strided_a)12902 TEST(QC8_GEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD4R, k_lt_16_strided_a) {
12903 TEST_REQUIRES_ARM_NEON_V8;
12904 for (size_t k = 1; k < 16; k++) {
12905 GemmMicrokernelTester()
12906 .mr(2)
12907 .nr(8)
12908 .kr(2)
12909 .sr(1)
12910 .m(2)
12911 .n(8)
12912 .k(k)
12913 .a_stride(19)
12914 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld4r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
12915 }
12916 }
12917
TEST(QC8_GEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD4R,k_lt_16_subtile)12918 TEST(QC8_GEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD4R, k_lt_16_subtile) {
12919 TEST_REQUIRES_ARM_NEON_V8;
12920 for (size_t k = 1; k < 16; k++) {
12921 for (uint32_t n = 1; n <= 8; n++) {
12922 for (uint32_t m = 1; m <= 2; m++) {
12923 GemmMicrokernelTester()
12924 .mr(2)
12925 .nr(8)
12926 .kr(2)
12927 .sr(1)
12928 .m(m)
12929 .n(n)
12930 .k(k)
12931 .iterations(1)
12932 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld4r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
12933 }
12934 }
12935 }
12936 }
12937
TEST(QC8_GEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD4R,k_gt_16)12938 TEST(QC8_GEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD4R, k_gt_16) {
12939 TEST_REQUIRES_ARM_NEON_V8;
12940 for (size_t k = 17; k < 32; k++) {
12941 GemmMicrokernelTester()
12942 .mr(2)
12943 .nr(8)
12944 .kr(2)
12945 .sr(1)
12946 .m(2)
12947 .n(8)
12948 .k(k)
12949 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld4r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
12950 }
12951 }
12952
TEST(QC8_GEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD4R,k_gt_16_strided_a)12953 TEST(QC8_GEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD4R, k_gt_16_strided_a) {
12954 TEST_REQUIRES_ARM_NEON_V8;
12955 for (size_t k = 17; k < 32; k++) {
12956 GemmMicrokernelTester()
12957 .mr(2)
12958 .nr(8)
12959 .kr(2)
12960 .sr(1)
12961 .m(2)
12962 .n(8)
12963 .k(k)
12964 .a_stride(37)
12965 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld4r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
12966 }
12967 }
12968
TEST(QC8_GEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD4R,k_gt_16_subtile)12969 TEST(QC8_GEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD4R, k_gt_16_subtile) {
12970 TEST_REQUIRES_ARM_NEON_V8;
12971 for (size_t k = 17; k < 32; k++) {
12972 for (uint32_t n = 1; n <= 8; n++) {
12973 for (uint32_t m = 1; m <= 2; m++) {
12974 GemmMicrokernelTester()
12975 .mr(2)
12976 .nr(8)
12977 .kr(2)
12978 .sr(1)
12979 .m(m)
12980 .n(n)
12981 .k(k)
12982 .iterations(1)
12983 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld4r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
12984 }
12985 }
12986 }
12987 }
12988
TEST(QC8_GEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD4R,k_div_16)12989 TEST(QC8_GEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD4R, k_div_16) {
12990 TEST_REQUIRES_ARM_NEON_V8;
12991 for (size_t k = 32; k <= 160; k += 16) {
12992 GemmMicrokernelTester()
12993 .mr(2)
12994 .nr(8)
12995 .kr(2)
12996 .sr(1)
12997 .m(2)
12998 .n(8)
12999 .k(k)
13000 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld4r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
13001 }
13002 }
13003
TEST(QC8_GEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD4R,k_div_16_strided_a)13004 TEST(QC8_GEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD4R, k_div_16_strided_a) {
13005 TEST_REQUIRES_ARM_NEON_V8;
13006 for (size_t k = 32; k <= 160; k += 16) {
13007 GemmMicrokernelTester()
13008 .mr(2)
13009 .nr(8)
13010 .kr(2)
13011 .sr(1)
13012 .m(2)
13013 .n(8)
13014 .k(k)
13015 .a_stride(163)
13016 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld4r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
13017 }
13018 }
13019
TEST(QC8_GEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD4R,k_div_16_subtile)13020 TEST(QC8_GEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD4R, k_div_16_subtile) {
13021 TEST_REQUIRES_ARM_NEON_V8;
13022 for (size_t k = 32; k <= 160; k += 16) {
13023 for (uint32_t n = 1; n <= 8; n++) {
13024 for (uint32_t m = 1; m <= 2; m++) {
13025 GemmMicrokernelTester()
13026 .mr(2)
13027 .nr(8)
13028 .kr(2)
13029 .sr(1)
13030 .m(m)
13031 .n(n)
13032 .k(k)
13033 .iterations(1)
13034 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld4r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
13035 }
13036 }
13037 }
13038 }
13039
TEST(QC8_GEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD4R,n_gt_8)13040 TEST(QC8_GEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD4R, n_gt_8) {
13041 TEST_REQUIRES_ARM_NEON_V8;
13042 for (uint32_t n = 9; n < 16; n++) {
13043 for (size_t k = 1; k <= 80; k += 17) {
13044 GemmMicrokernelTester()
13045 .mr(2)
13046 .nr(8)
13047 .kr(2)
13048 .sr(1)
13049 .m(2)
13050 .n(n)
13051 .k(k)
13052 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld4r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
13053 }
13054 }
13055 }
13056
TEST(QC8_GEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD4R,n_gt_8_strided_cn)13057 TEST(QC8_GEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD4R, n_gt_8_strided_cn) {
13058 TEST_REQUIRES_ARM_NEON_V8;
13059 for (uint32_t n = 9; n < 16; n++) {
13060 for (size_t k = 1; k <= 80; k += 17) {
13061 GemmMicrokernelTester()
13062 .mr(2)
13063 .nr(8)
13064 .kr(2)
13065 .sr(1)
13066 .m(2)
13067 .n(n)
13068 .k(k)
13069 .cn_stride(11)
13070 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld4r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
13071 }
13072 }
13073 }
13074
TEST(QC8_GEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD4R,n_gt_8_strided_a)13075 TEST(QC8_GEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD4R, n_gt_8_strided_a) {
13076 TEST_REQUIRES_ARM_NEON_V8;
13077 for (uint32_t n = 9; n < 16; n++) {
13078 for (size_t k = 1; k <= 80; k += 17) {
13079 GemmMicrokernelTester()
13080 .mr(2)
13081 .nr(8)
13082 .kr(2)
13083 .sr(1)
13084 .m(2)
13085 .n(n)
13086 .k(k)
13087 .a_stride(83)
13088 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld4r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
13089 }
13090 }
13091 }
13092
TEST(QC8_GEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD4R,n_gt_8_subtile)13093 TEST(QC8_GEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD4R, n_gt_8_subtile) {
13094 TEST_REQUIRES_ARM_NEON_V8;
13095 for (uint32_t n = 9; n < 16; n++) {
13096 for (size_t k = 1; k <= 80; k += 17) {
13097 for (uint32_t m = 1; m <= 2; m++) {
13098 GemmMicrokernelTester()
13099 .mr(2)
13100 .nr(8)
13101 .kr(2)
13102 .sr(1)
13103 .m(m)
13104 .n(n)
13105 .k(k)
13106 .iterations(1)
13107 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld4r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
13108 }
13109 }
13110 }
13111 }
13112
TEST(QC8_GEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD4R,n_div_8)13113 TEST(QC8_GEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD4R, n_div_8) {
13114 TEST_REQUIRES_ARM_NEON_V8;
13115 for (uint32_t n = 16; n <= 24; n += 8) {
13116 for (size_t k = 1; k <= 80; k += 17) {
13117 GemmMicrokernelTester()
13118 .mr(2)
13119 .nr(8)
13120 .kr(2)
13121 .sr(1)
13122 .m(2)
13123 .n(n)
13124 .k(k)
13125 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld4r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
13126 }
13127 }
13128 }
13129
TEST(QC8_GEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD4R,n_div_8_strided_cn)13130 TEST(QC8_GEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD4R, n_div_8_strided_cn) {
13131 TEST_REQUIRES_ARM_NEON_V8;
13132 for (uint32_t n = 16; n <= 24; n += 8) {
13133 for (size_t k = 1; k <= 80; k += 17) {
13134 GemmMicrokernelTester()
13135 .mr(2)
13136 .nr(8)
13137 .kr(2)
13138 .sr(1)
13139 .m(2)
13140 .n(n)
13141 .k(k)
13142 .cn_stride(11)
13143 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld4r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
13144 }
13145 }
13146 }
13147
TEST(QC8_GEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD4R,n_div_8_strided_a)13148 TEST(QC8_GEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD4R, n_div_8_strided_a) {
13149 TEST_REQUIRES_ARM_NEON_V8;
13150 for (uint32_t n = 16; n <= 24; n += 8) {
13151 for (size_t k = 1; k <= 80; k += 17) {
13152 GemmMicrokernelTester()
13153 .mr(2)
13154 .nr(8)
13155 .kr(2)
13156 .sr(1)
13157 .m(2)
13158 .n(n)
13159 .k(k)
13160 .a_stride(83)
13161 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld4r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
13162 }
13163 }
13164 }
13165
TEST(QC8_GEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD4R,n_div_8_subtile)13166 TEST(QC8_GEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD4R, n_div_8_subtile) {
13167 TEST_REQUIRES_ARM_NEON_V8;
13168 for (uint32_t n = 16; n <= 24; n += 8) {
13169 for (size_t k = 1; k <= 80; k += 17) {
13170 for (uint32_t m = 1; m <= 2; m++) {
13171 GemmMicrokernelTester()
13172 .mr(2)
13173 .nr(8)
13174 .kr(2)
13175 .sr(1)
13176 .m(m)
13177 .n(n)
13178 .k(k)
13179 .iterations(1)
13180 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld4r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
13181 }
13182 }
13183 }
13184 }
13185
TEST(QC8_GEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD4R,strided_cm_subtile)13186 TEST(QC8_GEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD4R, strided_cm_subtile) {
13187 TEST_REQUIRES_ARM_NEON_V8;
13188 for (size_t k = 1; k <= 80; k += 17) {
13189 for (uint32_t n = 1; n <= 8; n++) {
13190 for (uint32_t m = 1; m <= 2; m++) {
13191 GemmMicrokernelTester()
13192 .mr(2)
13193 .nr(8)
13194 .kr(2)
13195 .sr(1)
13196 .m(m)
13197 .n(n)
13198 .k(k)
13199 .cm_stride(11)
13200 .iterations(1)
13201 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld4r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
13202 }
13203 }
13204 }
13205 }
13206
TEST(QC8_GEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD4R,qmin)13207 TEST(QC8_GEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD4R, qmin) {
13208 TEST_REQUIRES_ARM_NEON_V8;
13209 GemmMicrokernelTester()
13210 .mr(2)
13211 .nr(8)
13212 .kr(2)
13213 .sr(1)
13214 .m(2)
13215 .n(8)
13216 .k(16)
13217 .qmin(128)
13218 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld4r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
13219 }
13220
TEST(QC8_GEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD4R,qmax)13221 TEST(QC8_GEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD4R, qmax) {
13222 TEST_REQUIRES_ARM_NEON_V8;
13223 GemmMicrokernelTester()
13224 .mr(2)
13225 .nr(8)
13226 .kr(2)
13227 .sr(1)
13228 .m(2)
13229 .n(8)
13230 .k(16)
13231 .qmax(128)
13232 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld4r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
13233 }
13234
TEST(QC8_GEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD4R,strided_cm)13235 TEST(QC8_GEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD4R, strided_cm) {
13236 TEST_REQUIRES_ARM_NEON_V8;
13237 GemmMicrokernelTester()
13238 .mr(2)
13239 .nr(8)
13240 .kr(2)
13241 .sr(1)
13242 .m(2)
13243 .n(8)
13244 .k(16)
13245 .cm_stride(11)
13246 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld4r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
13247 }
13248 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
13249
13250
13251 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QC8_GEMM_MINMAX_FP32_2X8C2S4__NEON_MLAL,k_eq_16)13252 TEST(QC8_GEMM_MINMAX_FP32_2X8C2S4__NEON_MLAL, k_eq_16) {
13253 TEST_REQUIRES_ARM_NEON;
13254 GemmMicrokernelTester()
13255 .mr(2)
13256 .nr(8)
13257 .kr(2)
13258 .sr(4)
13259 .m(2)
13260 .n(8)
13261 .k(16)
13262 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2s4__neon_mlal, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
13263 }
13264
TEST(QC8_GEMM_MINMAX_FP32_2X8C2S4__NEON_MLAL,strided_cn)13265 TEST(QC8_GEMM_MINMAX_FP32_2X8C2S4__NEON_MLAL, strided_cn) {
13266 TEST_REQUIRES_ARM_NEON;
13267 GemmMicrokernelTester()
13268 .mr(2)
13269 .nr(8)
13270 .kr(2)
13271 .sr(4)
13272 .m(2)
13273 .n(8)
13274 .k(16)
13275 .cn_stride(11)
13276 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2s4__neon_mlal, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
13277 }
13278
TEST(QC8_GEMM_MINMAX_FP32_2X8C2S4__NEON_MLAL,k_eq_16_strided_a)13279 TEST(QC8_GEMM_MINMAX_FP32_2X8C2S4__NEON_MLAL, k_eq_16_strided_a) {
13280 TEST_REQUIRES_ARM_NEON;
13281 GemmMicrokernelTester()
13282 .mr(2)
13283 .nr(8)
13284 .kr(2)
13285 .sr(4)
13286 .m(2)
13287 .n(8)
13288 .k(16)
13289 .a_stride(19)
13290 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2s4__neon_mlal, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
13291 }
13292
TEST(QC8_GEMM_MINMAX_FP32_2X8C2S4__NEON_MLAL,k_eq_16_subtile)13293 TEST(QC8_GEMM_MINMAX_FP32_2X8C2S4__NEON_MLAL, k_eq_16_subtile) {
13294 TEST_REQUIRES_ARM_NEON;
13295 for (uint32_t n = 1; n <= 8; n++) {
13296 for (uint32_t m = 1; m <= 2; m++) {
13297 GemmMicrokernelTester()
13298 .mr(2)
13299 .nr(8)
13300 .kr(2)
13301 .sr(4)
13302 .m(m)
13303 .n(n)
13304 .k(16)
13305 .iterations(1)
13306 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2s4__neon_mlal, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
13307 }
13308 }
13309 }
13310
TEST(QC8_GEMM_MINMAX_FP32_2X8C2S4__NEON_MLAL,k_eq_16_subtile_m)13311 TEST(QC8_GEMM_MINMAX_FP32_2X8C2S4__NEON_MLAL, k_eq_16_subtile_m) {
13312 TEST_REQUIRES_ARM_NEON;
13313 for (uint32_t m = 1; m <= 2; m++) {
13314 GemmMicrokernelTester()
13315 .mr(2)
13316 .nr(8)
13317 .kr(2)
13318 .sr(4)
13319 .m(m)
13320 .n(8)
13321 .k(16)
13322 .iterations(1)
13323 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2s4__neon_mlal, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
13324 }
13325 }
13326
TEST(QC8_GEMM_MINMAX_FP32_2X8C2S4__NEON_MLAL,k_eq_16_subtile_n)13327 TEST(QC8_GEMM_MINMAX_FP32_2X8C2S4__NEON_MLAL, k_eq_16_subtile_n) {
13328 TEST_REQUIRES_ARM_NEON;
13329 for (uint32_t n = 1; n <= 8; n++) {
13330 GemmMicrokernelTester()
13331 .mr(2)
13332 .nr(8)
13333 .kr(2)
13334 .sr(4)
13335 .m(2)
13336 .n(n)
13337 .k(16)
13338 .iterations(1)
13339 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2s4__neon_mlal, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
13340 }
13341 }
13342
TEST(QC8_GEMM_MINMAX_FP32_2X8C2S4__NEON_MLAL,k_lt_16)13343 TEST(QC8_GEMM_MINMAX_FP32_2X8C2S4__NEON_MLAL, k_lt_16) {
13344 TEST_REQUIRES_ARM_NEON;
13345 for (size_t k = 1; k < 16; k++) {
13346 GemmMicrokernelTester()
13347 .mr(2)
13348 .nr(8)
13349 .kr(2)
13350 .sr(4)
13351 .m(2)
13352 .n(8)
13353 .k(k)
13354 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2s4__neon_mlal, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
13355 }
13356 }
13357
TEST(QC8_GEMM_MINMAX_FP32_2X8C2S4__NEON_MLAL,k_lt_16_strided_a)13358 TEST(QC8_GEMM_MINMAX_FP32_2X8C2S4__NEON_MLAL, k_lt_16_strided_a) {
13359 TEST_REQUIRES_ARM_NEON;
13360 for (size_t k = 1; k < 16; k++) {
13361 GemmMicrokernelTester()
13362 .mr(2)
13363 .nr(8)
13364 .kr(2)
13365 .sr(4)
13366 .m(2)
13367 .n(8)
13368 .k(k)
13369 .a_stride(19)
13370 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2s4__neon_mlal, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
13371 }
13372 }
13373
TEST(QC8_GEMM_MINMAX_FP32_2X8C2S4__NEON_MLAL,k_lt_16_subtile)13374 TEST(QC8_GEMM_MINMAX_FP32_2X8C2S4__NEON_MLAL, k_lt_16_subtile) {
13375 TEST_REQUIRES_ARM_NEON;
13376 for (size_t k = 1; k < 16; k++) {
13377 for (uint32_t n = 1; n <= 8; n++) {
13378 for (uint32_t m = 1; m <= 2; m++) {
13379 GemmMicrokernelTester()
13380 .mr(2)
13381 .nr(8)
13382 .kr(2)
13383 .sr(4)
13384 .m(m)
13385 .n(n)
13386 .k(k)
13387 .iterations(1)
13388 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2s4__neon_mlal, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
13389 }
13390 }
13391 }
13392 }
13393
TEST(QC8_GEMM_MINMAX_FP32_2X8C2S4__NEON_MLAL,k_gt_16)13394 TEST(QC8_GEMM_MINMAX_FP32_2X8C2S4__NEON_MLAL, k_gt_16) {
13395 TEST_REQUIRES_ARM_NEON;
13396 for (size_t k = 17; k < 32; k++) {
13397 GemmMicrokernelTester()
13398 .mr(2)
13399 .nr(8)
13400 .kr(2)
13401 .sr(4)
13402 .m(2)
13403 .n(8)
13404 .k(k)
13405 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2s4__neon_mlal, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
13406 }
13407 }
13408
TEST(QC8_GEMM_MINMAX_FP32_2X8C2S4__NEON_MLAL,k_gt_16_strided_a)13409 TEST(QC8_GEMM_MINMAX_FP32_2X8C2S4__NEON_MLAL, k_gt_16_strided_a) {
13410 TEST_REQUIRES_ARM_NEON;
13411 for (size_t k = 17; k < 32; k++) {
13412 GemmMicrokernelTester()
13413 .mr(2)
13414 .nr(8)
13415 .kr(2)
13416 .sr(4)
13417 .m(2)
13418 .n(8)
13419 .k(k)
13420 .a_stride(37)
13421 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2s4__neon_mlal, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
13422 }
13423 }
13424
TEST(QC8_GEMM_MINMAX_FP32_2X8C2S4__NEON_MLAL,k_gt_16_subtile)13425 TEST(QC8_GEMM_MINMAX_FP32_2X8C2S4__NEON_MLAL, k_gt_16_subtile) {
13426 TEST_REQUIRES_ARM_NEON;
13427 for (size_t k = 17; k < 32; k++) {
13428 for (uint32_t n = 1; n <= 8; n++) {
13429 for (uint32_t m = 1; m <= 2; m++) {
13430 GemmMicrokernelTester()
13431 .mr(2)
13432 .nr(8)
13433 .kr(2)
13434 .sr(4)
13435 .m(m)
13436 .n(n)
13437 .k(k)
13438 .iterations(1)
13439 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2s4__neon_mlal, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
13440 }
13441 }
13442 }
13443 }
13444
TEST(QC8_GEMM_MINMAX_FP32_2X8C2S4__NEON_MLAL,k_div_16)13445 TEST(QC8_GEMM_MINMAX_FP32_2X8C2S4__NEON_MLAL, k_div_16) {
13446 TEST_REQUIRES_ARM_NEON;
13447 for (size_t k = 32; k <= 160; k += 16) {
13448 GemmMicrokernelTester()
13449 .mr(2)
13450 .nr(8)
13451 .kr(2)
13452 .sr(4)
13453 .m(2)
13454 .n(8)
13455 .k(k)
13456 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2s4__neon_mlal, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
13457 }
13458 }
13459
TEST(QC8_GEMM_MINMAX_FP32_2X8C2S4__NEON_MLAL,k_div_16_strided_a)13460 TEST(QC8_GEMM_MINMAX_FP32_2X8C2S4__NEON_MLAL, k_div_16_strided_a) {
13461 TEST_REQUIRES_ARM_NEON;
13462 for (size_t k = 32; k <= 160; k += 16) {
13463 GemmMicrokernelTester()
13464 .mr(2)
13465 .nr(8)
13466 .kr(2)
13467 .sr(4)
13468 .m(2)
13469 .n(8)
13470 .k(k)
13471 .a_stride(163)
13472 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2s4__neon_mlal, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
13473 }
13474 }
13475
TEST(QC8_GEMM_MINMAX_FP32_2X8C2S4__NEON_MLAL,k_div_16_subtile)13476 TEST(QC8_GEMM_MINMAX_FP32_2X8C2S4__NEON_MLAL, k_div_16_subtile) {
13477 TEST_REQUIRES_ARM_NEON;
13478 for (size_t k = 32; k <= 160; k += 16) {
13479 for (uint32_t n = 1; n <= 8; n++) {
13480 for (uint32_t m = 1; m <= 2; m++) {
13481 GemmMicrokernelTester()
13482 .mr(2)
13483 .nr(8)
13484 .kr(2)
13485 .sr(4)
13486 .m(m)
13487 .n(n)
13488 .k(k)
13489 .iterations(1)
13490 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2s4__neon_mlal, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
13491 }
13492 }
13493 }
13494 }
13495
TEST(QC8_GEMM_MINMAX_FP32_2X8C2S4__NEON_MLAL,n_gt_8)13496 TEST(QC8_GEMM_MINMAX_FP32_2X8C2S4__NEON_MLAL, n_gt_8) {
13497 TEST_REQUIRES_ARM_NEON;
13498 for (uint32_t n = 9; n < 16; n++) {
13499 for (size_t k = 1; k <= 80; k += 17) {
13500 GemmMicrokernelTester()
13501 .mr(2)
13502 .nr(8)
13503 .kr(2)
13504 .sr(4)
13505 .m(2)
13506 .n(n)
13507 .k(k)
13508 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2s4__neon_mlal, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
13509 }
13510 }
13511 }
13512
TEST(QC8_GEMM_MINMAX_FP32_2X8C2S4__NEON_MLAL,n_gt_8_strided_cn)13513 TEST(QC8_GEMM_MINMAX_FP32_2X8C2S4__NEON_MLAL, n_gt_8_strided_cn) {
13514 TEST_REQUIRES_ARM_NEON;
13515 for (uint32_t n = 9; n < 16; n++) {
13516 for (size_t k = 1; k <= 80; k += 17) {
13517 GemmMicrokernelTester()
13518 .mr(2)
13519 .nr(8)
13520 .kr(2)
13521 .sr(4)
13522 .m(2)
13523 .n(n)
13524 .k(k)
13525 .cn_stride(11)
13526 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2s4__neon_mlal, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
13527 }
13528 }
13529 }
13530
TEST(QC8_GEMM_MINMAX_FP32_2X8C2S4__NEON_MLAL,n_gt_8_strided_a)13531 TEST(QC8_GEMM_MINMAX_FP32_2X8C2S4__NEON_MLAL, n_gt_8_strided_a) {
13532 TEST_REQUIRES_ARM_NEON;
13533 for (uint32_t n = 9; n < 16; n++) {
13534 for (size_t k = 1; k <= 80; k += 17) {
13535 GemmMicrokernelTester()
13536 .mr(2)
13537 .nr(8)
13538 .kr(2)
13539 .sr(4)
13540 .m(2)
13541 .n(n)
13542 .k(k)
13543 .a_stride(83)
13544 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2s4__neon_mlal, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
13545 }
13546 }
13547 }
13548
TEST(QC8_GEMM_MINMAX_FP32_2X8C2S4__NEON_MLAL,n_gt_8_subtile)13549 TEST(QC8_GEMM_MINMAX_FP32_2X8C2S4__NEON_MLAL, n_gt_8_subtile) {
13550 TEST_REQUIRES_ARM_NEON;
13551 for (uint32_t n = 9; n < 16; n++) {
13552 for (size_t k = 1; k <= 80; k += 17) {
13553 for (uint32_t m = 1; m <= 2; m++) {
13554 GemmMicrokernelTester()
13555 .mr(2)
13556 .nr(8)
13557 .kr(2)
13558 .sr(4)
13559 .m(m)
13560 .n(n)
13561 .k(k)
13562 .iterations(1)
13563 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2s4__neon_mlal, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
13564 }
13565 }
13566 }
13567 }
13568
TEST(QC8_GEMM_MINMAX_FP32_2X8C2S4__NEON_MLAL,n_div_8)13569 TEST(QC8_GEMM_MINMAX_FP32_2X8C2S4__NEON_MLAL, n_div_8) {
13570 TEST_REQUIRES_ARM_NEON;
13571 for (uint32_t n = 16; n <= 24; n += 8) {
13572 for (size_t k = 1; k <= 80; k += 17) {
13573 GemmMicrokernelTester()
13574 .mr(2)
13575 .nr(8)
13576 .kr(2)
13577 .sr(4)
13578 .m(2)
13579 .n(n)
13580 .k(k)
13581 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2s4__neon_mlal, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
13582 }
13583 }
13584 }
13585
TEST(QC8_GEMM_MINMAX_FP32_2X8C2S4__NEON_MLAL,n_div_8_strided_cn)13586 TEST(QC8_GEMM_MINMAX_FP32_2X8C2S4__NEON_MLAL, n_div_8_strided_cn) {
13587 TEST_REQUIRES_ARM_NEON;
13588 for (uint32_t n = 16; n <= 24; n += 8) {
13589 for (size_t k = 1; k <= 80; k += 17) {
13590 GemmMicrokernelTester()
13591 .mr(2)
13592 .nr(8)
13593 .kr(2)
13594 .sr(4)
13595 .m(2)
13596 .n(n)
13597 .k(k)
13598 .cn_stride(11)
13599 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2s4__neon_mlal, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
13600 }
13601 }
13602 }
13603
TEST(QC8_GEMM_MINMAX_FP32_2X8C2S4__NEON_MLAL,n_div_8_strided_a)13604 TEST(QC8_GEMM_MINMAX_FP32_2X8C2S4__NEON_MLAL, n_div_8_strided_a) {
13605 TEST_REQUIRES_ARM_NEON;
13606 for (uint32_t n = 16; n <= 24; n += 8) {
13607 for (size_t k = 1; k <= 80; k += 17) {
13608 GemmMicrokernelTester()
13609 .mr(2)
13610 .nr(8)
13611 .kr(2)
13612 .sr(4)
13613 .m(2)
13614 .n(n)
13615 .k(k)
13616 .a_stride(83)
13617 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2s4__neon_mlal, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
13618 }
13619 }
13620 }
13621
TEST(QC8_GEMM_MINMAX_FP32_2X8C2S4__NEON_MLAL,n_div_8_subtile)13622 TEST(QC8_GEMM_MINMAX_FP32_2X8C2S4__NEON_MLAL, n_div_8_subtile) {
13623 TEST_REQUIRES_ARM_NEON;
13624 for (uint32_t n = 16; n <= 24; n += 8) {
13625 for (size_t k = 1; k <= 80; k += 17) {
13626 for (uint32_t m = 1; m <= 2; m++) {
13627 GemmMicrokernelTester()
13628 .mr(2)
13629 .nr(8)
13630 .kr(2)
13631 .sr(4)
13632 .m(m)
13633 .n(n)
13634 .k(k)
13635 .iterations(1)
13636 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2s4__neon_mlal, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
13637 }
13638 }
13639 }
13640 }
13641
TEST(QC8_GEMM_MINMAX_FP32_2X8C2S4__NEON_MLAL,strided_cm_subtile)13642 TEST(QC8_GEMM_MINMAX_FP32_2X8C2S4__NEON_MLAL, strided_cm_subtile) {
13643 TEST_REQUIRES_ARM_NEON;
13644 for (size_t k = 1; k <= 80; k += 17) {
13645 for (uint32_t n = 1; n <= 8; n++) {
13646 for (uint32_t m = 1; m <= 2; m++) {
13647 GemmMicrokernelTester()
13648 .mr(2)
13649 .nr(8)
13650 .kr(2)
13651 .sr(4)
13652 .m(m)
13653 .n(n)
13654 .k(k)
13655 .cm_stride(11)
13656 .iterations(1)
13657 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2s4__neon_mlal, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
13658 }
13659 }
13660 }
13661 }
13662
TEST(QC8_GEMM_MINMAX_FP32_2X8C2S4__NEON_MLAL,qmin)13663 TEST(QC8_GEMM_MINMAX_FP32_2X8C2S4__NEON_MLAL, qmin) {
13664 TEST_REQUIRES_ARM_NEON;
13665 GemmMicrokernelTester()
13666 .mr(2)
13667 .nr(8)
13668 .kr(2)
13669 .sr(4)
13670 .m(2)
13671 .n(8)
13672 .k(16)
13673 .qmin(128)
13674 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2s4__neon_mlal, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
13675 }
13676
TEST(QC8_GEMM_MINMAX_FP32_2X8C2S4__NEON_MLAL,qmax)13677 TEST(QC8_GEMM_MINMAX_FP32_2X8C2S4__NEON_MLAL, qmax) {
13678 TEST_REQUIRES_ARM_NEON;
13679 GemmMicrokernelTester()
13680 .mr(2)
13681 .nr(8)
13682 .kr(2)
13683 .sr(4)
13684 .m(2)
13685 .n(8)
13686 .k(16)
13687 .qmax(128)
13688 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2s4__neon_mlal, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
13689 }
13690
TEST(QC8_GEMM_MINMAX_FP32_2X8C2S4__NEON_MLAL,strided_cm)13691 TEST(QC8_GEMM_MINMAX_FP32_2X8C2S4__NEON_MLAL, strided_cm) {
13692 TEST_REQUIRES_ARM_NEON;
13693 GemmMicrokernelTester()
13694 .mr(2)
13695 .nr(8)
13696 .kr(2)
13697 .sr(4)
13698 .m(2)
13699 .n(8)
13700 .k(16)
13701 .cm_stride(11)
13702 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2s4__neon_mlal, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
13703 }
13704 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
13705
13706
13707 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QC8_GEMM_MINMAX_FP32_2X8C4__NEON_MLAL_LD2R,k_eq_16)13708 TEST(QC8_GEMM_MINMAX_FP32_2X8C4__NEON_MLAL_LD2R, k_eq_16) {
13709 TEST_REQUIRES_ARM_NEON;
13710 GemmMicrokernelTester()
13711 .mr(2)
13712 .nr(8)
13713 .kr(4)
13714 .sr(1)
13715 .m(2)
13716 .n(8)
13717 .k(16)
13718 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c4__neon_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
13719 }
13720
TEST(QC8_GEMM_MINMAX_FP32_2X8C4__NEON_MLAL_LD2R,strided_cn)13721 TEST(QC8_GEMM_MINMAX_FP32_2X8C4__NEON_MLAL_LD2R, strided_cn) {
13722 TEST_REQUIRES_ARM_NEON;
13723 GemmMicrokernelTester()
13724 .mr(2)
13725 .nr(8)
13726 .kr(4)
13727 .sr(1)
13728 .m(2)
13729 .n(8)
13730 .k(16)
13731 .cn_stride(11)
13732 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c4__neon_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
13733 }
13734
TEST(QC8_GEMM_MINMAX_FP32_2X8C4__NEON_MLAL_LD2R,k_eq_16_strided_a)13735 TEST(QC8_GEMM_MINMAX_FP32_2X8C4__NEON_MLAL_LD2R, k_eq_16_strided_a) {
13736 TEST_REQUIRES_ARM_NEON;
13737 GemmMicrokernelTester()
13738 .mr(2)
13739 .nr(8)
13740 .kr(4)
13741 .sr(1)
13742 .m(2)
13743 .n(8)
13744 .k(16)
13745 .a_stride(19)
13746 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c4__neon_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
13747 }
13748
TEST(QC8_GEMM_MINMAX_FP32_2X8C4__NEON_MLAL_LD2R,k_eq_16_subtile)13749 TEST(QC8_GEMM_MINMAX_FP32_2X8C4__NEON_MLAL_LD2R, k_eq_16_subtile) {
13750 TEST_REQUIRES_ARM_NEON;
13751 for (uint32_t n = 1; n <= 8; n++) {
13752 for (uint32_t m = 1; m <= 2; m++) {
13753 GemmMicrokernelTester()
13754 .mr(2)
13755 .nr(8)
13756 .kr(4)
13757 .sr(1)
13758 .m(m)
13759 .n(n)
13760 .k(16)
13761 .iterations(1)
13762 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c4__neon_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
13763 }
13764 }
13765 }
13766
TEST(QC8_GEMM_MINMAX_FP32_2X8C4__NEON_MLAL_LD2R,k_eq_16_subtile_m)13767 TEST(QC8_GEMM_MINMAX_FP32_2X8C4__NEON_MLAL_LD2R, k_eq_16_subtile_m) {
13768 TEST_REQUIRES_ARM_NEON;
13769 for (uint32_t m = 1; m <= 2; m++) {
13770 GemmMicrokernelTester()
13771 .mr(2)
13772 .nr(8)
13773 .kr(4)
13774 .sr(1)
13775 .m(m)
13776 .n(8)
13777 .k(16)
13778 .iterations(1)
13779 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c4__neon_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
13780 }
13781 }
13782
TEST(QC8_GEMM_MINMAX_FP32_2X8C4__NEON_MLAL_LD2R,k_eq_16_subtile_n)13783 TEST(QC8_GEMM_MINMAX_FP32_2X8C4__NEON_MLAL_LD2R, k_eq_16_subtile_n) {
13784 TEST_REQUIRES_ARM_NEON;
13785 for (uint32_t n = 1; n <= 8; n++) {
13786 GemmMicrokernelTester()
13787 .mr(2)
13788 .nr(8)
13789 .kr(4)
13790 .sr(1)
13791 .m(2)
13792 .n(n)
13793 .k(16)
13794 .iterations(1)
13795 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c4__neon_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
13796 }
13797 }
13798
TEST(QC8_GEMM_MINMAX_FP32_2X8C4__NEON_MLAL_LD2R,k_lt_16)13799 TEST(QC8_GEMM_MINMAX_FP32_2X8C4__NEON_MLAL_LD2R, k_lt_16) {
13800 TEST_REQUIRES_ARM_NEON;
13801 for (size_t k = 1; k < 16; k++) {
13802 GemmMicrokernelTester()
13803 .mr(2)
13804 .nr(8)
13805 .kr(4)
13806 .sr(1)
13807 .m(2)
13808 .n(8)
13809 .k(k)
13810 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c4__neon_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
13811 }
13812 }
13813
TEST(QC8_GEMM_MINMAX_FP32_2X8C4__NEON_MLAL_LD2R,k_lt_16_strided_a)13814 TEST(QC8_GEMM_MINMAX_FP32_2X8C4__NEON_MLAL_LD2R, k_lt_16_strided_a) {
13815 TEST_REQUIRES_ARM_NEON;
13816 for (size_t k = 1; k < 16; k++) {
13817 GemmMicrokernelTester()
13818 .mr(2)
13819 .nr(8)
13820 .kr(4)
13821 .sr(1)
13822 .m(2)
13823 .n(8)
13824 .k(k)
13825 .a_stride(19)
13826 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c4__neon_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
13827 }
13828 }
13829
TEST(QC8_GEMM_MINMAX_FP32_2X8C4__NEON_MLAL_LD2R,k_lt_16_subtile)13830 TEST(QC8_GEMM_MINMAX_FP32_2X8C4__NEON_MLAL_LD2R, k_lt_16_subtile) {
13831 TEST_REQUIRES_ARM_NEON;
13832 for (size_t k = 1; k < 16; k++) {
13833 for (uint32_t n = 1; n <= 8; n++) {
13834 for (uint32_t m = 1; m <= 2; m++) {
13835 GemmMicrokernelTester()
13836 .mr(2)
13837 .nr(8)
13838 .kr(4)
13839 .sr(1)
13840 .m(m)
13841 .n(n)
13842 .k(k)
13843 .iterations(1)
13844 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c4__neon_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
13845 }
13846 }
13847 }
13848 }
13849
TEST(QC8_GEMM_MINMAX_FP32_2X8C4__NEON_MLAL_LD2R,k_gt_16)13850 TEST(QC8_GEMM_MINMAX_FP32_2X8C4__NEON_MLAL_LD2R, k_gt_16) {
13851 TEST_REQUIRES_ARM_NEON;
13852 for (size_t k = 17; k < 32; k++) {
13853 GemmMicrokernelTester()
13854 .mr(2)
13855 .nr(8)
13856 .kr(4)
13857 .sr(1)
13858 .m(2)
13859 .n(8)
13860 .k(k)
13861 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c4__neon_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
13862 }
13863 }
13864
TEST(QC8_GEMM_MINMAX_FP32_2X8C4__NEON_MLAL_LD2R,k_gt_16_strided_a)13865 TEST(QC8_GEMM_MINMAX_FP32_2X8C4__NEON_MLAL_LD2R, k_gt_16_strided_a) {
13866 TEST_REQUIRES_ARM_NEON;
13867 for (size_t k = 17; k < 32; k++) {
13868 GemmMicrokernelTester()
13869 .mr(2)
13870 .nr(8)
13871 .kr(4)
13872 .sr(1)
13873 .m(2)
13874 .n(8)
13875 .k(k)
13876 .a_stride(37)
13877 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c4__neon_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
13878 }
13879 }
13880
TEST(QC8_GEMM_MINMAX_FP32_2X8C4__NEON_MLAL_LD2R,k_gt_16_subtile)13881 TEST(QC8_GEMM_MINMAX_FP32_2X8C4__NEON_MLAL_LD2R, k_gt_16_subtile) {
13882 TEST_REQUIRES_ARM_NEON;
13883 for (size_t k = 17; k < 32; k++) {
13884 for (uint32_t n = 1; n <= 8; n++) {
13885 for (uint32_t m = 1; m <= 2; m++) {
13886 GemmMicrokernelTester()
13887 .mr(2)
13888 .nr(8)
13889 .kr(4)
13890 .sr(1)
13891 .m(m)
13892 .n(n)
13893 .k(k)
13894 .iterations(1)
13895 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c4__neon_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
13896 }
13897 }
13898 }
13899 }
13900
TEST(QC8_GEMM_MINMAX_FP32_2X8C4__NEON_MLAL_LD2R,k_div_16)13901 TEST(QC8_GEMM_MINMAX_FP32_2X8C4__NEON_MLAL_LD2R, k_div_16) {
13902 TEST_REQUIRES_ARM_NEON;
13903 for (size_t k = 32; k <= 160; k += 16) {
13904 GemmMicrokernelTester()
13905 .mr(2)
13906 .nr(8)
13907 .kr(4)
13908 .sr(1)
13909 .m(2)
13910 .n(8)
13911 .k(k)
13912 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c4__neon_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
13913 }
13914 }
13915
TEST(QC8_GEMM_MINMAX_FP32_2X8C4__NEON_MLAL_LD2R,k_div_16_strided_a)13916 TEST(QC8_GEMM_MINMAX_FP32_2X8C4__NEON_MLAL_LD2R, k_div_16_strided_a) {
13917 TEST_REQUIRES_ARM_NEON;
13918 for (size_t k = 32; k <= 160; k += 16) {
13919 GemmMicrokernelTester()
13920 .mr(2)
13921 .nr(8)
13922 .kr(4)
13923 .sr(1)
13924 .m(2)
13925 .n(8)
13926 .k(k)
13927 .a_stride(163)
13928 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c4__neon_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
13929 }
13930 }
13931
TEST(QC8_GEMM_MINMAX_FP32_2X8C4__NEON_MLAL_LD2R,k_div_16_subtile)13932 TEST(QC8_GEMM_MINMAX_FP32_2X8C4__NEON_MLAL_LD2R, k_div_16_subtile) {
13933 TEST_REQUIRES_ARM_NEON;
13934 for (size_t k = 32; k <= 160; k += 16) {
13935 for (uint32_t n = 1; n <= 8; n++) {
13936 for (uint32_t m = 1; m <= 2; m++) {
13937 GemmMicrokernelTester()
13938 .mr(2)
13939 .nr(8)
13940 .kr(4)
13941 .sr(1)
13942 .m(m)
13943 .n(n)
13944 .k(k)
13945 .iterations(1)
13946 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c4__neon_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
13947 }
13948 }
13949 }
13950 }
13951
TEST(QC8_GEMM_MINMAX_FP32_2X8C4__NEON_MLAL_LD2R,n_gt_8)13952 TEST(QC8_GEMM_MINMAX_FP32_2X8C4__NEON_MLAL_LD2R, n_gt_8) {
13953 TEST_REQUIRES_ARM_NEON;
13954 for (uint32_t n = 9; n < 16; n++) {
13955 for (size_t k = 1; k <= 80; k += 17) {
13956 GemmMicrokernelTester()
13957 .mr(2)
13958 .nr(8)
13959 .kr(4)
13960 .sr(1)
13961 .m(2)
13962 .n(n)
13963 .k(k)
13964 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c4__neon_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
13965 }
13966 }
13967 }
13968
TEST(QC8_GEMM_MINMAX_FP32_2X8C4__NEON_MLAL_LD2R,n_gt_8_strided_cn)13969 TEST(QC8_GEMM_MINMAX_FP32_2X8C4__NEON_MLAL_LD2R, n_gt_8_strided_cn) {
13970 TEST_REQUIRES_ARM_NEON;
13971 for (uint32_t n = 9; n < 16; n++) {
13972 for (size_t k = 1; k <= 80; k += 17) {
13973 GemmMicrokernelTester()
13974 .mr(2)
13975 .nr(8)
13976 .kr(4)
13977 .sr(1)
13978 .m(2)
13979 .n(n)
13980 .k(k)
13981 .cn_stride(11)
13982 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c4__neon_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
13983 }
13984 }
13985 }
13986
TEST(QC8_GEMM_MINMAX_FP32_2X8C4__NEON_MLAL_LD2R,n_gt_8_strided_a)13987 TEST(QC8_GEMM_MINMAX_FP32_2X8C4__NEON_MLAL_LD2R, n_gt_8_strided_a) {
13988 TEST_REQUIRES_ARM_NEON;
13989 for (uint32_t n = 9; n < 16; n++) {
13990 for (size_t k = 1; k <= 80; k += 17) {
13991 GemmMicrokernelTester()
13992 .mr(2)
13993 .nr(8)
13994 .kr(4)
13995 .sr(1)
13996 .m(2)
13997 .n(n)
13998 .k(k)
13999 .a_stride(83)
14000 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c4__neon_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
14001 }
14002 }
14003 }
14004
TEST(QC8_GEMM_MINMAX_FP32_2X8C4__NEON_MLAL_LD2R,n_gt_8_subtile)14005 TEST(QC8_GEMM_MINMAX_FP32_2X8C4__NEON_MLAL_LD2R, n_gt_8_subtile) {
14006 TEST_REQUIRES_ARM_NEON;
14007 for (uint32_t n = 9; n < 16; n++) {
14008 for (size_t k = 1; k <= 80; k += 17) {
14009 for (uint32_t m = 1; m <= 2; m++) {
14010 GemmMicrokernelTester()
14011 .mr(2)
14012 .nr(8)
14013 .kr(4)
14014 .sr(1)
14015 .m(m)
14016 .n(n)
14017 .k(k)
14018 .iterations(1)
14019 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c4__neon_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
14020 }
14021 }
14022 }
14023 }
14024
TEST(QC8_GEMM_MINMAX_FP32_2X8C4__NEON_MLAL_LD2R,n_div_8)14025 TEST(QC8_GEMM_MINMAX_FP32_2X8C4__NEON_MLAL_LD2R, n_div_8) {
14026 TEST_REQUIRES_ARM_NEON;
14027 for (uint32_t n = 16; n <= 24; n += 8) {
14028 for (size_t k = 1; k <= 80; k += 17) {
14029 GemmMicrokernelTester()
14030 .mr(2)
14031 .nr(8)
14032 .kr(4)
14033 .sr(1)
14034 .m(2)
14035 .n(n)
14036 .k(k)
14037 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c4__neon_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
14038 }
14039 }
14040 }
14041
TEST(QC8_GEMM_MINMAX_FP32_2X8C4__NEON_MLAL_LD2R,n_div_8_strided_cn)14042 TEST(QC8_GEMM_MINMAX_FP32_2X8C4__NEON_MLAL_LD2R, n_div_8_strided_cn) {
14043 TEST_REQUIRES_ARM_NEON;
14044 for (uint32_t n = 16; n <= 24; n += 8) {
14045 for (size_t k = 1; k <= 80; k += 17) {
14046 GemmMicrokernelTester()
14047 .mr(2)
14048 .nr(8)
14049 .kr(4)
14050 .sr(1)
14051 .m(2)
14052 .n(n)
14053 .k(k)
14054 .cn_stride(11)
14055 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c4__neon_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
14056 }
14057 }
14058 }
14059
TEST(QC8_GEMM_MINMAX_FP32_2X8C4__NEON_MLAL_LD2R,n_div_8_strided_a)14060 TEST(QC8_GEMM_MINMAX_FP32_2X8C4__NEON_MLAL_LD2R, n_div_8_strided_a) {
14061 TEST_REQUIRES_ARM_NEON;
14062 for (uint32_t n = 16; n <= 24; n += 8) {
14063 for (size_t k = 1; k <= 80; k += 17) {
14064 GemmMicrokernelTester()
14065 .mr(2)
14066 .nr(8)
14067 .kr(4)
14068 .sr(1)
14069 .m(2)
14070 .n(n)
14071 .k(k)
14072 .a_stride(83)
14073 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c4__neon_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
14074 }
14075 }
14076 }
14077
TEST(QC8_GEMM_MINMAX_FP32_2X8C4__NEON_MLAL_LD2R,n_div_8_subtile)14078 TEST(QC8_GEMM_MINMAX_FP32_2X8C4__NEON_MLAL_LD2R, n_div_8_subtile) {
14079 TEST_REQUIRES_ARM_NEON;
14080 for (uint32_t n = 16; n <= 24; n += 8) {
14081 for (size_t k = 1; k <= 80; k += 17) {
14082 for (uint32_t m = 1; m <= 2; m++) {
14083 GemmMicrokernelTester()
14084 .mr(2)
14085 .nr(8)
14086 .kr(4)
14087 .sr(1)
14088 .m(m)
14089 .n(n)
14090 .k(k)
14091 .iterations(1)
14092 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c4__neon_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
14093 }
14094 }
14095 }
14096 }
14097
TEST(QC8_GEMM_MINMAX_FP32_2X8C4__NEON_MLAL_LD2R,strided_cm_subtile)14098 TEST(QC8_GEMM_MINMAX_FP32_2X8C4__NEON_MLAL_LD2R, strided_cm_subtile) {
14099 TEST_REQUIRES_ARM_NEON;
14100 for (size_t k = 1; k <= 80; k += 17) {
14101 for (uint32_t n = 1; n <= 8; n++) {
14102 for (uint32_t m = 1; m <= 2; m++) {
14103 GemmMicrokernelTester()
14104 .mr(2)
14105 .nr(8)
14106 .kr(4)
14107 .sr(1)
14108 .m(m)
14109 .n(n)
14110 .k(k)
14111 .cm_stride(11)
14112 .iterations(1)
14113 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c4__neon_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
14114 }
14115 }
14116 }
14117 }
14118
TEST(QC8_GEMM_MINMAX_FP32_2X8C4__NEON_MLAL_LD2R,qmin)14119 TEST(QC8_GEMM_MINMAX_FP32_2X8C4__NEON_MLAL_LD2R, qmin) {
14120 TEST_REQUIRES_ARM_NEON;
14121 GemmMicrokernelTester()
14122 .mr(2)
14123 .nr(8)
14124 .kr(4)
14125 .sr(1)
14126 .m(2)
14127 .n(8)
14128 .k(16)
14129 .qmin(128)
14130 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c4__neon_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
14131 }
14132
TEST(QC8_GEMM_MINMAX_FP32_2X8C4__NEON_MLAL_LD2R,qmax)14133 TEST(QC8_GEMM_MINMAX_FP32_2X8C4__NEON_MLAL_LD2R, qmax) {
14134 TEST_REQUIRES_ARM_NEON;
14135 GemmMicrokernelTester()
14136 .mr(2)
14137 .nr(8)
14138 .kr(4)
14139 .sr(1)
14140 .m(2)
14141 .n(8)
14142 .k(16)
14143 .qmax(128)
14144 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c4__neon_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
14145 }
14146
TEST(QC8_GEMM_MINMAX_FP32_2X8C4__NEON_MLAL_LD2R,strided_cm)14147 TEST(QC8_GEMM_MINMAX_FP32_2X8C4__NEON_MLAL_LD2R, strided_cm) {
14148 TEST_REQUIRES_ARM_NEON;
14149 GemmMicrokernelTester()
14150 .mr(2)
14151 .nr(8)
14152 .kr(4)
14153 .sr(1)
14154 .m(2)
14155 .n(8)
14156 .k(16)
14157 .cm_stride(11)
14158 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c4__neon_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
14159 }
14160 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
14161
14162
14163 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QC8_GEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_LD2R,k_eq_16)14164 TEST(QC8_GEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_LD2R, k_eq_16) {
14165 TEST_REQUIRES_ARM_NEON_V8;
14166 GemmMicrokernelTester()
14167 .mr(2)
14168 .nr(8)
14169 .kr(4)
14170 .sr(1)
14171 .m(2)
14172 .n(8)
14173 .k(16)
14174 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
14175 }
14176
TEST(QC8_GEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_LD2R,strided_cn)14177 TEST(QC8_GEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_LD2R, strided_cn) {
14178 TEST_REQUIRES_ARM_NEON_V8;
14179 GemmMicrokernelTester()
14180 .mr(2)
14181 .nr(8)
14182 .kr(4)
14183 .sr(1)
14184 .m(2)
14185 .n(8)
14186 .k(16)
14187 .cn_stride(11)
14188 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
14189 }
14190
TEST(QC8_GEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_LD2R,k_eq_16_strided_a)14191 TEST(QC8_GEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_LD2R, k_eq_16_strided_a) {
14192 TEST_REQUIRES_ARM_NEON_V8;
14193 GemmMicrokernelTester()
14194 .mr(2)
14195 .nr(8)
14196 .kr(4)
14197 .sr(1)
14198 .m(2)
14199 .n(8)
14200 .k(16)
14201 .a_stride(19)
14202 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
14203 }
14204
TEST(QC8_GEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_LD2R,k_eq_16_subtile)14205 TEST(QC8_GEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_LD2R, k_eq_16_subtile) {
14206 TEST_REQUIRES_ARM_NEON_V8;
14207 for (uint32_t n = 1; n <= 8; n++) {
14208 for (uint32_t m = 1; m <= 2; m++) {
14209 GemmMicrokernelTester()
14210 .mr(2)
14211 .nr(8)
14212 .kr(4)
14213 .sr(1)
14214 .m(m)
14215 .n(n)
14216 .k(16)
14217 .iterations(1)
14218 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
14219 }
14220 }
14221 }
14222
TEST(QC8_GEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_LD2R,k_eq_16_subtile_m)14223 TEST(QC8_GEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_LD2R, k_eq_16_subtile_m) {
14224 TEST_REQUIRES_ARM_NEON_V8;
14225 for (uint32_t m = 1; m <= 2; m++) {
14226 GemmMicrokernelTester()
14227 .mr(2)
14228 .nr(8)
14229 .kr(4)
14230 .sr(1)
14231 .m(m)
14232 .n(8)
14233 .k(16)
14234 .iterations(1)
14235 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
14236 }
14237 }
14238
TEST(QC8_GEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_LD2R,k_eq_16_subtile_n)14239 TEST(QC8_GEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_LD2R, k_eq_16_subtile_n) {
14240 TEST_REQUIRES_ARM_NEON_V8;
14241 for (uint32_t n = 1; n <= 8; n++) {
14242 GemmMicrokernelTester()
14243 .mr(2)
14244 .nr(8)
14245 .kr(4)
14246 .sr(1)
14247 .m(2)
14248 .n(n)
14249 .k(16)
14250 .iterations(1)
14251 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
14252 }
14253 }
14254
TEST(QC8_GEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_LD2R,k_lt_16)14255 TEST(QC8_GEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_LD2R, k_lt_16) {
14256 TEST_REQUIRES_ARM_NEON_V8;
14257 for (size_t k = 1; k < 16; k++) {
14258 GemmMicrokernelTester()
14259 .mr(2)
14260 .nr(8)
14261 .kr(4)
14262 .sr(1)
14263 .m(2)
14264 .n(8)
14265 .k(k)
14266 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
14267 }
14268 }
14269
TEST(QC8_GEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_LD2R,k_lt_16_strided_a)14270 TEST(QC8_GEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_LD2R, k_lt_16_strided_a) {
14271 TEST_REQUIRES_ARM_NEON_V8;
14272 for (size_t k = 1; k < 16; k++) {
14273 GemmMicrokernelTester()
14274 .mr(2)
14275 .nr(8)
14276 .kr(4)
14277 .sr(1)
14278 .m(2)
14279 .n(8)
14280 .k(k)
14281 .a_stride(19)
14282 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
14283 }
14284 }
14285
TEST(QC8_GEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_LD2R,k_lt_16_subtile)14286 TEST(QC8_GEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_LD2R, k_lt_16_subtile) {
14287 TEST_REQUIRES_ARM_NEON_V8;
14288 for (size_t k = 1; k < 16; k++) {
14289 for (uint32_t n = 1; n <= 8; n++) {
14290 for (uint32_t m = 1; m <= 2; m++) {
14291 GemmMicrokernelTester()
14292 .mr(2)
14293 .nr(8)
14294 .kr(4)
14295 .sr(1)
14296 .m(m)
14297 .n(n)
14298 .k(k)
14299 .iterations(1)
14300 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
14301 }
14302 }
14303 }
14304 }
14305
TEST(QC8_GEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_LD2R,k_gt_16)14306 TEST(QC8_GEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_LD2R, k_gt_16) {
14307 TEST_REQUIRES_ARM_NEON_V8;
14308 for (size_t k = 17; k < 32; k++) {
14309 GemmMicrokernelTester()
14310 .mr(2)
14311 .nr(8)
14312 .kr(4)
14313 .sr(1)
14314 .m(2)
14315 .n(8)
14316 .k(k)
14317 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
14318 }
14319 }
14320
TEST(QC8_GEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_LD2R,k_gt_16_strided_a)14321 TEST(QC8_GEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_LD2R, k_gt_16_strided_a) {
14322 TEST_REQUIRES_ARM_NEON_V8;
14323 for (size_t k = 17; k < 32; k++) {
14324 GemmMicrokernelTester()
14325 .mr(2)
14326 .nr(8)
14327 .kr(4)
14328 .sr(1)
14329 .m(2)
14330 .n(8)
14331 .k(k)
14332 .a_stride(37)
14333 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
14334 }
14335 }
14336
TEST(QC8_GEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_LD2R,k_gt_16_subtile)14337 TEST(QC8_GEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_LD2R, k_gt_16_subtile) {
14338 TEST_REQUIRES_ARM_NEON_V8;
14339 for (size_t k = 17; k < 32; k++) {
14340 for (uint32_t n = 1; n <= 8; n++) {
14341 for (uint32_t m = 1; m <= 2; m++) {
14342 GemmMicrokernelTester()
14343 .mr(2)
14344 .nr(8)
14345 .kr(4)
14346 .sr(1)
14347 .m(m)
14348 .n(n)
14349 .k(k)
14350 .iterations(1)
14351 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
14352 }
14353 }
14354 }
14355 }
14356
TEST(QC8_GEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_LD2R,k_div_16)14357 TEST(QC8_GEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_LD2R, k_div_16) {
14358 TEST_REQUIRES_ARM_NEON_V8;
14359 for (size_t k = 32; k <= 160; k += 16) {
14360 GemmMicrokernelTester()
14361 .mr(2)
14362 .nr(8)
14363 .kr(4)
14364 .sr(1)
14365 .m(2)
14366 .n(8)
14367 .k(k)
14368 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
14369 }
14370 }
14371
TEST(QC8_GEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_LD2R,k_div_16_strided_a)14372 TEST(QC8_GEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_LD2R, k_div_16_strided_a) {
14373 TEST_REQUIRES_ARM_NEON_V8;
14374 for (size_t k = 32; k <= 160; k += 16) {
14375 GemmMicrokernelTester()
14376 .mr(2)
14377 .nr(8)
14378 .kr(4)
14379 .sr(1)
14380 .m(2)
14381 .n(8)
14382 .k(k)
14383 .a_stride(163)
14384 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
14385 }
14386 }
14387
TEST(QC8_GEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_LD2R,k_div_16_subtile)14388 TEST(QC8_GEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_LD2R, k_div_16_subtile) {
14389 TEST_REQUIRES_ARM_NEON_V8;
14390 for (size_t k = 32; k <= 160; k += 16) {
14391 for (uint32_t n = 1; n <= 8; n++) {
14392 for (uint32_t m = 1; m <= 2; m++) {
14393 GemmMicrokernelTester()
14394 .mr(2)
14395 .nr(8)
14396 .kr(4)
14397 .sr(1)
14398 .m(m)
14399 .n(n)
14400 .k(k)
14401 .iterations(1)
14402 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
14403 }
14404 }
14405 }
14406 }
14407
TEST(QC8_GEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_LD2R,n_gt_8)14408 TEST(QC8_GEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_LD2R, n_gt_8) {
14409 TEST_REQUIRES_ARM_NEON_V8;
14410 for (uint32_t n = 9; n < 16; n++) {
14411 for (size_t k = 1; k <= 80; k += 17) {
14412 GemmMicrokernelTester()
14413 .mr(2)
14414 .nr(8)
14415 .kr(4)
14416 .sr(1)
14417 .m(2)
14418 .n(n)
14419 .k(k)
14420 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
14421 }
14422 }
14423 }
14424
TEST(QC8_GEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_LD2R,n_gt_8_strided_cn)14425 TEST(QC8_GEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_LD2R, n_gt_8_strided_cn) {
14426 TEST_REQUIRES_ARM_NEON_V8;
14427 for (uint32_t n = 9; n < 16; n++) {
14428 for (size_t k = 1; k <= 80; k += 17) {
14429 GemmMicrokernelTester()
14430 .mr(2)
14431 .nr(8)
14432 .kr(4)
14433 .sr(1)
14434 .m(2)
14435 .n(n)
14436 .k(k)
14437 .cn_stride(11)
14438 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
14439 }
14440 }
14441 }
14442
TEST(QC8_GEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_LD2R,n_gt_8_strided_a)14443 TEST(QC8_GEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_LD2R, n_gt_8_strided_a) {
14444 TEST_REQUIRES_ARM_NEON_V8;
14445 for (uint32_t n = 9; n < 16; n++) {
14446 for (size_t k = 1; k <= 80; k += 17) {
14447 GemmMicrokernelTester()
14448 .mr(2)
14449 .nr(8)
14450 .kr(4)
14451 .sr(1)
14452 .m(2)
14453 .n(n)
14454 .k(k)
14455 .a_stride(83)
14456 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
14457 }
14458 }
14459 }
14460
TEST(QC8_GEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_LD2R,n_gt_8_subtile)14461 TEST(QC8_GEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_LD2R, n_gt_8_subtile) {
14462 TEST_REQUIRES_ARM_NEON_V8;
14463 for (uint32_t n = 9; n < 16; n++) {
14464 for (size_t k = 1; k <= 80; k += 17) {
14465 for (uint32_t m = 1; m <= 2; m++) {
14466 GemmMicrokernelTester()
14467 .mr(2)
14468 .nr(8)
14469 .kr(4)
14470 .sr(1)
14471 .m(m)
14472 .n(n)
14473 .k(k)
14474 .iterations(1)
14475 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
14476 }
14477 }
14478 }
14479 }
14480
TEST(QC8_GEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_LD2R,n_div_8)14481 TEST(QC8_GEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_LD2R, n_div_8) {
14482 TEST_REQUIRES_ARM_NEON_V8;
14483 for (uint32_t n = 16; n <= 24; n += 8) {
14484 for (size_t k = 1; k <= 80; k += 17) {
14485 GemmMicrokernelTester()
14486 .mr(2)
14487 .nr(8)
14488 .kr(4)
14489 .sr(1)
14490 .m(2)
14491 .n(n)
14492 .k(k)
14493 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
14494 }
14495 }
14496 }
14497
TEST(QC8_GEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_LD2R,n_div_8_strided_cn)14498 TEST(QC8_GEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_LD2R, n_div_8_strided_cn) {
14499 TEST_REQUIRES_ARM_NEON_V8;
14500 for (uint32_t n = 16; n <= 24; n += 8) {
14501 for (size_t k = 1; k <= 80; k += 17) {
14502 GemmMicrokernelTester()
14503 .mr(2)
14504 .nr(8)
14505 .kr(4)
14506 .sr(1)
14507 .m(2)
14508 .n(n)
14509 .k(k)
14510 .cn_stride(11)
14511 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
14512 }
14513 }
14514 }
14515
TEST(QC8_GEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_LD2R,n_div_8_strided_a)14516 TEST(QC8_GEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_LD2R, n_div_8_strided_a) {
14517 TEST_REQUIRES_ARM_NEON_V8;
14518 for (uint32_t n = 16; n <= 24; n += 8) {
14519 for (size_t k = 1; k <= 80; k += 17) {
14520 GemmMicrokernelTester()
14521 .mr(2)
14522 .nr(8)
14523 .kr(4)
14524 .sr(1)
14525 .m(2)
14526 .n(n)
14527 .k(k)
14528 .a_stride(83)
14529 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
14530 }
14531 }
14532 }
14533
TEST(QC8_GEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_LD2R,n_div_8_subtile)14534 TEST(QC8_GEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_LD2R, n_div_8_subtile) {
14535 TEST_REQUIRES_ARM_NEON_V8;
14536 for (uint32_t n = 16; n <= 24; n += 8) {
14537 for (size_t k = 1; k <= 80; k += 17) {
14538 for (uint32_t m = 1; m <= 2; m++) {
14539 GemmMicrokernelTester()
14540 .mr(2)
14541 .nr(8)
14542 .kr(4)
14543 .sr(1)
14544 .m(m)
14545 .n(n)
14546 .k(k)
14547 .iterations(1)
14548 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
14549 }
14550 }
14551 }
14552 }
14553
TEST(QC8_GEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_LD2R,strided_cm_subtile)14554 TEST(QC8_GEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_LD2R, strided_cm_subtile) {
14555 TEST_REQUIRES_ARM_NEON_V8;
14556 for (size_t k = 1; k <= 80; k += 17) {
14557 for (uint32_t n = 1; n <= 8; n++) {
14558 for (uint32_t m = 1; m <= 2; m++) {
14559 GemmMicrokernelTester()
14560 .mr(2)
14561 .nr(8)
14562 .kr(4)
14563 .sr(1)
14564 .m(m)
14565 .n(n)
14566 .k(k)
14567 .cm_stride(11)
14568 .iterations(1)
14569 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
14570 }
14571 }
14572 }
14573 }
14574
TEST(QC8_GEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_LD2R,qmin)14575 TEST(QC8_GEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_LD2R, qmin) {
14576 TEST_REQUIRES_ARM_NEON_V8;
14577 GemmMicrokernelTester()
14578 .mr(2)
14579 .nr(8)
14580 .kr(4)
14581 .sr(1)
14582 .m(2)
14583 .n(8)
14584 .k(16)
14585 .qmin(128)
14586 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
14587 }
14588
TEST(QC8_GEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_LD2R,qmax)14589 TEST(QC8_GEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_LD2R, qmax) {
14590 TEST_REQUIRES_ARM_NEON_V8;
14591 GemmMicrokernelTester()
14592 .mr(2)
14593 .nr(8)
14594 .kr(4)
14595 .sr(1)
14596 .m(2)
14597 .n(8)
14598 .k(16)
14599 .qmax(128)
14600 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
14601 }
14602
TEST(QC8_GEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_LD2R,strided_cm)14603 TEST(QC8_GEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_LD2R, strided_cm) {
14604 TEST_REQUIRES_ARM_NEON_V8;
14605 GemmMicrokernelTester()
14606 .mr(2)
14607 .nr(8)
14608 .kr(4)
14609 .sr(1)
14610 .m(2)
14611 .n(8)
14612 .k(16)
14613 .cm_stride(11)
14614 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
14615 }
14616 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
14617
14618
14619 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QC8_GEMM_MINMAX_FP32_3X16__NEON_MLAL_LANE_PRFM,k_eq_8)14620 TEST(QC8_GEMM_MINMAX_FP32_3X16__NEON_MLAL_LANE_PRFM, k_eq_8) {
14621 TEST_REQUIRES_ARM_NEON;
14622 GemmMicrokernelTester()
14623 .mr(3)
14624 .nr(16)
14625 .kr(1)
14626 .sr(1)
14627 .m(3)
14628 .n(16)
14629 .k(8)
14630 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x16__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
14631 }
14632
TEST(QC8_GEMM_MINMAX_FP32_3X16__NEON_MLAL_LANE_PRFM,strided_cn)14633 TEST(QC8_GEMM_MINMAX_FP32_3X16__NEON_MLAL_LANE_PRFM, strided_cn) {
14634 TEST_REQUIRES_ARM_NEON;
14635 GemmMicrokernelTester()
14636 .mr(3)
14637 .nr(16)
14638 .kr(1)
14639 .sr(1)
14640 .m(3)
14641 .n(16)
14642 .k(8)
14643 .cn_stride(19)
14644 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x16__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
14645 }
14646
TEST(QC8_GEMM_MINMAX_FP32_3X16__NEON_MLAL_LANE_PRFM,k_eq_8_strided_a)14647 TEST(QC8_GEMM_MINMAX_FP32_3X16__NEON_MLAL_LANE_PRFM, k_eq_8_strided_a) {
14648 TEST_REQUIRES_ARM_NEON;
14649 GemmMicrokernelTester()
14650 .mr(3)
14651 .nr(16)
14652 .kr(1)
14653 .sr(1)
14654 .m(3)
14655 .n(16)
14656 .k(8)
14657 .a_stride(11)
14658 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x16__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
14659 }
14660
TEST(QC8_GEMM_MINMAX_FP32_3X16__NEON_MLAL_LANE_PRFM,k_eq_8_subtile)14661 TEST(QC8_GEMM_MINMAX_FP32_3X16__NEON_MLAL_LANE_PRFM, k_eq_8_subtile) {
14662 TEST_REQUIRES_ARM_NEON;
14663 for (uint32_t n = 1; n <= 16; n++) {
14664 for (uint32_t m = 1; m <= 3; m++) {
14665 GemmMicrokernelTester()
14666 .mr(3)
14667 .nr(16)
14668 .kr(1)
14669 .sr(1)
14670 .m(m)
14671 .n(n)
14672 .k(8)
14673 .iterations(1)
14674 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x16__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
14675 }
14676 }
14677 }
14678
TEST(QC8_GEMM_MINMAX_FP32_3X16__NEON_MLAL_LANE_PRFM,k_eq_8_subtile_m)14679 TEST(QC8_GEMM_MINMAX_FP32_3X16__NEON_MLAL_LANE_PRFM, k_eq_8_subtile_m) {
14680 TEST_REQUIRES_ARM_NEON;
14681 for (uint32_t m = 1; m <= 3; m++) {
14682 GemmMicrokernelTester()
14683 .mr(3)
14684 .nr(16)
14685 .kr(1)
14686 .sr(1)
14687 .m(m)
14688 .n(16)
14689 .k(8)
14690 .iterations(1)
14691 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x16__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
14692 }
14693 }
14694
TEST(QC8_GEMM_MINMAX_FP32_3X16__NEON_MLAL_LANE_PRFM,k_eq_8_subtile_n)14695 TEST(QC8_GEMM_MINMAX_FP32_3X16__NEON_MLAL_LANE_PRFM, k_eq_8_subtile_n) {
14696 TEST_REQUIRES_ARM_NEON;
14697 for (uint32_t n = 1; n <= 16; n++) {
14698 GemmMicrokernelTester()
14699 .mr(3)
14700 .nr(16)
14701 .kr(1)
14702 .sr(1)
14703 .m(3)
14704 .n(n)
14705 .k(8)
14706 .iterations(1)
14707 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x16__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
14708 }
14709 }
14710
TEST(QC8_GEMM_MINMAX_FP32_3X16__NEON_MLAL_LANE_PRFM,k_lt_8)14711 TEST(QC8_GEMM_MINMAX_FP32_3X16__NEON_MLAL_LANE_PRFM, k_lt_8) {
14712 TEST_REQUIRES_ARM_NEON;
14713 for (size_t k = 1; k < 8; k++) {
14714 GemmMicrokernelTester()
14715 .mr(3)
14716 .nr(16)
14717 .kr(1)
14718 .sr(1)
14719 .m(3)
14720 .n(16)
14721 .k(k)
14722 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x16__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
14723 }
14724 }
14725
TEST(QC8_GEMM_MINMAX_FP32_3X16__NEON_MLAL_LANE_PRFM,k_lt_8_strided_a)14726 TEST(QC8_GEMM_MINMAX_FP32_3X16__NEON_MLAL_LANE_PRFM, k_lt_8_strided_a) {
14727 TEST_REQUIRES_ARM_NEON;
14728 for (size_t k = 1; k < 8; k++) {
14729 GemmMicrokernelTester()
14730 .mr(3)
14731 .nr(16)
14732 .kr(1)
14733 .sr(1)
14734 .m(3)
14735 .n(16)
14736 .k(k)
14737 .a_stride(11)
14738 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x16__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
14739 }
14740 }
14741
TEST(QC8_GEMM_MINMAX_FP32_3X16__NEON_MLAL_LANE_PRFM,k_lt_8_subtile)14742 TEST(QC8_GEMM_MINMAX_FP32_3X16__NEON_MLAL_LANE_PRFM, k_lt_8_subtile) {
14743 TEST_REQUIRES_ARM_NEON;
14744 for (size_t k = 1; k < 8; k++) {
14745 for (uint32_t n = 1; n <= 16; n++) {
14746 for (uint32_t m = 1; m <= 3; m++) {
14747 GemmMicrokernelTester()
14748 .mr(3)
14749 .nr(16)
14750 .kr(1)
14751 .sr(1)
14752 .m(m)
14753 .n(n)
14754 .k(k)
14755 .iterations(1)
14756 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x16__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
14757 }
14758 }
14759 }
14760 }
14761
TEST(QC8_GEMM_MINMAX_FP32_3X16__NEON_MLAL_LANE_PRFM,k_gt_8)14762 TEST(QC8_GEMM_MINMAX_FP32_3X16__NEON_MLAL_LANE_PRFM, k_gt_8) {
14763 TEST_REQUIRES_ARM_NEON;
14764 for (size_t k = 9; k < 16; k++) {
14765 GemmMicrokernelTester()
14766 .mr(3)
14767 .nr(16)
14768 .kr(1)
14769 .sr(1)
14770 .m(3)
14771 .n(16)
14772 .k(k)
14773 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x16__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
14774 }
14775 }
14776
TEST(QC8_GEMM_MINMAX_FP32_3X16__NEON_MLAL_LANE_PRFM,k_gt_8_strided_a)14777 TEST(QC8_GEMM_MINMAX_FP32_3X16__NEON_MLAL_LANE_PRFM, k_gt_8_strided_a) {
14778 TEST_REQUIRES_ARM_NEON;
14779 for (size_t k = 9; k < 16; k++) {
14780 GemmMicrokernelTester()
14781 .mr(3)
14782 .nr(16)
14783 .kr(1)
14784 .sr(1)
14785 .m(3)
14786 .n(16)
14787 .k(k)
14788 .a_stride(19)
14789 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x16__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
14790 }
14791 }
14792
TEST(QC8_GEMM_MINMAX_FP32_3X16__NEON_MLAL_LANE_PRFM,k_gt_8_subtile)14793 TEST(QC8_GEMM_MINMAX_FP32_3X16__NEON_MLAL_LANE_PRFM, k_gt_8_subtile) {
14794 TEST_REQUIRES_ARM_NEON;
14795 for (size_t k = 9; k < 16; k++) {
14796 for (uint32_t n = 1; n <= 16; n++) {
14797 for (uint32_t m = 1; m <= 3; m++) {
14798 GemmMicrokernelTester()
14799 .mr(3)
14800 .nr(16)
14801 .kr(1)
14802 .sr(1)
14803 .m(m)
14804 .n(n)
14805 .k(k)
14806 .iterations(1)
14807 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x16__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
14808 }
14809 }
14810 }
14811 }
14812
TEST(QC8_GEMM_MINMAX_FP32_3X16__NEON_MLAL_LANE_PRFM,k_div_8)14813 TEST(QC8_GEMM_MINMAX_FP32_3X16__NEON_MLAL_LANE_PRFM, k_div_8) {
14814 TEST_REQUIRES_ARM_NEON;
14815 for (size_t k = 16; k <= 80; k += 8) {
14816 GemmMicrokernelTester()
14817 .mr(3)
14818 .nr(16)
14819 .kr(1)
14820 .sr(1)
14821 .m(3)
14822 .n(16)
14823 .k(k)
14824 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x16__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
14825 }
14826 }
14827
TEST(QC8_GEMM_MINMAX_FP32_3X16__NEON_MLAL_LANE_PRFM,k_div_8_strided_a)14828 TEST(QC8_GEMM_MINMAX_FP32_3X16__NEON_MLAL_LANE_PRFM, k_div_8_strided_a) {
14829 TEST_REQUIRES_ARM_NEON;
14830 for (size_t k = 16; k <= 80; k += 8) {
14831 GemmMicrokernelTester()
14832 .mr(3)
14833 .nr(16)
14834 .kr(1)
14835 .sr(1)
14836 .m(3)
14837 .n(16)
14838 .k(k)
14839 .a_stride(83)
14840 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x16__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
14841 }
14842 }
14843
TEST(QC8_GEMM_MINMAX_FP32_3X16__NEON_MLAL_LANE_PRFM,k_div_8_subtile)14844 TEST(QC8_GEMM_MINMAX_FP32_3X16__NEON_MLAL_LANE_PRFM, k_div_8_subtile) {
14845 TEST_REQUIRES_ARM_NEON;
14846 for (size_t k = 16; k <= 80; k += 8) {
14847 for (uint32_t n = 1; n <= 16; n++) {
14848 for (uint32_t m = 1; m <= 3; m++) {
14849 GemmMicrokernelTester()
14850 .mr(3)
14851 .nr(16)
14852 .kr(1)
14853 .sr(1)
14854 .m(m)
14855 .n(n)
14856 .k(k)
14857 .iterations(1)
14858 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x16__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
14859 }
14860 }
14861 }
14862 }
14863
TEST(QC8_GEMM_MINMAX_FP32_3X16__NEON_MLAL_LANE_PRFM,n_gt_16)14864 TEST(QC8_GEMM_MINMAX_FP32_3X16__NEON_MLAL_LANE_PRFM, n_gt_16) {
14865 TEST_REQUIRES_ARM_NEON;
14866 for (uint32_t n = 17; n < 32; n++) {
14867 for (size_t k = 1; k <= 40; k += 9) {
14868 GemmMicrokernelTester()
14869 .mr(3)
14870 .nr(16)
14871 .kr(1)
14872 .sr(1)
14873 .m(3)
14874 .n(n)
14875 .k(k)
14876 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x16__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
14877 }
14878 }
14879 }
14880
TEST(QC8_GEMM_MINMAX_FP32_3X16__NEON_MLAL_LANE_PRFM,n_gt_16_strided_cn)14881 TEST(QC8_GEMM_MINMAX_FP32_3X16__NEON_MLAL_LANE_PRFM, n_gt_16_strided_cn) {
14882 TEST_REQUIRES_ARM_NEON;
14883 for (uint32_t n = 17; n < 32; n++) {
14884 for (size_t k = 1; k <= 40; k += 9) {
14885 GemmMicrokernelTester()
14886 .mr(3)
14887 .nr(16)
14888 .kr(1)
14889 .sr(1)
14890 .m(3)
14891 .n(n)
14892 .k(k)
14893 .cn_stride(19)
14894 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x16__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
14895 }
14896 }
14897 }
14898
TEST(QC8_GEMM_MINMAX_FP32_3X16__NEON_MLAL_LANE_PRFM,n_gt_16_strided_a)14899 TEST(QC8_GEMM_MINMAX_FP32_3X16__NEON_MLAL_LANE_PRFM, n_gt_16_strided_a) {
14900 TEST_REQUIRES_ARM_NEON;
14901 for (uint32_t n = 17; n < 32; n++) {
14902 for (size_t k = 1; k <= 40; k += 9) {
14903 GemmMicrokernelTester()
14904 .mr(3)
14905 .nr(16)
14906 .kr(1)
14907 .sr(1)
14908 .m(3)
14909 .n(n)
14910 .k(k)
14911 .a_stride(43)
14912 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x16__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
14913 }
14914 }
14915 }
14916
TEST(QC8_GEMM_MINMAX_FP32_3X16__NEON_MLAL_LANE_PRFM,n_gt_16_subtile)14917 TEST(QC8_GEMM_MINMAX_FP32_3X16__NEON_MLAL_LANE_PRFM, n_gt_16_subtile) {
14918 TEST_REQUIRES_ARM_NEON;
14919 for (uint32_t n = 17; n < 32; n++) {
14920 for (size_t k = 1; k <= 40; k += 9) {
14921 for (uint32_t m = 1; m <= 3; m++) {
14922 GemmMicrokernelTester()
14923 .mr(3)
14924 .nr(16)
14925 .kr(1)
14926 .sr(1)
14927 .m(m)
14928 .n(n)
14929 .k(k)
14930 .iterations(1)
14931 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x16__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
14932 }
14933 }
14934 }
14935 }
14936
TEST(QC8_GEMM_MINMAX_FP32_3X16__NEON_MLAL_LANE_PRFM,n_div_16)14937 TEST(QC8_GEMM_MINMAX_FP32_3X16__NEON_MLAL_LANE_PRFM, n_div_16) {
14938 TEST_REQUIRES_ARM_NEON;
14939 for (uint32_t n = 32; n <= 48; n += 16) {
14940 for (size_t k = 1; k <= 40; k += 9) {
14941 GemmMicrokernelTester()
14942 .mr(3)
14943 .nr(16)
14944 .kr(1)
14945 .sr(1)
14946 .m(3)
14947 .n(n)
14948 .k(k)
14949 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x16__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
14950 }
14951 }
14952 }
14953
TEST(QC8_GEMM_MINMAX_FP32_3X16__NEON_MLAL_LANE_PRFM,n_div_16_strided_cn)14954 TEST(QC8_GEMM_MINMAX_FP32_3X16__NEON_MLAL_LANE_PRFM, n_div_16_strided_cn) {
14955 TEST_REQUIRES_ARM_NEON;
14956 for (uint32_t n = 32; n <= 48; n += 16) {
14957 for (size_t k = 1; k <= 40; k += 9) {
14958 GemmMicrokernelTester()
14959 .mr(3)
14960 .nr(16)
14961 .kr(1)
14962 .sr(1)
14963 .m(3)
14964 .n(n)
14965 .k(k)
14966 .cn_stride(19)
14967 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x16__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
14968 }
14969 }
14970 }
14971
TEST(QC8_GEMM_MINMAX_FP32_3X16__NEON_MLAL_LANE_PRFM,n_div_16_strided_a)14972 TEST(QC8_GEMM_MINMAX_FP32_3X16__NEON_MLAL_LANE_PRFM, n_div_16_strided_a) {
14973 TEST_REQUIRES_ARM_NEON;
14974 for (uint32_t n = 32; n <= 48; n += 16) {
14975 for (size_t k = 1; k <= 40; k += 9) {
14976 GemmMicrokernelTester()
14977 .mr(3)
14978 .nr(16)
14979 .kr(1)
14980 .sr(1)
14981 .m(3)
14982 .n(n)
14983 .k(k)
14984 .a_stride(43)
14985 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x16__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
14986 }
14987 }
14988 }
14989
TEST(QC8_GEMM_MINMAX_FP32_3X16__NEON_MLAL_LANE_PRFM,n_div_16_subtile)14990 TEST(QC8_GEMM_MINMAX_FP32_3X16__NEON_MLAL_LANE_PRFM, n_div_16_subtile) {
14991 TEST_REQUIRES_ARM_NEON;
14992 for (uint32_t n = 32; n <= 48; n += 16) {
14993 for (size_t k = 1; k <= 40; k += 9) {
14994 for (uint32_t m = 1; m <= 3; m++) {
14995 GemmMicrokernelTester()
14996 .mr(3)
14997 .nr(16)
14998 .kr(1)
14999 .sr(1)
15000 .m(m)
15001 .n(n)
15002 .k(k)
15003 .iterations(1)
15004 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x16__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
15005 }
15006 }
15007 }
15008 }
15009
TEST(QC8_GEMM_MINMAX_FP32_3X16__NEON_MLAL_LANE_PRFM,strided_cm_subtile)15010 TEST(QC8_GEMM_MINMAX_FP32_3X16__NEON_MLAL_LANE_PRFM, strided_cm_subtile) {
15011 TEST_REQUIRES_ARM_NEON;
15012 for (size_t k = 1; k <= 40; k += 9) {
15013 for (uint32_t n = 1; n <= 16; n++) {
15014 for (uint32_t m = 1; m <= 3; m++) {
15015 GemmMicrokernelTester()
15016 .mr(3)
15017 .nr(16)
15018 .kr(1)
15019 .sr(1)
15020 .m(m)
15021 .n(n)
15022 .k(k)
15023 .cm_stride(19)
15024 .iterations(1)
15025 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x16__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
15026 }
15027 }
15028 }
15029 }
15030
TEST(QC8_GEMM_MINMAX_FP32_3X16__NEON_MLAL_LANE_PRFM,qmin)15031 TEST(QC8_GEMM_MINMAX_FP32_3X16__NEON_MLAL_LANE_PRFM, qmin) {
15032 TEST_REQUIRES_ARM_NEON;
15033 GemmMicrokernelTester()
15034 .mr(3)
15035 .nr(16)
15036 .kr(1)
15037 .sr(1)
15038 .m(3)
15039 .n(16)
15040 .k(8)
15041 .qmin(128)
15042 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x16__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
15043 }
15044
TEST(QC8_GEMM_MINMAX_FP32_3X16__NEON_MLAL_LANE_PRFM,qmax)15045 TEST(QC8_GEMM_MINMAX_FP32_3X16__NEON_MLAL_LANE_PRFM, qmax) {
15046 TEST_REQUIRES_ARM_NEON;
15047 GemmMicrokernelTester()
15048 .mr(3)
15049 .nr(16)
15050 .kr(1)
15051 .sr(1)
15052 .m(3)
15053 .n(16)
15054 .k(8)
15055 .qmax(128)
15056 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x16__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
15057 }
15058
TEST(QC8_GEMM_MINMAX_FP32_3X16__NEON_MLAL_LANE_PRFM,strided_cm)15059 TEST(QC8_GEMM_MINMAX_FP32_3X16__NEON_MLAL_LANE_PRFM, strided_cm) {
15060 TEST_REQUIRES_ARM_NEON;
15061 GemmMicrokernelTester()
15062 .mr(3)
15063 .nr(16)
15064 .kr(1)
15065 .sr(1)
15066 .m(3)
15067 .n(16)
15068 .k(8)
15069 .cm_stride(19)
15070 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x16__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
15071 }
15072 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
15073
15074
15075 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QC8_GEMM_MINMAX_FP32_4X8__NEON_MLAL_LANE_PRFM,k_eq_8)15076 TEST(QC8_GEMM_MINMAX_FP32_4X8__NEON_MLAL_LANE_PRFM, k_eq_8) {
15077 TEST_REQUIRES_ARM_NEON;
15078 GemmMicrokernelTester()
15079 .mr(4)
15080 .nr(8)
15081 .kr(1)
15082 .sr(1)
15083 .m(4)
15084 .n(8)
15085 .k(8)
15086 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
15087 }
15088
TEST(QC8_GEMM_MINMAX_FP32_4X8__NEON_MLAL_LANE_PRFM,strided_cn)15089 TEST(QC8_GEMM_MINMAX_FP32_4X8__NEON_MLAL_LANE_PRFM, strided_cn) {
15090 TEST_REQUIRES_ARM_NEON;
15091 GemmMicrokernelTester()
15092 .mr(4)
15093 .nr(8)
15094 .kr(1)
15095 .sr(1)
15096 .m(4)
15097 .n(8)
15098 .k(8)
15099 .cn_stride(11)
15100 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
15101 }
15102
TEST(QC8_GEMM_MINMAX_FP32_4X8__NEON_MLAL_LANE_PRFM,k_eq_8_strided_a)15103 TEST(QC8_GEMM_MINMAX_FP32_4X8__NEON_MLAL_LANE_PRFM, k_eq_8_strided_a) {
15104 TEST_REQUIRES_ARM_NEON;
15105 GemmMicrokernelTester()
15106 .mr(4)
15107 .nr(8)
15108 .kr(1)
15109 .sr(1)
15110 .m(4)
15111 .n(8)
15112 .k(8)
15113 .a_stride(11)
15114 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
15115 }
15116
TEST(QC8_GEMM_MINMAX_FP32_4X8__NEON_MLAL_LANE_PRFM,k_eq_8_subtile)15117 TEST(QC8_GEMM_MINMAX_FP32_4X8__NEON_MLAL_LANE_PRFM, k_eq_8_subtile) {
15118 TEST_REQUIRES_ARM_NEON;
15119 for (uint32_t n = 1; n <= 8; n++) {
15120 for (uint32_t m = 1; m <= 4; m++) {
15121 GemmMicrokernelTester()
15122 .mr(4)
15123 .nr(8)
15124 .kr(1)
15125 .sr(1)
15126 .m(m)
15127 .n(n)
15128 .k(8)
15129 .iterations(1)
15130 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
15131 }
15132 }
15133 }
15134
TEST(QC8_GEMM_MINMAX_FP32_4X8__NEON_MLAL_LANE_PRFM,k_eq_8_subtile_m)15135 TEST(QC8_GEMM_MINMAX_FP32_4X8__NEON_MLAL_LANE_PRFM, k_eq_8_subtile_m) {
15136 TEST_REQUIRES_ARM_NEON;
15137 for (uint32_t m = 1; m <= 4; m++) {
15138 GemmMicrokernelTester()
15139 .mr(4)
15140 .nr(8)
15141 .kr(1)
15142 .sr(1)
15143 .m(m)
15144 .n(8)
15145 .k(8)
15146 .iterations(1)
15147 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
15148 }
15149 }
15150
TEST(QC8_GEMM_MINMAX_FP32_4X8__NEON_MLAL_LANE_PRFM,k_eq_8_subtile_n)15151 TEST(QC8_GEMM_MINMAX_FP32_4X8__NEON_MLAL_LANE_PRFM, k_eq_8_subtile_n) {
15152 TEST_REQUIRES_ARM_NEON;
15153 for (uint32_t n = 1; n <= 8; n++) {
15154 GemmMicrokernelTester()
15155 .mr(4)
15156 .nr(8)
15157 .kr(1)
15158 .sr(1)
15159 .m(4)
15160 .n(n)
15161 .k(8)
15162 .iterations(1)
15163 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
15164 }
15165 }
15166
TEST(QC8_GEMM_MINMAX_FP32_4X8__NEON_MLAL_LANE_PRFM,k_lt_8)15167 TEST(QC8_GEMM_MINMAX_FP32_4X8__NEON_MLAL_LANE_PRFM, k_lt_8) {
15168 TEST_REQUIRES_ARM_NEON;
15169 for (size_t k = 1; k < 8; k++) {
15170 GemmMicrokernelTester()
15171 .mr(4)
15172 .nr(8)
15173 .kr(1)
15174 .sr(1)
15175 .m(4)
15176 .n(8)
15177 .k(k)
15178 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
15179 }
15180 }
15181
TEST(QC8_GEMM_MINMAX_FP32_4X8__NEON_MLAL_LANE_PRFM,k_lt_8_strided_a)15182 TEST(QC8_GEMM_MINMAX_FP32_4X8__NEON_MLAL_LANE_PRFM, k_lt_8_strided_a) {
15183 TEST_REQUIRES_ARM_NEON;
15184 for (size_t k = 1; k < 8; k++) {
15185 GemmMicrokernelTester()
15186 .mr(4)
15187 .nr(8)
15188 .kr(1)
15189 .sr(1)
15190 .m(4)
15191 .n(8)
15192 .k(k)
15193 .a_stride(11)
15194 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
15195 }
15196 }
15197
TEST(QC8_GEMM_MINMAX_FP32_4X8__NEON_MLAL_LANE_PRFM,k_lt_8_subtile)15198 TEST(QC8_GEMM_MINMAX_FP32_4X8__NEON_MLAL_LANE_PRFM, k_lt_8_subtile) {
15199 TEST_REQUIRES_ARM_NEON;
15200 for (size_t k = 1; k < 8; k++) {
15201 for (uint32_t n = 1; n <= 8; n++) {
15202 for (uint32_t m = 1; m <= 4; m++) {
15203 GemmMicrokernelTester()
15204 .mr(4)
15205 .nr(8)
15206 .kr(1)
15207 .sr(1)
15208 .m(m)
15209 .n(n)
15210 .k(k)
15211 .iterations(1)
15212 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
15213 }
15214 }
15215 }
15216 }
15217
TEST(QC8_GEMM_MINMAX_FP32_4X8__NEON_MLAL_LANE_PRFM,k_gt_8)15218 TEST(QC8_GEMM_MINMAX_FP32_4X8__NEON_MLAL_LANE_PRFM, k_gt_8) {
15219 TEST_REQUIRES_ARM_NEON;
15220 for (size_t k = 9; k < 16; k++) {
15221 GemmMicrokernelTester()
15222 .mr(4)
15223 .nr(8)
15224 .kr(1)
15225 .sr(1)
15226 .m(4)
15227 .n(8)
15228 .k(k)
15229 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
15230 }
15231 }
15232
TEST(QC8_GEMM_MINMAX_FP32_4X8__NEON_MLAL_LANE_PRFM,k_gt_8_strided_a)15233 TEST(QC8_GEMM_MINMAX_FP32_4X8__NEON_MLAL_LANE_PRFM, k_gt_8_strided_a) {
15234 TEST_REQUIRES_ARM_NEON;
15235 for (size_t k = 9; k < 16; k++) {
15236 GemmMicrokernelTester()
15237 .mr(4)
15238 .nr(8)
15239 .kr(1)
15240 .sr(1)
15241 .m(4)
15242 .n(8)
15243 .k(k)
15244 .a_stride(19)
15245 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
15246 }
15247 }
15248
TEST(QC8_GEMM_MINMAX_FP32_4X8__NEON_MLAL_LANE_PRFM,k_gt_8_subtile)15249 TEST(QC8_GEMM_MINMAX_FP32_4X8__NEON_MLAL_LANE_PRFM, k_gt_8_subtile) {
15250 TEST_REQUIRES_ARM_NEON;
15251 for (size_t k = 9; k < 16; k++) {
15252 for (uint32_t n = 1; n <= 8; n++) {
15253 for (uint32_t m = 1; m <= 4; m++) {
15254 GemmMicrokernelTester()
15255 .mr(4)
15256 .nr(8)
15257 .kr(1)
15258 .sr(1)
15259 .m(m)
15260 .n(n)
15261 .k(k)
15262 .iterations(1)
15263 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
15264 }
15265 }
15266 }
15267 }
15268
TEST(QC8_GEMM_MINMAX_FP32_4X8__NEON_MLAL_LANE_PRFM,k_div_8)15269 TEST(QC8_GEMM_MINMAX_FP32_4X8__NEON_MLAL_LANE_PRFM, k_div_8) {
15270 TEST_REQUIRES_ARM_NEON;
15271 for (size_t k = 16; k <= 80; k += 8) {
15272 GemmMicrokernelTester()
15273 .mr(4)
15274 .nr(8)
15275 .kr(1)
15276 .sr(1)
15277 .m(4)
15278 .n(8)
15279 .k(k)
15280 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
15281 }
15282 }
15283
TEST(QC8_GEMM_MINMAX_FP32_4X8__NEON_MLAL_LANE_PRFM,k_div_8_strided_a)15284 TEST(QC8_GEMM_MINMAX_FP32_4X8__NEON_MLAL_LANE_PRFM, k_div_8_strided_a) {
15285 TEST_REQUIRES_ARM_NEON;
15286 for (size_t k = 16; k <= 80; k += 8) {
15287 GemmMicrokernelTester()
15288 .mr(4)
15289 .nr(8)
15290 .kr(1)
15291 .sr(1)
15292 .m(4)
15293 .n(8)
15294 .k(k)
15295 .a_stride(83)
15296 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
15297 }
15298 }
15299
TEST(QC8_GEMM_MINMAX_FP32_4X8__NEON_MLAL_LANE_PRFM,k_div_8_subtile)15300 TEST(QC8_GEMM_MINMAX_FP32_4X8__NEON_MLAL_LANE_PRFM, k_div_8_subtile) {
15301 TEST_REQUIRES_ARM_NEON;
15302 for (size_t k = 16; k <= 80; k += 8) {
15303 for (uint32_t n = 1; n <= 8; n++) {
15304 for (uint32_t m = 1; m <= 4; m++) {
15305 GemmMicrokernelTester()
15306 .mr(4)
15307 .nr(8)
15308 .kr(1)
15309 .sr(1)
15310 .m(m)
15311 .n(n)
15312 .k(k)
15313 .iterations(1)
15314 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
15315 }
15316 }
15317 }
15318 }
15319
TEST(QC8_GEMM_MINMAX_FP32_4X8__NEON_MLAL_LANE_PRFM,n_gt_8)15320 TEST(QC8_GEMM_MINMAX_FP32_4X8__NEON_MLAL_LANE_PRFM, n_gt_8) {
15321 TEST_REQUIRES_ARM_NEON;
15322 for (uint32_t n = 9; n < 16; n++) {
15323 for (size_t k = 1; k <= 40; k += 9) {
15324 GemmMicrokernelTester()
15325 .mr(4)
15326 .nr(8)
15327 .kr(1)
15328 .sr(1)
15329 .m(4)
15330 .n(n)
15331 .k(k)
15332 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
15333 }
15334 }
15335 }
15336
TEST(QC8_GEMM_MINMAX_FP32_4X8__NEON_MLAL_LANE_PRFM,n_gt_8_strided_cn)15337 TEST(QC8_GEMM_MINMAX_FP32_4X8__NEON_MLAL_LANE_PRFM, n_gt_8_strided_cn) {
15338 TEST_REQUIRES_ARM_NEON;
15339 for (uint32_t n = 9; n < 16; n++) {
15340 for (size_t k = 1; k <= 40; k += 9) {
15341 GemmMicrokernelTester()
15342 .mr(4)
15343 .nr(8)
15344 .kr(1)
15345 .sr(1)
15346 .m(4)
15347 .n(n)
15348 .k(k)
15349 .cn_stride(11)
15350 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
15351 }
15352 }
15353 }
15354
TEST(QC8_GEMM_MINMAX_FP32_4X8__NEON_MLAL_LANE_PRFM,n_gt_8_strided_a)15355 TEST(QC8_GEMM_MINMAX_FP32_4X8__NEON_MLAL_LANE_PRFM, n_gt_8_strided_a) {
15356 TEST_REQUIRES_ARM_NEON;
15357 for (uint32_t n = 9; n < 16; n++) {
15358 for (size_t k = 1; k <= 40; k += 9) {
15359 GemmMicrokernelTester()
15360 .mr(4)
15361 .nr(8)
15362 .kr(1)
15363 .sr(1)
15364 .m(4)
15365 .n(n)
15366 .k(k)
15367 .a_stride(43)
15368 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
15369 }
15370 }
15371 }
15372
TEST(QC8_GEMM_MINMAX_FP32_4X8__NEON_MLAL_LANE_PRFM,n_gt_8_subtile)15373 TEST(QC8_GEMM_MINMAX_FP32_4X8__NEON_MLAL_LANE_PRFM, n_gt_8_subtile) {
15374 TEST_REQUIRES_ARM_NEON;
15375 for (uint32_t n = 9; n < 16; n++) {
15376 for (size_t k = 1; k <= 40; k += 9) {
15377 for (uint32_t m = 1; m <= 4; m++) {
15378 GemmMicrokernelTester()
15379 .mr(4)
15380 .nr(8)
15381 .kr(1)
15382 .sr(1)
15383 .m(m)
15384 .n(n)
15385 .k(k)
15386 .iterations(1)
15387 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
15388 }
15389 }
15390 }
15391 }
15392
TEST(QC8_GEMM_MINMAX_FP32_4X8__NEON_MLAL_LANE_PRFM,n_div_8)15393 TEST(QC8_GEMM_MINMAX_FP32_4X8__NEON_MLAL_LANE_PRFM, n_div_8) {
15394 TEST_REQUIRES_ARM_NEON;
15395 for (uint32_t n = 16; n <= 24; n += 8) {
15396 for (size_t k = 1; k <= 40; k += 9) {
15397 GemmMicrokernelTester()
15398 .mr(4)
15399 .nr(8)
15400 .kr(1)
15401 .sr(1)
15402 .m(4)
15403 .n(n)
15404 .k(k)
15405 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
15406 }
15407 }
15408 }
15409
TEST(QC8_GEMM_MINMAX_FP32_4X8__NEON_MLAL_LANE_PRFM,n_div_8_strided_cn)15410 TEST(QC8_GEMM_MINMAX_FP32_4X8__NEON_MLAL_LANE_PRFM, n_div_8_strided_cn) {
15411 TEST_REQUIRES_ARM_NEON;
15412 for (uint32_t n = 16; n <= 24; n += 8) {
15413 for (size_t k = 1; k <= 40; k += 9) {
15414 GemmMicrokernelTester()
15415 .mr(4)
15416 .nr(8)
15417 .kr(1)
15418 .sr(1)
15419 .m(4)
15420 .n(n)
15421 .k(k)
15422 .cn_stride(11)
15423 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
15424 }
15425 }
15426 }
15427
TEST(QC8_GEMM_MINMAX_FP32_4X8__NEON_MLAL_LANE_PRFM,n_div_8_strided_a)15428 TEST(QC8_GEMM_MINMAX_FP32_4X8__NEON_MLAL_LANE_PRFM, n_div_8_strided_a) {
15429 TEST_REQUIRES_ARM_NEON;
15430 for (uint32_t n = 16; n <= 24; n += 8) {
15431 for (size_t k = 1; k <= 40; k += 9) {
15432 GemmMicrokernelTester()
15433 .mr(4)
15434 .nr(8)
15435 .kr(1)
15436 .sr(1)
15437 .m(4)
15438 .n(n)
15439 .k(k)
15440 .a_stride(43)
15441 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
15442 }
15443 }
15444 }
15445
TEST(QC8_GEMM_MINMAX_FP32_4X8__NEON_MLAL_LANE_PRFM,n_div_8_subtile)15446 TEST(QC8_GEMM_MINMAX_FP32_4X8__NEON_MLAL_LANE_PRFM, n_div_8_subtile) {
15447 TEST_REQUIRES_ARM_NEON;
15448 for (uint32_t n = 16; n <= 24; n += 8) {
15449 for (size_t k = 1; k <= 40; k += 9) {
15450 for (uint32_t m = 1; m <= 4; m++) {
15451 GemmMicrokernelTester()
15452 .mr(4)
15453 .nr(8)
15454 .kr(1)
15455 .sr(1)
15456 .m(m)
15457 .n(n)
15458 .k(k)
15459 .iterations(1)
15460 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
15461 }
15462 }
15463 }
15464 }
15465
TEST(QC8_GEMM_MINMAX_FP32_4X8__NEON_MLAL_LANE_PRFM,strided_cm_subtile)15466 TEST(QC8_GEMM_MINMAX_FP32_4X8__NEON_MLAL_LANE_PRFM, strided_cm_subtile) {
15467 TEST_REQUIRES_ARM_NEON;
15468 for (size_t k = 1; k <= 40; k += 9) {
15469 for (uint32_t n = 1; n <= 8; n++) {
15470 for (uint32_t m = 1; m <= 4; m++) {
15471 GemmMicrokernelTester()
15472 .mr(4)
15473 .nr(8)
15474 .kr(1)
15475 .sr(1)
15476 .m(m)
15477 .n(n)
15478 .k(k)
15479 .cm_stride(11)
15480 .iterations(1)
15481 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
15482 }
15483 }
15484 }
15485 }
15486
TEST(QC8_GEMM_MINMAX_FP32_4X8__NEON_MLAL_LANE_PRFM,qmin)15487 TEST(QC8_GEMM_MINMAX_FP32_4X8__NEON_MLAL_LANE_PRFM, qmin) {
15488 TEST_REQUIRES_ARM_NEON;
15489 GemmMicrokernelTester()
15490 .mr(4)
15491 .nr(8)
15492 .kr(1)
15493 .sr(1)
15494 .m(4)
15495 .n(8)
15496 .k(8)
15497 .qmin(128)
15498 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
15499 }
15500
TEST(QC8_GEMM_MINMAX_FP32_4X8__NEON_MLAL_LANE_PRFM,qmax)15501 TEST(QC8_GEMM_MINMAX_FP32_4X8__NEON_MLAL_LANE_PRFM, qmax) {
15502 TEST_REQUIRES_ARM_NEON;
15503 GemmMicrokernelTester()
15504 .mr(4)
15505 .nr(8)
15506 .kr(1)
15507 .sr(1)
15508 .m(4)
15509 .n(8)
15510 .k(8)
15511 .qmax(128)
15512 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
15513 }
15514
TEST(QC8_GEMM_MINMAX_FP32_4X8__NEON_MLAL_LANE_PRFM,strided_cm)15515 TEST(QC8_GEMM_MINMAX_FP32_4X8__NEON_MLAL_LANE_PRFM, strided_cm) {
15516 TEST_REQUIRES_ARM_NEON;
15517 GemmMicrokernelTester()
15518 .mr(4)
15519 .nr(8)
15520 .kr(1)
15521 .sr(1)
15522 .m(4)
15523 .n(8)
15524 .k(8)
15525 .cm_stride(11)
15526 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
15527 }
15528 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
15529
15530
15531 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QC8_GEMM_MINMAX_FP32_4X8__NEONV8_MLAL_LANE,k_eq_8)15532 TEST(QC8_GEMM_MINMAX_FP32_4X8__NEONV8_MLAL_LANE, k_eq_8) {
15533 TEST_REQUIRES_ARM_NEON_V8;
15534 GemmMicrokernelTester()
15535 .mr(4)
15536 .nr(8)
15537 .kr(1)
15538 .sr(1)
15539 .m(4)
15540 .n(8)
15541 .k(8)
15542 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
15543 }
15544
TEST(QC8_GEMM_MINMAX_FP32_4X8__NEONV8_MLAL_LANE,strided_cn)15545 TEST(QC8_GEMM_MINMAX_FP32_4X8__NEONV8_MLAL_LANE, strided_cn) {
15546 TEST_REQUIRES_ARM_NEON_V8;
15547 GemmMicrokernelTester()
15548 .mr(4)
15549 .nr(8)
15550 .kr(1)
15551 .sr(1)
15552 .m(4)
15553 .n(8)
15554 .k(8)
15555 .cn_stride(11)
15556 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
15557 }
15558
TEST(QC8_GEMM_MINMAX_FP32_4X8__NEONV8_MLAL_LANE,k_eq_8_strided_a)15559 TEST(QC8_GEMM_MINMAX_FP32_4X8__NEONV8_MLAL_LANE, k_eq_8_strided_a) {
15560 TEST_REQUIRES_ARM_NEON_V8;
15561 GemmMicrokernelTester()
15562 .mr(4)
15563 .nr(8)
15564 .kr(1)
15565 .sr(1)
15566 .m(4)
15567 .n(8)
15568 .k(8)
15569 .a_stride(11)
15570 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
15571 }
15572
TEST(QC8_GEMM_MINMAX_FP32_4X8__NEONV8_MLAL_LANE,k_eq_8_subtile)15573 TEST(QC8_GEMM_MINMAX_FP32_4X8__NEONV8_MLAL_LANE, k_eq_8_subtile) {
15574 TEST_REQUIRES_ARM_NEON_V8;
15575 for (uint32_t n = 1; n <= 8; n++) {
15576 for (uint32_t m = 1; m <= 4; m++) {
15577 GemmMicrokernelTester()
15578 .mr(4)
15579 .nr(8)
15580 .kr(1)
15581 .sr(1)
15582 .m(m)
15583 .n(n)
15584 .k(8)
15585 .iterations(1)
15586 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
15587 }
15588 }
15589 }
15590
TEST(QC8_GEMM_MINMAX_FP32_4X8__NEONV8_MLAL_LANE,k_eq_8_subtile_m)15591 TEST(QC8_GEMM_MINMAX_FP32_4X8__NEONV8_MLAL_LANE, k_eq_8_subtile_m) {
15592 TEST_REQUIRES_ARM_NEON_V8;
15593 for (uint32_t m = 1; m <= 4; m++) {
15594 GemmMicrokernelTester()
15595 .mr(4)
15596 .nr(8)
15597 .kr(1)
15598 .sr(1)
15599 .m(m)
15600 .n(8)
15601 .k(8)
15602 .iterations(1)
15603 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
15604 }
15605 }
15606
TEST(QC8_GEMM_MINMAX_FP32_4X8__NEONV8_MLAL_LANE,k_eq_8_subtile_n)15607 TEST(QC8_GEMM_MINMAX_FP32_4X8__NEONV8_MLAL_LANE, k_eq_8_subtile_n) {
15608 TEST_REQUIRES_ARM_NEON_V8;
15609 for (uint32_t n = 1; n <= 8; n++) {
15610 GemmMicrokernelTester()
15611 .mr(4)
15612 .nr(8)
15613 .kr(1)
15614 .sr(1)
15615 .m(4)
15616 .n(n)
15617 .k(8)
15618 .iterations(1)
15619 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
15620 }
15621 }
15622
TEST(QC8_GEMM_MINMAX_FP32_4X8__NEONV8_MLAL_LANE,k_lt_8)15623 TEST(QC8_GEMM_MINMAX_FP32_4X8__NEONV8_MLAL_LANE, k_lt_8) {
15624 TEST_REQUIRES_ARM_NEON_V8;
15625 for (size_t k = 1; k < 8; k++) {
15626 GemmMicrokernelTester()
15627 .mr(4)
15628 .nr(8)
15629 .kr(1)
15630 .sr(1)
15631 .m(4)
15632 .n(8)
15633 .k(k)
15634 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
15635 }
15636 }
15637
TEST(QC8_GEMM_MINMAX_FP32_4X8__NEONV8_MLAL_LANE,k_lt_8_strided_a)15638 TEST(QC8_GEMM_MINMAX_FP32_4X8__NEONV8_MLAL_LANE, k_lt_8_strided_a) {
15639 TEST_REQUIRES_ARM_NEON_V8;
15640 for (size_t k = 1; k < 8; k++) {
15641 GemmMicrokernelTester()
15642 .mr(4)
15643 .nr(8)
15644 .kr(1)
15645 .sr(1)
15646 .m(4)
15647 .n(8)
15648 .k(k)
15649 .a_stride(11)
15650 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
15651 }
15652 }
15653
TEST(QC8_GEMM_MINMAX_FP32_4X8__NEONV8_MLAL_LANE,k_lt_8_subtile)15654 TEST(QC8_GEMM_MINMAX_FP32_4X8__NEONV8_MLAL_LANE, k_lt_8_subtile) {
15655 TEST_REQUIRES_ARM_NEON_V8;
15656 for (size_t k = 1; k < 8; k++) {
15657 for (uint32_t n = 1; n <= 8; n++) {
15658 for (uint32_t m = 1; m <= 4; m++) {
15659 GemmMicrokernelTester()
15660 .mr(4)
15661 .nr(8)
15662 .kr(1)
15663 .sr(1)
15664 .m(m)
15665 .n(n)
15666 .k(k)
15667 .iterations(1)
15668 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
15669 }
15670 }
15671 }
15672 }
15673
TEST(QC8_GEMM_MINMAX_FP32_4X8__NEONV8_MLAL_LANE,k_gt_8)15674 TEST(QC8_GEMM_MINMAX_FP32_4X8__NEONV8_MLAL_LANE, k_gt_8) {
15675 TEST_REQUIRES_ARM_NEON_V8;
15676 for (size_t k = 9; k < 16; k++) {
15677 GemmMicrokernelTester()
15678 .mr(4)
15679 .nr(8)
15680 .kr(1)
15681 .sr(1)
15682 .m(4)
15683 .n(8)
15684 .k(k)
15685 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
15686 }
15687 }
15688
TEST(QC8_GEMM_MINMAX_FP32_4X8__NEONV8_MLAL_LANE,k_gt_8_strided_a)15689 TEST(QC8_GEMM_MINMAX_FP32_4X8__NEONV8_MLAL_LANE, k_gt_8_strided_a) {
15690 TEST_REQUIRES_ARM_NEON_V8;
15691 for (size_t k = 9; k < 16; k++) {
15692 GemmMicrokernelTester()
15693 .mr(4)
15694 .nr(8)
15695 .kr(1)
15696 .sr(1)
15697 .m(4)
15698 .n(8)
15699 .k(k)
15700 .a_stride(19)
15701 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
15702 }
15703 }
15704
TEST(QC8_GEMM_MINMAX_FP32_4X8__NEONV8_MLAL_LANE,k_gt_8_subtile)15705 TEST(QC8_GEMM_MINMAX_FP32_4X8__NEONV8_MLAL_LANE, k_gt_8_subtile) {
15706 TEST_REQUIRES_ARM_NEON_V8;
15707 for (size_t k = 9; k < 16; k++) {
15708 for (uint32_t n = 1; n <= 8; n++) {
15709 for (uint32_t m = 1; m <= 4; m++) {
15710 GemmMicrokernelTester()
15711 .mr(4)
15712 .nr(8)
15713 .kr(1)
15714 .sr(1)
15715 .m(m)
15716 .n(n)
15717 .k(k)
15718 .iterations(1)
15719 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
15720 }
15721 }
15722 }
15723 }
15724
TEST(QC8_GEMM_MINMAX_FP32_4X8__NEONV8_MLAL_LANE,k_div_8)15725 TEST(QC8_GEMM_MINMAX_FP32_4X8__NEONV8_MLAL_LANE, k_div_8) {
15726 TEST_REQUIRES_ARM_NEON_V8;
15727 for (size_t k = 16; k <= 80; k += 8) {
15728 GemmMicrokernelTester()
15729 .mr(4)
15730 .nr(8)
15731 .kr(1)
15732 .sr(1)
15733 .m(4)
15734 .n(8)
15735 .k(k)
15736 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
15737 }
15738 }
15739
TEST(QC8_GEMM_MINMAX_FP32_4X8__NEONV8_MLAL_LANE,k_div_8_strided_a)15740 TEST(QC8_GEMM_MINMAX_FP32_4X8__NEONV8_MLAL_LANE, k_div_8_strided_a) {
15741 TEST_REQUIRES_ARM_NEON_V8;
15742 for (size_t k = 16; k <= 80; k += 8) {
15743 GemmMicrokernelTester()
15744 .mr(4)
15745 .nr(8)
15746 .kr(1)
15747 .sr(1)
15748 .m(4)
15749 .n(8)
15750 .k(k)
15751 .a_stride(83)
15752 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
15753 }
15754 }
15755
TEST(QC8_GEMM_MINMAX_FP32_4X8__NEONV8_MLAL_LANE,k_div_8_subtile)15756 TEST(QC8_GEMM_MINMAX_FP32_4X8__NEONV8_MLAL_LANE, k_div_8_subtile) {
15757 TEST_REQUIRES_ARM_NEON_V8;
15758 for (size_t k = 16; k <= 80; k += 8) {
15759 for (uint32_t n = 1; n <= 8; n++) {
15760 for (uint32_t m = 1; m <= 4; m++) {
15761 GemmMicrokernelTester()
15762 .mr(4)
15763 .nr(8)
15764 .kr(1)
15765 .sr(1)
15766 .m(m)
15767 .n(n)
15768 .k(k)
15769 .iterations(1)
15770 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
15771 }
15772 }
15773 }
15774 }
15775
TEST(QC8_GEMM_MINMAX_FP32_4X8__NEONV8_MLAL_LANE,n_gt_8)15776 TEST(QC8_GEMM_MINMAX_FP32_4X8__NEONV8_MLAL_LANE, n_gt_8) {
15777 TEST_REQUIRES_ARM_NEON_V8;
15778 for (uint32_t n = 9; n < 16; n++) {
15779 for (size_t k = 1; k <= 40; k += 9) {
15780 GemmMicrokernelTester()
15781 .mr(4)
15782 .nr(8)
15783 .kr(1)
15784 .sr(1)
15785 .m(4)
15786 .n(n)
15787 .k(k)
15788 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
15789 }
15790 }
15791 }
15792
TEST(QC8_GEMM_MINMAX_FP32_4X8__NEONV8_MLAL_LANE,n_gt_8_strided_cn)15793 TEST(QC8_GEMM_MINMAX_FP32_4X8__NEONV8_MLAL_LANE, n_gt_8_strided_cn) {
15794 TEST_REQUIRES_ARM_NEON_V8;
15795 for (uint32_t n = 9; n < 16; n++) {
15796 for (size_t k = 1; k <= 40; k += 9) {
15797 GemmMicrokernelTester()
15798 .mr(4)
15799 .nr(8)
15800 .kr(1)
15801 .sr(1)
15802 .m(4)
15803 .n(n)
15804 .k(k)
15805 .cn_stride(11)
15806 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
15807 }
15808 }
15809 }
15810
TEST(QC8_GEMM_MINMAX_FP32_4X8__NEONV8_MLAL_LANE,n_gt_8_strided_a)15811 TEST(QC8_GEMM_MINMAX_FP32_4X8__NEONV8_MLAL_LANE, n_gt_8_strided_a) {
15812 TEST_REQUIRES_ARM_NEON_V8;
15813 for (uint32_t n = 9; n < 16; n++) {
15814 for (size_t k = 1; k <= 40; k += 9) {
15815 GemmMicrokernelTester()
15816 .mr(4)
15817 .nr(8)
15818 .kr(1)
15819 .sr(1)
15820 .m(4)
15821 .n(n)
15822 .k(k)
15823 .a_stride(43)
15824 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
15825 }
15826 }
15827 }
15828
TEST(QC8_GEMM_MINMAX_FP32_4X8__NEONV8_MLAL_LANE,n_gt_8_subtile)15829 TEST(QC8_GEMM_MINMAX_FP32_4X8__NEONV8_MLAL_LANE, n_gt_8_subtile) {
15830 TEST_REQUIRES_ARM_NEON_V8;
15831 for (uint32_t n = 9; n < 16; n++) {
15832 for (size_t k = 1; k <= 40; k += 9) {
15833 for (uint32_t m = 1; m <= 4; m++) {
15834 GemmMicrokernelTester()
15835 .mr(4)
15836 .nr(8)
15837 .kr(1)
15838 .sr(1)
15839 .m(m)
15840 .n(n)
15841 .k(k)
15842 .iterations(1)
15843 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
15844 }
15845 }
15846 }
15847 }
15848
TEST(QC8_GEMM_MINMAX_FP32_4X8__NEONV8_MLAL_LANE,n_div_8)15849 TEST(QC8_GEMM_MINMAX_FP32_4X8__NEONV8_MLAL_LANE, n_div_8) {
15850 TEST_REQUIRES_ARM_NEON_V8;
15851 for (uint32_t n = 16; n <= 24; n += 8) {
15852 for (size_t k = 1; k <= 40; k += 9) {
15853 GemmMicrokernelTester()
15854 .mr(4)
15855 .nr(8)
15856 .kr(1)
15857 .sr(1)
15858 .m(4)
15859 .n(n)
15860 .k(k)
15861 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
15862 }
15863 }
15864 }
15865
TEST(QC8_GEMM_MINMAX_FP32_4X8__NEONV8_MLAL_LANE,n_div_8_strided_cn)15866 TEST(QC8_GEMM_MINMAX_FP32_4X8__NEONV8_MLAL_LANE, n_div_8_strided_cn) {
15867 TEST_REQUIRES_ARM_NEON_V8;
15868 for (uint32_t n = 16; n <= 24; n += 8) {
15869 for (size_t k = 1; k <= 40; k += 9) {
15870 GemmMicrokernelTester()
15871 .mr(4)
15872 .nr(8)
15873 .kr(1)
15874 .sr(1)
15875 .m(4)
15876 .n(n)
15877 .k(k)
15878 .cn_stride(11)
15879 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
15880 }
15881 }
15882 }
15883
TEST(QC8_GEMM_MINMAX_FP32_4X8__NEONV8_MLAL_LANE,n_div_8_strided_a)15884 TEST(QC8_GEMM_MINMAX_FP32_4X8__NEONV8_MLAL_LANE, n_div_8_strided_a) {
15885 TEST_REQUIRES_ARM_NEON_V8;
15886 for (uint32_t n = 16; n <= 24; n += 8) {
15887 for (size_t k = 1; k <= 40; k += 9) {
15888 GemmMicrokernelTester()
15889 .mr(4)
15890 .nr(8)
15891 .kr(1)
15892 .sr(1)
15893 .m(4)
15894 .n(n)
15895 .k(k)
15896 .a_stride(43)
15897 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
15898 }
15899 }
15900 }
15901
TEST(QC8_GEMM_MINMAX_FP32_4X8__NEONV8_MLAL_LANE,n_div_8_subtile)15902 TEST(QC8_GEMM_MINMAX_FP32_4X8__NEONV8_MLAL_LANE, n_div_8_subtile) {
15903 TEST_REQUIRES_ARM_NEON_V8;
15904 for (uint32_t n = 16; n <= 24; n += 8) {
15905 for (size_t k = 1; k <= 40; k += 9) {
15906 for (uint32_t m = 1; m <= 4; m++) {
15907 GemmMicrokernelTester()
15908 .mr(4)
15909 .nr(8)
15910 .kr(1)
15911 .sr(1)
15912 .m(m)
15913 .n(n)
15914 .k(k)
15915 .iterations(1)
15916 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
15917 }
15918 }
15919 }
15920 }
15921
TEST(QC8_GEMM_MINMAX_FP32_4X8__NEONV8_MLAL_LANE,strided_cm_subtile)15922 TEST(QC8_GEMM_MINMAX_FP32_4X8__NEONV8_MLAL_LANE, strided_cm_subtile) {
15923 TEST_REQUIRES_ARM_NEON_V8;
15924 for (size_t k = 1; k <= 40; k += 9) {
15925 for (uint32_t n = 1; n <= 8; n++) {
15926 for (uint32_t m = 1; m <= 4; m++) {
15927 GemmMicrokernelTester()
15928 .mr(4)
15929 .nr(8)
15930 .kr(1)
15931 .sr(1)
15932 .m(m)
15933 .n(n)
15934 .k(k)
15935 .cm_stride(11)
15936 .iterations(1)
15937 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
15938 }
15939 }
15940 }
15941 }
15942
TEST(QC8_GEMM_MINMAX_FP32_4X8__NEONV8_MLAL_LANE,qmin)15943 TEST(QC8_GEMM_MINMAX_FP32_4X8__NEONV8_MLAL_LANE, qmin) {
15944 TEST_REQUIRES_ARM_NEON_V8;
15945 GemmMicrokernelTester()
15946 .mr(4)
15947 .nr(8)
15948 .kr(1)
15949 .sr(1)
15950 .m(4)
15951 .n(8)
15952 .k(8)
15953 .qmin(128)
15954 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
15955 }
15956
TEST(QC8_GEMM_MINMAX_FP32_4X8__NEONV8_MLAL_LANE,qmax)15957 TEST(QC8_GEMM_MINMAX_FP32_4X8__NEONV8_MLAL_LANE, qmax) {
15958 TEST_REQUIRES_ARM_NEON_V8;
15959 GemmMicrokernelTester()
15960 .mr(4)
15961 .nr(8)
15962 .kr(1)
15963 .sr(1)
15964 .m(4)
15965 .n(8)
15966 .k(8)
15967 .qmax(128)
15968 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
15969 }
15970
TEST(QC8_GEMM_MINMAX_FP32_4X8__NEONV8_MLAL_LANE,strided_cm)15971 TEST(QC8_GEMM_MINMAX_FP32_4X8__NEONV8_MLAL_LANE, strided_cm) {
15972 TEST_REQUIRES_ARM_NEON_V8;
15973 GemmMicrokernelTester()
15974 .mr(4)
15975 .nr(8)
15976 .kr(1)
15977 .sr(1)
15978 .m(4)
15979 .n(8)
15980 .k(8)
15981 .cm_stride(11)
15982 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x8__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
15983 }
15984 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
15985
15986
15987 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QC8_GEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE,k_eq_8)15988 TEST(QC8_GEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE, k_eq_8) {
15989 TEST_REQUIRES_ARM_NEON;
15990 GemmMicrokernelTester()
15991 .mr(4)
15992 .nr(16)
15993 .kr(1)
15994 .sr(1)
15995 .m(4)
15996 .n(16)
15997 .k(8)
15998 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x16__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
15999 }
16000
TEST(QC8_GEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE,strided_cn)16001 TEST(QC8_GEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE, strided_cn) {
16002 TEST_REQUIRES_ARM_NEON;
16003 GemmMicrokernelTester()
16004 .mr(4)
16005 .nr(16)
16006 .kr(1)
16007 .sr(1)
16008 .m(4)
16009 .n(16)
16010 .k(8)
16011 .cn_stride(19)
16012 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x16__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
16013 }
16014
TEST(QC8_GEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE,k_eq_8_strided_a)16015 TEST(QC8_GEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE, k_eq_8_strided_a) {
16016 TEST_REQUIRES_ARM_NEON;
16017 GemmMicrokernelTester()
16018 .mr(4)
16019 .nr(16)
16020 .kr(1)
16021 .sr(1)
16022 .m(4)
16023 .n(16)
16024 .k(8)
16025 .a_stride(11)
16026 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x16__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
16027 }
16028
TEST(QC8_GEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE,k_eq_8_subtile)16029 TEST(QC8_GEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE, k_eq_8_subtile) {
16030 TEST_REQUIRES_ARM_NEON;
16031 for (uint32_t n = 1; n <= 16; n++) {
16032 for (uint32_t m = 1; m <= 4; m++) {
16033 GemmMicrokernelTester()
16034 .mr(4)
16035 .nr(16)
16036 .kr(1)
16037 .sr(1)
16038 .m(m)
16039 .n(n)
16040 .k(8)
16041 .iterations(1)
16042 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x16__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
16043 }
16044 }
16045 }
16046
TEST(QC8_GEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE,k_eq_8_subtile_m)16047 TEST(QC8_GEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE, k_eq_8_subtile_m) {
16048 TEST_REQUIRES_ARM_NEON;
16049 for (uint32_t m = 1; m <= 4; m++) {
16050 GemmMicrokernelTester()
16051 .mr(4)
16052 .nr(16)
16053 .kr(1)
16054 .sr(1)
16055 .m(m)
16056 .n(16)
16057 .k(8)
16058 .iterations(1)
16059 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x16__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
16060 }
16061 }
16062
TEST(QC8_GEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE,k_eq_8_subtile_n)16063 TEST(QC8_GEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE, k_eq_8_subtile_n) {
16064 TEST_REQUIRES_ARM_NEON;
16065 for (uint32_t n = 1; n <= 16; n++) {
16066 GemmMicrokernelTester()
16067 .mr(4)
16068 .nr(16)
16069 .kr(1)
16070 .sr(1)
16071 .m(4)
16072 .n(n)
16073 .k(8)
16074 .iterations(1)
16075 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x16__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
16076 }
16077 }
16078
TEST(QC8_GEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE,k_lt_8)16079 TEST(QC8_GEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE, k_lt_8) {
16080 TEST_REQUIRES_ARM_NEON;
16081 for (size_t k = 1; k < 8; k++) {
16082 GemmMicrokernelTester()
16083 .mr(4)
16084 .nr(16)
16085 .kr(1)
16086 .sr(1)
16087 .m(4)
16088 .n(16)
16089 .k(k)
16090 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x16__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
16091 }
16092 }
16093
TEST(QC8_GEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE,k_lt_8_strided_a)16094 TEST(QC8_GEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE, k_lt_8_strided_a) {
16095 TEST_REQUIRES_ARM_NEON;
16096 for (size_t k = 1; k < 8; k++) {
16097 GemmMicrokernelTester()
16098 .mr(4)
16099 .nr(16)
16100 .kr(1)
16101 .sr(1)
16102 .m(4)
16103 .n(16)
16104 .k(k)
16105 .a_stride(11)
16106 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x16__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
16107 }
16108 }
16109
TEST(QC8_GEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE,k_lt_8_subtile)16110 TEST(QC8_GEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE, k_lt_8_subtile) {
16111 TEST_REQUIRES_ARM_NEON;
16112 for (size_t k = 1; k < 8; k++) {
16113 for (uint32_t n = 1; n <= 16; n++) {
16114 for (uint32_t m = 1; m <= 4; m++) {
16115 GemmMicrokernelTester()
16116 .mr(4)
16117 .nr(16)
16118 .kr(1)
16119 .sr(1)
16120 .m(m)
16121 .n(n)
16122 .k(k)
16123 .iterations(1)
16124 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x16__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
16125 }
16126 }
16127 }
16128 }
16129
TEST(QC8_GEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE,k_gt_8)16130 TEST(QC8_GEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE, k_gt_8) {
16131 TEST_REQUIRES_ARM_NEON;
16132 for (size_t k = 9; k < 16; k++) {
16133 GemmMicrokernelTester()
16134 .mr(4)
16135 .nr(16)
16136 .kr(1)
16137 .sr(1)
16138 .m(4)
16139 .n(16)
16140 .k(k)
16141 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x16__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
16142 }
16143 }
16144
TEST(QC8_GEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE,k_gt_8_strided_a)16145 TEST(QC8_GEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE, k_gt_8_strided_a) {
16146 TEST_REQUIRES_ARM_NEON;
16147 for (size_t k = 9; k < 16; k++) {
16148 GemmMicrokernelTester()
16149 .mr(4)
16150 .nr(16)
16151 .kr(1)
16152 .sr(1)
16153 .m(4)
16154 .n(16)
16155 .k(k)
16156 .a_stride(19)
16157 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x16__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
16158 }
16159 }
16160
TEST(QC8_GEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE,k_gt_8_subtile)16161 TEST(QC8_GEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE, k_gt_8_subtile) {
16162 TEST_REQUIRES_ARM_NEON;
16163 for (size_t k = 9; k < 16; k++) {
16164 for (uint32_t n = 1; n <= 16; n++) {
16165 for (uint32_t m = 1; m <= 4; m++) {
16166 GemmMicrokernelTester()
16167 .mr(4)
16168 .nr(16)
16169 .kr(1)
16170 .sr(1)
16171 .m(m)
16172 .n(n)
16173 .k(k)
16174 .iterations(1)
16175 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x16__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
16176 }
16177 }
16178 }
16179 }
16180
TEST(QC8_GEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE,k_div_8)16181 TEST(QC8_GEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE, k_div_8) {
16182 TEST_REQUIRES_ARM_NEON;
16183 for (size_t k = 16; k <= 80; k += 8) {
16184 GemmMicrokernelTester()
16185 .mr(4)
16186 .nr(16)
16187 .kr(1)
16188 .sr(1)
16189 .m(4)
16190 .n(16)
16191 .k(k)
16192 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x16__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
16193 }
16194 }
16195
TEST(QC8_GEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE,k_div_8_strided_a)16196 TEST(QC8_GEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE, k_div_8_strided_a) {
16197 TEST_REQUIRES_ARM_NEON;
16198 for (size_t k = 16; k <= 80; k += 8) {
16199 GemmMicrokernelTester()
16200 .mr(4)
16201 .nr(16)
16202 .kr(1)
16203 .sr(1)
16204 .m(4)
16205 .n(16)
16206 .k(k)
16207 .a_stride(83)
16208 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x16__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
16209 }
16210 }
16211
TEST(QC8_GEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE,k_div_8_subtile)16212 TEST(QC8_GEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE, k_div_8_subtile) {
16213 TEST_REQUIRES_ARM_NEON;
16214 for (size_t k = 16; k <= 80; k += 8) {
16215 for (uint32_t n = 1; n <= 16; n++) {
16216 for (uint32_t m = 1; m <= 4; m++) {
16217 GemmMicrokernelTester()
16218 .mr(4)
16219 .nr(16)
16220 .kr(1)
16221 .sr(1)
16222 .m(m)
16223 .n(n)
16224 .k(k)
16225 .iterations(1)
16226 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x16__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
16227 }
16228 }
16229 }
16230 }
16231
TEST(QC8_GEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE,n_gt_16)16232 TEST(QC8_GEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE, n_gt_16) {
16233 TEST_REQUIRES_ARM_NEON;
16234 for (uint32_t n = 17; n < 32; n++) {
16235 for (size_t k = 1; k <= 40; k += 9) {
16236 GemmMicrokernelTester()
16237 .mr(4)
16238 .nr(16)
16239 .kr(1)
16240 .sr(1)
16241 .m(4)
16242 .n(n)
16243 .k(k)
16244 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x16__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
16245 }
16246 }
16247 }
16248
TEST(QC8_GEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE,n_gt_16_strided_cn)16249 TEST(QC8_GEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE, n_gt_16_strided_cn) {
16250 TEST_REQUIRES_ARM_NEON;
16251 for (uint32_t n = 17; n < 32; n++) {
16252 for (size_t k = 1; k <= 40; k += 9) {
16253 GemmMicrokernelTester()
16254 .mr(4)
16255 .nr(16)
16256 .kr(1)
16257 .sr(1)
16258 .m(4)
16259 .n(n)
16260 .k(k)
16261 .cn_stride(19)
16262 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x16__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
16263 }
16264 }
16265 }
16266
TEST(QC8_GEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE,n_gt_16_strided_a)16267 TEST(QC8_GEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE, n_gt_16_strided_a) {
16268 TEST_REQUIRES_ARM_NEON;
16269 for (uint32_t n = 17; n < 32; n++) {
16270 for (size_t k = 1; k <= 40; k += 9) {
16271 GemmMicrokernelTester()
16272 .mr(4)
16273 .nr(16)
16274 .kr(1)
16275 .sr(1)
16276 .m(4)
16277 .n(n)
16278 .k(k)
16279 .a_stride(43)
16280 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x16__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
16281 }
16282 }
16283 }
16284
TEST(QC8_GEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE,n_gt_16_subtile)16285 TEST(QC8_GEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE, n_gt_16_subtile) {
16286 TEST_REQUIRES_ARM_NEON;
16287 for (uint32_t n = 17; n < 32; n++) {
16288 for (size_t k = 1; k <= 40; k += 9) {
16289 for (uint32_t m = 1; m <= 4; m++) {
16290 GemmMicrokernelTester()
16291 .mr(4)
16292 .nr(16)
16293 .kr(1)
16294 .sr(1)
16295 .m(m)
16296 .n(n)
16297 .k(k)
16298 .iterations(1)
16299 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x16__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
16300 }
16301 }
16302 }
16303 }
16304
TEST(QC8_GEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE,n_div_16)16305 TEST(QC8_GEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE, n_div_16) {
16306 TEST_REQUIRES_ARM_NEON;
16307 for (uint32_t n = 32; n <= 48; n += 16) {
16308 for (size_t k = 1; k <= 40; k += 9) {
16309 GemmMicrokernelTester()
16310 .mr(4)
16311 .nr(16)
16312 .kr(1)
16313 .sr(1)
16314 .m(4)
16315 .n(n)
16316 .k(k)
16317 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x16__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
16318 }
16319 }
16320 }
16321
TEST(QC8_GEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE,n_div_16_strided_cn)16322 TEST(QC8_GEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE, n_div_16_strided_cn) {
16323 TEST_REQUIRES_ARM_NEON;
16324 for (uint32_t n = 32; n <= 48; n += 16) {
16325 for (size_t k = 1; k <= 40; k += 9) {
16326 GemmMicrokernelTester()
16327 .mr(4)
16328 .nr(16)
16329 .kr(1)
16330 .sr(1)
16331 .m(4)
16332 .n(n)
16333 .k(k)
16334 .cn_stride(19)
16335 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x16__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
16336 }
16337 }
16338 }
16339
TEST(QC8_GEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE,n_div_16_strided_a)16340 TEST(QC8_GEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE, n_div_16_strided_a) {
16341 TEST_REQUIRES_ARM_NEON;
16342 for (uint32_t n = 32; n <= 48; n += 16) {
16343 for (size_t k = 1; k <= 40; k += 9) {
16344 GemmMicrokernelTester()
16345 .mr(4)
16346 .nr(16)
16347 .kr(1)
16348 .sr(1)
16349 .m(4)
16350 .n(n)
16351 .k(k)
16352 .a_stride(43)
16353 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x16__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
16354 }
16355 }
16356 }
16357
TEST(QC8_GEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE,n_div_16_subtile)16358 TEST(QC8_GEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE, n_div_16_subtile) {
16359 TEST_REQUIRES_ARM_NEON;
16360 for (uint32_t n = 32; n <= 48; n += 16) {
16361 for (size_t k = 1; k <= 40; k += 9) {
16362 for (uint32_t m = 1; m <= 4; m++) {
16363 GemmMicrokernelTester()
16364 .mr(4)
16365 .nr(16)
16366 .kr(1)
16367 .sr(1)
16368 .m(m)
16369 .n(n)
16370 .k(k)
16371 .iterations(1)
16372 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x16__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
16373 }
16374 }
16375 }
16376 }
16377
TEST(QC8_GEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE,strided_cm_subtile)16378 TEST(QC8_GEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE, strided_cm_subtile) {
16379 TEST_REQUIRES_ARM_NEON;
16380 for (size_t k = 1; k <= 40; k += 9) {
16381 for (uint32_t n = 1; n <= 16; n++) {
16382 for (uint32_t m = 1; m <= 4; m++) {
16383 GemmMicrokernelTester()
16384 .mr(4)
16385 .nr(16)
16386 .kr(1)
16387 .sr(1)
16388 .m(m)
16389 .n(n)
16390 .k(k)
16391 .cm_stride(19)
16392 .iterations(1)
16393 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x16__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
16394 }
16395 }
16396 }
16397 }
16398
TEST(QC8_GEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE,qmin)16399 TEST(QC8_GEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE, qmin) {
16400 TEST_REQUIRES_ARM_NEON;
16401 GemmMicrokernelTester()
16402 .mr(4)
16403 .nr(16)
16404 .kr(1)
16405 .sr(1)
16406 .m(4)
16407 .n(16)
16408 .k(8)
16409 .qmin(128)
16410 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x16__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
16411 }
16412
TEST(QC8_GEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE,qmax)16413 TEST(QC8_GEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE, qmax) {
16414 TEST_REQUIRES_ARM_NEON;
16415 GemmMicrokernelTester()
16416 .mr(4)
16417 .nr(16)
16418 .kr(1)
16419 .sr(1)
16420 .m(4)
16421 .n(16)
16422 .k(8)
16423 .qmax(128)
16424 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x16__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
16425 }
16426
TEST(QC8_GEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE,strided_cm)16427 TEST(QC8_GEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE, strided_cm) {
16428 TEST_REQUIRES_ARM_NEON;
16429 GemmMicrokernelTester()
16430 .mr(4)
16431 .nr(16)
16432 .kr(1)
16433 .sr(1)
16434 .m(4)
16435 .n(16)
16436 .k(8)
16437 .cm_stride(19)
16438 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x16__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
16439 }
16440 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
16441
16442
16443 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QC8_GEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE_PRFM,k_eq_8)16444 TEST(QC8_GEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE_PRFM, k_eq_8) {
16445 TEST_REQUIRES_ARM_NEON;
16446 GemmMicrokernelTester()
16447 .mr(4)
16448 .nr(16)
16449 .kr(1)
16450 .sr(1)
16451 .m(4)
16452 .n(16)
16453 .k(8)
16454 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x16__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
16455 }
16456
TEST(QC8_GEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE_PRFM,strided_cn)16457 TEST(QC8_GEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE_PRFM, strided_cn) {
16458 TEST_REQUIRES_ARM_NEON;
16459 GemmMicrokernelTester()
16460 .mr(4)
16461 .nr(16)
16462 .kr(1)
16463 .sr(1)
16464 .m(4)
16465 .n(16)
16466 .k(8)
16467 .cn_stride(19)
16468 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x16__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
16469 }
16470
TEST(QC8_GEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE_PRFM,k_eq_8_strided_a)16471 TEST(QC8_GEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE_PRFM, k_eq_8_strided_a) {
16472 TEST_REQUIRES_ARM_NEON;
16473 GemmMicrokernelTester()
16474 .mr(4)
16475 .nr(16)
16476 .kr(1)
16477 .sr(1)
16478 .m(4)
16479 .n(16)
16480 .k(8)
16481 .a_stride(11)
16482 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x16__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
16483 }
16484
TEST(QC8_GEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE_PRFM,k_eq_8_subtile)16485 TEST(QC8_GEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE_PRFM, k_eq_8_subtile) {
16486 TEST_REQUIRES_ARM_NEON;
16487 for (uint32_t n = 1; n <= 16; n++) {
16488 for (uint32_t m = 1; m <= 4; m++) {
16489 GemmMicrokernelTester()
16490 .mr(4)
16491 .nr(16)
16492 .kr(1)
16493 .sr(1)
16494 .m(m)
16495 .n(n)
16496 .k(8)
16497 .iterations(1)
16498 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x16__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
16499 }
16500 }
16501 }
16502
TEST(QC8_GEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE_PRFM,k_eq_8_subtile_m)16503 TEST(QC8_GEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE_PRFM, k_eq_8_subtile_m) {
16504 TEST_REQUIRES_ARM_NEON;
16505 for (uint32_t m = 1; m <= 4; m++) {
16506 GemmMicrokernelTester()
16507 .mr(4)
16508 .nr(16)
16509 .kr(1)
16510 .sr(1)
16511 .m(m)
16512 .n(16)
16513 .k(8)
16514 .iterations(1)
16515 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x16__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
16516 }
16517 }
16518
TEST(QC8_GEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE_PRFM,k_eq_8_subtile_n)16519 TEST(QC8_GEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE_PRFM, k_eq_8_subtile_n) {
16520 TEST_REQUIRES_ARM_NEON;
16521 for (uint32_t n = 1; n <= 16; n++) {
16522 GemmMicrokernelTester()
16523 .mr(4)
16524 .nr(16)
16525 .kr(1)
16526 .sr(1)
16527 .m(4)
16528 .n(n)
16529 .k(8)
16530 .iterations(1)
16531 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x16__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
16532 }
16533 }
16534
TEST(QC8_GEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE_PRFM,k_lt_8)16535 TEST(QC8_GEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE_PRFM, k_lt_8) {
16536 TEST_REQUIRES_ARM_NEON;
16537 for (size_t k = 1; k < 8; k++) {
16538 GemmMicrokernelTester()
16539 .mr(4)
16540 .nr(16)
16541 .kr(1)
16542 .sr(1)
16543 .m(4)
16544 .n(16)
16545 .k(k)
16546 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x16__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
16547 }
16548 }
16549
TEST(QC8_GEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE_PRFM,k_lt_8_strided_a)16550 TEST(QC8_GEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE_PRFM, k_lt_8_strided_a) {
16551 TEST_REQUIRES_ARM_NEON;
16552 for (size_t k = 1; k < 8; k++) {
16553 GemmMicrokernelTester()
16554 .mr(4)
16555 .nr(16)
16556 .kr(1)
16557 .sr(1)
16558 .m(4)
16559 .n(16)
16560 .k(k)
16561 .a_stride(11)
16562 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x16__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
16563 }
16564 }
16565
TEST(QC8_GEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE_PRFM,k_lt_8_subtile)16566 TEST(QC8_GEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE_PRFM, k_lt_8_subtile) {
16567 TEST_REQUIRES_ARM_NEON;
16568 for (size_t k = 1; k < 8; k++) {
16569 for (uint32_t n = 1; n <= 16; n++) {
16570 for (uint32_t m = 1; m <= 4; m++) {
16571 GemmMicrokernelTester()
16572 .mr(4)
16573 .nr(16)
16574 .kr(1)
16575 .sr(1)
16576 .m(m)
16577 .n(n)
16578 .k(k)
16579 .iterations(1)
16580 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x16__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
16581 }
16582 }
16583 }
16584 }
16585
TEST(QC8_GEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE_PRFM,k_gt_8)16586 TEST(QC8_GEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE_PRFM, k_gt_8) {
16587 TEST_REQUIRES_ARM_NEON;
16588 for (size_t k = 9; k < 16; k++) {
16589 GemmMicrokernelTester()
16590 .mr(4)
16591 .nr(16)
16592 .kr(1)
16593 .sr(1)
16594 .m(4)
16595 .n(16)
16596 .k(k)
16597 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x16__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
16598 }
16599 }
16600
TEST(QC8_GEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE_PRFM,k_gt_8_strided_a)16601 TEST(QC8_GEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE_PRFM, k_gt_8_strided_a) {
16602 TEST_REQUIRES_ARM_NEON;
16603 for (size_t k = 9; k < 16; k++) {
16604 GemmMicrokernelTester()
16605 .mr(4)
16606 .nr(16)
16607 .kr(1)
16608 .sr(1)
16609 .m(4)
16610 .n(16)
16611 .k(k)
16612 .a_stride(19)
16613 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x16__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
16614 }
16615 }
16616
TEST(QC8_GEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE_PRFM,k_gt_8_subtile)16617 TEST(QC8_GEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE_PRFM, k_gt_8_subtile) {
16618 TEST_REQUIRES_ARM_NEON;
16619 for (size_t k = 9; k < 16; k++) {
16620 for (uint32_t n = 1; n <= 16; n++) {
16621 for (uint32_t m = 1; m <= 4; m++) {
16622 GemmMicrokernelTester()
16623 .mr(4)
16624 .nr(16)
16625 .kr(1)
16626 .sr(1)
16627 .m(m)
16628 .n(n)
16629 .k(k)
16630 .iterations(1)
16631 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x16__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
16632 }
16633 }
16634 }
16635 }
16636
TEST(QC8_GEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE_PRFM,k_div_8)16637 TEST(QC8_GEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE_PRFM, k_div_8) {
16638 TEST_REQUIRES_ARM_NEON;
16639 for (size_t k = 16; k <= 80; k += 8) {
16640 GemmMicrokernelTester()
16641 .mr(4)
16642 .nr(16)
16643 .kr(1)
16644 .sr(1)
16645 .m(4)
16646 .n(16)
16647 .k(k)
16648 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x16__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
16649 }
16650 }
16651
TEST(QC8_GEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE_PRFM,k_div_8_strided_a)16652 TEST(QC8_GEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE_PRFM, k_div_8_strided_a) {
16653 TEST_REQUIRES_ARM_NEON;
16654 for (size_t k = 16; k <= 80; k += 8) {
16655 GemmMicrokernelTester()
16656 .mr(4)
16657 .nr(16)
16658 .kr(1)
16659 .sr(1)
16660 .m(4)
16661 .n(16)
16662 .k(k)
16663 .a_stride(83)
16664 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x16__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
16665 }
16666 }
16667
TEST(QC8_GEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE_PRFM,k_div_8_subtile)16668 TEST(QC8_GEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE_PRFM, k_div_8_subtile) {
16669 TEST_REQUIRES_ARM_NEON;
16670 for (size_t k = 16; k <= 80; k += 8) {
16671 for (uint32_t n = 1; n <= 16; n++) {
16672 for (uint32_t m = 1; m <= 4; m++) {
16673 GemmMicrokernelTester()
16674 .mr(4)
16675 .nr(16)
16676 .kr(1)
16677 .sr(1)
16678 .m(m)
16679 .n(n)
16680 .k(k)
16681 .iterations(1)
16682 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x16__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
16683 }
16684 }
16685 }
16686 }
16687
TEST(QC8_GEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE_PRFM,n_gt_16)16688 TEST(QC8_GEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE_PRFM, n_gt_16) {
16689 TEST_REQUIRES_ARM_NEON;
16690 for (uint32_t n = 17; n < 32; n++) {
16691 for (size_t k = 1; k <= 40; k += 9) {
16692 GemmMicrokernelTester()
16693 .mr(4)
16694 .nr(16)
16695 .kr(1)
16696 .sr(1)
16697 .m(4)
16698 .n(n)
16699 .k(k)
16700 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x16__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
16701 }
16702 }
16703 }
16704
TEST(QC8_GEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE_PRFM,n_gt_16_strided_cn)16705 TEST(QC8_GEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE_PRFM, n_gt_16_strided_cn) {
16706 TEST_REQUIRES_ARM_NEON;
16707 for (uint32_t n = 17; n < 32; n++) {
16708 for (size_t k = 1; k <= 40; k += 9) {
16709 GemmMicrokernelTester()
16710 .mr(4)
16711 .nr(16)
16712 .kr(1)
16713 .sr(1)
16714 .m(4)
16715 .n(n)
16716 .k(k)
16717 .cn_stride(19)
16718 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x16__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
16719 }
16720 }
16721 }
16722
TEST(QC8_GEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE_PRFM,n_gt_16_strided_a)16723 TEST(QC8_GEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE_PRFM, n_gt_16_strided_a) {
16724 TEST_REQUIRES_ARM_NEON;
16725 for (uint32_t n = 17; n < 32; n++) {
16726 for (size_t k = 1; k <= 40; k += 9) {
16727 GemmMicrokernelTester()
16728 .mr(4)
16729 .nr(16)
16730 .kr(1)
16731 .sr(1)
16732 .m(4)
16733 .n(n)
16734 .k(k)
16735 .a_stride(43)
16736 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x16__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
16737 }
16738 }
16739 }
16740
TEST(QC8_GEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE_PRFM,n_gt_16_subtile)16741 TEST(QC8_GEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE_PRFM, n_gt_16_subtile) {
16742 TEST_REQUIRES_ARM_NEON;
16743 for (uint32_t n = 17; n < 32; n++) {
16744 for (size_t k = 1; k <= 40; k += 9) {
16745 for (uint32_t m = 1; m <= 4; m++) {
16746 GemmMicrokernelTester()
16747 .mr(4)
16748 .nr(16)
16749 .kr(1)
16750 .sr(1)
16751 .m(m)
16752 .n(n)
16753 .k(k)
16754 .iterations(1)
16755 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x16__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
16756 }
16757 }
16758 }
16759 }
16760
TEST(QC8_GEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE_PRFM,n_div_16)16761 TEST(QC8_GEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE_PRFM, n_div_16) {
16762 TEST_REQUIRES_ARM_NEON;
16763 for (uint32_t n = 32; n <= 48; n += 16) {
16764 for (size_t k = 1; k <= 40; k += 9) {
16765 GemmMicrokernelTester()
16766 .mr(4)
16767 .nr(16)
16768 .kr(1)
16769 .sr(1)
16770 .m(4)
16771 .n(n)
16772 .k(k)
16773 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x16__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
16774 }
16775 }
16776 }
16777
TEST(QC8_GEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE_PRFM,n_div_16_strided_cn)16778 TEST(QC8_GEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE_PRFM, n_div_16_strided_cn) {
16779 TEST_REQUIRES_ARM_NEON;
16780 for (uint32_t n = 32; n <= 48; n += 16) {
16781 for (size_t k = 1; k <= 40; k += 9) {
16782 GemmMicrokernelTester()
16783 .mr(4)
16784 .nr(16)
16785 .kr(1)
16786 .sr(1)
16787 .m(4)
16788 .n(n)
16789 .k(k)
16790 .cn_stride(19)
16791 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x16__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
16792 }
16793 }
16794 }
16795
TEST(QC8_GEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE_PRFM,n_div_16_strided_a)16796 TEST(QC8_GEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE_PRFM, n_div_16_strided_a) {
16797 TEST_REQUIRES_ARM_NEON;
16798 for (uint32_t n = 32; n <= 48; n += 16) {
16799 for (size_t k = 1; k <= 40; k += 9) {
16800 GemmMicrokernelTester()
16801 .mr(4)
16802 .nr(16)
16803 .kr(1)
16804 .sr(1)
16805 .m(4)
16806 .n(n)
16807 .k(k)
16808 .a_stride(43)
16809 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x16__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
16810 }
16811 }
16812 }
16813
TEST(QC8_GEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE_PRFM,n_div_16_subtile)16814 TEST(QC8_GEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE_PRFM, n_div_16_subtile) {
16815 TEST_REQUIRES_ARM_NEON;
16816 for (uint32_t n = 32; n <= 48; n += 16) {
16817 for (size_t k = 1; k <= 40; k += 9) {
16818 for (uint32_t m = 1; m <= 4; m++) {
16819 GemmMicrokernelTester()
16820 .mr(4)
16821 .nr(16)
16822 .kr(1)
16823 .sr(1)
16824 .m(m)
16825 .n(n)
16826 .k(k)
16827 .iterations(1)
16828 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x16__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
16829 }
16830 }
16831 }
16832 }
16833
TEST(QC8_GEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE_PRFM,strided_cm_subtile)16834 TEST(QC8_GEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE_PRFM, strided_cm_subtile) {
16835 TEST_REQUIRES_ARM_NEON;
16836 for (size_t k = 1; k <= 40; k += 9) {
16837 for (uint32_t n = 1; n <= 16; n++) {
16838 for (uint32_t m = 1; m <= 4; m++) {
16839 GemmMicrokernelTester()
16840 .mr(4)
16841 .nr(16)
16842 .kr(1)
16843 .sr(1)
16844 .m(m)
16845 .n(n)
16846 .k(k)
16847 .cm_stride(19)
16848 .iterations(1)
16849 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x16__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
16850 }
16851 }
16852 }
16853 }
16854
TEST(QC8_GEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE_PRFM,qmin)16855 TEST(QC8_GEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE_PRFM, qmin) {
16856 TEST_REQUIRES_ARM_NEON;
16857 GemmMicrokernelTester()
16858 .mr(4)
16859 .nr(16)
16860 .kr(1)
16861 .sr(1)
16862 .m(4)
16863 .n(16)
16864 .k(8)
16865 .qmin(128)
16866 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x16__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
16867 }
16868
TEST(QC8_GEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE_PRFM,qmax)16869 TEST(QC8_GEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE_PRFM, qmax) {
16870 TEST_REQUIRES_ARM_NEON;
16871 GemmMicrokernelTester()
16872 .mr(4)
16873 .nr(16)
16874 .kr(1)
16875 .sr(1)
16876 .m(4)
16877 .n(16)
16878 .k(8)
16879 .qmax(128)
16880 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x16__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
16881 }
16882
TEST(QC8_GEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE_PRFM,strided_cm)16883 TEST(QC8_GEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE_PRFM, strided_cm) {
16884 TEST_REQUIRES_ARM_NEON;
16885 GemmMicrokernelTester()
16886 .mr(4)
16887 .nr(16)
16888 .kr(1)
16889 .sr(1)
16890 .m(4)
16891 .n(16)
16892 .k(8)
16893 .cm_stride(19)
16894 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x16__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
16895 }
16896 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
16897
16898
16899 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QC8_GEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE,k_eq_8)16900 TEST(QC8_GEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE, k_eq_8) {
16901 TEST_REQUIRES_ARM_NEON_V8;
16902 GemmMicrokernelTester()
16903 .mr(4)
16904 .nr(16)
16905 .kr(1)
16906 .sr(1)
16907 .m(4)
16908 .n(16)
16909 .k(8)
16910 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
16911 }
16912
TEST(QC8_GEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE,strided_cn)16913 TEST(QC8_GEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE, strided_cn) {
16914 TEST_REQUIRES_ARM_NEON_V8;
16915 GemmMicrokernelTester()
16916 .mr(4)
16917 .nr(16)
16918 .kr(1)
16919 .sr(1)
16920 .m(4)
16921 .n(16)
16922 .k(8)
16923 .cn_stride(19)
16924 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
16925 }
16926
TEST(QC8_GEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE,k_eq_8_strided_a)16927 TEST(QC8_GEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE, k_eq_8_strided_a) {
16928 TEST_REQUIRES_ARM_NEON_V8;
16929 GemmMicrokernelTester()
16930 .mr(4)
16931 .nr(16)
16932 .kr(1)
16933 .sr(1)
16934 .m(4)
16935 .n(16)
16936 .k(8)
16937 .a_stride(11)
16938 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
16939 }
16940
TEST(QC8_GEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE,k_eq_8_subtile)16941 TEST(QC8_GEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE, k_eq_8_subtile) {
16942 TEST_REQUIRES_ARM_NEON_V8;
16943 for (uint32_t n = 1; n <= 16; n++) {
16944 for (uint32_t m = 1; m <= 4; m++) {
16945 GemmMicrokernelTester()
16946 .mr(4)
16947 .nr(16)
16948 .kr(1)
16949 .sr(1)
16950 .m(m)
16951 .n(n)
16952 .k(8)
16953 .iterations(1)
16954 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
16955 }
16956 }
16957 }
16958
TEST(QC8_GEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE,k_eq_8_subtile_m)16959 TEST(QC8_GEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE, k_eq_8_subtile_m) {
16960 TEST_REQUIRES_ARM_NEON_V8;
16961 for (uint32_t m = 1; m <= 4; m++) {
16962 GemmMicrokernelTester()
16963 .mr(4)
16964 .nr(16)
16965 .kr(1)
16966 .sr(1)
16967 .m(m)
16968 .n(16)
16969 .k(8)
16970 .iterations(1)
16971 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
16972 }
16973 }
16974
TEST(QC8_GEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE,k_eq_8_subtile_n)16975 TEST(QC8_GEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE, k_eq_8_subtile_n) {
16976 TEST_REQUIRES_ARM_NEON_V8;
16977 for (uint32_t n = 1; n <= 16; n++) {
16978 GemmMicrokernelTester()
16979 .mr(4)
16980 .nr(16)
16981 .kr(1)
16982 .sr(1)
16983 .m(4)
16984 .n(n)
16985 .k(8)
16986 .iterations(1)
16987 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
16988 }
16989 }
16990
TEST(QC8_GEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE,k_lt_8)16991 TEST(QC8_GEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE, k_lt_8) {
16992 TEST_REQUIRES_ARM_NEON_V8;
16993 for (size_t k = 1; k < 8; k++) {
16994 GemmMicrokernelTester()
16995 .mr(4)
16996 .nr(16)
16997 .kr(1)
16998 .sr(1)
16999 .m(4)
17000 .n(16)
17001 .k(k)
17002 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
17003 }
17004 }
17005
TEST(QC8_GEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE,k_lt_8_strided_a)17006 TEST(QC8_GEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE, k_lt_8_strided_a) {
17007 TEST_REQUIRES_ARM_NEON_V8;
17008 for (size_t k = 1; k < 8; k++) {
17009 GemmMicrokernelTester()
17010 .mr(4)
17011 .nr(16)
17012 .kr(1)
17013 .sr(1)
17014 .m(4)
17015 .n(16)
17016 .k(k)
17017 .a_stride(11)
17018 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
17019 }
17020 }
17021
TEST(QC8_GEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE,k_lt_8_subtile)17022 TEST(QC8_GEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE, k_lt_8_subtile) {
17023 TEST_REQUIRES_ARM_NEON_V8;
17024 for (size_t k = 1; k < 8; k++) {
17025 for (uint32_t n = 1; n <= 16; n++) {
17026 for (uint32_t m = 1; m <= 4; m++) {
17027 GemmMicrokernelTester()
17028 .mr(4)
17029 .nr(16)
17030 .kr(1)
17031 .sr(1)
17032 .m(m)
17033 .n(n)
17034 .k(k)
17035 .iterations(1)
17036 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
17037 }
17038 }
17039 }
17040 }
17041
TEST(QC8_GEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE,k_gt_8)17042 TEST(QC8_GEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE, k_gt_8) {
17043 TEST_REQUIRES_ARM_NEON_V8;
17044 for (size_t k = 9; k < 16; k++) {
17045 GemmMicrokernelTester()
17046 .mr(4)
17047 .nr(16)
17048 .kr(1)
17049 .sr(1)
17050 .m(4)
17051 .n(16)
17052 .k(k)
17053 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
17054 }
17055 }
17056
TEST(QC8_GEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE,k_gt_8_strided_a)17057 TEST(QC8_GEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE, k_gt_8_strided_a) {
17058 TEST_REQUIRES_ARM_NEON_V8;
17059 for (size_t k = 9; k < 16; k++) {
17060 GemmMicrokernelTester()
17061 .mr(4)
17062 .nr(16)
17063 .kr(1)
17064 .sr(1)
17065 .m(4)
17066 .n(16)
17067 .k(k)
17068 .a_stride(19)
17069 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
17070 }
17071 }
17072
TEST(QC8_GEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE,k_gt_8_subtile)17073 TEST(QC8_GEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE, k_gt_8_subtile) {
17074 TEST_REQUIRES_ARM_NEON_V8;
17075 for (size_t k = 9; k < 16; k++) {
17076 for (uint32_t n = 1; n <= 16; n++) {
17077 for (uint32_t m = 1; m <= 4; m++) {
17078 GemmMicrokernelTester()
17079 .mr(4)
17080 .nr(16)
17081 .kr(1)
17082 .sr(1)
17083 .m(m)
17084 .n(n)
17085 .k(k)
17086 .iterations(1)
17087 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
17088 }
17089 }
17090 }
17091 }
17092
TEST(QC8_GEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE,k_div_8)17093 TEST(QC8_GEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE, k_div_8) {
17094 TEST_REQUIRES_ARM_NEON_V8;
17095 for (size_t k = 16; k <= 80; k += 8) {
17096 GemmMicrokernelTester()
17097 .mr(4)
17098 .nr(16)
17099 .kr(1)
17100 .sr(1)
17101 .m(4)
17102 .n(16)
17103 .k(k)
17104 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
17105 }
17106 }
17107
TEST(QC8_GEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE,k_div_8_strided_a)17108 TEST(QC8_GEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE, k_div_8_strided_a) {
17109 TEST_REQUIRES_ARM_NEON_V8;
17110 for (size_t k = 16; k <= 80; k += 8) {
17111 GemmMicrokernelTester()
17112 .mr(4)
17113 .nr(16)
17114 .kr(1)
17115 .sr(1)
17116 .m(4)
17117 .n(16)
17118 .k(k)
17119 .a_stride(83)
17120 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
17121 }
17122 }
17123
TEST(QC8_GEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE,k_div_8_subtile)17124 TEST(QC8_GEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE, k_div_8_subtile) {
17125 TEST_REQUIRES_ARM_NEON_V8;
17126 for (size_t k = 16; k <= 80; k += 8) {
17127 for (uint32_t n = 1; n <= 16; n++) {
17128 for (uint32_t m = 1; m <= 4; m++) {
17129 GemmMicrokernelTester()
17130 .mr(4)
17131 .nr(16)
17132 .kr(1)
17133 .sr(1)
17134 .m(m)
17135 .n(n)
17136 .k(k)
17137 .iterations(1)
17138 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
17139 }
17140 }
17141 }
17142 }
17143
TEST(QC8_GEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE,n_gt_16)17144 TEST(QC8_GEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE, n_gt_16) {
17145 TEST_REQUIRES_ARM_NEON_V8;
17146 for (uint32_t n = 17; n < 32; n++) {
17147 for (size_t k = 1; k <= 40; k += 9) {
17148 GemmMicrokernelTester()
17149 .mr(4)
17150 .nr(16)
17151 .kr(1)
17152 .sr(1)
17153 .m(4)
17154 .n(n)
17155 .k(k)
17156 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
17157 }
17158 }
17159 }
17160
TEST(QC8_GEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE,n_gt_16_strided_cn)17161 TEST(QC8_GEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE, n_gt_16_strided_cn) {
17162 TEST_REQUIRES_ARM_NEON_V8;
17163 for (uint32_t n = 17; n < 32; n++) {
17164 for (size_t k = 1; k <= 40; k += 9) {
17165 GemmMicrokernelTester()
17166 .mr(4)
17167 .nr(16)
17168 .kr(1)
17169 .sr(1)
17170 .m(4)
17171 .n(n)
17172 .k(k)
17173 .cn_stride(19)
17174 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
17175 }
17176 }
17177 }
17178
TEST(QC8_GEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE,n_gt_16_strided_a)17179 TEST(QC8_GEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE, n_gt_16_strided_a) {
17180 TEST_REQUIRES_ARM_NEON_V8;
17181 for (uint32_t n = 17; n < 32; n++) {
17182 for (size_t k = 1; k <= 40; k += 9) {
17183 GemmMicrokernelTester()
17184 .mr(4)
17185 .nr(16)
17186 .kr(1)
17187 .sr(1)
17188 .m(4)
17189 .n(n)
17190 .k(k)
17191 .a_stride(43)
17192 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
17193 }
17194 }
17195 }
17196
TEST(QC8_GEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE,n_gt_16_subtile)17197 TEST(QC8_GEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE, n_gt_16_subtile) {
17198 TEST_REQUIRES_ARM_NEON_V8;
17199 for (uint32_t n = 17; n < 32; n++) {
17200 for (size_t k = 1; k <= 40; k += 9) {
17201 for (uint32_t m = 1; m <= 4; m++) {
17202 GemmMicrokernelTester()
17203 .mr(4)
17204 .nr(16)
17205 .kr(1)
17206 .sr(1)
17207 .m(m)
17208 .n(n)
17209 .k(k)
17210 .iterations(1)
17211 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
17212 }
17213 }
17214 }
17215 }
17216
TEST(QC8_GEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE,n_div_16)17217 TEST(QC8_GEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE, n_div_16) {
17218 TEST_REQUIRES_ARM_NEON_V8;
17219 for (uint32_t n = 32; n <= 48; n += 16) {
17220 for (size_t k = 1; k <= 40; k += 9) {
17221 GemmMicrokernelTester()
17222 .mr(4)
17223 .nr(16)
17224 .kr(1)
17225 .sr(1)
17226 .m(4)
17227 .n(n)
17228 .k(k)
17229 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
17230 }
17231 }
17232 }
17233
TEST(QC8_GEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE,n_div_16_strided_cn)17234 TEST(QC8_GEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE, n_div_16_strided_cn) {
17235 TEST_REQUIRES_ARM_NEON_V8;
17236 for (uint32_t n = 32; n <= 48; n += 16) {
17237 for (size_t k = 1; k <= 40; k += 9) {
17238 GemmMicrokernelTester()
17239 .mr(4)
17240 .nr(16)
17241 .kr(1)
17242 .sr(1)
17243 .m(4)
17244 .n(n)
17245 .k(k)
17246 .cn_stride(19)
17247 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
17248 }
17249 }
17250 }
17251
TEST(QC8_GEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE,n_div_16_strided_a)17252 TEST(QC8_GEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE, n_div_16_strided_a) {
17253 TEST_REQUIRES_ARM_NEON_V8;
17254 for (uint32_t n = 32; n <= 48; n += 16) {
17255 for (size_t k = 1; k <= 40; k += 9) {
17256 GemmMicrokernelTester()
17257 .mr(4)
17258 .nr(16)
17259 .kr(1)
17260 .sr(1)
17261 .m(4)
17262 .n(n)
17263 .k(k)
17264 .a_stride(43)
17265 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
17266 }
17267 }
17268 }
17269
TEST(QC8_GEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE,n_div_16_subtile)17270 TEST(QC8_GEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE, n_div_16_subtile) {
17271 TEST_REQUIRES_ARM_NEON_V8;
17272 for (uint32_t n = 32; n <= 48; n += 16) {
17273 for (size_t k = 1; k <= 40; k += 9) {
17274 for (uint32_t m = 1; m <= 4; m++) {
17275 GemmMicrokernelTester()
17276 .mr(4)
17277 .nr(16)
17278 .kr(1)
17279 .sr(1)
17280 .m(m)
17281 .n(n)
17282 .k(k)
17283 .iterations(1)
17284 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
17285 }
17286 }
17287 }
17288 }
17289
TEST(QC8_GEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE,strided_cm_subtile)17290 TEST(QC8_GEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE, strided_cm_subtile) {
17291 TEST_REQUIRES_ARM_NEON_V8;
17292 for (size_t k = 1; k <= 40; k += 9) {
17293 for (uint32_t n = 1; n <= 16; n++) {
17294 for (uint32_t m = 1; m <= 4; m++) {
17295 GemmMicrokernelTester()
17296 .mr(4)
17297 .nr(16)
17298 .kr(1)
17299 .sr(1)
17300 .m(m)
17301 .n(n)
17302 .k(k)
17303 .cm_stride(19)
17304 .iterations(1)
17305 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
17306 }
17307 }
17308 }
17309 }
17310
TEST(QC8_GEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE,qmin)17311 TEST(QC8_GEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE, qmin) {
17312 TEST_REQUIRES_ARM_NEON_V8;
17313 GemmMicrokernelTester()
17314 .mr(4)
17315 .nr(16)
17316 .kr(1)
17317 .sr(1)
17318 .m(4)
17319 .n(16)
17320 .k(8)
17321 .qmin(128)
17322 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
17323 }
17324
TEST(QC8_GEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE,qmax)17325 TEST(QC8_GEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE, qmax) {
17326 TEST_REQUIRES_ARM_NEON_V8;
17327 GemmMicrokernelTester()
17328 .mr(4)
17329 .nr(16)
17330 .kr(1)
17331 .sr(1)
17332 .m(4)
17333 .n(16)
17334 .k(8)
17335 .qmax(128)
17336 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
17337 }
17338
TEST(QC8_GEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE,strided_cm)17339 TEST(QC8_GEMM_MINMAX_FP32_4X16__NEONV8_MLAL_LANE, strided_cm) {
17340 TEST_REQUIRES_ARM_NEON_V8;
17341 GemmMicrokernelTester()
17342 .mr(4)
17343 .nr(16)
17344 .kr(1)
17345 .sr(1)
17346 .m(4)
17347 .n(16)
17348 .k(8)
17349 .cm_stride(19)
17350 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x16__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
17351 }
17352 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
17353
17354
17355 #if XNN_ENABLE_ARM_DOTPROD && (XNN_ARCH_ARM && !XNN_PLATFORM_IOS || XNN_ARCH_ARM64)
TEST(QC8_GEMM_MINMAX_FP32_4X16C4__NEONDOT,k_eq_8)17356 TEST(QC8_GEMM_MINMAX_FP32_4X16C4__NEONDOT, k_eq_8) {
17357 TEST_REQUIRES_ARM_NEON_DOT;
17358 GemmMicrokernelTester()
17359 .mr(4)
17360 .nr(16)
17361 .kr(4)
17362 .sr(1)
17363 .m(4)
17364 .n(16)
17365 .k(8)
17366 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x16c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
17367 }
17368
TEST(QC8_GEMM_MINMAX_FP32_4X16C4__NEONDOT,strided_cn)17369 TEST(QC8_GEMM_MINMAX_FP32_4X16C4__NEONDOT, strided_cn) {
17370 TEST_REQUIRES_ARM_NEON_DOT;
17371 GemmMicrokernelTester()
17372 .mr(4)
17373 .nr(16)
17374 .kr(4)
17375 .sr(1)
17376 .m(4)
17377 .n(16)
17378 .k(8)
17379 .cn_stride(19)
17380 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x16c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
17381 }
17382
TEST(QC8_GEMM_MINMAX_FP32_4X16C4__NEONDOT,k_eq_8_strided_a)17383 TEST(QC8_GEMM_MINMAX_FP32_4X16C4__NEONDOT, k_eq_8_strided_a) {
17384 TEST_REQUIRES_ARM_NEON_DOT;
17385 GemmMicrokernelTester()
17386 .mr(4)
17387 .nr(16)
17388 .kr(4)
17389 .sr(1)
17390 .m(4)
17391 .n(16)
17392 .k(8)
17393 .a_stride(11)
17394 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x16c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
17395 }
17396
TEST(QC8_GEMM_MINMAX_FP32_4X16C4__NEONDOT,k_eq_8_subtile)17397 TEST(QC8_GEMM_MINMAX_FP32_4X16C4__NEONDOT, k_eq_8_subtile) {
17398 TEST_REQUIRES_ARM_NEON_DOT;
17399 for (uint32_t n = 1; n <= 16; n++) {
17400 for (uint32_t m = 1; m <= 4; m++) {
17401 GemmMicrokernelTester()
17402 .mr(4)
17403 .nr(16)
17404 .kr(4)
17405 .sr(1)
17406 .m(m)
17407 .n(n)
17408 .k(8)
17409 .iterations(1)
17410 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x16c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
17411 }
17412 }
17413 }
17414
TEST(QC8_GEMM_MINMAX_FP32_4X16C4__NEONDOT,k_eq_8_subtile_m)17415 TEST(QC8_GEMM_MINMAX_FP32_4X16C4__NEONDOT, k_eq_8_subtile_m) {
17416 TEST_REQUIRES_ARM_NEON_DOT;
17417 for (uint32_t m = 1; m <= 4; m++) {
17418 GemmMicrokernelTester()
17419 .mr(4)
17420 .nr(16)
17421 .kr(4)
17422 .sr(1)
17423 .m(m)
17424 .n(16)
17425 .k(8)
17426 .iterations(1)
17427 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x16c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
17428 }
17429 }
17430
TEST(QC8_GEMM_MINMAX_FP32_4X16C4__NEONDOT,k_eq_8_subtile_n)17431 TEST(QC8_GEMM_MINMAX_FP32_4X16C4__NEONDOT, k_eq_8_subtile_n) {
17432 TEST_REQUIRES_ARM_NEON_DOT;
17433 for (uint32_t n = 1; n <= 16; n++) {
17434 GemmMicrokernelTester()
17435 .mr(4)
17436 .nr(16)
17437 .kr(4)
17438 .sr(1)
17439 .m(4)
17440 .n(n)
17441 .k(8)
17442 .iterations(1)
17443 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x16c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
17444 }
17445 }
17446
TEST(QC8_GEMM_MINMAX_FP32_4X16C4__NEONDOT,k_lt_8)17447 TEST(QC8_GEMM_MINMAX_FP32_4X16C4__NEONDOT, k_lt_8) {
17448 TEST_REQUIRES_ARM_NEON_DOT;
17449 for (size_t k = 1; k < 8; k++) {
17450 GemmMicrokernelTester()
17451 .mr(4)
17452 .nr(16)
17453 .kr(4)
17454 .sr(1)
17455 .m(4)
17456 .n(16)
17457 .k(k)
17458 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x16c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
17459 }
17460 }
17461
TEST(QC8_GEMM_MINMAX_FP32_4X16C4__NEONDOT,k_lt_8_strided_a)17462 TEST(QC8_GEMM_MINMAX_FP32_4X16C4__NEONDOT, k_lt_8_strided_a) {
17463 TEST_REQUIRES_ARM_NEON_DOT;
17464 for (size_t k = 1; k < 8; k++) {
17465 GemmMicrokernelTester()
17466 .mr(4)
17467 .nr(16)
17468 .kr(4)
17469 .sr(1)
17470 .m(4)
17471 .n(16)
17472 .k(k)
17473 .a_stride(11)
17474 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x16c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
17475 }
17476 }
17477
TEST(QC8_GEMM_MINMAX_FP32_4X16C4__NEONDOT,k_lt_8_subtile)17478 TEST(QC8_GEMM_MINMAX_FP32_4X16C4__NEONDOT, k_lt_8_subtile) {
17479 TEST_REQUIRES_ARM_NEON_DOT;
17480 for (size_t k = 1; k < 8; k++) {
17481 for (uint32_t n = 1; n <= 16; n++) {
17482 for (uint32_t m = 1; m <= 4; m++) {
17483 GemmMicrokernelTester()
17484 .mr(4)
17485 .nr(16)
17486 .kr(4)
17487 .sr(1)
17488 .m(m)
17489 .n(n)
17490 .k(k)
17491 .iterations(1)
17492 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x16c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
17493 }
17494 }
17495 }
17496 }
17497
TEST(QC8_GEMM_MINMAX_FP32_4X16C4__NEONDOT,k_gt_8)17498 TEST(QC8_GEMM_MINMAX_FP32_4X16C4__NEONDOT, k_gt_8) {
17499 TEST_REQUIRES_ARM_NEON_DOT;
17500 for (size_t k = 9; k < 16; k++) {
17501 GemmMicrokernelTester()
17502 .mr(4)
17503 .nr(16)
17504 .kr(4)
17505 .sr(1)
17506 .m(4)
17507 .n(16)
17508 .k(k)
17509 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x16c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
17510 }
17511 }
17512
TEST(QC8_GEMM_MINMAX_FP32_4X16C4__NEONDOT,k_gt_8_strided_a)17513 TEST(QC8_GEMM_MINMAX_FP32_4X16C4__NEONDOT, k_gt_8_strided_a) {
17514 TEST_REQUIRES_ARM_NEON_DOT;
17515 for (size_t k = 9; k < 16; k++) {
17516 GemmMicrokernelTester()
17517 .mr(4)
17518 .nr(16)
17519 .kr(4)
17520 .sr(1)
17521 .m(4)
17522 .n(16)
17523 .k(k)
17524 .a_stride(19)
17525 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x16c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
17526 }
17527 }
17528
TEST(QC8_GEMM_MINMAX_FP32_4X16C4__NEONDOT,k_gt_8_subtile)17529 TEST(QC8_GEMM_MINMAX_FP32_4X16C4__NEONDOT, k_gt_8_subtile) {
17530 TEST_REQUIRES_ARM_NEON_DOT;
17531 for (size_t k = 9; k < 16; k++) {
17532 for (uint32_t n = 1; n <= 16; n++) {
17533 for (uint32_t m = 1; m <= 4; m++) {
17534 GemmMicrokernelTester()
17535 .mr(4)
17536 .nr(16)
17537 .kr(4)
17538 .sr(1)
17539 .m(m)
17540 .n(n)
17541 .k(k)
17542 .iterations(1)
17543 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x16c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
17544 }
17545 }
17546 }
17547 }
17548
TEST(QC8_GEMM_MINMAX_FP32_4X16C4__NEONDOT,k_div_8)17549 TEST(QC8_GEMM_MINMAX_FP32_4X16C4__NEONDOT, k_div_8) {
17550 TEST_REQUIRES_ARM_NEON_DOT;
17551 for (size_t k = 16; k <= 80; k += 8) {
17552 GemmMicrokernelTester()
17553 .mr(4)
17554 .nr(16)
17555 .kr(4)
17556 .sr(1)
17557 .m(4)
17558 .n(16)
17559 .k(k)
17560 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x16c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
17561 }
17562 }
17563
TEST(QC8_GEMM_MINMAX_FP32_4X16C4__NEONDOT,k_div_8_strided_a)17564 TEST(QC8_GEMM_MINMAX_FP32_4X16C4__NEONDOT, k_div_8_strided_a) {
17565 TEST_REQUIRES_ARM_NEON_DOT;
17566 for (size_t k = 16; k <= 80; k += 8) {
17567 GemmMicrokernelTester()
17568 .mr(4)
17569 .nr(16)
17570 .kr(4)
17571 .sr(1)
17572 .m(4)
17573 .n(16)
17574 .k(k)
17575 .a_stride(83)
17576 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x16c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
17577 }
17578 }
17579
TEST(QC8_GEMM_MINMAX_FP32_4X16C4__NEONDOT,k_div_8_subtile)17580 TEST(QC8_GEMM_MINMAX_FP32_4X16C4__NEONDOT, k_div_8_subtile) {
17581 TEST_REQUIRES_ARM_NEON_DOT;
17582 for (size_t k = 16; k <= 80; k += 8) {
17583 for (uint32_t n = 1; n <= 16; n++) {
17584 for (uint32_t m = 1; m <= 4; m++) {
17585 GemmMicrokernelTester()
17586 .mr(4)
17587 .nr(16)
17588 .kr(4)
17589 .sr(1)
17590 .m(m)
17591 .n(n)
17592 .k(k)
17593 .iterations(1)
17594 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x16c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
17595 }
17596 }
17597 }
17598 }
17599
TEST(QC8_GEMM_MINMAX_FP32_4X16C4__NEONDOT,n_gt_16)17600 TEST(QC8_GEMM_MINMAX_FP32_4X16C4__NEONDOT, n_gt_16) {
17601 TEST_REQUIRES_ARM_NEON_DOT;
17602 for (uint32_t n = 17; n < 32; n++) {
17603 for (size_t k = 1; k <= 40; k += 9) {
17604 GemmMicrokernelTester()
17605 .mr(4)
17606 .nr(16)
17607 .kr(4)
17608 .sr(1)
17609 .m(4)
17610 .n(n)
17611 .k(k)
17612 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x16c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
17613 }
17614 }
17615 }
17616
TEST(QC8_GEMM_MINMAX_FP32_4X16C4__NEONDOT,n_gt_16_strided_cn)17617 TEST(QC8_GEMM_MINMAX_FP32_4X16C4__NEONDOT, n_gt_16_strided_cn) {
17618 TEST_REQUIRES_ARM_NEON_DOT;
17619 for (uint32_t n = 17; n < 32; n++) {
17620 for (size_t k = 1; k <= 40; k += 9) {
17621 GemmMicrokernelTester()
17622 .mr(4)
17623 .nr(16)
17624 .kr(4)
17625 .sr(1)
17626 .m(4)
17627 .n(n)
17628 .k(k)
17629 .cn_stride(19)
17630 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x16c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
17631 }
17632 }
17633 }
17634
TEST(QC8_GEMM_MINMAX_FP32_4X16C4__NEONDOT,n_gt_16_strided_a)17635 TEST(QC8_GEMM_MINMAX_FP32_4X16C4__NEONDOT, n_gt_16_strided_a) {
17636 TEST_REQUIRES_ARM_NEON_DOT;
17637 for (uint32_t n = 17; n < 32; n++) {
17638 for (size_t k = 1; k <= 40; k += 9) {
17639 GemmMicrokernelTester()
17640 .mr(4)
17641 .nr(16)
17642 .kr(4)
17643 .sr(1)
17644 .m(4)
17645 .n(n)
17646 .k(k)
17647 .a_stride(43)
17648 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x16c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
17649 }
17650 }
17651 }
17652
TEST(QC8_GEMM_MINMAX_FP32_4X16C4__NEONDOT,n_gt_16_subtile)17653 TEST(QC8_GEMM_MINMAX_FP32_4X16C4__NEONDOT, n_gt_16_subtile) {
17654 TEST_REQUIRES_ARM_NEON_DOT;
17655 for (uint32_t n = 17; n < 32; n++) {
17656 for (size_t k = 1; k <= 40; k += 9) {
17657 for (uint32_t m = 1; m <= 4; m++) {
17658 GemmMicrokernelTester()
17659 .mr(4)
17660 .nr(16)
17661 .kr(4)
17662 .sr(1)
17663 .m(m)
17664 .n(n)
17665 .k(k)
17666 .iterations(1)
17667 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x16c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
17668 }
17669 }
17670 }
17671 }
17672
TEST(QC8_GEMM_MINMAX_FP32_4X16C4__NEONDOT,n_div_16)17673 TEST(QC8_GEMM_MINMAX_FP32_4X16C4__NEONDOT, n_div_16) {
17674 TEST_REQUIRES_ARM_NEON_DOT;
17675 for (uint32_t n = 32; n <= 48; n += 16) {
17676 for (size_t k = 1; k <= 40; k += 9) {
17677 GemmMicrokernelTester()
17678 .mr(4)
17679 .nr(16)
17680 .kr(4)
17681 .sr(1)
17682 .m(4)
17683 .n(n)
17684 .k(k)
17685 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x16c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
17686 }
17687 }
17688 }
17689
TEST(QC8_GEMM_MINMAX_FP32_4X16C4__NEONDOT,n_div_16_strided_cn)17690 TEST(QC8_GEMM_MINMAX_FP32_4X16C4__NEONDOT, n_div_16_strided_cn) {
17691 TEST_REQUIRES_ARM_NEON_DOT;
17692 for (uint32_t n = 32; n <= 48; n += 16) {
17693 for (size_t k = 1; k <= 40; k += 9) {
17694 GemmMicrokernelTester()
17695 .mr(4)
17696 .nr(16)
17697 .kr(4)
17698 .sr(1)
17699 .m(4)
17700 .n(n)
17701 .k(k)
17702 .cn_stride(19)
17703 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x16c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
17704 }
17705 }
17706 }
17707
TEST(QC8_GEMM_MINMAX_FP32_4X16C4__NEONDOT,n_div_16_strided_a)17708 TEST(QC8_GEMM_MINMAX_FP32_4X16C4__NEONDOT, n_div_16_strided_a) {
17709 TEST_REQUIRES_ARM_NEON_DOT;
17710 for (uint32_t n = 32; n <= 48; n += 16) {
17711 for (size_t k = 1; k <= 40; k += 9) {
17712 GemmMicrokernelTester()
17713 .mr(4)
17714 .nr(16)
17715 .kr(4)
17716 .sr(1)
17717 .m(4)
17718 .n(n)
17719 .k(k)
17720 .a_stride(43)
17721 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x16c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
17722 }
17723 }
17724 }
17725
TEST(QC8_GEMM_MINMAX_FP32_4X16C4__NEONDOT,n_div_16_subtile)17726 TEST(QC8_GEMM_MINMAX_FP32_4X16C4__NEONDOT, n_div_16_subtile) {
17727 TEST_REQUIRES_ARM_NEON_DOT;
17728 for (uint32_t n = 32; n <= 48; n += 16) {
17729 for (size_t k = 1; k <= 40; k += 9) {
17730 for (uint32_t m = 1; m <= 4; m++) {
17731 GemmMicrokernelTester()
17732 .mr(4)
17733 .nr(16)
17734 .kr(4)
17735 .sr(1)
17736 .m(m)
17737 .n(n)
17738 .k(k)
17739 .iterations(1)
17740 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x16c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
17741 }
17742 }
17743 }
17744 }
17745
TEST(QC8_GEMM_MINMAX_FP32_4X16C4__NEONDOT,strided_cm_subtile)17746 TEST(QC8_GEMM_MINMAX_FP32_4X16C4__NEONDOT, strided_cm_subtile) {
17747 TEST_REQUIRES_ARM_NEON_DOT;
17748 for (size_t k = 1; k <= 40; k += 9) {
17749 for (uint32_t n = 1; n <= 16; n++) {
17750 for (uint32_t m = 1; m <= 4; m++) {
17751 GemmMicrokernelTester()
17752 .mr(4)
17753 .nr(16)
17754 .kr(4)
17755 .sr(1)
17756 .m(m)
17757 .n(n)
17758 .k(k)
17759 .cm_stride(19)
17760 .iterations(1)
17761 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x16c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
17762 }
17763 }
17764 }
17765 }
17766
TEST(QC8_GEMM_MINMAX_FP32_4X16C4__NEONDOT,qmin)17767 TEST(QC8_GEMM_MINMAX_FP32_4X16C4__NEONDOT, qmin) {
17768 TEST_REQUIRES_ARM_NEON_DOT;
17769 GemmMicrokernelTester()
17770 .mr(4)
17771 .nr(16)
17772 .kr(4)
17773 .sr(1)
17774 .m(4)
17775 .n(16)
17776 .k(8)
17777 .qmin(128)
17778 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x16c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
17779 }
17780
TEST(QC8_GEMM_MINMAX_FP32_4X16C4__NEONDOT,qmax)17781 TEST(QC8_GEMM_MINMAX_FP32_4X16C4__NEONDOT, qmax) {
17782 TEST_REQUIRES_ARM_NEON_DOT;
17783 GemmMicrokernelTester()
17784 .mr(4)
17785 .nr(16)
17786 .kr(4)
17787 .sr(1)
17788 .m(4)
17789 .n(16)
17790 .k(8)
17791 .qmax(128)
17792 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x16c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
17793 }
17794
TEST(QC8_GEMM_MINMAX_FP32_4X16C4__NEONDOT,strided_cm)17795 TEST(QC8_GEMM_MINMAX_FP32_4X16C4__NEONDOT, strided_cm) {
17796 TEST_REQUIRES_ARM_NEON_DOT;
17797 GemmMicrokernelTester()
17798 .mr(4)
17799 .nr(16)
17800 .kr(4)
17801 .sr(1)
17802 .m(4)
17803 .n(16)
17804 .k(8)
17805 .cm_stride(19)
17806 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x16c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
17807 }
17808 #endif // XNN_ENABLE_ARM_DOTPROD && (XNN_ARCH_ARM && !XNN_PLATFORM_IOS || XNN_ARCH_ARM64)
17809
17810
17811 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QC8_GEMM_MINMAX_FP32_6X8__NEONV8_MLAL_LANE,k_eq_8)17812 TEST(QC8_GEMM_MINMAX_FP32_6X8__NEONV8_MLAL_LANE, k_eq_8) {
17813 TEST_REQUIRES_ARM_NEON_V8;
17814 GemmMicrokernelTester()
17815 .mr(6)
17816 .nr(8)
17817 .kr(1)
17818 .sr(1)
17819 .m(6)
17820 .n(8)
17821 .k(8)
17822 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_6x8__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
17823 }
17824
TEST(QC8_GEMM_MINMAX_FP32_6X8__NEONV8_MLAL_LANE,strided_cn)17825 TEST(QC8_GEMM_MINMAX_FP32_6X8__NEONV8_MLAL_LANE, strided_cn) {
17826 TEST_REQUIRES_ARM_NEON_V8;
17827 GemmMicrokernelTester()
17828 .mr(6)
17829 .nr(8)
17830 .kr(1)
17831 .sr(1)
17832 .m(6)
17833 .n(8)
17834 .k(8)
17835 .cn_stride(11)
17836 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_6x8__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
17837 }
17838
TEST(QC8_GEMM_MINMAX_FP32_6X8__NEONV8_MLAL_LANE,k_eq_8_strided_a)17839 TEST(QC8_GEMM_MINMAX_FP32_6X8__NEONV8_MLAL_LANE, k_eq_8_strided_a) {
17840 TEST_REQUIRES_ARM_NEON_V8;
17841 GemmMicrokernelTester()
17842 .mr(6)
17843 .nr(8)
17844 .kr(1)
17845 .sr(1)
17846 .m(6)
17847 .n(8)
17848 .k(8)
17849 .a_stride(11)
17850 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_6x8__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
17851 }
17852
TEST(QC8_GEMM_MINMAX_FP32_6X8__NEONV8_MLAL_LANE,k_eq_8_subtile)17853 TEST(QC8_GEMM_MINMAX_FP32_6X8__NEONV8_MLAL_LANE, k_eq_8_subtile) {
17854 TEST_REQUIRES_ARM_NEON_V8;
17855 for (uint32_t n = 1; n <= 8; n++) {
17856 for (uint32_t m = 1; m <= 6; m++) {
17857 GemmMicrokernelTester()
17858 .mr(6)
17859 .nr(8)
17860 .kr(1)
17861 .sr(1)
17862 .m(m)
17863 .n(n)
17864 .k(8)
17865 .iterations(1)
17866 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_6x8__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
17867 }
17868 }
17869 }
17870
TEST(QC8_GEMM_MINMAX_FP32_6X8__NEONV8_MLAL_LANE,k_eq_8_subtile_m)17871 TEST(QC8_GEMM_MINMAX_FP32_6X8__NEONV8_MLAL_LANE, k_eq_8_subtile_m) {
17872 TEST_REQUIRES_ARM_NEON_V8;
17873 for (uint32_t m = 1; m <= 6; m++) {
17874 GemmMicrokernelTester()
17875 .mr(6)
17876 .nr(8)
17877 .kr(1)
17878 .sr(1)
17879 .m(m)
17880 .n(8)
17881 .k(8)
17882 .iterations(1)
17883 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_6x8__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
17884 }
17885 }
17886
TEST(QC8_GEMM_MINMAX_FP32_6X8__NEONV8_MLAL_LANE,k_eq_8_subtile_n)17887 TEST(QC8_GEMM_MINMAX_FP32_6X8__NEONV8_MLAL_LANE, k_eq_8_subtile_n) {
17888 TEST_REQUIRES_ARM_NEON_V8;
17889 for (uint32_t n = 1; n <= 8; n++) {
17890 GemmMicrokernelTester()
17891 .mr(6)
17892 .nr(8)
17893 .kr(1)
17894 .sr(1)
17895 .m(6)
17896 .n(n)
17897 .k(8)
17898 .iterations(1)
17899 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_6x8__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
17900 }
17901 }
17902
TEST(QC8_GEMM_MINMAX_FP32_6X8__NEONV8_MLAL_LANE,k_lt_8)17903 TEST(QC8_GEMM_MINMAX_FP32_6X8__NEONV8_MLAL_LANE, k_lt_8) {
17904 TEST_REQUIRES_ARM_NEON_V8;
17905 for (size_t k = 1; k < 8; k++) {
17906 GemmMicrokernelTester()
17907 .mr(6)
17908 .nr(8)
17909 .kr(1)
17910 .sr(1)
17911 .m(6)
17912 .n(8)
17913 .k(k)
17914 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_6x8__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
17915 }
17916 }
17917
TEST(QC8_GEMM_MINMAX_FP32_6X8__NEONV8_MLAL_LANE,k_lt_8_strided_a)17918 TEST(QC8_GEMM_MINMAX_FP32_6X8__NEONV8_MLAL_LANE, k_lt_8_strided_a) {
17919 TEST_REQUIRES_ARM_NEON_V8;
17920 for (size_t k = 1; k < 8; k++) {
17921 GemmMicrokernelTester()
17922 .mr(6)
17923 .nr(8)
17924 .kr(1)
17925 .sr(1)
17926 .m(6)
17927 .n(8)
17928 .k(k)
17929 .a_stride(11)
17930 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_6x8__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
17931 }
17932 }
17933
TEST(QC8_GEMM_MINMAX_FP32_6X8__NEONV8_MLAL_LANE,k_lt_8_subtile)17934 TEST(QC8_GEMM_MINMAX_FP32_6X8__NEONV8_MLAL_LANE, k_lt_8_subtile) {
17935 TEST_REQUIRES_ARM_NEON_V8;
17936 for (size_t k = 1; k < 8; k++) {
17937 for (uint32_t n = 1; n <= 8; n++) {
17938 for (uint32_t m = 1; m <= 6; m++) {
17939 GemmMicrokernelTester()
17940 .mr(6)
17941 .nr(8)
17942 .kr(1)
17943 .sr(1)
17944 .m(m)
17945 .n(n)
17946 .k(k)
17947 .iterations(1)
17948 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_6x8__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
17949 }
17950 }
17951 }
17952 }
17953
TEST(QC8_GEMM_MINMAX_FP32_6X8__NEONV8_MLAL_LANE,k_gt_8)17954 TEST(QC8_GEMM_MINMAX_FP32_6X8__NEONV8_MLAL_LANE, k_gt_8) {
17955 TEST_REQUIRES_ARM_NEON_V8;
17956 for (size_t k = 9; k < 16; k++) {
17957 GemmMicrokernelTester()
17958 .mr(6)
17959 .nr(8)
17960 .kr(1)
17961 .sr(1)
17962 .m(6)
17963 .n(8)
17964 .k(k)
17965 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_6x8__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
17966 }
17967 }
17968
TEST(QC8_GEMM_MINMAX_FP32_6X8__NEONV8_MLAL_LANE,k_gt_8_strided_a)17969 TEST(QC8_GEMM_MINMAX_FP32_6X8__NEONV8_MLAL_LANE, k_gt_8_strided_a) {
17970 TEST_REQUIRES_ARM_NEON_V8;
17971 for (size_t k = 9; k < 16; k++) {
17972 GemmMicrokernelTester()
17973 .mr(6)
17974 .nr(8)
17975 .kr(1)
17976 .sr(1)
17977 .m(6)
17978 .n(8)
17979 .k(k)
17980 .a_stride(19)
17981 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_6x8__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
17982 }
17983 }
17984
TEST(QC8_GEMM_MINMAX_FP32_6X8__NEONV8_MLAL_LANE,k_gt_8_subtile)17985 TEST(QC8_GEMM_MINMAX_FP32_6X8__NEONV8_MLAL_LANE, k_gt_8_subtile) {
17986 TEST_REQUIRES_ARM_NEON_V8;
17987 for (size_t k = 9; k < 16; k++) {
17988 for (uint32_t n = 1; n <= 8; n++) {
17989 for (uint32_t m = 1; m <= 6; m++) {
17990 GemmMicrokernelTester()
17991 .mr(6)
17992 .nr(8)
17993 .kr(1)
17994 .sr(1)
17995 .m(m)
17996 .n(n)
17997 .k(k)
17998 .iterations(1)
17999 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_6x8__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
18000 }
18001 }
18002 }
18003 }
18004
TEST(QC8_GEMM_MINMAX_FP32_6X8__NEONV8_MLAL_LANE,k_div_8)18005 TEST(QC8_GEMM_MINMAX_FP32_6X8__NEONV8_MLAL_LANE, k_div_8) {
18006 TEST_REQUIRES_ARM_NEON_V8;
18007 for (size_t k = 16; k <= 80; k += 8) {
18008 GemmMicrokernelTester()
18009 .mr(6)
18010 .nr(8)
18011 .kr(1)
18012 .sr(1)
18013 .m(6)
18014 .n(8)
18015 .k(k)
18016 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_6x8__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
18017 }
18018 }
18019
TEST(QC8_GEMM_MINMAX_FP32_6X8__NEONV8_MLAL_LANE,k_div_8_strided_a)18020 TEST(QC8_GEMM_MINMAX_FP32_6X8__NEONV8_MLAL_LANE, k_div_8_strided_a) {
18021 TEST_REQUIRES_ARM_NEON_V8;
18022 for (size_t k = 16; k <= 80; k += 8) {
18023 GemmMicrokernelTester()
18024 .mr(6)
18025 .nr(8)
18026 .kr(1)
18027 .sr(1)
18028 .m(6)
18029 .n(8)
18030 .k(k)
18031 .a_stride(83)
18032 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_6x8__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
18033 }
18034 }
18035
TEST(QC8_GEMM_MINMAX_FP32_6X8__NEONV8_MLAL_LANE,k_div_8_subtile)18036 TEST(QC8_GEMM_MINMAX_FP32_6X8__NEONV8_MLAL_LANE, k_div_8_subtile) {
18037 TEST_REQUIRES_ARM_NEON_V8;
18038 for (size_t k = 16; k <= 80; k += 8) {
18039 for (uint32_t n = 1; n <= 8; n++) {
18040 for (uint32_t m = 1; m <= 6; m++) {
18041 GemmMicrokernelTester()
18042 .mr(6)
18043 .nr(8)
18044 .kr(1)
18045 .sr(1)
18046 .m(m)
18047 .n(n)
18048 .k(k)
18049 .iterations(1)
18050 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_6x8__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
18051 }
18052 }
18053 }
18054 }
18055
TEST(QC8_GEMM_MINMAX_FP32_6X8__NEONV8_MLAL_LANE,n_gt_8)18056 TEST(QC8_GEMM_MINMAX_FP32_6X8__NEONV8_MLAL_LANE, n_gt_8) {
18057 TEST_REQUIRES_ARM_NEON_V8;
18058 for (uint32_t n = 9; n < 16; n++) {
18059 for (size_t k = 1; k <= 40; k += 9) {
18060 GemmMicrokernelTester()
18061 .mr(6)
18062 .nr(8)
18063 .kr(1)
18064 .sr(1)
18065 .m(6)
18066 .n(n)
18067 .k(k)
18068 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_6x8__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
18069 }
18070 }
18071 }
18072
TEST(QC8_GEMM_MINMAX_FP32_6X8__NEONV8_MLAL_LANE,n_gt_8_strided_cn)18073 TEST(QC8_GEMM_MINMAX_FP32_6X8__NEONV8_MLAL_LANE, n_gt_8_strided_cn) {
18074 TEST_REQUIRES_ARM_NEON_V8;
18075 for (uint32_t n = 9; n < 16; n++) {
18076 for (size_t k = 1; k <= 40; k += 9) {
18077 GemmMicrokernelTester()
18078 .mr(6)
18079 .nr(8)
18080 .kr(1)
18081 .sr(1)
18082 .m(6)
18083 .n(n)
18084 .k(k)
18085 .cn_stride(11)
18086 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_6x8__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
18087 }
18088 }
18089 }
18090
TEST(QC8_GEMM_MINMAX_FP32_6X8__NEONV8_MLAL_LANE,n_gt_8_strided_a)18091 TEST(QC8_GEMM_MINMAX_FP32_6X8__NEONV8_MLAL_LANE, n_gt_8_strided_a) {
18092 TEST_REQUIRES_ARM_NEON_V8;
18093 for (uint32_t n = 9; n < 16; n++) {
18094 for (size_t k = 1; k <= 40; k += 9) {
18095 GemmMicrokernelTester()
18096 .mr(6)
18097 .nr(8)
18098 .kr(1)
18099 .sr(1)
18100 .m(6)
18101 .n(n)
18102 .k(k)
18103 .a_stride(43)
18104 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_6x8__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
18105 }
18106 }
18107 }
18108
TEST(QC8_GEMM_MINMAX_FP32_6X8__NEONV8_MLAL_LANE,n_gt_8_subtile)18109 TEST(QC8_GEMM_MINMAX_FP32_6X8__NEONV8_MLAL_LANE, n_gt_8_subtile) {
18110 TEST_REQUIRES_ARM_NEON_V8;
18111 for (uint32_t n = 9; n < 16; n++) {
18112 for (size_t k = 1; k <= 40; k += 9) {
18113 for (uint32_t m = 1; m <= 6; m++) {
18114 GemmMicrokernelTester()
18115 .mr(6)
18116 .nr(8)
18117 .kr(1)
18118 .sr(1)
18119 .m(m)
18120 .n(n)
18121 .k(k)
18122 .iterations(1)
18123 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_6x8__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
18124 }
18125 }
18126 }
18127 }
18128
TEST(QC8_GEMM_MINMAX_FP32_6X8__NEONV8_MLAL_LANE,n_div_8)18129 TEST(QC8_GEMM_MINMAX_FP32_6X8__NEONV8_MLAL_LANE, n_div_8) {
18130 TEST_REQUIRES_ARM_NEON_V8;
18131 for (uint32_t n = 16; n <= 24; n += 8) {
18132 for (size_t k = 1; k <= 40; k += 9) {
18133 GemmMicrokernelTester()
18134 .mr(6)
18135 .nr(8)
18136 .kr(1)
18137 .sr(1)
18138 .m(6)
18139 .n(n)
18140 .k(k)
18141 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_6x8__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
18142 }
18143 }
18144 }
18145
TEST(QC8_GEMM_MINMAX_FP32_6X8__NEONV8_MLAL_LANE,n_div_8_strided_cn)18146 TEST(QC8_GEMM_MINMAX_FP32_6X8__NEONV8_MLAL_LANE, n_div_8_strided_cn) {
18147 TEST_REQUIRES_ARM_NEON_V8;
18148 for (uint32_t n = 16; n <= 24; n += 8) {
18149 for (size_t k = 1; k <= 40; k += 9) {
18150 GemmMicrokernelTester()
18151 .mr(6)
18152 .nr(8)
18153 .kr(1)
18154 .sr(1)
18155 .m(6)
18156 .n(n)
18157 .k(k)
18158 .cn_stride(11)
18159 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_6x8__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
18160 }
18161 }
18162 }
18163
TEST(QC8_GEMM_MINMAX_FP32_6X8__NEONV8_MLAL_LANE,n_div_8_strided_a)18164 TEST(QC8_GEMM_MINMAX_FP32_6X8__NEONV8_MLAL_LANE, n_div_8_strided_a) {
18165 TEST_REQUIRES_ARM_NEON_V8;
18166 for (uint32_t n = 16; n <= 24; n += 8) {
18167 for (size_t k = 1; k <= 40; k += 9) {
18168 GemmMicrokernelTester()
18169 .mr(6)
18170 .nr(8)
18171 .kr(1)
18172 .sr(1)
18173 .m(6)
18174 .n(n)
18175 .k(k)
18176 .a_stride(43)
18177 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_6x8__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
18178 }
18179 }
18180 }
18181
TEST(QC8_GEMM_MINMAX_FP32_6X8__NEONV8_MLAL_LANE,n_div_8_subtile)18182 TEST(QC8_GEMM_MINMAX_FP32_6X8__NEONV8_MLAL_LANE, n_div_8_subtile) {
18183 TEST_REQUIRES_ARM_NEON_V8;
18184 for (uint32_t n = 16; n <= 24; n += 8) {
18185 for (size_t k = 1; k <= 40; k += 9) {
18186 for (uint32_t m = 1; m <= 6; m++) {
18187 GemmMicrokernelTester()
18188 .mr(6)
18189 .nr(8)
18190 .kr(1)
18191 .sr(1)
18192 .m(m)
18193 .n(n)
18194 .k(k)
18195 .iterations(1)
18196 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_6x8__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
18197 }
18198 }
18199 }
18200 }
18201
TEST(QC8_GEMM_MINMAX_FP32_6X8__NEONV8_MLAL_LANE,strided_cm_subtile)18202 TEST(QC8_GEMM_MINMAX_FP32_6X8__NEONV8_MLAL_LANE, strided_cm_subtile) {
18203 TEST_REQUIRES_ARM_NEON_V8;
18204 for (size_t k = 1; k <= 40; k += 9) {
18205 for (uint32_t n = 1; n <= 8; n++) {
18206 for (uint32_t m = 1; m <= 6; m++) {
18207 GemmMicrokernelTester()
18208 .mr(6)
18209 .nr(8)
18210 .kr(1)
18211 .sr(1)
18212 .m(m)
18213 .n(n)
18214 .k(k)
18215 .cm_stride(11)
18216 .iterations(1)
18217 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_6x8__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
18218 }
18219 }
18220 }
18221 }
18222
TEST(QC8_GEMM_MINMAX_FP32_6X8__NEONV8_MLAL_LANE,qmin)18223 TEST(QC8_GEMM_MINMAX_FP32_6X8__NEONV8_MLAL_LANE, qmin) {
18224 TEST_REQUIRES_ARM_NEON_V8;
18225 GemmMicrokernelTester()
18226 .mr(6)
18227 .nr(8)
18228 .kr(1)
18229 .sr(1)
18230 .m(6)
18231 .n(8)
18232 .k(8)
18233 .qmin(128)
18234 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_6x8__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
18235 }
18236
TEST(QC8_GEMM_MINMAX_FP32_6X8__NEONV8_MLAL_LANE,qmax)18237 TEST(QC8_GEMM_MINMAX_FP32_6X8__NEONV8_MLAL_LANE, qmax) {
18238 TEST_REQUIRES_ARM_NEON_V8;
18239 GemmMicrokernelTester()
18240 .mr(6)
18241 .nr(8)
18242 .kr(1)
18243 .sr(1)
18244 .m(6)
18245 .n(8)
18246 .k(8)
18247 .qmax(128)
18248 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_6x8__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
18249 }
18250
TEST(QC8_GEMM_MINMAX_FP32_6X8__NEONV8_MLAL_LANE,strided_cm)18251 TEST(QC8_GEMM_MINMAX_FP32_6X8__NEONV8_MLAL_LANE, strided_cm) {
18252 TEST_REQUIRES_ARM_NEON_V8;
18253 GemmMicrokernelTester()
18254 .mr(6)
18255 .nr(8)
18256 .kr(1)
18257 .sr(1)
18258 .m(6)
18259 .n(8)
18260 .k(8)
18261 .cm_stride(11)
18262 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_6x8__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
18263 }
18264 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
18265
18266
18267 #if XNN_ENABLE_ARM_DOTPROD && (XNN_ARCH_ARM && !XNN_PLATFORM_IOS || XNN_ARCH_ARM64)
TEST(QC8_GEMM_MINMAX_FP32_6X8C4__NEONDOT,k_eq_8)18268 TEST(QC8_GEMM_MINMAX_FP32_6X8C4__NEONDOT, k_eq_8) {
18269 TEST_REQUIRES_ARM_NEON_DOT;
18270 GemmMicrokernelTester()
18271 .mr(6)
18272 .nr(8)
18273 .kr(4)
18274 .sr(1)
18275 .m(6)
18276 .n(8)
18277 .k(8)
18278 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_6x8c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
18279 }
18280
TEST(QC8_GEMM_MINMAX_FP32_6X8C4__NEONDOT,strided_cn)18281 TEST(QC8_GEMM_MINMAX_FP32_6X8C4__NEONDOT, strided_cn) {
18282 TEST_REQUIRES_ARM_NEON_DOT;
18283 GemmMicrokernelTester()
18284 .mr(6)
18285 .nr(8)
18286 .kr(4)
18287 .sr(1)
18288 .m(6)
18289 .n(8)
18290 .k(8)
18291 .cn_stride(11)
18292 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_6x8c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
18293 }
18294
TEST(QC8_GEMM_MINMAX_FP32_6X8C4__NEONDOT,k_eq_8_strided_a)18295 TEST(QC8_GEMM_MINMAX_FP32_6X8C4__NEONDOT, k_eq_8_strided_a) {
18296 TEST_REQUIRES_ARM_NEON_DOT;
18297 GemmMicrokernelTester()
18298 .mr(6)
18299 .nr(8)
18300 .kr(4)
18301 .sr(1)
18302 .m(6)
18303 .n(8)
18304 .k(8)
18305 .a_stride(11)
18306 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_6x8c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
18307 }
18308
TEST(QC8_GEMM_MINMAX_FP32_6X8C4__NEONDOT,k_eq_8_subtile)18309 TEST(QC8_GEMM_MINMAX_FP32_6X8C4__NEONDOT, k_eq_8_subtile) {
18310 TEST_REQUIRES_ARM_NEON_DOT;
18311 for (uint32_t n = 1; n <= 8; n++) {
18312 for (uint32_t m = 1; m <= 6; m++) {
18313 GemmMicrokernelTester()
18314 .mr(6)
18315 .nr(8)
18316 .kr(4)
18317 .sr(1)
18318 .m(m)
18319 .n(n)
18320 .k(8)
18321 .iterations(1)
18322 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_6x8c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
18323 }
18324 }
18325 }
18326
TEST(QC8_GEMM_MINMAX_FP32_6X8C4__NEONDOT,k_eq_8_subtile_m)18327 TEST(QC8_GEMM_MINMAX_FP32_6X8C4__NEONDOT, k_eq_8_subtile_m) {
18328 TEST_REQUIRES_ARM_NEON_DOT;
18329 for (uint32_t m = 1; m <= 6; m++) {
18330 GemmMicrokernelTester()
18331 .mr(6)
18332 .nr(8)
18333 .kr(4)
18334 .sr(1)
18335 .m(m)
18336 .n(8)
18337 .k(8)
18338 .iterations(1)
18339 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_6x8c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
18340 }
18341 }
18342
TEST(QC8_GEMM_MINMAX_FP32_6X8C4__NEONDOT,k_eq_8_subtile_n)18343 TEST(QC8_GEMM_MINMAX_FP32_6X8C4__NEONDOT, k_eq_8_subtile_n) {
18344 TEST_REQUIRES_ARM_NEON_DOT;
18345 for (uint32_t n = 1; n <= 8; n++) {
18346 GemmMicrokernelTester()
18347 .mr(6)
18348 .nr(8)
18349 .kr(4)
18350 .sr(1)
18351 .m(6)
18352 .n(n)
18353 .k(8)
18354 .iterations(1)
18355 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_6x8c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
18356 }
18357 }
18358
TEST(QC8_GEMM_MINMAX_FP32_6X8C4__NEONDOT,k_lt_8)18359 TEST(QC8_GEMM_MINMAX_FP32_6X8C4__NEONDOT, k_lt_8) {
18360 TEST_REQUIRES_ARM_NEON_DOT;
18361 for (size_t k = 1; k < 8; k++) {
18362 GemmMicrokernelTester()
18363 .mr(6)
18364 .nr(8)
18365 .kr(4)
18366 .sr(1)
18367 .m(6)
18368 .n(8)
18369 .k(k)
18370 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_6x8c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
18371 }
18372 }
18373
TEST(QC8_GEMM_MINMAX_FP32_6X8C4__NEONDOT,k_lt_8_strided_a)18374 TEST(QC8_GEMM_MINMAX_FP32_6X8C4__NEONDOT, k_lt_8_strided_a) {
18375 TEST_REQUIRES_ARM_NEON_DOT;
18376 for (size_t k = 1; k < 8; k++) {
18377 GemmMicrokernelTester()
18378 .mr(6)
18379 .nr(8)
18380 .kr(4)
18381 .sr(1)
18382 .m(6)
18383 .n(8)
18384 .k(k)
18385 .a_stride(11)
18386 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_6x8c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
18387 }
18388 }
18389
TEST(QC8_GEMM_MINMAX_FP32_6X8C4__NEONDOT,k_lt_8_subtile)18390 TEST(QC8_GEMM_MINMAX_FP32_6X8C4__NEONDOT, k_lt_8_subtile) {
18391 TEST_REQUIRES_ARM_NEON_DOT;
18392 for (size_t k = 1; k < 8; k++) {
18393 for (uint32_t n = 1; n <= 8; n++) {
18394 for (uint32_t m = 1; m <= 6; m++) {
18395 GemmMicrokernelTester()
18396 .mr(6)
18397 .nr(8)
18398 .kr(4)
18399 .sr(1)
18400 .m(m)
18401 .n(n)
18402 .k(k)
18403 .iterations(1)
18404 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_6x8c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
18405 }
18406 }
18407 }
18408 }
18409
TEST(QC8_GEMM_MINMAX_FP32_6X8C4__NEONDOT,k_gt_8)18410 TEST(QC8_GEMM_MINMAX_FP32_6X8C4__NEONDOT, k_gt_8) {
18411 TEST_REQUIRES_ARM_NEON_DOT;
18412 for (size_t k = 9; k < 16; k++) {
18413 GemmMicrokernelTester()
18414 .mr(6)
18415 .nr(8)
18416 .kr(4)
18417 .sr(1)
18418 .m(6)
18419 .n(8)
18420 .k(k)
18421 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_6x8c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
18422 }
18423 }
18424
TEST(QC8_GEMM_MINMAX_FP32_6X8C4__NEONDOT,k_gt_8_strided_a)18425 TEST(QC8_GEMM_MINMAX_FP32_6X8C4__NEONDOT, k_gt_8_strided_a) {
18426 TEST_REQUIRES_ARM_NEON_DOT;
18427 for (size_t k = 9; k < 16; k++) {
18428 GemmMicrokernelTester()
18429 .mr(6)
18430 .nr(8)
18431 .kr(4)
18432 .sr(1)
18433 .m(6)
18434 .n(8)
18435 .k(k)
18436 .a_stride(19)
18437 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_6x8c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
18438 }
18439 }
18440
TEST(QC8_GEMM_MINMAX_FP32_6X8C4__NEONDOT,k_gt_8_subtile)18441 TEST(QC8_GEMM_MINMAX_FP32_6X8C4__NEONDOT, k_gt_8_subtile) {
18442 TEST_REQUIRES_ARM_NEON_DOT;
18443 for (size_t k = 9; k < 16; k++) {
18444 for (uint32_t n = 1; n <= 8; n++) {
18445 for (uint32_t m = 1; m <= 6; m++) {
18446 GemmMicrokernelTester()
18447 .mr(6)
18448 .nr(8)
18449 .kr(4)
18450 .sr(1)
18451 .m(m)
18452 .n(n)
18453 .k(k)
18454 .iterations(1)
18455 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_6x8c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
18456 }
18457 }
18458 }
18459 }
18460
TEST(QC8_GEMM_MINMAX_FP32_6X8C4__NEONDOT,k_div_8)18461 TEST(QC8_GEMM_MINMAX_FP32_6X8C4__NEONDOT, k_div_8) {
18462 TEST_REQUIRES_ARM_NEON_DOT;
18463 for (size_t k = 16; k <= 80; k += 8) {
18464 GemmMicrokernelTester()
18465 .mr(6)
18466 .nr(8)
18467 .kr(4)
18468 .sr(1)
18469 .m(6)
18470 .n(8)
18471 .k(k)
18472 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_6x8c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
18473 }
18474 }
18475
TEST(QC8_GEMM_MINMAX_FP32_6X8C4__NEONDOT,k_div_8_strided_a)18476 TEST(QC8_GEMM_MINMAX_FP32_6X8C4__NEONDOT, k_div_8_strided_a) {
18477 TEST_REQUIRES_ARM_NEON_DOT;
18478 for (size_t k = 16; k <= 80; k += 8) {
18479 GemmMicrokernelTester()
18480 .mr(6)
18481 .nr(8)
18482 .kr(4)
18483 .sr(1)
18484 .m(6)
18485 .n(8)
18486 .k(k)
18487 .a_stride(83)
18488 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_6x8c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
18489 }
18490 }
18491
TEST(QC8_GEMM_MINMAX_FP32_6X8C4__NEONDOT,k_div_8_subtile)18492 TEST(QC8_GEMM_MINMAX_FP32_6X8C4__NEONDOT, k_div_8_subtile) {
18493 TEST_REQUIRES_ARM_NEON_DOT;
18494 for (size_t k = 16; k <= 80; k += 8) {
18495 for (uint32_t n = 1; n <= 8; n++) {
18496 for (uint32_t m = 1; m <= 6; m++) {
18497 GemmMicrokernelTester()
18498 .mr(6)
18499 .nr(8)
18500 .kr(4)
18501 .sr(1)
18502 .m(m)
18503 .n(n)
18504 .k(k)
18505 .iterations(1)
18506 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_6x8c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
18507 }
18508 }
18509 }
18510 }
18511
TEST(QC8_GEMM_MINMAX_FP32_6X8C4__NEONDOT,n_gt_8)18512 TEST(QC8_GEMM_MINMAX_FP32_6X8C4__NEONDOT, n_gt_8) {
18513 TEST_REQUIRES_ARM_NEON_DOT;
18514 for (uint32_t n = 9; n < 16; n++) {
18515 for (size_t k = 1; k <= 40; k += 9) {
18516 GemmMicrokernelTester()
18517 .mr(6)
18518 .nr(8)
18519 .kr(4)
18520 .sr(1)
18521 .m(6)
18522 .n(n)
18523 .k(k)
18524 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_6x8c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
18525 }
18526 }
18527 }
18528
TEST(QC8_GEMM_MINMAX_FP32_6X8C4__NEONDOT,n_gt_8_strided_cn)18529 TEST(QC8_GEMM_MINMAX_FP32_6X8C4__NEONDOT, n_gt_8_strided_cn) {
18530 TEST_REQUIRES_ARM_NEON_DOT;
18531 for (uint32_t n = 9; n < 16; n++) {
18532 for (size_t k = 1; k <= 40; k += 9) {
18533 GemmMicrokernelTester()
18534 .mr(6)
18535 .nr(8)
18536 .kr(4)
18537 .sr(1)
18538 .m(6)
18539 .n(n)
18540 .k(k)
18541 .cn_stride(11)
18542 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_6x8c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
18543 }
18544 }
18545 }
18546
TEST(QC8_GEMM_MINMAX_FP32_6X8C4__NEONDOT,n_gt_8_strided_a)18547 TEST(QC8_GEMM_MINMAX_FP32_6X8C4__NEONDOT, n_gt_8_strided_a) {
18548 TEST_REQUIRES_ARM_NEON_DOT;
18549 for (uint32_t n = 9; n < 16; n++) {
18550 for (size_t k = 1; k <= 40; k += 9) {
18551 GemmMicrokernelTester()
18552 .mr(6)
18553 .nr(8)
18554 .kr(4)
18555 .sr(1)
18556 .m(6)
18557 .n(n)
18558 .k(k)
18559 .a_stride(43)
18560 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_6x8c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
18561 }
18562 }
18563 }
18564
TEST(QC8_GEMM_MINMAX_FP32_6X8C4__NEONDOT,n_gt_8_subtile)18565 TEST(QC8_GEMM_MINMAX_FP32_6X8C4__NEONDOT, n_gt_8_subtile) {
18566 TEST_REQUIRES_ARM_NEON_DOT;
18567 for (uint32_t n = 9; n < 16; n++) {
18568 for (size_t k = 1; k <= 40; k += 9) {
18569 for (uint32_t m = 1; m <= 6; m++) {
18570 GemmMicrokernelTester()
18571 .mr(6)
18572 .nr(8)
18573 .kr(4)
18574 .sr(1)
18575 .m(m)
18576 .n(n)
18577 .k(k)
18578 .iterations(1)
18579 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_6x8c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
18580 }
18581 }
18582 }
18583 }
18584
TEST(QC8_GEMM_MINMAX_FP32_6X8C4__NEONDOT,n_div_8)18585 TEST(QC8_GEMM_MINMAX_FP32_6X8C4__NEONDOT, n_div_8) {
18586 TEST_REQUIRES_ARM_NEON_DOT;
18587 for (uint32_t n = 16; n <= 24; n += 8) {
18588 for (size_t k = 1; k <= 40; k += 9) {
18589 GemmMicrokernelTester()
18590 .mr(6)
18591 .nr(8)
18592 .kr(4)
18593 .sr(1)
18594 .m(6)
18595 .n(n)
18596 .k(k)
18597 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_6x8c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
18598 }
18599 }
18600 }
18601
TEST(QC8_GEMM_MINMAX_FP32_6X8C4__NEONDOT,n_div_8_strided_cn)18602 TEST(QC8_GEMM_MINMAX_FP32_6X8C4__NEONDOT, n_div_8_strided_cn) {
18603 TEST_REQUIRES_ARM_NEON_DOT;
18604 for (uint32_t n = 16; n <= 24; n += 8) {
18605 for (size_t k = 1; k <= 40; k += 9) {
18606 GemmMicrokernelTester()
18607 .mr(6)
18608 .nr(8)
18609 .kr(4)
18610 .sr(1)
18611 .m(6)
18612 .n(n)
18613 .k(k)
18614 .cn_stride(11)
18615 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_6x8c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
18616 }
18617 }
18618 }
18619
TEST(QC8_GEMM_MINMAX_FP32_6X8C4__NEONDOT,n_div_8_strided_a)18620 TEST(QC8_GEMM_MINMAX_FP32_6X8C4__NEONDOT, n_div_8_strided_a) {
18621 TEST_REQUIRES_ARM_NEON_DOT;
18622 for (uint32_t n = 16; n <= 24; n += 8) {
18623 for (size_t k = 1; k <= 40; k += 9) {
18624 GemmMicrokernelTester()
18625 .mr(6)
18626 .nr(8)
18627 .kr(4)
18628 .sr(1)
18629 .m(6)
18630 .n(n)
18631 .k(k)
18632 .a_stride(43)
18633 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_6x8c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
18634 }
18635 }
18636 }
18637
TEST(QC8_GEMM_MINMAX_FP32_6X8C4__NEONDOT,n_div_8_subtile)18638 TEST(QC8_GEMM_MINMAX_FP32_6X8C4__NEONDOT, n_div_8_subtile) {
18639 TEST_REQUIRES_ARM_NEON_DOT;
18640 for (uint32_t n = 16; n <= 24; n += 8) {
18641 for (size_t k = 1; k <= 40; k += 9) {
18642 for (uint32_t m = 1; m <= 6; m++) {
18643 GemmMicrokernelTester()
18644 .mr(6)
18645 .nr(8)
18646 .kr(4)
18647 .sr(1)
18648 .m(m)
18649 .n(n)
18650 .k(k)
18651 .iterations(1)
18652 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_6x8c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
18653 }
18654 }
18655 }
18656 }
18657
TEST(QC8_GEMM_MINMAX_FP32_6X8C4__NEONDOT,strided_cm_subtile)18658 TEST(QC8_GEMM_MINMAX_FP32_6X8C4__NEONDOT, strided_cm_subtile) {
18659 TEST_REQUIRES_ARM_NEON_DOT;
18660 for (size_t k = 1; k <= 40; k += 9) {
18661 for (uint32_t n = 1; n <= 8; n++) {
18662 for (uint32_t m = 1; m <= 6; m++) {
18663 GemmMicrokernelTester()
18664 .mr(6)
18665 .nr(8)
18666 .kr(4)
18667 .sr(1)
18668 .m(m)
18669 .n(n)
18670 .k(k)
18671 .cm_stride(11)
18672 .iterations(1)
18673 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_6x8c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
18674 }
18675 }
18676 }
18677 }
18678
TEST(QC8_GEMM_MINMAX_FP32_6X8C4__NEONDOT,qmin)18679 TEST(QC8_GEMM_MINMAX_FP32_6X8C4__NEONDOT, qmin) {
18680 TEST_REQUIRES_ARM_NEON_DOT;
18681 GemmMicrokernelTester()
18682 .mr(6)
18683 .nr(8)
18684 .kr(4)
18685 .sr(1)
18686 .m(6)
18687 .n(8)
18688 .k(8)
18689 .qmin(128)
18690 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_6x8c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
18691 }
18692
TEST(QC8_GEMM_MINMAX_FP32_6X8C4__NEONDOT,qmax)18693 TEST(QC8_GEMM_MINMAX_FP32_6X8C4__NEONDOT, qmax) {
18694 TEST_REQUIRES_ARM_NEON_DOT;
18695 GemmMicrokernelTester()
18696 .mr(6)
18697 .nr(8)
18698 .kr(4)
18699 .sr(1)
18700 .m(6)
18701 .n(8)
18702 .k(8)
18703 .qmax(128)
18704 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_6x8c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
18705 }
18706
TEST(QC8_GEMM_MINMAX_FP32_6X8C4__NEONDOT,strided_cm)18707 TEST(QC8_GEMM_MINMAX_FP32_6X8C4__NEONDOT, strided_cm) {
18708 TEST_REQUIRES_ARM_NEON_DOT;
18709 GemmMicrokernelTester()
18710 .mr(6)
18711 .nr(8)
18712 .kr(4)
18713 .sr(1)
18714 .m(6)
18715 .n(8)
18716 .k(8)
18717 .cm_stride(11)
18718 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_6x8c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
18719 }
18720 #endif // XNN_ENABLE_ARM_DOTPROD && (XNN_ARCH_ARM && !XNN_PLATFORM_IOS || XNN_ARCH_ARM64)
18721
18722
18723 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QC8_GEMM_MINMAX_FP32_6X16__NEON_MLAL_LANE,k_eq_8)18724 TEST(QC8_GEMM_MINMAX_FP32_6X16__NEON_MLAL_LANE, k_eq_8) {
18725 TEST_REQUIRES_ARM_NEON;
18726 GemmMicrokernelTester()
18727 .mr(6)
18728 .nr(16)
18729 .kr(1)
18730 .sr(1)
18731 .m(6)
18732 .n(16)
18733 .k(8)
18734 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_6x16__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
18735 }
18736
TEST(QC8_GEMM_MINMAX_FP32_6X16__NEON_MLAL_LANE,strided_cn)18737 TEST(QC8_GEMM_MINMAX_FP32_6X16__NEON_MLAL_LANE, strided_cn) {
18738 TEST_REQUIRES_ARM_NEON;
18739 GemmMicrokernelTester()
18740 .mr(6)
18741 .nr(16)
18742 .kr(1)
18743 .sr(1)
18744 .m(6)
18745 .n(16)
18746 .k(8)
18747 .cn_stride(19)
18748 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_6x16__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
18749 }
18750
TEST(QC8_GEMM_MINMAX_FP32_6X16__NEON_MLAL_LANE,k_eq_8_strided_a)18751 TEST(QC8_GEMM_MINMAX_FP32_6X16__NEON_MLAL_LANE, k_eq_8_strided_a) {
18752 TEST_REQUIRES_ARM_NEON;
18753 GemmMicrokernelTester()
18754 .mr(6)
18755 .nr(16)
18756 .kr(1)
18757 .sr(1)
18758 .m(6)
18759 .n(16)
18760 .k(8)
18761 .a_stride(11)
18762 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_6x16__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
18763 }
18764
TEST(QC8_GEMM_MINMAX_FP32_6X16__NEON_MLAL_LANE,k_eq_8_subtile)18765 TEST(QC8_GEMM_MINMAX_FP32_6X16__NEON_MLAL_LANE, k_eq_8_subtile) {
18766 TEST_REQUIRES_ARM_NEON;
18767 for (uint32_t n = 1; n <= 16; n++) {
18768 for (uint32_t m = 1; m <= 6; m++) {
18769 GemmMicrokernelTester()
18770 .mr(6)
18771 .nr(16)
18772 .kr(1)
18773 .sr(1)
18774 .m(m)
18775 .n(n)
18776 .k(8)
18777 .iterations(1)
18778 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_6x16__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
18779 }
18780 }
18781 }
18782
TEST(QC8_GEMM_MINMAX_FP32_6X16__NEON_MLAL_LANE,k_eq_8_subtile_m)18783 TEST(QC8_GEMM_MINMAX_FP32_6X16__NEON_MLAL_LANE, k_eq_8_subtile_m) {
18784 TEST_REQUIRES_ARM_NEON;
18785 for (uint32_t m = 1; m <= 6; m++) {
18786 GemmMicrokernelTester()
18787 .mr(6)
18788 .nr(16)
18789 .kr(1)
18790 .sr(1)
18791 .m(m)
18792 .n(16)
18793 .k(8)
18794 .iterations(1)
18795 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_6x16__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
18796 }
18797 }
18798
TEST(QC8_GEMM_MINMAX_FP32_6X16__NEON_MLAL_LANE,k_eq_8_subtile_n)18799 TEST(QC8_GEMM_MINMAX_FP32_6X16__NEON_MLAL_LANE, k_eq_8_subtile_n) {
18800 TEST_REQUIRES_ARM_NEON;
18801 for (uint32_t n = 1; n <= 16; n++) {
18802 GemmMicrokernelTester()
18803 .mr(6)
18804 .nr(16)
18805 .kr(1)
18806 .sr(1)
18807 .m(6)
18808 .n(n)
18809 .k(8)
18810 .iterations(1)
18811 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_6x16__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
18812 }
18813 }
18814
TEST(QC8_GEMM_MINMAX_FP32_6X16__NEON_MLAL_LANE,k_lt_8)18815 TEST(QC8_GEMM_MINMAX_FP32_6X16__NEON_MLAL_LANE, k_lt_8) {
18816 TEST_REQUIRES_ARM_NEON;
18817 for (size_t k = 1; k < 8; k++) {
18818 GemmMicrokernelTester()
18819 .mr(6)
18820 .nr(16)
18821 .kr(1)
18822 .sr(1)
18823 .m(6)
18824 .n(16)
18825 .k(k)
18826 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_6x16__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
18827 }
18828 }
18829
TEST(QC8_GEMM_MINMAX_FP32_6X16__NEON_MLAL_LANE,k_lt_8_strided_a)18830 TEST(QC8_GEMM_MINMAX_FP32_6X16__NEON_MLAL_LANE, k_lt_8_strided_a) {
18831 TEST_REQUIRES_ARM_NEON;
18832 for (size_t k = 1; k < 8; k++) {
18833 GemmMicrokernelTester()
18834 .mr(6)
18835 .nr(16)
18836 .kr(1)
18837 .sr(1)
18838 .m(6)
18839 .n(16)
18840 .k(k)
18841 .a_stride(11)
18842 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_6x16__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
18843 }
18844 }
18845
TEST(QC8_GEMM_MINMAX_FP32_6X16__NEON_MLAL_LANE,k_lt_8_subtile)18846 TEST(QC8_GEMM_MINMAX_FP32_6X16__NEON_MLAL_LANE, k_lt_8_subtile) {
18847 TEST_REQUIRES_ARM_NEON;
18848 for (size_t k = 1; k < 8; k++) {
18849 for (uint32_t n = 1; n <= 16; n++) {
18850 for (uint32_t m = 1; m <= 6; m++) {
18851 GemmMicrokernelTester()
18852 .mr(6)
18853 .nr(16)
18854 .kr(1)
18855 .sr(1)
18856 .m(m)
18857 .n(n)
18858 .k(k)
18859 .iterations(1)
18860 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_6x16__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
18861 }
18862 }
18863 }
18864 }
18865
TEST(QC8_GEMM_MINMAX_FP32_6X16__NEON_MLAL_LANE,k_gt_8)18866 TEST(QC8_GEMM_MINMAX_FP32_6X16__NEON_MLAL_LANE, k_gt_8) {
18867 TEST_REQUIRES_ARM_NEON;
18868 for (size_t k = 9; k < 16; k++) {
18869 GemmMicrokernelTester()
18870 .mr(6)
18871 .nr(16)
18872 .kr(1)
18873 .sr(1)
18874 .m(6)
18875 .n(16)
18876 .k(k)
18877 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_6x16__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
18878 }
18879 }
18880
TEST(QC8_GEMM_MINMAX_FP32_6X16__NEON_MLAL_LANE,k_gt_8_strided_a)18881 TEST(QC8_GEMM_MINMAX_FP32_6X16__NEON_MLAL_LANE, k_gt_8_strided_a) {
18882 TEST_REQUIRES_ARM_NEON;
18883 for (size_t k = 9; k < 16; k++) {
18884 GemmMicrokernelTester()
18885 .mr(6)
18886 .nr(16)
18887 .kr(1)
18888 .sr(1)
18889 .m(6)
18890 .n(16)
18891 .k(k)
18892 .a_stride(19)
18893 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_6x16__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
18894 }
18895 }
18896
TEST(QC8_GEMM_MINMAX_FP32_6X16__NEON_MLAL_LANE,k_gt_8_subtile)18897 TEST(QC8_GEMM_MINMAX_FP32_6X16__NEON_MLAL_LANE, k_gt_8_subtile) {
18898 TEST_REQUIRES_ARM_NEON;
18899 for (size_t k = 9; k < 16; k++) {
18900 for (uint32_t n = 1; n <= 16; n++) {
18901 for (uint32_t m = 1; m <= 6; m++) {
18902 GemmMicrokernelTester()
18903 .mr(6)
18904 .nr(16)
18905 .kr(1)
18906 .sr(1)
18907 .m(m)
18908 .n(n)
18909 .k(k)
18910 .iterations(1)
18911 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_6x16__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
18912 }
18913 }
18914 }
18915 }
18916
TEST(QC8_GEMM_MINMAX_FP32_6X16__NEON_MLAL_LANE,k_div_8)18917 TEST(QC8_GEMM_MINMAX_FP32_6X16__NEON_MLAL_LANE, k_div_8) {
18918 TEST_REQUIRES_ARM_NEON;
18919 for (size_t k = 16; k <= 80; k += 8) {
18920 GemmMicrokernelTester()
18921 .mr(6)
18922 .nr(16)
18923 .kr(1)
18924 .sr(1)
18925 .m(6)
18926 .n(16)
18927 .k(k)
18928 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_6x16__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
18929 }
18930 }
18931
TEST(QC8_GEMM_MINMAX_FP32_6X16__NEON_MLAL_LANE,k_div_8_strided_a)18932 TEST(QC8_GEMM_MINMAX_FP32_6X16__NEON_MLAL_LANE, k_div_8_strided_a) {
18933 TEST_REQUIRES_ARM_NEON;
18934 for (size_t k = 16; k <= 80; k += 8) {
18935 GemmMicrokernelTester()
18936 .mr(6)
18937 .nr(16)
18938 .kr(1)
18939 .sr(1)
18940 .m(6)
18941 .n(16)
18942 .k(k)
18943 .a_stride(83)
18944 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_6x16__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
18945 }
18946 }
18947
TEST(QC8_GEMM_MINMAX_FP32_6X16__NEON_MLAL_LANE,k_div_8_subtile)18948 TEST(QC8_GEMM_MINMAX_FP32_6X16__NEON_MLAL_LANE, k_div_8_subtile) {
18949 TEST_REQUIRES_ARM_NEON;
18950 for (size_t k = 16; k <= 80; k += 8) {
18951 for (uint32_t n = 1; n <= 16; n++) {
18952 for (uint32_t m = 1; m <= 6; m++) {
18953 GemmMicrokernelTester()
18954 .mr(6)
18955 .nr(16)
18956 .kr(1)
18957 .sr(1)
18958 .m(m)
18959 .n(n)
18960 .k(k)
18961 .iterations(1)
18962 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_6x16__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
18963 }
18964 }
18965 }
18966 }
18967
TEST(QC8_GEMM_MINMAX_FP32_6X16__NEON_MLAL_LANE,n_gt_16)18968 TEST(QC8_GEMM_MINMAX_FP32_6X16__NEON_MLAL_LANE, n_gt_16) {
18969 TEST_REQUIRES_ARM_NEON;
18970 for (uint32_t n = 17; n < 32; n++) {
18971 for (size_t k = 1; k <= 40; k += 9) {
18972 GemmMicrokernelTester()
18973 .mr(6)
18974 .nr(16)
18975 .kr(1)
18976 .sr(1)
18977 .m(6)
18978 .n(n)
18979 .k(k)
18980 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_6x16__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
18981 }
18982 }
18983 }
18984
TEST(QC8_GEMM_MINMAX_FP32_6X16__NEON_MLAL_LANE,n_gt_16_strided_cn)18985 TEST(QC8_GEMM_MINMAX_FP32_6X16__NEON_MLAL_LANE, n_gt_16_strided_cn) {
18986 TEST_REQUIRES_ARM_NEON;
18987 for (uint32_t n = 17; n < 32; n++) {
18988 for (size_t k = 1; k <= 40; k += 9) {
18989 GemmMicrokernelTester()
18990 .mr(6)
18991 .nr(16)
18992 .kr(1)
18993 .sr(1)
18994 .m(6)
18995 .n(n)
18996 .k(k)
18997 .cn_stride(19)
18998 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_6x16__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
18999 }
19000 }
19001 }
19002
TEST(QC8_GEMM_MINMAX_FP32_6X16__NEON_MLAL_LANE,n_gt_16_strided_a)19003 TEST(QC8_GEMM_MINMAX_FP32_6X16__NEON_MLAL_LANE, n_gt_16_strided_a) {
19004 TEST_REQUIRES_ARM_NEON;
19005 for (uint32_t n = 17; n < 32; n++) {
19006 for (size_t k = 1; k <= 40; k += 9) {
19007 GemmMicrokernelTester()
19008 .mr(6)
19009 .nr(16)
19010 .kr(1)
19011 .sr(1)
19012 .m(6)
19013 .n(n)
19014 .k(k)
19015 .a_stride(43)
19016 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_6x16__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
19017 }
19018 }
19019 }
19020
TEST(QC8_GEMM_MINMAX_FP32_6X16__NEON_MLAL_LANE,n_gt_16_subtile)19021 TEST(QC8_GEMM_MINMAX_FP32_6X16__NEON_MLAL_LANE, n_gt_16_subtile) {
19022 TEST_REQUIRES_ARM_NEON;
19023 for (uint32_t n = 17; n < 32; n++) {
19024 for (size_t k = 1; k <= 40; k += 9) {
19025 for (uint32_t m = 1; m <= 6; m++) {
19026 GemmMicrokernelTester()
19027 .mr(6)
19028 .nr(16)
19029 .kr(1)
19030 .sr(1)
19031 .m(m)
19032 .n(n)
19033 .k(k)
19034 .iterations(1)
19035 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_6x16__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
19036 }
19037 }
19038 }
19039 }
19040
TEST(QC8_GEMM_MINMAX_FP32_6X16__NEON_MLAL_LANE,n_div_16)19041 TEST(QC8_GEMM_MINMAX_FP32_6X16__NEON_MLAL_LANE, n_div_16) {
19042 TEST_REQUIRES_ARM_NEON;
19043 for (uint32_t n = 32; n <= 48; n += 16) {
19044 for (size_t k = 1; k <= 40; k += 9) {
19045 GemmMicrokernelTester()
19046 .mr(6)
19047 .nr(16)
19048 .kr(1)
19049 .sr(1)
19050 .m(6)
19051 .n(n)
19052 .k(k)
19053 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_6x16__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
19054 }
19055 }
19056 }
19057
TEST(QC8_GEMM_MINMAX_FP32_6X16__NEON_MLAL_LANE,n_div_16_strided_cn)19058 TEST(QC8_GEMM_MINMAX_FP32_6X16__NEON_MLAL_LANE, n_div_16_strided_cn) {
19059 TEST_REQUIRES_ARM_NEON;
19060 for (uint32_t n = 32; n <= 48; n += 16) {
19061 for (size_t k = 1; k <= 40; k += 9) {
19062 GemmMicrokernelTester()
19063 .mr(6)
19064 .nr(16)
19065 .kr(1)
19066 .sr(1)
19067 .m(6)
19068 .n(n)
19069 .k(k)
19070 .cn_stride(19)
19071 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_6x16__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
19072 }
19073 }
19074 }
19075
TEST(QC8_GEMM_MINMAX_FP32_6X16__NEON_MLAL_LANE,n_div_16_strided_a)19076 TEST(QC8_GEMM_MINMAX_FP32_6X16__NEON_MLAL_LANE, n_div_16_strided_a) {
19077 TEST_REQUIRES_ARM_NEON;
19078 for (uint32_t n = 32; n <= 48; n += 16) {
19079 for (size_t k = 1; k <= 40; k += 9) {
19080 GemmMicrokernelTester()
19081 .mr(6)
19082 .nr(16)
19083 .kr(1)
19084 .sr(1)
19085 .m(6)
19086 .n(n)
19087 .k(k)
19088 .a_stride(43)
19089 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_6x16__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
19090 }
19091 }
19092 }
19093
TEST(QC8_GEMM_MINMAX_FP32_6X16__NEON_MLAL_LANE,n_div_16_subtile)19094 TEST(QC8_GEMM_MINMAX_FP32_6X16__NEON_MLAL_LANE, n_div_16_subtile) {
19095 TEST_REQUIRES_ARM_NEON;
19096 for (uint32_t n = 32; n <= 48; n += 16) {
19097 for (size_t k = 1; k <= 40; k += 9) {
19098 for (uint32_t m = 1; m <= 6; m++) {
19099 GemmMicrokernelTester()
19100 .mr(6)
19101 .nr(16)
19102 .kr(1)
19103 .sr(1)
19104 .m(m)
19105 .n(n)
19106 .k(k)
19107 .iterations(1)
19108 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_6x16__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
19109 }
19110 }
19111 }
19112 }
19113
TEST(QC8_GEMM_MINMAX_FP32_6X16__NEON_MLAL_LANE,strided_cm_subtile)19114 TEST(QC8_GEMM_MINMAX_FP32_6X16__NEON_MLAL_LANE, strided_cm_subtile) {
19115 TEST_REQUIRES_ARM_NEON;
19116 for (size_t k = 1; k <= 40; k += 9) {
19117 for (uint32_t n = 1; n <= 16; n++) {
19118 for (uint32_t m = 1; m <= 6; m++) {
19119 GemmMicrokernelTester()
19120 .mr(6)
19121 .nr(16)
19122 .kr(1)
19123 .sr(1)
19124 .m(m)
19125 .n(n)
19126 .k(k)
19127 .cm_stride(19)
19128 .iterations(1)
19129 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_6x16__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
19130 }
19131 }
19132 }
19133 }
19134
TEST(QC8_GEMM_MINMAX_FP32_6X16__NEON_MLAL_LANE,qmin)19135 TEST(QC8_GEMM_MINMAX_FP32_6X16__NEON_MLAL_LANE, qmin) {
19136 TEST_REQUIRES_ARM_NEON;
19137 GemmMicrokernelTester()
19138 .mr(6)
19139 .nr(16)
19140 .kr(1)
19141 .sr(1)
19142 .m(6)
19143 .n(16)
19144 .k(8)
19145 .qmin(128)
19146 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_6x16__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
19147 }
19148
TEST(QC8_GEMM_MINMAX_FP32_6X16__NEON_MLAL_LANE,qmax)19149 TEST(QC8_GEMM_MINMAX_FP32_6X16__NEON_MLAL_LANE, qmax) {
19150 TEST_REQUIRES_ARM_NEON;
19151 GemmMicrokernelTester()
19152 .mr(6)
19153 .nr(16)
19154 .kr(1)
19155 .sr(1)
19156 .m(6)
19157 .n(16)
19158 .k(8)
19159 .qmax(128)
19160 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_6x16__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
19161 }
19162
TEST(QC8_GEMM_MINMAX_FP32_6X16__NEON_MLAL_LANE,strided_cm)19163 TEST(QC8_GEMM_MINMAX_FP32_6X16__NEON_MLAL_LANE, strided_cm) {
19164 TEST_REQUIRES_ARM_NEON;
19165 GemmMicrokernelTester()
19166 .mr(6)
19167 .nr(16)
19168 .kr(1)
19169 .sr(1)
19170 .m(6)
19171 .n(16)
19172 .k(8)
19173 .cm_stride(19)
19174 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_6x16__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
19175 }
19176 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
19177
19178
19179 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QC8_GEMM_MINMAX_FP32_6X16__NEON_MLAL_LANE_PRFM,k_eq_8)19180 TEST(QC8_GEMM_MINMAX_FP32_6X16__NEON_MLAL_LANE_PRFM, k_eq_8) {
19181 TEST_REQUIRES_ARM_NEON;
19182 GemmMicrokernelTester()
19183 .mr(6)
19184 .nr(16)
19185 .kr(1)
19186 .sr(1)
19187 .m(6)
19188 .n(16)
19189 .k(8)
19190 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_6x16__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
19191 }
19192
TEST(QC8_GEMM_MINMAX_FP32_6X16__NEON_MLAL_LANE_PRFM,strided_cn)19193 TEST(QC8_GEMM_MINMAX_FP32_6X16__NEON_MLAL_LANE_PRFM, strided_cn) {
19194 TEST_REQUIRES_ARM_NEON;
19195 GemmMicrokernelTester()
19196 .mr(6)
19197 .nr(16)
19198 .kr(1)
19199 .sr(1)
19200 .m(6)
19201 .n(16)
19202 .k(8)
19203 .cn_stride(19)
19204 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_6x16__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
19205 }
19206
TEST(QC8_GEMM_MINMAX_FP32_6X16__NEON_MLAL_LANE_PRFM,k_eq_8_strided_a)19207 TEST(QC8_GEMM_MINMAX_FP32_6X16__NEON_MLAL_LANE_PRFM, k_eq_8_strided_a) {
19208 TEST_REQUIRES_ARM_NEON;
19209 GemmMicrokernelTester()
19210 .mr(6)
19211 .nr(16)
19212 .kr(1)
19213 .sr(1)
19214 .m(6)
19215 .n(16)
19216 .k(8)
19217 .a_stride(11)
19218 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_6x16__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
19219 }
19220
TEST(QC8_GEMM_MINMAX_FP32_6X16__NEON_MLAL_LANE_PRFM,k_eq_8_subtile)19221 TEST(QC8_GEMM_MINMAX_FP32_6X16__NEON_MLAL_LANE_PRFM, k_eq_8_subtile) {
19222 TEST_REQUIRES_ARM_NEON;
19223 for (uint32_t n = 1; n <= 16; n++) {
19224 for (uint32_t m = 1; m <= 6; m++) {
19225 GemmMicrokernelTester()
19226 .mr(6)
19227 .nr(16)
19228 .kr(1)
19229 .sr(1)
19230 .m(m)
19231 .n(n)
19232 .k(8)
19233 .iterations(1)
19234 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_6x16__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
19235 }
19236 }
19237 }
19238
TEST(QC8_GEMM_MINMAX_FP32_6X16__NEON_MLAL_LANE_PRFM,k_eq_8_subtile_m)19239 TEST(QC8_GEMM_MINMAX_FP32_6X16__NEON_MLAL_LANE_PRFM, k_eq_8_subtile_m) {
19240 TEST_REQUIRES_ARM_NEON;
19241 for (uint32_t m = 1; m <= 6; m++) {
19242 GemmMicrokernelTester()
19243 .mr(6)
19244 .nr(16)
19245 .kr(1)
19246 .sr(1)
19247 .m(m)
19248 .n(16)
19249 .k(8)
19250 .iterations(1)
19251 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_6x16__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
19252 }
19253 }
19254
TEST(QC8_GEMM_MINMAX_FP32_6X16__NEON_MLAL_LANE_PRFM,k_eq_8_subtile_n)19255 TEST(QC8_GEMM_MINMAX_FP32_6X16__NEON_MLAL_LANE_PRFM, k_eq_8_subtile_n) {
19256 TEST_REQUIRES_ARM_NEON;
19257 for (uint32_t n = 1; n <= 16; n++) {
19258 GemmMicrokernelTester()
19259 .mr(6)
19260 .nr(16)
19261 .kr(1)
19262 .sr(1)
19263 .m(6)
19264 .n(n)
19265 .k(8)
19266 .iterations(1)
19267 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_6x16__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
19268 }
19269 }
19270
TEST(QC8_GEMM_MINMAX_FP32_6X16__NEON_MLAL_LANE_PRFM,k_lt_8)19271 TEST(QC8_GEMM_MINMAX_FP32_6X16__NEON_MLAL_LANE_PRFM, k_lt_8) {
19272 TEST_REQUIRES_ARM_NEON;
19273 for (size_t k = 1; k < 8; k++) {
19274 GemmMicrokernelTester()
19275 .mr(6)
19276 .nr(16)
19277 .kr(1)
19278 .sr(1)
19279 .m(6)
19280 .n(16)
19281 .k(k)
19282 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_6x16__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
19283 }
19284 }
19285
TEST(QC8_GEMM_MINMAX_FP32_6X16__NEON_MLAL_LANE_PRFM,k_lt_8_strided_a)19286 TEST(QC8_GEMM_MINMAX_FP32_6X16__NEON_MLAL_LANE_PRFM, k_lt_8_strided_a) {
19287 TEST_REQUIRES_ARM_NEON;
19288 for (size_t k = 1; k < 8; k++) {
19289 GemmMicrokernelTester()
19290 .mr(6)
19291 .nr(16)
19292 .kr(1)
19293 .sr(1)
19294 .m(6)
19295 .n(16)
19296 .k(k)
19297 .a_stride(11)
19298 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_6x16__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
19299 }
19300 }
19301
TEST(QC8_GEMM_MINMAX_FP32_6X16__NEON_MLAL_LANE_PRFM,k_lt_8_subtile)19302 TEST(QC8_GEMM_MINMAX_FP32_6X16__NEON_MLAL_LANE_PRFM, k_lt_8_subtile) {
19303 TEST_REQUIRES_ARM_NEON;
19304 for (size_t k = 1; k < 8; k++) {
19305 for (uint32_t n = 1; n <= 16; n++) {
19306 for (uint32_t m = 1; m <= 6; m++) {
19307 GemmMicrokernelTester()
19308 .mr(6)
19309 .nr(16)
19310 .kr(1)
19311 .sr(1)
19312 .m(m)
19313 .n(n)
19314 .k(k)
19315 .iterations(1)
19316 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_6x16__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
19317 }
19318 }
19319 }
19320 }
19321
TEST(QC8_GEMM_MINMAX_FP32_6X16__NEON_MLAL_LANE_PRFM,k_gt_8)19322 TEST(QC8_GEMM_MINMAX_FP32_6X16__NEON_MLAL_LANE_PRFM, k_gt_8) {
19323 TEST_REQUIRES_ARM_NEON;
19324 for (size_t k = 9; k < 16; k++) {
19325 GemmMicrokernelTester()
19326 .mr(6)
19327 .nr(16)
19328 .kr(1)
19329 .sr(1)
19330 .m(6)
19331 .n(16)
19332 .k(k)
19333 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_6x16__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
19334 }
19335 }
19336
TEST(QC8_GEMM_MINMAX_FP32_6X16__NEON_MLAL_LANE_PRFM,k_gt_8_strided_a)19337 TEST(QC8_GEMM_MINMAX_FP32_6X16__NEON_MLAL_LANE_PRFM, k_gt_8_strided_a) {
19338 TEST_REQUIRES_ARM_NEON;
19339 for (size_t k = 9; k < 16; k++) {
19340 GemmMicrokernelTester()
19341 .mr(6)
19342 .nr(16)
19343 .kr(1)
19344 .sr(1)
19345 .m(6)
19346 .n(16)
19347 .k(k)
19348 .a_stride(19)
19349 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_6x16__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
19350 }
19351 }
19352
TEST(QC8_GEMM_MINMAX_FP32_6X16__NEON_MLAL_LANE_PRFM,k_gt_8_subtile)19353 TEST(QC8_GEMM_MINMAX_FP32_6X16__NEON_MLAL_LANE_PRFM, k_gt_8_subtile) {
19354 TEST_REQUIRES_ARM_NEON;
19355 for (size_t k = 9; k < 16; k++) {
19356 for (uint32_t n = 1; n <= 16; n++) {
19357 for (uint32_t m = 1; m <= 6; m++) {
19358 GemmMicrokernelTester()
19359 .mr(6)
19360 .nr(16)
19361 .kr(1)
19362 .sr(1)
19363 .m(m)
19364 .n(n)
19365 .k(k)
19366 .iterations(1)
19367 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_6x16__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
19368 }
19369 }
19370 }
19371 }
19372
TEST(QC8_GEMM_MINMAX_FP32_6X16__NEON_MLAL_LANE_PRFM,k_div_8)19373 TEST(QC8_GEMM_MINMAX_FP32_6X16__NEON_MLAL_LANE_PRFM, k_div_8) {
19374 TEST_REQUIRES_ARM_NEON;
19375 for (size_t k = 16; k <= 80; k += 8) {
19376 GemmMicrokernelTester()
19377 .mr(6)
19378 .nr(16)
19379 .kr(1)
19380 .sr(1)
19381 .m(6)
19382 .n(16)
19383 .k(k)
19384 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_6x16__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
19385 }
19386 }
19387
TEST(QC8_GEMM_MINMAX_FP32_6X16__NEON_MLAL_LANE_PRFM,k_div_8_strided_a)19388 TEST(QC8_GEMM_MINMAX_FP32_6X16__NEON_MLAL_LANE_PRFM, k_div_8_strided_a) {
19389 TEST_REQUIRES_ARM_NEON;
19390 for (size_t k = 16; k <= 80; k += 8) {
19391 GemmMicrokernelTester()
19392 .mr(6)
19393 .nr(16)
19394 .kr(1)
19395 .sr(1)
19396 .m(6)
19397 .n(16)
19398 .k(k)
19399 .a_stride(83)
19400 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_6x16__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
19401 }
19402 }
19403
TEST(QC8_GEMM_MINMAX_FP32_6X16__NEON_MLAL_LANE_PRFM,k_div_8_subtile)19404 TEST(QC8_GEMM_MINMAX_FP32_6X16__NEON_MLAL_LANE_PRFM, k_div_8_subtile) {
19405 TEST_REQUIRES_ARM_NEON;
19406 for (size_t k = 16; k <= 80; k += 8) {
19407 for (uint32_t n = 1; n <= 16; n++) {
19408 for (uint32_t m = 1; m <= 6; m++) {
19409 GemmMicrokernelTester()
19410 .mr(6)
19411 .nr(16)
19412 .kr(1)
19413 .sr(1)
19414 .m(m)
19415 .n(n)
19416 .k(k)
19417 .iterations(1)
19418 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_6x16__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
19419 }
19420 }
19421 }
19422 }
19423
TEST(QC8_GEMM_MINMAX_FP32_6X16__NEON_MLAL_LANE_PRFM,n_gt_16)19424 TEST(QC8_GEMM_MINMAX_FP32_6X16__NEON_MLAL_LANE_PRFM, n_gt_16) {
19425 TEST_REQUIRES_ARM_NEON;
19426 for (uint32_t n = 17; n < 32; n++) {
19427 for (size_t k = 1; k <= 40; k += 9) {
19428 GemmMicrokernelTester()
19429 .mr(6)
19430 .nr(16)
19431 .kr(1)
19432 .sr(1)
19433 .m(6)
19434 .n(n)
19435 .k(k)
19436 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_6x16__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
19437 }
19438 }
19439 }
19440
TEST(QC8_GEMM_MINMAX_FP32_6X16__NEON_MLAL_LANE_PRFM,n_gt_16_strided_cn)19441 TEST(QC8_GEMM_MINMAX_FP32_6X16__NEON_MLAL_LANE_PRFM, n_gt_16_strided_cn) {
19442 TEST_REQUIRES_ARM_NEON;
19443 for (uint32_t n = 17; n < 32; n++) {
19444 for (size_t k = 1; k <= 40; k += 9) {
19445 GemmMicrokernelTester()
19446 .mr(6)
19447 .nr(16)
19448 .kr(1)
19449 .sr(1)
19450 .m(6)
19451 .n(n)
19452 .k(k)
19453 .cn_stride(19)
19454 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_6x16__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
19455 }
19456 }
19457 }
19458
TEST(QC8_GEMM_MINMAX_FP32_6X16__NEON_MLAL_LANE_PRFM,n_gt_16_strided_a)19459 TEST(QC8_GEMM_MINMAX_FP32_6X16__NEON_MLAL_LANE_PRFM, n_gt_16_strided_a) {
19460 TEST_REQUIRES_ARM_NEON;
19461 for (uint32_t n = 17; n < 32; n++) {
19462 for (size_t k = 1; k <= 40; k += 9) {
19463 GemmMicrokernelTester()
19464 .mr(6)
19465 .nr(16)
19466 .kr(1)
19467 .sr(1)
19468 .m(6)
19469 .n(n)
19470 .k(k)
19471 .a_stride(43)
19472 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_6x16__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
19473 }
19474 }
19475 }
19476
TEST(QC8_GEMM_MINMAX_FP32_6X16__NEON_MLAL_LANE_PRFM,n_gt_16_subtile)19477 TEST(QC8_GEMM_MINMAX_FP32_6X16__NEON_MLAL_LANE_PRFM, n_gt_16_subtile) {
19478 TEST_REQUIRES_ARM_NEON;
19479 for (uint32_t n = 17; n < 32; n++) {
19480 for (size_t k = 1; k <= 40; k += 9) {
19481 for (uint32_t m = 1; m <= 6; m++) {
19482 GemmMicrokernelTester()
19483 .mr(6)
19484 .nr(16)
19485 .kr(1)
19486 .sr(1)
19487 .m(m)
19488 .n(n)
19489 .k(k)
19490 .iterations(1)
19491 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_6x16__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
19492 }
19493 }
19494 }
19495 }
19496
TEST(QC8_GEMM_MINMAX_FP32_6X16__NEON_MLAL_LANE_PRFM,n_div_16)19497 TEST(QC8_GEMM_MINMAX_FP32_6X16__NEON_MLAL_LANE_PRFM, n_div_16) {
19498 TEST_REQUIRES_ARM_NEON;
19499 for (uint32_t n = 32; n <= 48; n += 16) {
19500 for (size_t k = 1; k <= 40; k += 9) {
19501 GemmMicrokernelTester()
19502 .mr(6)
19503 .nr(16)
19504 .kr(1)
19505 .sr(1)
19506 .m(6)
19507 .n(n)
19508 .k(k)
19509 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_6x16__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
19510 }
19511 }
19512 }
19513
TEST(QC8_GEMM_MINMAX_FP32_6X16__NEON_MLAL_LANE_PRFM,n_div_16_strided_cn)19514 TEST(QC8_GEMM_MINMAX_FP32_6X16__NEON_MLAL_LANE_PRFM, n_div_16_strided_cn) {
19515 TEST_REQUIRES_ARM_NEON;
19516 for (uint32_t n = 32; n <= 48; n += 16) {
19517 for (size_t k = 1; k <= 40; k += 9) {
19518 GemmMicrokernelTester()
19519 .mr(6)
19520 .nr(16)
19521 .kr(1)
19522 .sr(1)
19523 .m(6)
19524 .n(n)
19525 .k(k)
19526 .cn_stride(19)
19527 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_6x16__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
19528 }
19529 }
19530 }
19531
TEST(QC8_GEMM_MINMAX_FP32_6X16__NEON_MLAL_LANE_PRFM,n_div_16_strided_a)19532 TEST(QC8_GEMM_MINMAX_FP32_6X16__NEON_MLAL_LANE_PRFM, n_div_16_strided_a) {
19533 TEST_REQUIRES_ARM_NEON;
19534 for (uint32_t n = 32; n <= 48; n += 16) {
19535 for (size_t k = 1; k <= 40; k += 9) {
19536 GemmMicrokernelTester()
19537 .mr(6)
19538 .nr(16)
19539 .kr(1)
19540 .sr(1)
19541 .m(6)
19542 .n(n)
19543 .k(k)
19544 .a_stride(43)
19545 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_6x16__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
19546 }
19547 }
19548 }
19549
TEST(QC8_GEMM_MINMAX_FP32_6X16__NEON_MLAL_LANE_PRFM,n_div_16_subtile)19550 TEST(QC8_GEMM_MINMAX_FP32_6X16__NEON_MLAL_LANE_PRFM, n_div_16_subtile) {
19551 TEST_REQUIRES_ARM_NEON;
19552 for (uint32_t n = 32; n <= 48; n += 16) {
19553 for (size_t k = 1; k <= 40; k += 9) {
19554 for (uint32_t m = 1; m <= 6; m++) {
19555 GemmMicrokernelTester()
19556 .mr(6)
19557 .nr(16)
19558 .kr(1)
19559 .sr(1)
19560 .m(m)
19561 .n(n)
19562 .k(k)
19563 .iterations(1)
19564 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_6x16__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
19565 }
19566 }
19567 }
19568 }
19569
TEST(QC8_GEMM_MINMAX_FP32_6X16__NEON_MLAL_LANE_PRFM,strided_cm_subtile)19570 TEST(QC8_GEMM_MINMAX_FP32_6X16__NEON_MLAL_LANE_PRFM, strided_cm_subtile) {
19571 TEST_REQUIRES_ARM_NEON;
19572 for (size_t k = 1; k <= 40; k += 9) {
19573 for (uint32_t n = 1; n <= 16; n++) {
19574 for (uint32_t m = 1; m <= 6; m++) {
19575 GemmMicrokernelTester()
19576 .mr(6)
19577 .nr(16)
19578 .kr(1)
19579 .sr(1)
19580 .m(m)
19581 .n(n)
19582 .k(k)
19583 .cm_stride(19)
19584 .iterations(1)
19585 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_6x16__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
19586 }
19587 }
19588 }
19589 }
19590
TEST(QC8_GEMM_MINMAX_FP32_6X16__NEON_MLAL_LANE_PRFM,qmin)19591 TEST(QC8_GEMM_MINMAX_FP32_6X16__NEON_MLAL_LANE_PRFM, qmin) {
19592 TEST_REQUIRES_ARM_NEON;
19593 GemmMicrokernelTester()
19594 .mr(6)
19595 .nr(16)
19596 .kr(1)
19597 .sr(1)
19598 .m(6)
19599 .n(16)
19600 .k(8)
19601 .qmin(128)
19602 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_6x16__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
19603 }
19604
TEST(QC8_GEMM_MINMAX_FP32_6X16__NEON_MLAL_LANE_PRFM,qmax)19605 TEST(QC8_GEMM_MINMAX_FP32_6X16__NEON_MLAL_LANE_PRFM, qmax) {
19606 TEST_REQUIRES_ARM_NEON;
19607 GemmMicrokernelTester()
19608 .mr(6)
19609 .nr(16)
19610 .kr(1)
19611 .sr(1)
19612 .m(6)
19613 .n(16)
19614 .k(8)
19615 .qmax(128)
19616 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_6x16__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
19617 }
19618
TEST(QC8_GEMM_MINMAX_FP32_6X16__NEON_MLAL_LANE_PRFM,strided_cm)19619 TEST(QC8_GEMM_MINMAX_FP32_6X16__NEON_MLAL_LANE_PRFM, strided_cm) {
19620 TEST_REQUIRES_ARM_NEON;
19621 GemmMicrokernelTester()
19622 .mr(6)
19623 .nr(16)
19624 .kr(1)
19625 .sr(1)
19626 .m(6)
19627 .n(16)
19628 .k(8)
19629 .cm_stride(19)
19630 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_6x16__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
19631 }
19632 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
19633
19634
19635 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QC8_GEMM_MINMAX_FP32_6X16__NEONV8_MLAL_LANE_PRFM,k_eq_8)19636 TEST(QC8_GEMM_MINMAX_FP32_6X16__NEONV8_MLAL_LANE_PRFM, k_eq_8) {
19637 TEST_REQUIRES_ARM_NEON_V8;
19638 GemmMicrokernelTester()
19639 .mr(6)
19640 .nr(16)
19641 .kr(1)
19642 .sr(1)
19643 .m(6)
19644 .n(16)
19645 .k(8)
19646 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_6x16__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
19647 }
19648
TEST(QC8_GEMM_MINMAX_FP32_6X16__NEONV8_MLAL_LANE_PRFM,strided_cn)19649 TEST(QC8_GEMM_MINMAX_FP32_6X16__NEONV8_MLAL_LANE_PRFM, strided_cn) {
19650 TEST_REQUIRES_ARM_NEON_V8;
19651 GemmMicrokernelTester()
19652 .mr(6)
19653 .nr(16)
19654 .kr(1)
19655 .sr(1)
19656 .m(6)
19657 .n(16)
19658 .k(8)
19659 .cn_stride(19)
19660 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_6x16__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
19661 }
19662
TEST(QC8_GEMM_MINMAX_FP32_6X16__NEONV8_MLAL_LANE_PRFM,k_eq_8_strided_a)19663 TEST(QC8_GEMM_MINMAX_FP32_6X16__NEONV8_MLAL_LANE_PRFM, k_eq_8_strided_a) {
19664 TEST_REQUIRES_ARM_NEON_V8;
19665 GemmMicrokernelTester()
19666 .mr(6)
19667 .nr(16)
19668 .kr(1)
19669 .sr(1)
19670 .m(6)
19671 .n(16)
19672 .k(8)
19673 .a_stride(11)
19674 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_6x16__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
19675 }
19676
TEST(QC8_GEMM_MINMAX_FP32_6X16__NEONV8_MLAL_LANE_PRFM,k_eq_8_subtile)19677 TEST(QC8_GEMM_MINMAX_FP32_6X16__NEONV8_MLAL_LANE_PRFM, k_eq_8_subtile) {
19678 TEST_REQUIRES_ARM_NEON_V8;
19679 for (uint32_t n = 1; n <= 16; n++) {
19680 for (uint32_t m = 1; m <= 6; m++) {
19681 GemmMicrokernelTester()
19682 .mr(6)
19683 .nr(16)
19684 .kr(1)
19685 .sr(1)
19686 .m(m)
19687 .n(n)
19688 .k(8)
19689 .iterations(1)
19690 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_6x16__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
19691 }
19692 }
19693 }
19694
TEST(QC8_GEMM_MINMAX_FP32_6X16__NEONV8_MLAL_LANE_PRFM,k_eq_8_subtile_m)19695 TEST(QC8_GEMM_MINMAX_FP32_6X16__NEONV8_MLAL_LANE_PRFM, k_eq_8_subtile_m) {
19696 TEST_REQUIRES_ARM_NEON_V8;
19697 for (uint32_t m = 1; m <= 6; m++) {
19698 GemmMicrokernelTester()
19699 .mr(6)
19700 .nr(16)
19701 .kr(1)
19702 .sr(1)
19703 .m(m)
19704 .n(16)
19705 .k(8)
19706 .iterations(1)
19707 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_6x16__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
19708 }
19709 }
19710
TEST(QC8_GEMM_MINMAX_FP32_6X16__NEONV8_MLAL_LANE_PRFM,k_eq_8_subtile_n)19711 TEST(QC8_GEMM_MINMAX_FP32_6X16__NEONV8_MLAL_LANE_PRFM, k_eq_8_subtile_n) {
19712 TEST_REQUIRES_ARM_NEON_V8;
19713 for (uint32_t n = 1; n <= 16; n++) {
19714 GemmMicrokernelTester()
19715 .mr(6)
19716 .nr(16)
19717 .kr(1)
19718 .sr(1)
19719 .m(6)
19720 .n(n)
19721 .k(8)
19722 .iterations(1)
19723 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_6x16__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
19724 }
19725 }
19726
TEST(QC8_GEMM_MINMAX_FP32_6X16__NEONV8_MLAL_LANE_PRFM,k_lt_8)19727 TEST(QC8_GEMM_MINMAX_FP32_6X16__NEONV8_MLAL_LANE_PRFM, k_lt_8) {
19728 TEST_REQUIRES_ARM_NEON_V8;
19729 for (size_t k = 1; k < 8; k++) {
19730 GemmMicrokernelTester()
19731 .mr(6)
19732 .nr(16)
19733 .kr(1)
19734 .sr(1)
19735 .m(6)
19736 .n(16)
19737 .k(k)
19738 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_6x16__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
19739 }
19740 }
19741
TEST(QC8_GEMM_MINMAX_FP32_6X16__NEONV8_MLAL_LANE_PRFM,k_lt_8_strided_a)19742 TEST(QC8_GEMM_MINMAX_FP32_6X16__NEONV8_MLAL_LANE_PRFM, k_lt_8_strided_a) {
19743 TEST_REQUIRES_ARM_NEON_V8;
19744 for (size_t k = 1; k < 8; k++) {
19745 GemmMicrokernelTester()
19746 .mr(6)
19747 .nr(16)
19748 .kr(1)
19749 .sr(1)
19750 .m(6)
19751 .n(16)
19752 .k(k)
19753 .a_stride(11)
19754 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_6x16__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
19755 }
19756 }
19757
TEST(QC8_GEMM_MINMAX_FP32_6X16__NEONV8_MLAL_LANE_PRFM,k_lt_8_subtile)19758 TEST(QC8_GEMM_MINMAX_FP32_6X16__NEONV8_MLAL_LANE_PRFM, k_lt_8_subtile) {
19759 TEST_REQUIRES_ARM_NEON_V8;
19760 for (size_t k = 1; k < 8; k++) {
19761 for (uint32_t n = 1; n <= 16; n++) {
19762 for (uint32_t m = 1; m <= 6; m++) {
19763 GemmMicrokernelTester()
19764 .mr(6)
19765 .nr(16)
19766 .kr(1)
19767 .sr(1)
19768 .m(m)
19769 .n(n)
19770 .k(k)
19771 .iterations(1)
19772 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_6x16__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
19773 }
19774 }
19775 }
19776 }
19777
TEST(QC8_GEMM_MINMAX_FP32_6X16__NEONV8_MLAL_LANE_PRFM,k_gt_8)19778 TEST(QC8_GEMM_MINMAX_FP32_6X16__NEONV8_MLAL_LANE_PRFM, k_gt_8) {
19779 TEST_REQUIRES_ARM_NEON_V8;
19780 for (size_t k = 9; k < 16; k++) {
19781 GemmMicrokernelTester()
19782 .mr(6)
19783 .nr(16)
19784 .kr(1)
19785 .sr(1)
19786 .m(6)
19787 .n(16)
19788 .k(k)
19789 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_6x16__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
19790 }
19791 }
19792
TEST(QC8_GEMM_MINMAX_FP32_6X16__NEONV8_MLAL_LANE_PRFM,k_gt_8_strided_a)19793 TEST(QC8_GEMM_MINMAX_FP32_6X16__NEONV8_MLAL_LANE_PRFM, k_gt_8_strided_a) {
19794 TEST_REQUIRES_ARM_NEON_V8;
19795 for (size_t k = 9; k < 16; k++) {
19796 GemmMicrokernelTester()
19797 .mr(6)
19798 .nr(16)
19799 .kr(1)
19800 .sr(1)
19801 .m(6)
19802 .n(16)
19803 .k(k)
19804 .a_stride(19)
19805 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_6x16__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
19806 }
19807 }
19808
TEST(QC8_GEMM_MINMAX_FP32_6X16__NEONV8_MLAL_LANE_PRFM,k_gt_8_subtile)19809 TEST(QC8_GEMM_MINMAX_FP32_6X16__NEONV8_MLAL_LANE_PRFM, k_gt_8_subtile) {
19810 TEST_REQUIRES_ARM_NEON_V8;
19811 for (size_t k = 9; k < 16; k++) {
19812 for (uint32_t n = 1; n <= 16; n++) {
19813 for (uint32_t m = 1; m <= 6; m++) {
19814 GemmMicrokernelTester()
19815 .mr(6)
19816 .nr(16)
19817 .kr(1)
19818 .sr(1)
19819 .m(m)
19820 .n(n)
19821 .k(k)
19822 .iterations(1)
19823 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_6x16__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
19824 }
19825 }
19826 }
19827 }
19828
TEST(QC8_GEMM_MINMAX_FP32_6X16__NEONV8_MLAL_LANE_PRFM,k_div_8)19829 TEST(QC8_GEMM_MINMAX_FP32_6X16__NEONV8_MLAL_LANE_PRFM, k_div_8) {
19830 TEST_REQUIRES_ARM_NEON_V8;
19831 for (size_t k = 16; k <= 80; k += 8) {
19832 GemmMicrokernelTester()
19833 .mr(6)
19834 .nr(16)
19835 .kr(1)
19836 .sr(1)
19837 .m(6)
19838 .n(16)
19839 .k(k)
19840 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_6x16__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
19841 }
19842 }
19843
TEST(QC8_GEMM_MINMAX_FP32_6X16__NEONV8_MLAL_LANE_PRFM,k_div_8_strided_a)19844 TEST(QC8_GEMM_MINMAX_FP32_6X16__NEONV8_MLAL_LANE_PRFM, k_div_8_strided_a) {
19845 TEST_REQUIRES_ARM_NEON_V8;
19846 for (size_t k = 16; k <= 80; k += 8) {
19847 GemmMicrokernelTester()
19848 .mr(6)
19849 .nr(16)
19850 .kr(1)
19851 .sr(1)
19852 .m(6)
19853 .n(16)
19854 .k(k)
19855 .a_stride(83)
19856 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_6x16__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
19857 }
19858 }
19859
TEST(QC8_GEMM_MINMAX_FP32_6X16__NEONV8_MLAL_LANE_PRFM,k_div_8_subtile)19860 TEST(QC8_GEMM_MINMAX_FP32_6X16__NEONV8_MLAL_LANE_PRFM, k_div_8_subtile) {
19861 TEST_REQUIRES_ARM_NEON_V8;
19862 for (size_t k = 16; k <= 80; k += 8) {
19863 for (uint32_t n = 1; n <= 16; n++) {
19864 for (uint32_t m = 1; m <= 6; m++) {
19865 GemmMicrokernelTester()
19866 .mr(6)
19867 .nr(16)
19868 .kr(1)
19869 .sr(1)
19870 .m(m)
19871 .n(n)
19872 .k(k)
19873 .iterations(1)
19874 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_6x16__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
19875 }
19876 }
19877 }
19878 }
19879
TEST(QC8_GEMM_MINMAX_FP32_6X16__NEONV8_MLAL_LANE_PRFM,n_gt_16)19880 TEST(QC8_GEMM_MINMAX_FP32_6X16__NEONV8_MLAL_LANE_PRFM, n_gt_16) {
19881 TEST_REQUIRES_ARM_NEON_V8;
19882 for (uint32_t n = 17; n < 32; n++) {
19883 for (size_t k = 1; k <= 40; k += 9) {
19884 GemmMicrokernelTester()
19885 .mr(6)
19886 .nr(16)
19887 .kr(1)
19888 .sr(1)
19889 .m(6)
19890 .n(n)
19891 .k(k)
19892 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_6x16__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
19893 }
19894 }
19895 }
19896
TEST(QC8_GEMM_MINMAX_FP32_6X16__NEONV8_MLAL_LANE_PRFM,n_gt_16_strided_cn)19897 TEST(QC8_GEMM_MINMAX_FP32_6X16__NEONV8_MLAL_LANE_PRFM, n_gt_16_strided_cn) {
19898 TEST_REQUIRES_ARM_NEON_V8;
19899 for (uint32_t n = 17; n < 32; n++) {
19900 for (size_t k = 1; k <= 40; k += 9) {
19901 GemmMicrokernelTester()
19902 .mr(6)
19903 .nr(16)
19904 .kr(1)
19905 .sr(1)
19906 .m(6)
19907 .n(n)
19908 .k(k)
19909 .cn_stride(19)
19910 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_6x16__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
19911 }
19912 }
19913 }
19914
TEST(QC8_GEMM_MINMAX_FP32_6X16__NEONV8_MLAL_LANE_PRFM,n_gt_16_strided_a)19915 TEST(QC8_GEMM_MINMAX_FP32_6X16__NEONV8_MLAL_LANE_PRFM, n_gt_16_strided_a) {
19916 TEST_REQUIRES_ARM_NEON_V8;
19917 for (uint32_t n = 17; n < 32; n++) {
19918 for (size_t k = 1; k <= 40; k += 9) {
19919 GemmMicrokernelTester()
19920 .mr(6)
19921 .nr(16)
19922 .kr(1)
19923 .sr(1)
19924 .m(6)
19925 .n(n)
19926 .k(k)
19927 .a_stride(43)
19928 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_6x16__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
19929 }
19930 }
19931 }
19932
TEST(QC8_GEMM_MINMAX_FP32_6X16__NEONV8_MLAL_LANE_PRFM,n_gt_16_subtile)19933 TEST(QC8_GEMM_MINMAX_FP32_6X16__NEONV8_MLAL_LANE_PRFM, n_gt_16_subtile) {
19934 TEST_REQUIRES_ARM_NEON_V8;
19935 for (uint32_t n = 17; n < 32; n++) {
19936 for (size_t k = 1; k <= 40; k += 9) {
19937 for (uint32_t m = 1; m <= 6; m++) {
19938 GemmMicrokernelTester()
19939 .mr(6)
19940 .nr(16)
19941 .kr(1)
19942 .sr(1)
19943 .m(m)
19944 .n(n)
19945 .k(k)
19946 .iterations(1)
19947 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_6x16__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
19948 }
19949 }
19950 }
19951 }
19952
TEST(QC8_GEMM_MINMAX_FP32_6X16__NEONV8_MLAL_LANE_PRFM,n_div_16)19953 TEST(QC8_GEMM_MINMAX_FP32_6X16__NEONV8_MLAL_LANE_PRFM, n_div_16) {
19954 TEST_REQUIRES_ARM_NEON_V8;
19955 for (uint32_t n = 32; n <= 48; n += 16) {
19956 for (size_t k = 1; k <= 40; k += 9) {
19957 GemmMicrokernelTester()
19958 .mr(6)
19959 .nr(16)
19960 .kr(1)
19961 .sr(1)
19962 .m(6)
19963 .n(n)
19964 .k(k)
19965 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_6x16__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
19966 }
19967 }
19968 }
19969
TEST(QC8_GEMM_MINMAX_FP32_6X16__NEONV8_MLAL_LANE_PRFM,n_div_16_strided_cn)19970 TEST(QC8_GEMM_MINMAX_FP32_6X16__NEONV8_MLAL_LANE_PRFM, n_div_16_strided_cn) {
19971 TEST_REQUIRES_ARM_NEON_V8;
19972 for (uint32_t n = 32; n <= 48; n += 16) {
19973 for (size_t k = 1; k <= 40; k += 9) {
19974 GemmMicrokernelTester()
19975 .mr(6)
19976 .nr(16)
19977 .kr(1)
19978 .sr(1)
19979 .m(6)
19980 .n(n)
19981 .k(k)
19982 .cn_stride(19)
19983 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_6x16__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
19984 }
19985 }
19986 }
19987
TEST(QC8_GEMM_MINMAX_FP32_6X16__NEONV8_MLAL_LANE_PRFM,n_div_16_strided_a)19988 TEST(QC8_GEMM_MINMAX_FP32_6X16__NEONV8_MLAL_LANE_PRFM, n_div_16_strided_a) {
19989 TEST_REQUIRES_ARM_NEON_V8;
19990 for (uint32_t n = 32; n <= 48; n += 16) {
19991 for (size_t k = 1; k <= 40; k += 9) {
19992 GemmMicrokernelTester()
19993 .mr(6)
19994 .nr(16)
19995 .kr(1)
19996 .sr(1)
19997 .m(6)
19998 .n(n)
19999 .k(k)
20000 .a_stride(43)
20001 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_6x16__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
20002 }
20003 }
20004 }
20005
TEST(QC8_GEMM_MINMAX_FP32_6X16__NEONV8_MLAL_LANE_PRFM,n_div_16_subtile)20006 TEST(QC8_GEMM_MINMAX_FP32_6X16__NEONV8_MLAL_LANE_PRFM, n_div_16_subtile) {
20007 TEST_REQUIRES_ARM_NEON_V8;
20008 for (uint32_t n = 32; n <= 48; n += 16) {
20009 for (size_t k = 1; k <= 40; k += 9) {
20010 for (uint32_t m = 1; m <= 6; m++) {
20011 GemmMicrokernelTester()
20012 .mr(6)
20013 .nr(16)
20014 .kr(1)
20015 .sr(1)
20016 .m(m)
20017 .n(n)
20018 .k(k)
20019 .iterations(1)
20020 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_6x16__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
20021 }
20022 }
20023 }
20024 }
20025
TEST(QC8_GEMM_MINMAX_FP32_6X16__NEONV8_MLAL_LANE_PRFM,strided_cm_subtile)20026 TEST(QC8_GEMM_MINMAX_FP32_6X16__NEONV8_MLAL_LANE_PRFM, strided_cm_subtile) {
20027 TEST_REQUIRES_ARM_NEON_V8;
20028 for (size_t k = 1; k <= 40; k += 9) {
20029 for (uint32_t n = 1; n <= 16; n++) {
20030 for (uint32_t m = 1; m <= 6; m++) {
20031 GemmMicrokernelTester()
20032 .mr(6)
20033 .nr(16)
20034 .kr(1)
20035 .sr(1)
20036 .m(m)
20037 .n(n)
20038 .k(k)
20039 .cm_stride(19)
20040 .iterations(1)
20041 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_6x16__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
20042 }
20043 }
20044 }
20045 }
20046
TEST(QC8_GEMM_MINMAX_FP32_6X16__NEONV8_MLAL_LANE_PRFM,qmin)20047 TEST(QC8_GEMM_MINMAX_FP32_6X16__NEONV8_MLAL_LANE_PRFM, qmin) {
20048 TEST_REQUIRES_ARM_NEON_V8;
20049 GemmMicrokernelTester()
20050 .mr(6)
20051 .nr(16)
20052 .kr(1)
20053 .sr(1)
20054 .m(6)
20055 .n(16)
20056 .k(8)
20057 .qmin(128)
20058 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_6x16__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
20059 }
20060
TEST(QC8_GEMM_MINMAX_FP32_6X16__NEONV8_MLAL_LANE_PRFM,qmax)20061 TEST(QC8_GEMM_MINMAX_FP32_6X16__NEONV8_MLAL_LANE_PRFM, qmax) {
20062 TEST_REQUIRES_ARM_NEON_V8;
20063 GemmMicrokernelTester()
20064 .mr(6)
20065 .nr(16)
20066 .kr(1)
20067 .sr(1)
20068 .m(6)
20069 .n(16)
20070 .k(8)
20071 .qmax(128)
20072 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_6x16__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
20073 }
20074
TEST(QC8_GEMM_MINMAX_FP32_6X16__NEONV8_MLAL_LANE_PRFM,strided_cm)20075 TEST(QC8_GEMM_MINMAX_FP32_6X16__NEONV8_MLAL_LANE_PRFM, strided_cm) {
20076 TEST_REQUIRES_ARM_NEON_V8;
20077 GemmMicrokernelTester()
20078 .mr(6)
20079 .nr(16)
20080 .kr(1)
20081 .sr(1)
20082 .m(6)
20083 .n(16)
20084 .k(8)
20085 .cm_stride(19)
20086 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_6x16__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
20087 }
20088 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
20089
20090
20091 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QC8_GEMM_MINMAX_FP32_1X4C2__SSE41_LD64,k_eq_8)20092 TEST(QC8_GEMM_MINMAX_FP32_1X4C2__SSE41_LD64, k_eq_8) {
20093 TEST_REQUIRES_X86_SSE41;
20094 GemmMicrokernelTester()
20095 .mr(1)
20096 .nr(4)
20097 .kr(2)
20098 .sr(1)
20099 .m(1)
20100 .n(4)
20101 .k(8)
20102 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20103 }
20104
TEST(QC8_GEMM_MINMAX_FP32_1X4C2__SSE41_LD64,strided_cn)20105 TEST(QC8_GEMM_MINMAX_FP32_1X4C2__SSE41_LD64, strided_cn) {
20106 TEST_REQUIRES_X86_SSE41;
20107 GemmMicrokernelTester()
20108 .mr(1)
20109 .nr(4)
20110 .kr(2)
20111 .sr(1)
20112 .m(1)
20113 .n(4)
20114 .k(8)
20115 .cn_stride(7)
20116 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20117 }
20118
TEST(QC8_GEMM_MINMAX_FP32_1X4C2__SSE41_LD64,k_eq_8_strided_a)20119 TEST(QC8_GEMM_MINMAX_FP32_1X4C2__SSE41_LD64, k_eq_8_strided_a) {
20120 TEST_REQUIRES_X86_SSE41;
20121 GemmMicrokernelTester()
20122 .mr(1)
20123 .nr(4)
20124 .kr(2)
20125 .sr(1)
20126 .m(1)
20127 .n(4)
20128 .k(8)
20129 .a_stride(11)
20130 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20131 }
20132
TEST(QC8_GEMM_MINMAX_FP32_1X4C2__SSE41_LD64,k_eq_8_subtile)20133 TEST(QC8_GEMM_MINMAX_FP32_1X4C2__SSE41_LD64, k_eq_8_subtile) {
20134 TEST_REQUIRES_X86_SSE41;
20135 for (uint32_t n = 1; n <= 4; n++) {
20136 for (uint32_t m = 1; m <= 1; m++) {
20137 GemmMicrokernelTester()
20138 .mr(1)
20139 .nr(4)
20140 .kr(2)
20141 .sr(1)
20142 .m(m)
20143 .n(n)
20144 .k(8)
20145 .iterations(1)
20146 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20147 }
20148 }
20149 }
20150
TEST(QC8_GEMM_MINMAX_FP32_1X4C2__SSE41_LD64,k_eq_8_subtile_m)20151 TEST(QC8_GEMM_MINMAX_FP32_1X4C2__SSE41_LD64, k_eq_8_subtile_m) {
20152 TEST_REQUIRES_X86_SSE41;
20153 for (uint32_t m = 1; m <= 1; m++) {
20154 GemmMicrokernelTester()
20155 .mr(1)
20156 .nr(4)
20157 .kr(2)
20158 .sr(1)
20159 .m(m)
20160 .n(4)
20161 .k(8)
20162 .iterations(1)
20163 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20164 }
20165 }
20166
TEST(QC8_GEMM_MINMAX_FP32_1X4C2__SSE41_LD64,k_eq_8_subtile_n)20167 TEST(QC8_GEMM_MINMAX_FP32_1X4C2__SSE41_LD64, k_eq_8_subtile_n) {
20168 TEST_REQUIRES_X86_SSE41;
20169 for (uint32_t n = 1; n <= 4; n++) {
20170 GemmMicrokernelTester()
20171 .mr(1)
20172 .nr(4)
20173 .kr(2)
20174 .sr(1)
20175 .m(1)
20176 .n(n)
20177 .k(8)
20178 .iterations(1)
20179 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20180 }
20181 }
20182
TEST(QC8_GEMM_MINMAX_FP32_1X4C2__SSE41_LD64,k_lt_8)20183 TEST(QC8_GEMM_MINMAX_FP32_1X4C2__SSE41_LD64, k_lt_8) {
20184 TEST_REQUIRES_X86_SSE41;
20185 for (size_t k = 1; k < 8; k++) {
20186 GemmMicrokernelTester()
20187 .mr(1)
20188 .nr(4)
20189 .kr(2)
20190 .sr(1)
20191 .m(1)
20192 .n(4)
20193 .k(k)
20194 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20195 }
20196 }
20197
TEST(QC8_GEMM_MINMAX_FP32_1X4C2__SSE41_LD64,k_lt_8_strided_a)20198 TEST(QC8_GEMM_MINMAX_FP32_1X4C2__SSE41_LD64, k_lt_8_strided_a) {
20199 TEST_REQUIRES_X86_SSE41;
20200 for (size_t k = 1; k < 8; k++) {
20201 GemmMicrokernelTester()
20202 .mr(1)
20203 .nr(4)
20204 .kr(2)
20205 .sr(1)
20206 .m(1)
20207 .n(4)
20208 .k(k)
20209 .a_stride(11)
20210 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20211 }
20212 }
20213
TEST(QC8_GEMM_MINMAX_FP32_1X4C2__SSE41_LD64,k_lt_8_subtile)20214 TEST(QC8_GEMM_MINMAX_FP32_1X4C2__SSE41_LD64, k_lt_8_subtile) {
20215 TEST_REQUIRES_X86_SSE41;
20216 for (size_t k = 1; k < 8; k++) {
20217 for (uint32_t n = 1; n <= 4; n++) {
20218 for (uint32_t m = 1; m <= 1; m++) {
20219 GemmMicrokernelTester()
20220 .mr(1)
20221 .nr(4)
20222 .kr(2)
20223 .sr(1)
20224 .m(m)
20225 .n(n)
20226 .k(k)
20227 .iterations(1)
20228 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20229 }
20230 }
20231 }
20232 }
20233
TEST(QC8_GEMM_MINMAX_FP32_1X4C2__SSE41_LD64,k_gt_8)20234 TEST(QC8_GEMM_MINMAX_FP32_1X4C2__SSE41_LD64, k_gt_8) {
20235 TEST_REQUIRES_X86_SSE41;
20236 for (size_t k = 9; k < 16; k++) {
20237 GemmMicrokernelTester()
20238 .mr(1)
20239 .nr(4)
20240 .kr(2)
20241 .sr(1)
20242 .m(1)
20243 .n(4)
20244 .k(k)
20245 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20246 }
20247 }
20248
TEST(QC8_GEMM_MINMAX_FP32_1X4C2__SSE41_LD64,k_gt_8_strided_a)20249 TEST(QC8_GEMM_MINMAX_FP32_1X4C2__SSE41_LD64, k_gt_8_strided_a) {
20250 TEST_REQUIRES_X86_SSE41;
20251 for (size_t k = 9; k < 16; k++) {
20252 GemmMicrokernelTester()
20253 .mr(1)
20254 .nr(4)
20255 .kr(2)
20256 .sr(1)
20257 .m(1)
20258 .n(4)
20259 .k(k)
20260 .a_stride(19)
20261 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20262 }
20263 }
20264
TEST(QC8_GEMM_MINMAX_FP32_1X4C2__SSE41_LD64,k_gt_8_subtile)20265 TEST(QC8_GEMM_MINMAX_FP32_1X4C2__SSE41_LD64, k_gt_8_subtile) {
20266 TEST_REQUIRES_X86_SSE41;
20267 for (size_t k = 9; k < 16; k++) {
20268 for (uint32_t n = 1; n <= 4; n++) {
20269 for (uint32_t m = 1; m <= 1; m++) {
20270 GemmMicrokernelTester()
20271 .mr(1)
20272 .nr(4)
20273 .kr(2)
20274 .sr(1)
20275 .m(m)
20276 .n(n)
20277 .k(k)
20278 .iterations(1)
20279 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20280 }
20281 }
20282 }
20283 }
20284
TEST(QC8_GEMM_MINMAX_FP32_1X4C2__SSE41_LD64,k_div_8)20285 TEST(QC8_GEMM_MINMAX_FP32_1X4C2__SSE41_LD64, k_div_8) {
20286 TEST_REQUIRES_X86_SSE41;
20287 for (size_t k = 16; k <= 80; k += 8) {
20288 GemmMicrokernelTester()
20289 .mr(1)
20290 .nr(4)
20291 .kr(2)
20292 .sr(1)
20293 .m(1)
20294 .n(4)
20295 .k(k)
20296 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20297 }
20298 }
20299
TEST(QC8_GEMM_MINMAX_FP32_1X4C2__SSE41_LD64,k_div_8_strided_a)20300 TEST(QC8_GEMM_MINMAX_FP32_1X4C2__SSE41_LD64, k_div_8_strided_a) {
20301 TEST_REQUIRES_X86_SSE41;
20302 for (size_t k = 16; k <= 80; k += 8) {
20303 GemmMicrokernelTester()
20304 .mr(1)
20305 .nr(4)
20306 .kr(2)
20307 .sr(1)
20308 .m(1)
20309 .n(4)
20310 .k(k)
20311 .a_stride(83)
20312 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20313 }
20314 }
20315
TEST(QC8_GEMM_MINMAX_FP32_1X4C2__SSE41_LD64,k_div_8_subtile)20316 TEST(QC8_GEMM_MINMAX_FP32_1X4C2__SSE41_LD64, k_div_8_subtile) {
20317 TEST_REQUIRES_X86_SSE41;
20318 for (size_t k = 16; k <= 80; k += 8) {
20319 for (uint32_t n = 1; n <= 4; n++) {
20320 for (uint32_t m = 1; m <= 1; m++) {
20321 GemmMicrokernelTester()
20322 .mr(1)
20323 .nr(4)
20324 .kr(2)
20325 .sr(1)
20326 .m(m)
20327 .n(n)
20328 .k(k)
20329 .iterations(1)
20330 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20331 }
20332 }
20333 }
20334 }
20335
TEST(QC8_GEMM_MINMAX_FP32_1X4C2__SSE41_LD64,n_gt_4)20336 TEST(QC8_GEMM_MINMAX_FP32_1X4C2__SSE41_LD64, n_gt_4) {
20337 TEST_REQUIRES_X86_SSE41;
20338 for (uint32_t n = 5; n < 8; n++) {
20339 for (size_t k = 1; k <= 40; k += 9) {
20340 GemmMicrokernelTester()
20341 .mr(1)
20342 .nr(4)
20343 .kr(2)
20344 .sr(1)
20345 .m(1)
20346 .n(n)
20347 .k(k)
20348 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20349 }
20350 }
20351 }
20352
TEST(QC8_GEMM_MINMAX_FP32_1X4C2__SSE41_LD64,n_gt_4_strided_cn)20353 TEST(QC8_GEMM_MINMAX_FP32_1X4C2__SSE41_LD64, n_gt_4_strided_cn) {
20354 TEST_REQUIRES_X86_SSE41;
20355 for (uint32_t n = 5; n < 8; n++) {
20356 for (size_t k = 1; k <= 40; k += 9) {
20357 GemmMicrokernelTester()
20358 .mr(1)
20359 .nr(4)
20360 .kr(2)
20361 .sr(1)
20362 .m(1)
20363 .n(n)
20364 .k(k)
20365 .cn_stride(7)
20366 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20367 }
20368 }
20369 }
20370
TEST(QC8_GEMM_MINMAX_FP32_1X4C2__SSE41_LD64,n_gt_4_strided_a)20371 TEST(QC8_GEMM_MINMAX_FP32_1X4C2__SSE41_LD64, n_gt_4_strided_a) {
20372 TEST_REQUIRES_X86_SSE41;
20373 for (uint32_t n = 5; n < 8; n++) {
20374 for (size_t k = 1; k <= 40; k += 9) {
20375 GemmMicrokernelTester()
20376 .mr(1)
20377 .nr(4)
20378 .kr(2)
20379 .sr(1)
20380 .m(1)
20381 .n(n)
20382 .k(k)
20383 .a_stride(43)
20384 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20385 }
20386 }
20387 }
20388
TEST(QC8_GEMM_MINMAX_FP32_1X4C2__SSE41_LD64,n_gt_4_subtile)20389 TEST(QC8_GEMM_MINMAX_FP32_1X4C2__SSE41_LD64, n_gt_4_subtile) {
20390 TEST_REQUIRES_X86_SSE41;
20391 for (uint32_t n = 5; n < 8; n++) {
20392 for (size_t k = 1; k <= 40; k += 9) {
20393 for (uint32_t m = 1; m <= 1; m++) {
20394 GemmMicrokernelTester()
20395 .mr(1)
20396 .nr(4)
20397 .kr(2)
20398 .sr(1)
20399 .m(m)
20400 .n(n)
20401 .k(k)
20402 .iterations(1)
20403 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20404 }
20405 }
20406 }
20407 }
20408
TEST(QC8_GEMM_MINMAX_FP32_1X4C2__SSE41_LD64,n_div_4)20409 TEST(QC8_GEMM_MINMAX_FP32_1X4C2__SSE41_LD64, n_div_4) {
20410 TEST_REQUIRES_X86_SSE41;
20411 for (uint32_t n = 8; n <= 12; n += 4) {
20412 for (size_t k = 1; k <= 40; k += 9) {
20413 GemmMicrokernelTester()
20414 .mr(1)
20415 .nr(4)
20416 .kr(2)
20417 .sr(1)
20418 .m(1)
20419 .n(n)
20420 .k(k)
20421 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20422 }
20423 }
20424 }
20425
TEST(QC8_GEMM_MINMAX_FP32_1X4C2__SSE41_LD64,n_div_4_strided_cn)20426 TEST(QC8_GEMM_MINMAX_FP32_1X4C2__SSE41_LD64, n_div_4_strided_cn) {
20427 TEST_REQUIRES_X86_SSE41;
20428 for (uint32_t n = 8; n <= 12; n += 4) {
20429 for (size_t k = 1; k <= 40; k += 9) {
20430 GemmMicrokernelTester()
20431 .mr(1)
20432 .nr(4)
20433 .kr(2)
20434 .sr(1)
20435 .m(1)
20436 .n(n)
20437 .k(k)
20438 .cn_stride(7)
20439 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20440 }
20441 }
20442 }
20443
TEST(QC8_GEMM_MINMAX_FP32_1X4C2__SSE41_LD64,n_div_4_strided_a)20444 TEST(QC8_GEMM_MINMAX_FP32_1X4C2__SSE41_LD64, n_div_4_strided_a) {
20445 TEST_REQUIRES_X86_SSE41;
20446 for (uint32_t n = 8; n <= 12; n += 4) {
20447 for (size_t k = 1; k <= 40; k += 9) {
20448 GemmMicrokernelTester()
20449 .mr(1)
20450 .nr(4)
20451 .kr(2)
20452 .sr(1)
20453 .m(1)
20454 .n(n)
20455 .k(k)
20456 .a_stride(43)
20457 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20458 }
20459 }
20460 }
20461
TEST(QC8_GEMM_MINMAX_FP32_1X4C2__SSE41_LD64,n_div_4_subtile)20462 TEST(QC8_GEMM_MINMAX_FP32_1X4C2__SSE41_LD64, n_div_4_subtile) {
20463 TEST_REQUIRES_X86_SSE41;
20464 for (uint32_t n = 8; n <= 12; n += 4) {
20465 for (size_t k = 1; k <= 40; k += 9) {
20466 for (uint32_t m = 1; m <= 1; m++) {
20467 GemmMicrokernelTester()
20468 .mr(1)
20469 .nr(4)
20470 .kr(2)
20471 .sr(1)
20472 .m(m)
20473 .n(n)
20474 .k(k)
20475 .iterations(1)
20476 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20477 }
20478 }
20479 }
20480 }
20481
TEST(QC8_GEMM_MINMAX_FP32_1X4C2__SSE41_LD64,strided_cm_subtile)20482 TEST(QC8_GEMM_MINMAX_FP32_1X4C2__SSE41_LD64, strided_cm_subtile) {
20483 TEST_REQUIRES_X86_SSE41;
20484 for (size_t k = 1; k <= 40; k += 9) {
20485 for (uint32_t n = 1; n <= 4; n++) {
20486 for (uint32_t m = 1; m <= 1; m++) {
20487 GemmMicrokernelTester()
20488 .mr(1)
20489 .nr(4)
20490 .kr(2)
20491 .sr(1)
20492 .m(m)
20493 .n(n)
20494 .k(k)
20495 .cm_stride(7)
20496 .iterations(1)
20497 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20498 }
20499 }
20500 }
20501 }
20502
TEST(QC8_GEMM_MINMAX_FP32_1X4C2__SSE41_LD64,qmin)20503 TEST(QC8_GEMM_MINMAX_FP32_1X4C2__SSE41_LD64, qmin) {
20504 TEST_REQUIRES_X86_SSE41;
20505 GemmMicrokernelTester()
20506 .mr(1)
20507 .nr(4)
20508 .kr(2)
20509 .sr(1)
20510 .m(1)
20511 .n(4)
20512 .k(8)
20513 .qmin(128)
20514 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20515 }
20516
TEST(QC8_GEMM_MINMAX_FP32_1X4C2__SSE41_LD64,qmax)20517 TEST(QC8_GEMM_MINMAX_FP32_1X4C2__SSE41_LD64, qmax) {
20518 TEST_REQUIRES_X86_SSE41;
20519 GemmMicrokernelTester()
20520 .mr(1)
20521 .nr(4)
20522 .kr(2)
20523 .sr(1)
20524 .m(1)
20525 .n(4)
20526 .k(8)
20527 .qmax(128)
20528 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20529 }
20530
TEST(QC8_GEMM_MINMAX_FP32_1X4C2__SSE41_LD64,strided_cm)20531 TEST(QC8_GEMM_MINMAX_FP32_1X4C2__SSE41_LD64, strided_cm) {
20532 TEST_REQUIRES_X86_SSE41;
20533 GemmMicrokernelTester()
20534 .mr(1)
20535 .nr(4)
20536 .kr(2)
20537 .sr(1)
20538 .m(1)
20539 .n(4)
20540 .k(8)
20541 .cm_stride(7)
20542 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20543 }
20544 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
20545
20546
20547 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QC8_GEMM_MINMAX_FP32_2X4C2__SSE2_LD64,k_eq_8)20548 TEST(QC8_GEMM_MINMAX_FP32_2X4C2__SSE2_LD64, k_eq_8) {
20549 TEST_REQUIRES_X86_SSE2;
20550 GemmMicrokernelTester()
20551 .mr(2)
20552 .nr(4)
20553 .kr(2)
20554 .sr(1)
20555 .m(2)
20556 .n(4)
20557 .k(8)
20558 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
20559 }
20560
TEST(QC8_GEMM_MINMAX_FP32_2X4C2__SSE2_LD64,strided_cn)20561 TEST(QC8_GEMM_MINMAX_FP32_2X4C2__SSE2_LD64, strided_cn) {
20562 TEST_REQUIRES_X86_SSE2;
20563 GemmMicrokernelTester()
20564 .mr(2)
20565 .nr(4)
20566 .kr(2)
20567 .sr(1)
20568 .m(2)
20569 .n(4)
20570 .k(8)
20571 .cn_stride(7)
20572 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
20573 }
20574
TEST(QC8_GEMM_MINMAX_FP32_2X4C2__SSE2_LD64,k_eq_8_strided_a)20575 TEST(QC8_GEMM_MINMAX_FP32_2X4C2__SSE2_LD64, k_eq_8_strided_a) {
20576 TEST_REQUIRES_X86_SSE2;
20577 GemmMicrokernelTester()
20578 .mr(2)
20579 .nr(4)
20580 .kr(2)
20581 .sr(1)
20582 .m(2)
20583 .n(4)
20584 .k(8)
20585 .a_stride(11)
20586 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
20587 }
20588
TEST(QC8_GEMM_MINMAX_FP32_2X4C2__SSE2_LD64,k_eq_8_subtile)20589 TEST(QC8_GEMM_MINMAX_FP32_2X4C2__SSE2_LD64, k_eq_8_subtile) {
20590 TEST_REQUIRES_X86_SSE2;
20591 for (uint32_t n = 1; n <= 4; n++) {
20592 for (uint32_t m = 1; m <= 2; m++) {
20593 GemmMicrokernelTester()
20594 .mr(2)
20595 .nr(4)
20596 .kr(2)
20597 .sr(1)
20598 .m(m)
20599 .n(n)
20600 .k(8)
20601 .iterations(1)
20602 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
20603 }
20604 }
20605 }
20606
TEST(QC8_GEMM_MINMAX_FP32_2X4C2__SSE2_LD64,k_eq_8_subtile_m)20607 TEST(QC8_GEMM_MINMAX_FP32_2X4C2__SSE2_LD64, k_eq_8_subtile_m) {
20608 TEST_REQUIRES_X86_SSE2;
20609 for (uint32_t m = 1; m <= 2; m++) {
20610 GemmMicrokernelTester()
20611 .mr(2)
20612 .nr(4)
20613 .kr(2)
20614 .sr(1)
20615 .m(m)
20616 .n(4)
20617 .k(8)
20618 .iterations(1)
20619 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
20620 }
20621 }
20622
TEST(QC8_GEMM_MINMAX_FP32_2X4C2__SSE2_LD64,k_eq_8_subtile_n)20623 TEST(QC8_GEMM_MINMAX_FP32_2X4C2__SSE2_LD64, k_eq_8_subtile_n) {
20624 TEST_REQUIRES_X86_SSE2;
20625 for (uint32_t n = 1; n <= 4; n++) {
20626 GemmMicrokernelTester()
20627 .mr(2)
20628 .nr(4)
20629 .kr(2)
20630 .sr(1)
20631 .m(2)
20632 .n(n)
20633 .k(8)
20634 .iterations(1)
20635 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
20636 }
20637 }
20638
TEST(QC8_GEMM_MINMAX_FP32_2X4C2__SSE2_LD64,k_lt_8)20639 TEST(QC8_GEMM_MINMAX_FP32_2X4C2__SSE2_LD64, k_lt_8) {
20640 TEST_REQUIRES_X86_SSE2;
20641 for (size_t k = 1; k < 8; k++) {
20642 GemmMicrokernelTester()
20643 .mr(2)
20644 .nr(4)
20645 .kr(2)
20646 .sr(1)
20647 .m(2)
20648 .n(4)
20649 .k(k)
20650 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
20651 }
20652 }
20653
TEST(QC8_GEMM_MINMAX_FP32_2X4C2__SSE2_LD64,k_lt_8_strided_a)20654 TEST(QC8_GEMM_MINMAX_FP32_2X4C2__SSE2_LD64, k_lt_8_strided_a) {
20655 TEST_REQUIRES_X86_SSE2;
20656 for (size_t k = 1; k < 8; k++) {
20657 GemmMicrokernelTester()
20658 .mr(2)
20659 .nr(4)
20660 .kr(2)
20661 .sr(1)
20662 .m(2)
20663 .n(4)
20664 .k(k)
20665 .a_stride(11)
20666 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
20667 }
20668 }
20669
TEST(QC8_GEMM_MINMAX_FP32_2X4C2__SSE2_LD64,k_lt_8_subtile)20670 TEST(QC8_GEMM_MINMAX_FP32_2X4C2__SSE2_LD64, k_lt_8_subtile) {
20671 TEST_REQUIRES_X86_SSE2;
20672 for (size_t k = 1; k < 8; k++) {
20673 for (uint32_t n = 1; n <= 4; n++) {
20674 for (uint32_t m = 1; m <= 2; m++) {
20675 GemmMicrokernelTester()
20676 .mr(2)
20677 .nr(4)
20678 .kr(2)
20679 .sr(1)
20680 .m(m)
20681 .n(n)
20682 .k(k)
20683 .iterations(1)
20684 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
20685 }
20686 }
20687 }
20688 }
20689
TEST(QC8_GEMM_MINMAX_FP32_2X4C2__SSE2_LD64,k_gt_8)20690 TEST(QC8_GEMM_MINMAX_FP32_2X4C2__SSE2_LD64, k_gt_8) {
20691 TEST_REQUIRES_X86_SSE2;
20692 for (size_t k = 9; k < 16; k++) {
20693 GemmMicrokernelTester()
20694 .mr(2)
20695 .nr(4)
20696 .kr(2)
20697 .sr(1)
20698 .m(2)
20699 .n(4)
20700 .k(k)
20701 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
20702 }
20703 }
20704
TEST(QC8_GEMM_MINMAX_FP32_2X4C2__SSE2_LD64,k_gt_8_strided_a)20705 TEST(QC8_GEMM_MINMAX_FP32_2X4C2__SSE2_LD64, k_gt_8_strided_a) {
20706 TEST_REQUIRES_X86_SSE2;
20707 for (size_t k = 9; k < 16; k++) {
20708 GemmMicrokernelTester()
20709 .mr(2)
20710 .nr(4)
20711 .kr(2)
20712 .sr(1)
20713 .m(2)
20714 .n(4)
20715 .k(k)
20716 .a_stride(19)
20717 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
20718 }
20719 }
20720
TEST(QC8_GEMM_MINMAX_FP32_2X4C2__SSE2_LD64,k_gt_8_subtile)20721 TEST(QC8_GEMM_MINMAX_FP32_2X4C2__SSE2_LD64, k_gt_8_subtile) {
20722 TEST_REQUIRES_X86_SSE2;
20723 for (size_t k = 9; k < 16; k++) {
20724 for (uint32_t n = 1; n <= 4; n++) {
20725 for (uint32_t m = 1; m <= 2; m++) {
20726 GemmMicrokernelTester()
20727 .mr(2)
20728 .nr(4)
20729 .kr(2)
20730 .sr(1)
20731 .m(m)
20732 .n(n)
20733 .k(k)
20734 .iterations(1)
20735 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
20736 }
20737 }
20738 }
20739 }
20740
TEST(QC8_GEMM_MINMAX_FP32_2X4C2__SSE2_LD64,k_div_8)20741 TEST(QC8_GEMM_MINMAX_FP32_2X4C2__SSE2_LD64, k_div_8) {
20742 TEST_REQUIRES_X86_SSE2;
20743 for (size_t k = 16; k <= 80; k += 8) {
20744 GemmMicrokernelTester()
20745 .mr(2)
20746 .nr(4)
20747 .kr(2)
20748 .sr(1)
20749 .m(2)
20750 .n(4)
20751 .k(k)
20752 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
20753 }
20754 }
20755
TEST(QC8_GEMM_MINMAX_FP32_2X4C2__SSE2_LD64,k_div_8_strided_a)20756 TEST(QC8_GEMM_MINMAX_FP32_2X4C2__SSE2_LD64, k_div_8_strided_a) {
20757 TEST_REQUIRES_X86_SSE2;
20758 for (size_t k = 16; k <= 80; k += 8) {
20759 GemmMicrokernelTester()
20760 .mr(2)
20761 .nr(4)
20762 .kr(2)
20763 .sr(1)
20764 .m(2)
20765 .n(4)
20766 .k(k)
20767 .a_stride(83)
20768 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
20769 }
20770 }
20771
TEST(QC8_GEMM_MINMAX_FP32_2X4C2__SSE2_LD64,k_div_8_subtile)20772 TEST(QC8_GEMM_MINMAX_FP32_2X4C2__SSE2_LD64, k_div_8_subtile) {
20773 TEST_REQUIRES_X86_SSE2;
20774 for (size_t k = 16; k <= 80; k += 8) {
20775 for (uint32_t n = 1; n <= 4; n++) {
20776 for (uint32_t m = 1; m <= 2; m++) {
20777 GemmMicrokernelTester()
20778 .mr(2)
20779 .nr(4)
20780 .kr(2)
20781 .sr(1)
20782 .m(m)
20783 .n(n)
20784 .k(k)
20785 .iterations(1)
20786 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
20787 }
20788 }
20789 }
20790 }
20791
TEST(QC8_GEMM_MINMAX_FP32_2X4C2__SSE2_LD64,n_gt_4)20792 TEST(QC8_GEMM_MINMAX_FP32_2X4C2__SSE2_LD64, n_gt_4) {
20793 TEST_REQUIRES_X86_SSE2;
20794 for (uint32_t n = 5; n < 8; n++) {
20795 for (size_t k = 1; k <= 40; k += 9) {
20796 GemmMicrokernelTester()
20797 .mr(2)
20798 .nr(4)
20799 .kr(2)
20800 .sr(1)
20801 .m(2)
20802 .n(n)
20803 .k(k)
20804 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
20805 }
20806 }
20807 }
20808
TEST(QC8_GEMM_MINMAX_FP32_2X4C2__SSE2_LD64,n_gt_4_strided_cn)20809 TEST(QC8_GEMM_MINMAX_FP32_2X4C2__SSE2_LD64, n_gt_4_strided_cn) {
20810 TEST_REQUIRES_X86_SSE2;
20811 for (uint32_t n = 5; n < 8; n++) {
20812 for (size_t k = 1; k <= 40; k += 9) {
20813 GemmMicrokernelTester()
20814 .mr(2)
20815 .nr(4)
20816 .kr(2)
20817 .sr(1)
20818 .m(2)
20819 .n(n)
20820 .k(k)
20821 .cn_stride(7)
20822 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
20823 }
20824 }
20825 }
20826
TEST(QC8_GEMM_MINMAX_FP32_2X4C2__SSE2_LD64,n_gt_4_strided_a)20827 TEST(QC8_GEMM_MINMAX_FP32_2X4C2__SSE2_LD64, n_gt_4_strided_a) {
20828 TEST_REQUIRES_X86_SSE2;
20829 for (uint32_t n = 5; n < 8; n++) {
20830 for (size_t k = 1; k <= 40; k += 9) {
20831 GemmMicrokernelTester()
20832 .mr(2)
20833 .nr(4)
20834 .kr(2)
20835 .sr(1)
20836 .m(2)
20837 .n(n)
20838 .k(k)
20839 .a_stride(43)
20840 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
20841 }
20842 }
20843 }
20844
TEST(QC8_GEMM_MINMAX_FP32_2X4C2__SSE2_LD64,n_gt_4_subtile)20845 TEST(QC8_GEMM_MINMAX_FP32_2X4C2__SSE2_LD64, n_gt_4_subtile) {
20846 TEST_REQUIRES_X86_SSE2;
20847 for (uint32_t n = 5; n < 8; n++) {
20848 for (size_t k = 1; k <= 40; k += 9) {
20849 for (uint32_t m = 1; m <= 2; m++) {
20850 GemmMicrokernelTester()
20851 .mr(2)
20852 .nr(4)
20853 .kr(2)
20854 .sr(1)
20855 .m(m)
20856 .n(n)
20857 .k(k)
20858 .iterations(1)
20859 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
20860 }
20861 }
20862 }
20863 }
20864
TEST(QC8_GEMM_MINMAX_FP32_2X4C2__SSE2_LD64,n_div_4)20865 TEST(QC8_GEMM_MINMAX_FP32_2X4C2__SSE2_LD64, n_div_4) {
20866 TEST_REQUIRES_X86_SSE2;
20867 for (uint32_t n = 8; n <= 12; n += 4) {
20868 for (size_t k = 1; k <= 40; k += 9) {
20869 GemmMicrokernelTester()
20870 .mr(2)
20871 .nr(4)
20872 .kr(2)
20873 .sr(1)
20874 .m(2)
20875 .n(n)
20876 .k(k)
20877 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
20878 }
20879 }
20880 }
20881
TEST(QC8_GEMM_MINMAX_FP32_2X4C2__SSE2_LD64,n_div_4_strided_cn)20882 TEST(QC8_GEMM_MINMAX_FP32_2X4C2__SSE2_LD64, n_div_4_strided_cn) {
20883 TEST_REQUIRES_X86_SSE2;
20884 for (uint32_t n = 8; n <= 12; n += 4) {
20885 for (size_t k = 1; k <= 40; k += 9) {
20886 GemmMicrokernelTester()
20887 .mr(2)
20888 .nr(4)
20889 .kr(2)
20890 .sr(1)
20891 .m(2)
20892 .n(n)
20893 .k(k)
20894 .cn_stride(7)
20895 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
20896 }
20897 }
20898 }
20899
TEST(QC8_GEMM_MINMAX_FP32_2X4C2__SSE2_LD64,n_div_4_strided_a)20900 TEST(QC8_GEMM_MINMAX_FP32_2X4C2__SSE2_LD64, n_div_4_strided_a) {
20901 TEST_REQUIRES_X86_SSE2;
20902 for (uint32_t n = 8; n <= 12; n += 4) {
20903 for (size_t k = 1; k <= 40; k += 9) {
20904 GemmMicrokernelTester()
20905 .mr(2)
20906 .nr(4)
20907 .kr(2)
20908 .sr(1)
20909 .m(2)
20910 .n(n)
20911 .k(k)
20912 .a_stride(43)
20913 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
20914 }
20915 }
20916 }
20917
TEST(QC8_GEMM_MINMAX_FP32_2X4C2__SSE2_LD64,n_div_4_subtile)20918 TEST(QC8_GEMM_MINMAX_FP32_2X4C2__SSE2_LD64, n_div_4_subtile) {
20919 TEST_REQUIRES_X86_SSE2;
20920 for (uint32_t n = 8; n <= 12; n += 4) {
20921 for (size_t k = 1; k <= 40; k += 9) {
20922 for (uint32_t m = 1; m <= 2; m++) {
20923 GemmMicrokernelTester()
20924 .mr(2)
20925 .nr(4)
20926 .kr(2)
20927 .sr(1)
20928 .m(m)
20929 .n(n)
20930 .k(k)
20931 .iterations(1)
20932 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
20933 }
20934 }
20935 }
20936 }
20937
TEST(QC8_GEMM_MINMAX_FP32_2X4C2__SSE2_LD64,strided_cm_subtile)20938 TEST(QC8_GEMM_MINMAX_FP32_2X4C2__SSE2_LD64, strided_cm_subtile) {
20939 TEST_REQUIRES_X86_SSE2;
20940 for (size_t k = 1; k <= 40; k += 9) {
20941 for (uint32_t n = 1; n <= 4; n++) {
20942 for (uint32_t m = 1; m <= 2; m++) {
20943 GemmMicrokernelTester()
20944 .mr(2)
20945 .nr(4)
20946 .kr(2)
20947 .sr(1)
20948 .m(m)
20949 .n(n)
20950 .k(k)
20951 .cm_stride(7)
20952 .iterations(1)
20953 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
20954 }
20955 }
20956 }
20957 }
20958
TEST(QC8_GEMM_MINMAX_FP32_2X4C2__SSE2_LD64,qmin)20959 TEST(QC8_GEMM_MINMAX_FP32_2X4C2__SSE2_LD64, qmin) {
20960 TEST_REQUIRES_X86_SSE2;
20961 GemmMicrokernelTester()
20962 .mr(2)
20963 .nr(4)
20964 .kr(2)
20965 .sr(1)
20966 .m(2)
20967 .n(4)
20968 .k(8)
20969 .qmin(128)
20970 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
20971 }
20972
TEST(QC8_GEMM_MINMAX_FP32_2X4C2__SSE2_LD64,qmax)20973 TEST(QC8_GEMM_MINMAX_FP32_2X4C2__SSE2_LD64, qmax) {
20974 TEST_REQUIRES_X86_SSE2;
20975 GemmMicrokernelTester()
20976 .mr(2)
20977 .nr(4)
20978 .kr(2)
20979 .sr(1)
20980 .m(2)
20981 .n(4)
20982 .k(8)
20983 .qmax(128)
20984 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
20985 }
20986
TEST(QC8_GEMM_MINMAX_FP32_2X4C2__SSE2_LD64,strided_cm)20987 TEST(QC8_GEMM_MINMAX_FP32_2X4C2__SSE2_LD64, strided_cm) {
20988 TEST_REQUIRES_X86_SSE2;
20989 GemmMicrokernelTester()
20990 .mr(2)
20991 .nr(4)
20992 .kr(2)
20993 .sr(1)
20994 .m(2)
20995 .n(4)
20996 .k(8)
20997 .cm_stride(7)
20998 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
20999 }
21000 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
21001
21002
21003 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QC8_GEMM_MINMAX_FP32_1X4C2__XOP_LD64,k_eq_8)21004 TEST(QC8_GEMM_MINMAX_FP32_1X4C2__XOP_LD64, k_eq_8) {
21005 TEST_REQUIRES_X86_XOP;
21006 GemmMicrokernelTester()
21007 .mr(1)
21008 .nr(4)
21009 .kr(2)
21010 .sr(1)
21011 .m(1)
21012 .n(4)
21013 .k(8)
21014 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21015 }
21016
TEST(QC8_GEMM_MINMAX_FP32_1X4C2__XOP_LD64,strided_cn)21017 TEST(QC8_GEMM_MINMAX_FP32_1X4C2__XOP_LD64, strided_cn) {
21018 TEST_REQUIRES_X86_XOP;
21019 GemmMicrokernelTester()
21020 .mr(1)
21021 .nr(4)
21022 .kr(2)
21023 .sr(1)
21024 .m(1)
21025 .n(4)
21026 .k(8)
21027 .cn_stride(7)
21028 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21029 }
21030
TEST(QC8_GEMM_MINMAX_FP32_1X4C2__XOP_LD64,k_eq_8_strided_a)21031 TEST(QC8_GEMM_MINMAX_FP32_1X4C2__XOP_LD64, k_eq_8_strided_a) {
21032 TEST_REQUIRES_X86_XOP;
21033 GemmMicrokernelTester()
21034 .mr(1)
21035 .nr(4)
21036 .kr(2)
21037 .sr(1)
21038 .m(1)
21039 .n(4)
21040 .k(8)
21041 .a_stride(11)
21042 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21043 }
21044
TEST(QC8_GEMM_MINMAX_FP32_1X4C2__XOP_LD64,k_eq_8_subtile)21045 TEST(QC8_GEMM_MINMAX_FP32_1X4C2__XOP_LD64, k_eq_8_subtile) {
21046 TEST_REQUIRES_X86_XOP;
21047 for (uint32_t n = 1; n <= 4; n++) {
21048 for (uint32_t m = 1; m <= 1; m++) {
21049 GemmMicrokernelTester()
21050 .mr(1)
21051 .nr(4)
21052 .kr(2)
21053 .sr(1)
21054 .m(m)
21055 .n(n)
21056 .k(8)
21057 .iterations(1)
21058 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21059 }
21060 }
21061 }
21062
TEST(QC8_GEMM_MINMAX_FP32_1X4C2__XOP_LD64,k_eq_8_subtile_m)21063 TEST(QC8_GEMM_MINMAX_FP32_1X4C2__XOP_LD64, k_eq_8_subtile_m) {
21064 TEST_REQUIRES_X86_XOP;
21065 for (uint32_t m = 1; m <= 1; m++) {
21066 GemmMicrokernelTester()
21067 .mr(1)
21068 .nr(4)
21069 .kr(2)
21070 .sr(1)
21071 .m(m)
21072 .n(4)
21073 .k(8)
21074 .iterations(1)
21075 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21076 }
21077 }
21078
TEST(QC8_GEMM_MINMAX_FP32_1X4C2__XOP_LD64,k_eq_8_subtile_n)21079 TEST(QC8_GEMM_MINMAX_FP32_1X4C2__XOP_LD64, k_eq_8_subtile_n) {
21080 TEST_REQUIRES_X86_XOP;
21081 for (uint32_t n = 1; n <= 4; n++) {
21082 GemmMicrokernelTester()
21083 .mr(1)
21084 .nr(4)
21085 .kr(2)
21086 .sr(1)
21087 .m(1)
21088 .n(n)
21089 .k(8)
21090 .iterations(1)
21091 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21092 }
21093 }
21094
TEST(QC8_GEMM_MINMAX_FP32_1X4C2__XOP_LD64,k_lt_8)21095 TEST(QC8_GEMM_MINMAX_FP32_1X4C2__XOP_LD64, k_lt_8) {
21096 TEST_REQUIRES_X86_XOP;
21097 for (size_t k = 1; k < 8; k++) {
21098 GemmMicrokernelTester()
21099 .mr(1)
21100 .nr(4)
21101 .kr(2)
21102 .sr(1)
21103 .m(1)
21104 .n(4)
21105 .k(k)
21106 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21107 }
21108 }
21109
TEST(QC8_GEMM_MINMAX_FP32_1X4C2__XOP_LD64,k_lt_8_strided_a)21110 TEST(QC8_GEMM_MINMAX_FP32_1X4C2__XOP_LD64, k_lt_8_strided_a) {
21111 TEST_REQUIRES_X86_XOP;
21112 for (size_t k = 1; k < 8; k++) {
21113 GemmMicrokernelTester()
21114 .mr(1)
21115 .nr(4)
21116 .kr(2)
21117 .sr(1)
21118 .m(1)
21119 .n(4)
21120 .k(k)
21121 .a_stride(11)
21122 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21123 }
21124 }
21125
TEST(QC8_GEMM_MINMAX_FP32_1X4C2__XOP_LD64,k_lt_8_subtile)21126 TEST(QC8_GEMM_MINMAX_FP32_1X4C2__XOP_LD64, k_lt_8_subtile) {
21127 TEST_REQUIRES_X86_XOP;
21128 for (size_t k = 1; k < 8; k++) {
21129 for (uint32_t n = 1; n <= 4; n++) {
21130 for (uint32_t m = 1; m <= 1; m++) {
21131 GemmMicrokernelTester()
21132 .mr(1)
21133 .nr(4)
21134 .kr(2)
21135 .sr(1)
21136 .m(m)
21137 .n(n)
21138 .k(k)
21139 .iterations(1)
21140 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21141 }
21142 }
21143 }
21144 }
21145
TEST(QC8_GEMM_MINMAX_FP32_1X4C2__XOP_LD64,k_gt_8)21146 TEST(QC8_GEMM_MINMAX_FP32_1X4C2__XOP_LD64, k_gt_8) {
21147 TEST_REQUIRES_X86_XOP;
21148 for (size_t k = 9; k < 16; k++) {
21149 GemmMicrokernelTester()
21150 .mr(1)
21151 .nr(4)
21152 .kr(2)
21153 .sr(1)
21154 .m(1)
21155 .n(4)
21156 .k(k)
21157 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21158 }
21159 }
21160
TEST(QC8_GEMM_MINMAX_FP32_1X4C2__XOP_LD64,k_gt_8_strided_a)21161 TEST(QC8_GEMM_MINMAX_FP32_1X4C2__XOP_LD64, k_gt_8_strided_a) {
21162 TEST_REQUIRES_X86_XOP;
21163 for (size_t k = 9; k < 16; k++) {
21164 GemmMicrokernelTester()
21165 .mr(1)
21166 .nr(4)
21167 .kr(2)
21168 .sr(1)
21169 .m(1)
21170 .n(4)
21171 .k(k)
21172 .a_stride(19)
21173 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21174 }
21175 }
21176
TEST(QC8_GEMM_MINMAX_FP32_1X4C2__XOP_LD64,k_gt_8_subtile)21177 TEST(QC8_GEMM_MINMAX_FP32_1X4C2__XOP_LD64, k_gt_8_subtile) {
21178 TEST_REQUIRES_X86_XOP;
21179 for (size_t k = 9; k < 16; k++) {
21180 for (uint32_t n = 1; n <= 4; n++) {
21181 for (uint32_t m = 1; m <= 1; m++) {
21182 GemmMicrokernelTester()
21183 .mr(1)
21184 .nr(4)
21185 .kr(2)
21186 .sr(1)
21187 .m(m)
21188 .n(n)
21189 .k(k)
21190 .iterations(1)
21191 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21192 }
21193 }
21194 }
21195 }
21196
TEST(QC8_GEMM_MINMAX_FP32_1X4C2__XOP_LD64,k_div_8)21197 TEST(QC8_GEMM_MINMAX_FP32_1X4C2__XOP_LD64, k_div_8) {
21198 TEST_REQUIRES_X86_XOP;
21199 for (size_t k = 16; k <= 80; k += 8) {
21200 GemmMicrokernelTester()
21201 .mr(1)
21202 .nr(4)
21203 .kr(2)
21204 .sr(1)
21205 .m(1)
21206 .n(4)
21207 .k(k)
21208 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21209 }
21210 }
21211
TEST(QC8_GEMM_MINMAX_FP32_1X4C2__XOP_LD64,k_div_8_strided_a)21212 TEST(QC8_GEMM_MINMAX_FP32_1X4C2__XOP_LD64, k_div_8_strided_a) {
21213 TEST_REQUIRES_X86_XOP;
21214 for (size_t k = 16; k <= 80; k += 8) {
21215 GemmMicrokernelTester()
21216 .mr(1)
21217 .nr(4)
21218 .kr(2)
21219 .sr(1)
21220 .m(1)
21221 .n(4)
21222 .k(k)
21223 .a_stride(83)
21224 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21225 }
21226 }
21227
TEST(QC8_GEMM_MINMAX_FP32_1X4C2__XOP_LD64,k_div_8_subtile)21228 TEST(QC8_GEMM_MINMAX_FP32_1X4C2__XOP_LD64, k_div_8_subtile) {
21229 TEST_REQUIRES_X86_XOP;
21230 for (size_t k = 16; k <= 80; k += 8) {
21231 for (uint32_t n = 1; n <= 4; n++) {
21232 for (uint32_t m = 1; m <= 1; m++) {
21233 GemmMicrokernelTester()
21234 .mr(1)
21235 .nr(4)
21236 .kr(2)
21237 .sr(1)
21238 .m(m)
21239 .n(n)
21240 .k(k)
21241 .iterations(1)
21242 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21243 }
21244 }
21245 }
21246 }
21247
TEST(QC8_GEMM_MINMAX_FP32_1X4C2__XOP_LD64,n_gt_4)21248 TEST(QC8_GEMM_MINMAX_FP32_1X4C2__XOP_LD64, n_gt_4) {
21249 TEST_REQUIRES_X86_XOP;
21250 for (uint32_t n = 5; n < 8; n++) {
21251 for (size_t k = 1; k <= 40; k += 9) {
21252 GemmMicrokernelTester()
21253 .mr(1)
21254 .nr(4)
21255 .kr(2)
21256 .sr(1)
21257 .m(1)
21258 .n(n)
21259 .k(k)
21260 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21261 }
21262 }
21263 }
21264
TEST(QC8_GEMM_MINMAX_FP32_1X4C2__XOP_LD64,n_gt_4_strided_cn)21265 TEST(QC8_GEMM_MINMAX_FP32_1X4C2__XOP_LD64, n_gt_4_strided_cn) {
21266 TEST_REQUIRES_X86_XOP;
21267 for (uint32_t n = 5; n < 8; n++) {
21268 for (size_t k = 1; k <= 40; k += 9) {
21269 GemmMicrokernelTester()
21270 .mr(1)
21271 .nr(4)
21272 .kr(2)
21273 .sr(1)
21274 .m(1)
21275 .n(n)
21276 .k(k)
21277 .cn_stride(7)
21278 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21279 }
21280 }
21281 }
21282
TEST(QC8_GEMM_MINMAX_FP32_1X4C2__XOP_LD64,n_gt_4_strided_a)21283 TEST(QC8_GEMM_MINMAX_FP32_1X4C2__XOP_LD64, n_gt_4_strided_a) {
21284 TEST_REQUIRES_X86_XOP;
21285 for (uint32_t n = 5; n < 8; n++) {
21286 for (size_t k = 1; k <= 40; k += 9) {
21287 GemmMicrokernelTester()
21288 .mr(1)
21289 .nr(4)
21290 .kr(2)
21291 .sr(1)
21292 .m(1)
21293 .n(n)
21294 .k(k)
21295 .a_stride(43)
21296 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21297 }
21298 }
21299 }
21300
TEST(QC8_GEMM_MINMAX_FP32_1X4C2__XOP_LD64,n_gt_4_subtile)21301 TEST(QC8_GEMM_MINMAX_FP32_1X4C2__XOP_LD64, n_gt_4_subtile) {
21302 TEST_REQUIRES_X86_XOP;
21303 for (uint32_t n = 5; n < 8; n++) {
21304 for (size_t k = 1; k <= 40; k += 9) {
21305 for (uint32_t m = 1; m <= 1; m++) {
21306 GemmMicrokernelTester()
21307 .mr(1)
21308 .nr(4)
21309 .kr(2)
21310 .sr(1)
21311 .m(m)
21312 .n(n)
21313 .k(k)
21314 .iterations(1)
21315 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21316 }
21317 }
21318 }
21319 }
21320
TEST(QC8_GEMM_MINMAX_FP32_1X4C2__XOP_LD64,n_div_4)21321 TEST(QC8_GEMM_MINMAX_FP32_1X4C2__XOP_LD64, n_div_4) {
21322 TEST_REQUIRES_X86_XOP;
21323 for (uint32_t n = 8; n <= 12; n += 4) {
21324 for (size_t k = 1; k <= 40; k += 9) {
21325 GemmMicrokernelTester()
21326 .mr(1)
21327 .nr(4)
21328 .kr(2)
21329 .sr(1)
21330 .m(1)
21331 .n(n)
21332 .k(k)
21333 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21334 }
21335 }
21336 }
21337
TEST(QC8_GEMM_MINMAX_FP32_1X4C2__XOP_LD64,n_div_4_strided_cn)21338 TEST(QC8_GEMM_MINMAX_FP32_1X4C2__XOP_LD64, n_div_4_strided_cn) {
21339 TEST_REQUIRES_X86_XOP;
21340 for (uint32_t n = 8; n <= 12; n += 4) {
21341 for (size_t k = 1; k <= 40; k += 9) {
21342 GemmMicrokernelTester()
21343 .mr(1)
21344 .nr(4)
21345 .kr(2)
21346 .sr(1)
21347 .m(1)
21348 .n(n)
21349 .k(k)
21350 .cn_stride(7)
21351 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21352 }
21353 }
21354 }
21355
TEST(QC8_GEMM_MINMAX_FP32_1X4C2__XOP_LD64,n_div_4_strided_a)21356 TEST(QC8_GEMM_MINMAX_FP32_1X4C2__XOP_LD64, n_div_4_strided_a) {
21357 TEST_REQUIRES_X86_XOP;
21358 for (uint32_t n = 8; n <= 12; n += 4) {
21359 for (size_t k = 1; k <= 40; k += 9) {
21360 GemmMicrokernelTester()
21361 .mr(1)
21362 .nr(4)
21363 .kr(2)
21364 .sr(1)
21365 .m(1)
21366 .n(n)
21367 .k(k)
21368 .a_stride(43)
21369 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21370 }
21371 }
21372 }
21373
TEST(QC8_GEMM_MINMAX_FP32_1X4C2__XOP_LD64,n_div_4_subtile)21374 TEST(QC8_GEMM_MINMAX_FP32_1X4C2__XOP_LD64, n_div_4_subtile) {
21375 TEST_REQUIRES_X86_XOP;
21376 for (uint32_t n = 8; n <= 12; n += 4) {
21377 for (size_t k = 1; k <= 40; k += 9) {
21378 for (uint32_t m = 1; m <= 1; m++) {
21379 GemmMicrokernelTester()
21380 .mr(1)
21381 .nr(4)
21382 .kr(2)
21383 .sr(1)
21384 .m(m)
21385 .n(n)
21386 .k(k)
21387 .iterations(1)
21388 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21389 }
21390 }
21391 }
21392 }
21393
TEST(QC8_GEMM_MINMAX_FP32_1X4C2__XOP_LD64,strided_cm_subtile)21394 TEST(QC8_GEMM_MINMAX_FP32_1X4C2__XOP_LD64, strided_cm_subtile) {
21395 TEST_REQUIRES_X86_XOP;
21396 for (size_t k = 1; k <= 40; k += 9) {
21397 for (uint32_t n = 1; n <= 4; n++) {
21398 for (uint32_t m = 1; m <= 1; m++) {
21399 GemmMicrokernelTester()
21400 .mr(1)
21401 .nr(4)
21402 .kr(2)
21403 .sr(1)
21404 .m(m)
21405 .n(n)
21406 .k(k)
21407 .cm_stride(7)
21408 .iterations(1)
21409 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21410 }
21411 }
21412 }
21413 }
21414
TEST(QC8_GEMM_MINMAX_FP32_1X4C2__XOP_LD64,qmin)21415 TEST(QC8_GEMM_MINMAX_FP32_1X4C2__XOP_LD64, qmin) {
21416 TEST_REQUIRES_X86_XOP;
21417 GemmMicrokernelTester()
21418 .mr(1)
21419 .nr(4)
21420 .kr(2)
21421 .sr(1)
21422 .m(1)
21423 .n(4)
21424 .k(8)
21425 .qmin(128)
21426 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21427 }
21428
TEST(QC8_GEMM_MINMAX_FP32_1X4C2__XOP_LD64,qmax)21429 TEST(QC8_GEMM_MINMAX_FP32_1X4C2__XOP_LD64, qmax) {
21430 TEST_REQUIRES_X86_XOP;
21431 GemmMicrokernelTester()
21432 .mr(1)
21433 .nr(4)
21434 .kr(2)
21435 .sr(1)
21436 .m(1)
21437 .n(4)
21438 .k(8)
21439 .qmax(128)
21440 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21441 }
21442
TEST(QC8_GEMM_MINMAX_FP32_1X4C2__XOP_LD64,strided_cm)21443 TEST(QC8_GEMM_MINMAX_FP32_1X4C2__XOP_LD64, strided_cm) {
21444 TEST_REQUIRES_X86_XOP;
21445 GemmMicrokernelTester()
21446 .mr(1)
21447 .nr(4)
21448 .kr(2)
21449 .sr(1)
21450 .m(1)
21451 .n(4)
21452 .k(8)
21453 .cm_stride(7)
21454 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21455 }
21456 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
21457
21458
21459 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QC8_GEMM_MINMAX_FP32_2X4C2__AVX_LD64,k_eq_8)21460 TEST(QC8_GEMM_MINMAX_FP32_2X4C2__AVX_LD64, k_eq_8) {
21461 TEST_REQUIRES_X86_AVX;
21462 GemmMicrokernelTester()
21463 .mr(2)
21464 .nr(4)
21465 .kr(2)
21466 .sr(1)
21467 .m(2)
21468 .n(4)
21469 .k(8)
21470 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21471 }
21472
TEST(QC8_GEMM_MINMAX_FP32_2X4C2__AVX_LD64,strided_cn)21473 TEST(QC8_GEMM_MINMAX_FP32_2X4C2__AVX_LD64, strided_cn) {
21474 TEST_REQUIRES_X86_AVX;
21475 GemmMicrokernelTester()
21476 .mr(2)
21477 .nr(4)
21478 .kr(2)
21479 .sr(1)
21480 .m(2)
21481 .n(4)
21482 .k(8)
21483 .cn_stride(7)
21484 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21485 }
21486
TEST(QC8_GEMM_MINMAX_FP32_2X4C2__AVX_LD64,k_eq_8_strided_a)21487 TEST(QC8_GEMM_MINMAX_FP32_2X4C2__AVX_LD64, k_eq_8_strided_a) {
21488 TEST_REQUIRES_X86_AVX;
21489 GemmMicrokernelTester()
21490 .mr(2)
21491 .nr(4)
21492 .kr(2)
21493 .sr(1)
21494 .m(2)
21495 .n(4)
21496 .k(8)
21497 .a_stride(11)
21498 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21499 }
21500
TEST(QC8_GEMM_MINMAX_FP32_2X4C2__AVX_LD64,k_eq_8_subtile)21501 TEST(QC8_GEMM_MINMAX_FP32_2X4C2__AVX_LD64, k_eq_8_subtile) {
21502 TEST_REQUIRES_X86_AVX;
21503 for (uint32_t n = 1; n <= 4; n++) {
21504 for (uint32_t m = 1; m <= 2; m++) {
21505 GemmMicrokernelTester()
21506 .mr(2)
21507 .nr(4)
21508 .kr(2)
21509 .sr(1)
21510 .m(m)
21511 .n(n)
21512 .k(8)
21513 .iterations(1)
21514 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21515 }
21516 }
21517 }
21518
TEST(QC8_GEMM_MINMAX_FP32_2X4C2__AVX_LD64,k_eq_8_subtile_m)21519 TEST(QC8_GEMM_MINMAX_FP32_2X4C2__AVX_LD64, k_eq_8_subtile_m) {
21520 TEST_REQUIRES_X86_AVX;
21521 for (uint32_t m = 1; m <= 2; m++) {
21522 GemmMicrokernelTester()
21523 .mr(2)
21524 .nr(4)
21525 .kr(2)
21526 .sr(1)
21527 .m(m)
21528 .n(4)
21529 .k(8)
21530 .iterations(1)
21531 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21532 }
21533 }
21534
TEST(QC8_GEMM_MINMAX_FP32_2X4C2__AVX_LD64,k_eq_8_subtile_n)21535 TEST(QC8_GEMM_MINMAX_FP32_2X4C2__AVX_LD64, k_eq_8_subtile_n) {
21536 TEST_REQUIRES_X86_AVX;
21537 for (uint32_t n = 1; n <= 4; n++) {
21538 GemmMicrokernelTester()
21539 .mr(2)
21540 .nr(4)
21541 .kr(2)
21542 .sr(1)
21543 .m(2)
21544 .n(n)
21545 .k(8)
21546 .iterations(1)
21547 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21548 }
21549 }
21550
TEST(QC8_GEMM_MINMAX_FP32_2X4C2__AVX_LD64,k_lt_8)21551 TEST(QC8_GEMM_MINMAX_FP32_2X4C2__AVX_LD64, k_lt_8) {
21552 TEST_REQUIRES_X86_AVX;
21553 for (size_t k = 1; k < 8; k++) {
21554 GemmMicrokernelTester()
21555 .mr(2)
21556 .nr(4)
21557 .kr(2)
21558 .sr(1)
21559 .m(2)
21560 .n(4)
21561 .k(k)
21562 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21563 }
21564 }
21565
TEST(QC8_GEMM_MINMAX_FP32_2X4C2__AVX_LD64,k_lt_8_strided_a)21566 TEST(QC8_GEMM_MINMAX_FP32_2X4C2__AVX_LD64, k_lt_8_strided_a) {
21567 TEST_REQUIRES_X86_AVX;
21568 for (size_t k = 1; k < 8; k++) {
21569 GemmMicrokernelTester()
21570 .mr(2)
21571 .nr(4)
21572 .kr(2)
21573 .sr(1)
21574 .m(2)
21575 .n(4)
21576 .k(k)
21577 .a_stride(11)
21578 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21579 }
21580 }
21581
TEST(QC8_GEMM_MINMAX_FP32_2X4C2__AVX_LD64,k_lt_8_subtile)21582 TEST(QC8_GEMM_MINMAX_FP32_2X4C2__AVX_LD64, k_lt_8_subtile) {
21583 TEST_REQUIRES_X86_AVX;
21584 for (size_t k = 1; k < 8; k++) {
21585 for (uint32_t n = 1; n <= 4; n++) {
21586 for (uint32_t m = 1; m <= 2; m++) {
21587 GemmMicrokernelTester()
21588 .mr(2)
21589 .nr(4)
21590 .kr(2)
21591 .sr(1)
21592 .m(m)
21593 .n(n)
21594 .k(k)
21595 .iterations(1)
21596 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21597 }
21598 }
21599 }
21600 }
21601
TEST(QC8_GEMM_MINMAX_FP32_2X4C2__AVX_LD64,k_gt_8)21602 TEST(QC8_GEMM_MINMAX_FP32_2X4C2__AVX_LD64, k_gt_8) {
21603 TEST_REQUIRES_X86_AVX;
21604 for (size_t k = 9; k < 16; k++) {
21605 GemmMicrokernelTester()
21606 .mr(2)
21607 .nr(4)
21608 .kr(2)
21609 .sr(1)
21610 .m(2)
21611 .n(4)
21612 .k(k)
21613 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21614 }
21615 }
21616
TEST(QC8_GEMM_MINMAX_FP32_2X4C2__AVX_LD64,k_gt_8_strided_a)21617 TEST(QC8_GEMM_MINMAX_FP32_2X4C2__AVX_LD64, k_gt_8_strided_a) {
21618 TEST_REQUIRES_X86_AVX;
21619 for (size_t k = 9; k < 16; k++) {
21620 GemmMicrokernelTester()
21621 .mr(2)
21622 .nr(4)
21623 .kr(2)
21624 .sr(1)
21625 .m(2)
21626 .n(4)
21627 .k(k)
21628 .a_stride(19)
21629 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21630 }
21631 }
21632
TEST(QC8_GEMM_MINMAX_FP32_2X4C2__AVX_LD64,k_gt_8_subtile)21633 TEST(QC8_GEMM_MINMAX_FP32_2X4C2__AVX_LD64, k_gt_8_subtile) {
21634 TEST_REQUIRES_X86_AVX;
21635 for (size_t k = 9; k < 16; k++) {
21636 for (uint32_t n = 1; n <= 4; n++) {
21637 for (uint32_t m = 1; m <= 2; m++) {
21638 GemmMicrokernelTester()
21639 .mr(2)
21640 .nr(4)
21641 .kr(2)
21642 .sr(1)
21643 .m(m)
21644 .n(n)
21645 .k(k)
21646 .iterations(1)
21647 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21648 }
21649 }
21650 }
21651 }
21652
TEST(QC8_GEMM_MINMAX_FP32_2X4C2__AVX_LD64,k_div_8)21653 TEST(QC8_GEMM_MINMAX_FP32_2X4C2__AVX_LD64, k_div_8) {
21654 TEST_REQUIRES_X86_AVX;
21655 for (size_t k = 16; k <= 80; k += 8) {
21656 GemmMicrokernelTester()
21657 .mr(2)
21658 .nr(4)
21659 .kr(2)
21660 .sr(1)
21661 .m(2)
21662 .n(4)
21663 .k(k)
21664 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21665 }
21666 }
21667
TEST(QC8_GEMM_MINMAX_FP32_2X4C2__AVX_LD64,k_div_8_strided_a)21668 TEST(QC8_GEMM_MINMAX_FP32_2X4C2__AVX_LD64, k_div_8_strided_a) {
21669 TEST_REQUIRES_X86_AVX;
21670 for (size_t k = 16; k <= 80; k += 8) {
21671 GemmMicrokernelTester()
21672 .mr(2)
21673 .nr(4)
21674 .kr(2)
21675 .sr(1)
21676 .m(2)
21677 .n(4)
21678 .k(k)
21679 .a_stride(83)
21680 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21681 }
21682 }
21683
TEST(QC8_GEMM_MINMAX_FP32_2X4C2__AVX_LD64,k_div_8_subtile)21684 TEST(QC8_GEMM_MINMAX_FP32_2X4C2__AVX_LD64, k_div_8_subtile) {
21685 TEST_REQUIRES_X86_AVX;
21686 for (size_t k = 16; k <= 80; k += 8) {
21687 for (uint32_t n = 1; n <= 4; n++) {
21688 for (uint32_t m = 1; m <= 2; m++) {
21689 GemmMicrokernelTester()
21690 .mr(2)
21691 .nr(4)
21692 .kr(2)
21693 .sr(1)
21694 .m(m)
21695 .n(n)
21696 .k(k)
21697 .iterations(1)
21698 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21699 }
21700 }
21701 }
21702 }
21703
TEST(QC8_GEMM_MINMAX_FP32_2X4C2__AVX_LD64,n_gt_4)21704 TEST(QC8_GEMM_MINMAX_FP32_2X4C2__AVX_LD64, n_gt_4) {
21705 TEST_REQUIRES_X86_AVX;
21706 for (uint32_t n = 5; n < 8; n++) {
21707 for (size_t k = 1; k <= 40; k += 9) {
21708 GemmMicrokernelTester()
21709 .mr(2)
21710 .nr(4)
21711 .kr(2)
21712 .sr(1)
21713 .m(2)
21714 .n(n)
21715 .k(k)
21716 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21717 }
21718 }
21719 }
21720
TEST(QC8_GEMM_MINMAX_FP32_2X4C2__AVX_LD64,n_gt_4_strided_cn)21721 TEST(QC8_GEMM_MINMAX_FP32_2X4C2__AVX_LD64, n_gt_4_strided_cn) {
21722 TEST_REQUIRES_X86_AVX;
21723 for (uint32_t n = 5; n < 8; n++) {
21724 for (size_t k = 1; k <= 40; k += 9) {
21725 GemmMicrokernelTester()
21726 .mr(2)
21727 .nr(4)
21728 .kr(2)
21729 .sr(1)
21730 .m(2)
21731 .n(n)
21732 .k(k)
21733 .cn_stride(7)
21734 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21735 }
21736 }
21737 }
21738
TEST(QC8_GEMM_MINMAX_FP32_2X4C2__AVX_LD64,n_gt_4_strided_a)21739 TEST(QC8_GEMM_MINMAX_FP32_2X4C2__AVX_LD64, n_gt_4_strided_a) {
21740 TEST_REQUIRES_X86_AVX;
21741 for (uint32_t n = 5; n < 8; n++) {
21742 for (size_t k = 1; k <= 40; k += 9) {
21743 GemmMicrokernelTester()
21744 .mr(2)
21745 .nr(4)
21746 .kr(2)
21747 .sr(1)
21748 .m(2)
21749 .n(n)
21750 .k(k)
21751 .a_stride(43)
21752 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21753 }
21754 }
21755 }
21756
TEST(QC8_GEMM_MINMAX_FP32_2X4C2__AVX_LD64,n_gt_4_subtile)21757 TEST(QC8_GEMM_MINMAX_FP32_2X4C2__AVX_LD64, n_gt_4_subtile) {
21758 TEST_REQUIRES_X86_AVX;
21759 for (uint32_t n = 5; n < 8; n++) {
21760 for (size_t k = 1; k <= 40; k += 9) {
21761 for (uint32_t m = 1; m <= 2; m++) {
21762 GemmMicrokernelTester()
21763 .mr(2)
21764 .nr(4)
21765 .kr(2)
21766 .sr(1)
21767 .m(m)
21768 .n(n)
21769 .k(k)
21770 .iterations(1)
21771 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21772 }
21773 }
21774 }
21775 }
21776
TEST(QC8_GEMM_MINMAX_FP32_2X4C2__AVX_LD64,n_div_4)21777 TEST(QC8_GEMM_MINMAX_FP32_2X4C2__AVX_LD64, n_div_4) {
21778 TEST_REQUIRES_X86_AVX;
21779 for (uint32_t n = 8; n <= 12; n += 4) {
21780 for (size_t k = 1; k <= 40; k += 9) {
21781 GemmMicrokernelTester()
21782 .mr(2)
21783 .nr(4)
21784 .kr(2)
21785 .sr(1)
21786 .m(2)
21787 .n(n)
21788 .k(k)
21789 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21790 }
21791 }
21792 }
21793
TEST(QC8_GEMM_MINMAX_FP32_2X4C2__AVX_LD64,n_div_4_strided_cn)21794 TEST(QC8_GEMM_MINMAX_FP32_2X4C2__AVX_LD64, n_div_4_strided_cn) {
21795 TEST_REQUIRES_X86_AVX;
21796 for (uint32_t n = 8; n <= 12; n += 4) {
21797 for (size_t k = 1; k <= 40; k += 9) {
21798 GemmMicrokernelTester()
21799 .mr(2)
21800 .nr(4)
21801 .kr(2)
21802 .sr(1)
21803 .m(2)
21804 .n(n)
21805 .k(k)
21806 .cn_stride(7)
21807 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21808 }
21809 }
21810 }
21811
TEST(QC8_GEMM_MINMAX_FP32_2X4C2__AVX_LD64,n_div_4_strided_a)21812 TEST(QC8_GEMM_MINMAX_FP32_2X4C2__AVX_LD64, n_div_4_strided_a) {
21813 TEST_REQUIRES_X86_AVX;
21814 for (uint32_t n = 8; n <= 12; n += 4) {
21815 for (size_t k = 1; k <= 40; k += 9) {
21816 GemmMicrokernelTester()
21817 .mr(2)
21818 .nr(4)
21819 .kr(2)
21820 .sr(1)
21821 .m(2)
21822 .n(n)
21823 .k(k)
21824 .a_stride(43)
21825 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21826 }
21827 }
21828 }
21829
TEST(QC8_GEMM_MINMAX_FP32_2X4C2__AVX_LD64,n_div_4_subtile)21830 TEST(QC8_GEMM_MINMAX_FP32_2X4C2__AVX_LD64, n_div_4_subtile) {
21831 TEST_REQUIRES_X86_AVX;
21832 for (uint32_t n = 8; n <= 12; n += 4) {
21833 for (size_t k = 1; k <= 40; k += 9) {
21834 for (uint32_t m = 1; m <= 2; m++) {
21835 GemmMicrokernelTester()
21836 .mr(2)
21837 .nr(4)
21838 .kr(2)
21839 .sr(1)
21840 .m(m)
21841 .n(n)
21842 .k(k)
21843 .iterations(1)
21844 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21845 }
21846 }
21847 }
21848 }
21849
TEST(QC8_GEMM_MINMAX_FP32_2X4C2__AVX_LD64,strided_cm_subtile)21850 TEST(QC8_GEMM_MINMAX_FP32_2X4C2__AVX_LD64, strided_cm_subtile) {
21851 TEST_REQUIRES_X86_AVX;
21852 for (size_t k = 1; k <= 40; k += 9) {
21853 for (uint32_t n = 1; n <= 4; n++) {
21854 for (uint32_t m = 1; m <= 2; m++) {
21855 GemmMicrokernelTester()
21856 .mr(2)
21857 .nr(4)
21858 .kr(2)
21859 .sr(1)
21860 .m(m)
21861 .n(n)
21862 .k(k)
21863 .cm_stride(7)
21864 .iterations(1)
21865 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21866 }
21867 }
21868 }
21869 }
21870
TEST(QC8_GEMM_MINMAX_FP32_2X4C2__AVX_LD64,qmin)21871 TEST(QC8_GEMM_MINMAX_FP32_2X4C2__AVX_LD64, qmin) {
21872 TEST_REQUIRES_X86_AVX;
21873 GemmMicrokernelTester()
21874 .mr(2)
21875 .nr(4)
21876 .kr(2)
21877 .sr(1)
21878 .m(2)
21879 .n(4)
21880 .k(8)
21881 .qmin(128)
21882 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21883 }
21884
TEST(QC8_GEMM_MINMAX_FP32_2X4C2__AVX_LD64,qmax)21885 TEST(QC8_GEMM_MINMAX_FP32_2X4C2__AVX_LD64, qmax) {
21886 TEST_REQUIRES_X86_AVX;
21887 GemmMicrokernelTester()
21888 .mr(2)
21889 .nr(4)
21890 .kr(2)
21891 .sr(1)
21892 .m(2)
21893 .n(4)
21894 .k(8)
21895 .qmax(128)
21896 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21897 }
21898
TEST(QC8_GEMM_MINMAX_FP32_2X4C2__AVX_LD64,strided_cm)21899 TEST(QC8_GEMM_MINMAX_FP32_2X4C2__AVX_LD64, strided_cm) {
21900 TEST_REQUIRES_X86_AVX;
21901 GemmMicrokernelTester()
21902 .mr(2)
21903 .nr(4)
21904 .kr(2)
21905 .sr(1)
21906 .m(2)
21907 .n(4)
21908 .k(8)
21909 .cm_stride(7)
21910 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21911 }
21912 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
21913
21914
21915 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QC8_GEMM_MINMAX_FP32_3X4C2__XOP_LD64,k_eq_8)21916 TEST(QC8_GEMM_MINMAX_FP32_3X4C2__XOP_LD64, k_eq_8) {
21917 TEST_REQUIRES_X86_XOP;
21918 GemmMicrokernelTester()
21919 .mr(3)
21920 .nr(4)
21921 .kr(2)
21922 .sr(1)
21923 .m(3)
21924 .n(4)
21925 .k(8)
21926 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21927 }
21928
TEST(QC8_GEMM_MINMAX_FP32_3X4C2__XOP_LD64,strided_cn)21929 TEST(QC8_GEMM_MINMAX_FP32_3X4C2__XOP_LD64, strided_cn) {
21930 TEST_REQUIRES_X86_XOP;
21931 GemmMicrokernelTester()
21932 .mr(3)
21933 .nr(4)
21934 .kr(2)
21935 .sr(1)
21936 .m(3)
21937 .n(4)
21938 .k(8)
21939 .cn_stride(7)
21940 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21941 }
21942
TEST(QC8_GEMM_MINMAX_FP32_3X4C2__XOP_LD64,k_eq_8_strided_a)21943 TEST(QC8_GEMM_MINMAX_FP32_3X4C2__XOP_LD64, k_eq_8_strided_a) {
21944 TEST_REQUIRES_X86_XOP;
21945 GemmMicrokernelTester()
21946 .mr(3)
21947 .nr(4)
21948 .kr(2)
21949 .sr(1)
21950 .m(3)
21951 .n(4)
21952 .k(8)
21953 .a_stride(11)
21954 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21955 }
21956
TEST(QC8_GEMM_MINMAX_FP32_3X4C2__XOP_LD64,k_eq_8_subtile)21957 TEST(QC8_GEMM_MINMAX_FP32_3X4C2__XOP_LD64, k_eq_8_subtile) {
21958 TEST_REQUIRES_X86_XOP;
21959 for (uint32_t n = 1; n <= 4; n++) {
21960 for (uint32_t m = 1; m <= 3; m++) {
21961 GemmMicrokernelTester()
21962 .mr(3)
21963 .nr(4)
21964 .kr(2)
21965 .sr(1)
21966 .m(m)
21967 .n(n)
21968 .k(8)
21969 .iterations(1)
21970 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21971 }
21972 }
21973 }
21974
TEST(QC8_GEMM_MINMAX_FP32_3X4C2__XOP_LD64,k_eq_8_subtile_m)21975 TEST(QC8_GEMM_MINMAX_FP32_3X4C2__XOP_LD64, k_eq_8_subtile_m) {
21976 TEST_REQUIRES_X86_XOP;
21977 for (uint32_t m = 1; m <= 3; m++) {
21978 GemmMicrokernelTester()
21979 .mr(3)
21980 .nr(4)
21981 .kr(2)
21982 .sr(1)
21983 .m(m)
21984 .n(4)
21985 .k(8)
21986 .iterations(1)
21987 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21988 }
21989 }
21990
TEST(QC8_GEMM_MINMAX_FP32_3X4C2__XOP_LD64,k_eq_8_subtile_n)21991 TEST(QC8_GEMM_MINMAX_FP32_3X4C2__XOP_LD64, k_eq_8_subtile_n) {
21992 TEST_REQUIRES_X86_XOP;
21993 for (uint32_t n = 1; n <= 4; n++) {
21994 GemmMicrokernelTester()
21995 .mr(3)
21996 .nr(4)
21997 .kr(2)
21998 .sr(1)
21999 .m(3)
22000 .n(n)
22001 .k(8)
22002 .iterations(1)
22003 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
22004 }
22005 }
22006
TEST(QC8_GEMM_MINMAX_FP32_3X4C2__XOP_LD64,k_lt_8)22007 TEST(QC8_GEMM_MINMAX_FP32_3X4C2__XOP_LD64, k_lt_8) {
22008 TEST_REQUIRES_X86_XOP;
22009 for (size_t k = 1; k < 8; k++) {
22010 GemmMicrokernelTester()
22011 .mr(3)
22012 .nr(4)
22013 .kr(2)
22014 .sr(1)
22015 .m(3)
22016 .n(4)
22017 .k(k)
22018 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
22019 }
22020 }
22021
TEST(QC8_GEMM_MINMAX_FP32_3X4C2__XOP_LD64,k_lt_8_strided_a)22022 TEST(QC8_GEMM_MINMAX_FP32_3X4C2__XOP_LD64, k_lt_8_strided_a) {
22023 TEST_REQUIRES_X86_XOP;
22024 for (size_t k = 1; k < 8; k++) {
22025 GemmMicrokernelTester()
22026 .mr(3)
22027 .nr(4)
22028 .kr(2)
22029 .sr(1)
22030 .m(3)
22031 .n(4)
22032 .k(k)
22033 .a_stride(11)
22034 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
22035 }
22036 }
22037
TEST(QC8_GEMM_MINMAX_FP32_3X4C2__XOP_LD64,k_lt_8_subtile)22038 TEST(QC8_GEMM_MINMAX_FP32_3X4C2__XOP_LD64, k_lt_8_subtile) {
22039 TEST_REQUIRES_X86_XOP;
22040 for (size_t k = 1; k < 8; k++) {
22041 for (uint32_t n = 1; n <= 4; n++) {
22042 for (uint32_t m = 1; m <= 3; m++) {
22043 GemmMicrokernelTester()
22044 .mr(3)
22045 .nr(4)
22046 .kr(2)
22047 .sr(1)
22048 .m(m)
22049 .n(n)
22050 .k(k)
22051 .iterations(1)
22052 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
22053 }
22054 }
22055 }
22056 }
22057
TEST(QC8_GEMM_MINMAX_FP32_3X4C2__XOP_LD64,k_gt_8)22058 TEST(QC8_GEMM_MINMAX_FP32_3X4C2__XOP_LD64, k_gt_8) {
22059 TEST_REQUIRES_X86_XOP;
22060 for (size_t k = 9; k < 16; k++) {
22061 GemmMicrokernelTester()
22062 .mr(3)
22063 .nr(4)
22064 .kr(2)
22065 .sr(1)
22066 .m(3)
22067 .n(4)
22068 .k(k)
22069 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
22070 }
22071 }
22072
TEST(QC8_GEMM_MINMAX_FP32_3X4C2__XOP_LD64,k_gt_8_strided_a)22073 TEST(QC8_GEMM_MINMAX_FP32_3X4C2__XOP_LD64, k_gt_8_strided_a) {
22074 TEST_REQUIRES_X86_XOP;
22075 for (size_t k = 9; k < 16; k++) {
22076 GemmMicrokernelTester()
22077 .mr(3)
22078 .nr(4)
22079 .kr(2)
22080 .sr(1)
22081 .m(3)
22082 .n(4)
22083 .k(k)
22084 .a_stride(19)
22085 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
22086 }
22087 }
22088
TEST(QC8_GEMM_MINMAX_FP32_3X4C2__XOP_LD64,k_gt_8_subtile)22089 TEST(QC8_GEMM_MINMAX_FP32_3X4C2__XOP_LD64, k_gt_8_subtile) {
22090 TEST_REQUIRES_X86_XOP;
22091 for (size_t k = 9; k < 16; k++) {
22092 for (uint32_t n = 1; n <= 4; n++) {
22093 for (uint32_t m = 1; m <= 3; m++) {
22094 GemmMicrokernelTester()
22095 .mr(3)
22096 .nr(4)
22097 .kr(2)
22098 .sr(1)
22099 .m(m)
22100 .n(n)
22101 .k(k)
22102 .iterations(1)
22103 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
22104 }
22105 }
22106 }
22107 }
22108
TEST(QC8_GEMM_MINMAX_FP32_3X4C2__XOP_LD64,k_div_8)22109 TEST(QC8_GEMM_MINMAX_FP32_3X4C2__XOP_LD64, k_div_8) {
22110 TEST_REQUIRES_X86_XOP;
22111 for (size_t k = 16; k <= 80; k += 8) {
22112 GemmMicrokernelTester()
22113 .mr(3)
22114 .nr(4)
22115 .kr(2)
22116 .sr(1)
22117 .m(3)
22118 .n(4)
22119 .k(k)
22120 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
22121 }
22122 }
22123
TEST(QC8_GEMM_MINMAX_FP32_3X4C2__XOP_LD64,k_div_8_strided_a)22124 TEST(QC8_GEMM_MINMAX_FP32_3X4C2__XOP_LD64, k_div_8_strided_a) {
22125 TEST_REQUIRES_X86_XOP;
22126 for (size_t k = 16; k <= 80; k += 8) {
22127 GemmMicrokernelTester()
22128 .mr(3)
22129 .nr(4)
22130 .kr(2)
22131 .sr(1)
22132 .m(3)
22133 .n(4)
22134 .k(k)
22135 .a_stride(83)
22136 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
22137 }
22138 }
22139
TEST(QC8_GEMM_MINMAX_FP32_3X4C2__XOP_LD64,k_div_8_subtile)22140 TEST(QC8_GEMM_MINMAX_FP32_3X4C2__XOP_LD64, k_div_8_subtile) {
22141 TEST_REQUIRES_X86_XOP;
22142 for (size_t k = 16; k <= 80; k += 8) {
22143 for (uint32_t n = 1; n <= 4; n++) {
22144 for (uint32_t m = 1; m <= 3; m++) {
22145 GemmMicrokernelTester()
22146 .mr(3)
22147 .nr(4)
22148 .kr(2)
22149 .sr(1)
22150 .m(m)
22151 .n(n)
22152 .k(k)
22153 .iterations(1)
22154 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
22155 }
22156 }
22157 }
22158 }
22159
TEST(QC8_GEMM_MINMAX_FP32_3X4C2__XOP_LD64,n_gt_4)22160 TEST(QC8_GEMM_MINMAX_FP32_3X4C2__XOP_LD64, n_gt_4) {
22161 TEST_REQUIRES_X86_XOP;
22162 for (uint32_t n = 5; n < 8; n++) {
22163 for (size_t k = 1; k <= 40; k += 9) {
22164 GemmMicrokernelTester()
22165 .mr(3)
22166 .nr(4)
22167 .kr(2)
22168 .sr(1)
22169 .m(3)
22170 .n(n)
22171 .k(k)
22172 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
22173 }
22174 }
22175 }
22176
TEST(QC8_GEMM_MINMAX_FP32_3X4C2__XOP_LD64,n_gt_4_strided_cn)22177 TEST(QC8_GEMM_MINMAX_FP32_3X4C2__XOP_LD64, n_gt_4_strided_cn) {
22178 TEST_REQUIRES_X86_XOP;
22179 for (uint32_t n = 5; n < 8; n++) {
22180 for (size_t k = 1; k <= 40; k += 9) {
22181 GemmMicrokernelTester()
22182 .mr(3)
22183 .nr(4)
22184 .kr(2)
22185 .sr(1)
22186 .m(3)
22187 .n(n)
22188 .k(k)
22189 .cn_stride(7)
22190 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
22191 }
22192 }
22193 }
22194
TEST(QC8_GEMM_MINMAX_FP32_3X4C2__XOP_LD64,n_gt_4_strided_a)22195 TEST(QC8_GEMM_MINMAX_FP32_3X4C2__XOP_LD64, n_gt_4_strided_a) {
22196 TEST_REQUIRES_X86_XOP;
22197 for (uint32_t n = 5; n < 8; n++) {
22198 for (size_t k = 1; k <= 40; k += 9) {
22199 GemmMicrokernelTester()
22200 .mr(3)
22201 .nr(4)
22202 .kr(2)
22203 .sr(1)
22204 .m(3)
22205 .n(n)
22206 .k(k)
22207 .a_stride(43)
22208 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
22209 }
22210 }
22211 }
22212
TEST(QC8_GEMM_MINMAX_FP32_3X4C2__XOP_LD64,n_gt_4_subtile)22213 TEST(QC8_GEMM_MINMAX_FP32_3X4C2__XOP_LD64, n_gt_4_subtile) {
22214 TEST_REQUIRES_X86_XOP;
22215 for (uint32_t n = 5; n < 8; n++) {
22216 for (size_t k = 1; k <= 40; k += 9) {
22217 for (uint32_t m = 1; m <= 3; m++) {
22218 GemmMicrokernelTester()
22219 .mr(3)
22220 .nr(4)
22221 .kr(2)
22222 .sr(1)
22223 .m(m)
22224 .n(n)
22225 .k(k)
22226 .iterations(1)
22227 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
22228 }
22229 }
22230 }
22231 }
22232
TEST(QC8_GEMM_MINMAX_FP32_3X4C2__XOP_LD64,n_div_4)22233 TEST(QC8_GEMM_MINMAX_FP32_3X4C2__XOP_LD64, n_div_4) {
22234 TEST_REQUIRES_X86_XOP;
22235 for (uint32_t n = 8; n <= 12; n += 4) {
22236 for (size_t k = 1; k <= 40; k += 9) {
22237 GemmMicrokernelTester()
22238 .mr(3)
22239 .nr(4)
22240 .kr(2)
22241 .sr(1)
22242 .m(3)
22243 .n(n)
22244 .k(k)
22245 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
22246 }
22247 }
22248 }
22249
TEST(QC8_GEMM_MINMAX_FP32_3X4C2__XOP_LD64,n_div_4_strided_cn)22250 TEST(QC8_GEMM_MINMAX_FP32_3X4C2__XOP_LD64, n_div_4_strided_cn) {
22251 TEST_REQUIRES_X86_XOP;
22252 for (uint32_t n = 8; n <= 12; n += 4) {
22253 for (size_t k = 1; k <= 40; k += 9) {
22254 GemmMicrokernelTester()
22255 .mr(3)
22256 .nr(4)
22257 .kr(2)
22258 .sr(1)
22259 .m(3)
22260 .n(n)
22261 .k(k)
22262 .cn_stride(7)
22263 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
22264 }
22265 }
22266 }
22267
TEST(QC8_GEMM_MINMAX_FP32_3X4C2__XOP_LD64,n_div_4_strided_a)22268 TEST(QC8_GEMM_MINMAX_FP32_3X4C2__XOP_LD64, n_div_4_strided_a) {
22269 TEST_REQUIRES_X86_XOP;
22270 for (uint32_t n = 8; n <= 12; n += 4) {
22271 for (size_t k = 1; k <= 40; k += 9) {
22272 GemmMicrokernelTester()
22273 .mr(3)
22274 .nr(4)
22275 .kr(2)
22276 .sr(1)
22277 .m(3)
22278 .n(n)
22279 .k(k)
22280 .a_stride(43)
22281 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
22282 }
22283 }
22284 }
22285
TEST(QC8_GEMM_MINMAX_FP32_3X4C2__XOP_LD64,n_div_4_subtile)22286 TEST(QC8_GEMM_MINMAX_FP32_3X4C2__XOP_LD64, n_div_4_subtile) {
22287 TEST_REQUIRES_X86_XOP;
22288 for (uint32_t n = 8; n <= 12; n += 4) {
22289 for (size_t k = 1; k <= 40; k += 9) {
22290 for (uint32_t m = 1; m <= 3; m++) {
22291 GemmMicrokernelTester()
22292 .mr(3)
22293 .nr(4)
22294 .kr(2)
22295 .sr(1)
22296 .m(m)
22297 .n(n)
22298 .k(k)
22299 .iterations(1)
22300 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
22301 }
22302 }
22303 }
22304 }
22305
TEST(QC8_GEMM_MINMAX_FP32_3X4C2__XOP_LD64,strided_cm_subtile)22306 TEST(QC8_GEMM_MINMAX_FP32_3X4C2__XOP_LD64, strided_cm_subtile) {
22307 TEST_REQUIRES_X86_XOP;
22308 for (size_t k = 1; k <= 40; k += 9) {
22309 for (uint32_t n = 1; n <= 4; n++) {
22310 for (uint32_t m = 1; m <= 3; m++) {
22311 GemmMicrokernelTester()
22312 .mr(3)
22313 .nr(4)
22314 .kr(2)
22315 .sr(1)
22316 .m(m)
22317 .n(n)
22318 .k(k)
22319 .cm_stride(7)
22320 .iterations(1)
22321 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
22322 }
22323 }
22324 }
22325 }
22326
TEST(QC8_GEMM_MINMAX_FP32_3X4C2__XOP_LD64,qmin)22327 TEST(QC8_GEMM_MINMAX_FP32_3X4C2__XOP_LD64, qmin) {
22328 TEST_REQUIRES_X86_XOP;
22329 GemmMicrokernelTester()
22330 .mr(3)
22331 .nr(4)
22332 .kr(2)
22333 .sr(1)
22334 .m(3)
22335 .n(4)
22336 .k(8)
22337 .qmin(128)
22338 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
22339 }
22340
TEST(QC8_GEMM_MINMAX_FP32_3X4C2__XOP_LD64,qmax)22341 TEST(QC8_GEMM_MINMAX_FP32_3X4C2__XOP_LD64, qmax) {
22342 TEST_REQUIRES_X86_XOP;
22343 GemmMicrokernelTester()
22344 .mr(3)
22345 .nr(4)
22346 .kr(2)
22347 .sr(1)
22348 .m(3)
22349 .n(4)
22350 .k(8)
22351 .qmax(128)
22352 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
22353 }
22354
TEST(QC8_GEMM_MINMAX_FP32_3X4C2__XOP_LD64,strided_cm)22355 TEST(QC8_GEMM_MINMAX_FP32_3X4C2__XOP_LD64, strided_cm) {
22356 TEST_REQUIRES_X86_XOP;
22357 GemmMicrokernelTester()
22358 .mr(3)
22359 .nr(4)
22360 .kr(2)
22361 .sr(1)
22362 .m(3)
22363 .n(4)
22364 .k(8)
22365 .cm_stride(7)
22366 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
22367 }
22368 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
22369
22370
22371 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QC8_GEMM_MINMAX_FP32_1X4C2__SSE2_LD128,k_eq_8)22372 TEST(QC8_GEMM_MINMAX_FP32_1X4C2__SSE2_LD128, k_eq_8) {
22373 TEST_REQUIRES_X86_SSE2;
22374 GemmMicrokernelTester()
22375 .mr(1)
22376 .nr(4)
22377 .kr(2)
22378 .sr(1)
22379 .m(1)
22380 .n(4)
22381 .k(8)
22382 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
22383 }
22384
TEST(QC8_GEMM_MINMAX_FP32_1X4C2__SSE2_LD128,strided_cn)22385 TEST(QC8_GEMM_MINMAX_FP32_1X4C2__SSE2_LD128, strided_cn) {
22386 TEST_REQUIRES_X86_SSE2;
22387 GemmMicrokernelTester()
22388 .mr(1)
22389 .nr(4)
22390 .kr(2)
22391 .sr(1)
22392 .m(1)
22393 .n(4)
22394 .k(8)
22395 .cn_stride(7)
22396 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
22397 }
22398
TEST(QC8_GEMM_MINMAX_FP32_1X4C2__SSE2_LD128,k_eq_8_strided_a)22399 TEST(QC8_GEMM_MINMAX_FP32_1X4C2__SSE2_LD128, k_eq_8_strided_a) {
22400 TEST_REQUIRES_X86_SSE2;
22401 GemmMicrokernelTester()
22402 .mr(1)
22403 .nr(4)
22404 .kr(2)
22405 .sr(1)
22406 .m(1)
22407 .n(4)
22408 .k(8)
22409 .a_stride(11)
22410 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
22411 }
22412
TEST(QC8_GEMM_MINMAX_FP32_1X4C2__SSE2_LD128,k_eq_8_subtile)22413 TEST(QC8_GEMM_MINMAX_FP32_1X4C2__SSE2_LD128, k_eq_8_subtile) {
22414 TEST_REQUIRES_X86_SSE2;
22415 for (uint32_t n = 1; n <= 4; n++) {
22416 for (uint32_t m = 1; m <= 1; m++) {
22417 GemmMicrokernelTester()
22418 .mr(1)
22419 .nr(4)
22420 .kr(2)
22421 .sr(1)
22422 .m(m)
22423 .n(n)
22424 .k(8)
22425 .iterations(1)
22426 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
22427 }
22428 }
22429 }
22430
TEST(QC8_GEMM_MINMAX_FP32_1X4C2__SSE2_LD128,k_eq_8_subtile_m)22431 TEST(QC8_GEMM_MINMAX_FP32_1X4C2__SSE2_LD128, k_eq_8_subtile_m) {
22432 TEST_REQUIRES_X86_SSE2;
22433 for (uint32_t m = 1; m <= 1; m++) {
22434 GemmMicrokernelTester()
22435 .mr(1)
22436 .nr(4)
22437 .kr(2)
22438 .sr(1)
22439 .m(m)
22440 .n(4)
22441 .k(8)
22442 .iterations(1)
22443 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
22444 }
22445 }
22446
TEST(QC8_GEMM_MINMAX_FP32_1X4C2__SSE2_LD128,k_eq_8_subtile_n)22447 TEST(QC8_GEMM_MINMAX_FP32_1X4C2__SSE2_LD128, k_eq_8_subtile_n) {
22448 TEST_REQUIRES_X86_SSE2;
22449 for (uint32_t n = 1; n <= 4; n++) {
22450 GemmMicrokernelTester()
22451 .mr(1)
22452 .nr(4)
22453 .kr(2)
22454 .sr(1)
22455 .m(1)
22456 .n(n)
22457 .k(8)
22458 .iterations(1)
22459 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
22460 }
22461 }
22462
TEST(QC8_GEMM_MINMAX_FP32_1X4C2__SSE2_LD128,k_lt_8)22463 TEST(QC8_GEMM_MINMAX_FP32_1X4C2__SSE2_LD128, k_lt_8) {
22464 TEST_REQUIRES_X86_SSE2;
22465 for (size_t k = 1; k < 8; k++) {
22466 GemmMicrokernelTester()
22467 .mr(1)
22468 .nr(4)
22469 .kr(2)
22470 .sr(1)
22471 .m(1)
22472 .n(4)
22473 .k(k)
22474 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
22475 }
22476 }
22477
TEST(QC8_GEMM_MINMAX_FP32_1X4C2__SSE2_LD128,k_lt_8_strided_a)22478 TEST(QC8_GEMM_MINMAX_FP32_1X4C2__SSE2_LD128, k_lt_8_strided_a) {
22479 TEST_REQUIRES_X86_SSE2;
22480 for (size_t k = 1; k < 8; k++) {
22481 GemmMicrokernelTester()
22482 .mr(1)
22483 .nr(4)
22484 .kr(2)
22485 .sr(1)
22486 .m(1)
22487 .n(4)
22488 .k(k)
22489 .a_stride(11)
22490 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
22491 }
22492 }
22493
TEST(QC8_GEMM_MINMAX_FP32_1X4C2__SSE2_LD128,k_lt_8_subtile)22494 TEST(QC8_GEMM_MINMAX_FP32_1X4C2__SSE2_LD128, k_lt_8_subtile) {
22495 TEST_REQUIRES_X86_SSE2;
22496 for (size_t k = 1; k < 8; k++) {
22497 for (uint32_t n = 1; n <= 4; n++) {
22498 for (uint32_t m = 1; m <= 1; m++) {
22499 GemmMicrokernelTester()
22500 .mr(1)
22501 .nr(4)
22502 .kr(2)
22503 .sr(1)
22504 .m(m)
22505 .n(n)
22506 .k(k)
22507 .iterations(1)
22508 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
22509 }
22510 }
22511 }
22512 }
22513
TEST(QC8_GEMM_MINMAX_FP32_1X4C2__SSE2_LD128,k_gt_8)22514 TEST(QC8_GEMM_MINMAX_FP32_1X4C2__SSE2_LD128, k_gt_8) {
22515 TEST_REQUIRES_X86_SSE2;
22516 for (size_t k = 9; k < 16; k++) {
22517 GemmMicrokernelTester()
22518 .mr(1)
22519 .nr(4)
22520 .kr(2)
22521 .sr(1)
22522 .m(1)
22523 .n(4)
22524 .k(k)
22525 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
22526 }
22527 }
22528
TEST(QC8_GEMM_MINMAX_FP32_1X4C2__SSE2_LD128,k_gt_8_strided_a)22529 TEST(QC8_GEMM_MINMAX_FP32_1X4C2__SSE2_LD128, k_gt_8_strided_a) {
22530 TEST_REQUIRES_X86_SSE2;
22531 for (size_t k = 9; k < 16; k++) {
22532 GemmMicrokernelTester()
22533 .mr(1)
22534 .nr(4)
22535 .kr(2)
22536 .sr(1)
22537 .m(1)
22538 .n(4)
22539 .k(k)
22540 .a_stride(19)
22541 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
22542 }
22543 }
22544
TEST(QC8_GEMM_MINMAX_FP32_1X4C2__SSE2_LD128,k_gt_8_subtile)22545 TEST(QC8_GEMM_MINMAX_FP32_1X4C2__SSE2_LD128, k_gt_8_subtile) {
22546 TEST_REQUIRES_X86_SSE2;
22547 for (size_t k = 9; k < 16; k++) {
22548 for (uint32_t n = 1; n <= 4; n++) {
22549 for (uint32_t m = 1; m <= 1; m++) {
22550 GemmMicrokernelTester()
22551 .mr(1)
22552 .nr(4)
22553 .kr(2)
22554 .sr(1)
22555 .m(m)
22556 .n(n)
22557 .k(k)
22558 .iterations(1)
22559 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
22560 }
22561 }
22562 }
22563 }
22564
TEST(QC8_GEMM_MINMAX_FP32_1X4C2__SSE2_LD128,k_div_8)22565 TEST(QC8_GEMM_MINMAX_FP32_1X4C2__SSE2_LD128, k_div_8) {
22566 TEST_REQUIRES_X86_SSE2;
22567 for (size_t k = 16; k <= 80; k += 8) {
22568 GemmMicrokernelTester()
22569 .mr(1)
22570 .nr(4)
22571 .kr(2)
22572 .sr(1)
22573 .m(1)
22574 .n(4)
22575 .k(k)
22576 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
22577 }
22578 }
22579
TEST(QC8_GEMM_MINMAX_FP32_1X4C2__SSE2_LD128,k_div_8_strided_a)22580 TEST(QC8_GEMM_MINMAX_FP32_1X4C2__SSE2_LD128, k_div_8_strided_a) {
22581 TEST_REQUIRES_X86_SSE2;
22582 for (size_t k = 16; k <= 80; k += 8) {
22583 GemmMicrokernelTester()
22584 .mr(1)
22585 .nr(4)
22586 .kr(2)
22587 .sr(1)
22588 .m(1)
22589 .n(4)
22590 .k(k)
22591 .a_stride(83)
22592 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
22593 }
22594 }
22595
TEST(QC8_GEMM_MINMAX_FP32_1X4C2__SSE2_LD128,k_div_8_subtile)22596 TEST(QC8_GEMM_MINMAX_FP32_1X4C2__SSE2_LD128, k_div_8_subtile) {
22597 TEST_REQUIRES_X86_SSE2;
22598 for (size_t k = 16; k <= 80; k += 8) {
22599 for (uint32_t n = 1; n <= 4; n++) {
22600 for (uint32_t m = 1; m <= 1; m++) {
22601 GemmMicrokernelTester()
22602 .mr(1)
22603 .nr(4)
22604 .kr(2)
22605 .sr(1)
22606 .m(m)
22607 .n(n)
22608 .k(k)
22609 .iterations(1)
22610 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
22611 }
22612 }
22613 }
22614 }
22615
TEST(QC8_GEMM_MINMAX_FP32_1X4C2__SSE2_LD128,n_gt_4)22616 TEST(QC8_GEMM_MINMAX_FP32_1X4C2__SSE2_LD128, n_gt_4) {
22617 TEST_REQUIRES_X86_SSE2;
22618 for (uint32_t n = 5; n < 8; n++) {
22619 for (size_t k = 1; k <= 40; k += 9) {
22620 GemmMicrokernelTester()
22621 .mr(1)
22622 .nr(4)
22623 .kr(2)
22624 .sr(1)
22625 .m(1)
22626 .n(n)
22627 .k(k)
22628 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
22629 }
22630 }
22631 }
22632
TEST(QC8_GEMM_MINMAX_FP32_1X4C2__SSE2_LD128,n_gt_4_strided_cn)22633 TEST(QC8_GEMM_MINMAX_FP32_1X4C2__SSE2_LD128, n_gt_4_strided_cn) {
22634 TEST_REQUIRES_X86_SSE2;
22635 for (uint32_t n = 5; n < 8; n++) {
22636 for (size_t k = 1; k <= 40; k += 9) {
22637 GemmMicrokernelTester()
22638 .mr(1)
22639 .nr(4)
22640 .kr(2)
22641 .sr(1)
22642 .m(1)
22643 .n(n)
22644 .k(k)
22645 .cn_stride(7)
22646 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
22647 }
22648 }
22649 }
22650
TEST(QC8_GEMM_MINMAX_FP32_1X4C2__SSE2_LD128,n_gt_4_strided_a)22651 TEST(QC8_GEMM_MINMAX_FP32_1X4C2__SSE2_LD128, n_gt_4_strided_a) {
22652 TEST_REQUIRES_X86_SSE2;
22653 for (uint32_t n = 5; n < 8; n++) {
22654 for (size_t k = 1; k <= 40; k += 9) {
22655 GemmMicrokernelTester()
22656 .mr(1)
22657 .nr(4)
22658 .kr(2)
22659 .sr(1)
22660 .m(1)
22661 .n(n)
22662 .k(k)
22663 .a_stride(43)
22664 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
22665 }
22666 }
22667 }
22668
TEST(QC8_GEMM_MINMAX_FP32_1X4C2__SSE2_LD128,n_gt_4_subtile)22669 TEST(QC8_GEMM_MINMAX_FP32_1X4C2__SSE2_LD128, n_gt_4_subtile) {
22670 TEST_REQUIRES_X86_SSE2;
22671 for (uint32_t n = 5; n < 8; n++) {
22672 for (size_t k = 1; k <= 40; k += 9) {
22673 for (uint32_t m = 1; m <= 1; m++) {
22674 GemmMicrokernelTester()
22675 .mr(1)
22676 .nr(4)
22677 .kr(2)
22678 .sr(1)
22679 .m(m)
22680 .n(n)
22681 .k(k)
22682 .iterations(1)
22683 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
22684 }
22685 }
22686 }
22687 }
22688
TEST(QC8_GEMM_MINMAX_FP32_1X4C2__SSE2_LD128,n_div_4)22689 TEST(QC8_GEMM_MINMAX_FP32_1X4C2__SSE2_LD128, n_div_4) {
22690 TEST_REQUIRES_X86_SSE2;
22691 for (uint32_t n = 8; n <= 12; n += 4) {
22692 for (size_t k = 1; k <= 40; k += 9) {
22693 GemmMicrokernelTester()
22694 .mr(1)
22695 .nr(4)
22696 .kr(2)
22697 .sr(1)
22698 .m(1)
22699 .n(n)
22700 .k(k)
22701 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
22702 }
22703 }
22704 }
22705
TEST(QC8_GEMM_MINMAX_FP32_1X4C2__SSE2_LD128,n_div_4_strided_cn)22706 TEST(QC8_GEMM_MINMAX_FP32_1X4C2__SSE2_LD128, n_div_4_strided_cn) {
22707 TEST_REQUIRES_X86_SSE2;
22708 for (uint32_t n = 8; n <= 12; n += 4) {
22709 for (size_t k = 1; k <= 40; k += 9) {
22710 GemmMicrokernelTester()
22711 .mr(1)
22712 .nr(4)
22713 .kr(2)
22714 .sr(1)
22715 .m(1)
22716 .n(n)
22717 .k(k)
22718 .cn_stride(7)
22719 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
22720 }
22721 }
22722 }
22723
TEST(QC8_GEMM_MINMAX_FP32_1X4C2__SSE2_LD128,n_div_4_strided_a)22724 TEST(QC8_GEMM_MINMAX_FP32_1X4C2__SSE2_LD128, n_div_4_strided_a) {
22725 TEST_REQUIRES_X86_SSE2;
22726 for (uint32_t n = 8; n <= 12; n += 4) {
22727 for (size_t k = 1; k <= 40; k += 9) {
22728 GemmMicrokernelTester()
22729 .mr(1)
22730 .nr(4)
22731 .kr(2)
22732 .sr(1)
22733 .m(1)
22734 .n(n)
22735 .k(k)
22736 .a_stride(43)
22737 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
22738 }
22739 }
22740 }
22741
TEST(QC8_GEMM_MINMAX_FP32_1X4C2__SSE2_LD128,n_div_4_subtile)22742 TEST(QC8_GEMM_MINMAX_FP32_1X4C2__SSE2_LD128, n_div_4_subtile) {
22743 TEST_REQUIRES_X86_SSE2;
22744 for (uint32_t n = 8; n <= 12; n += 4) {
22745 for (size_t k = 1; k <= 40; k += 9) {
22746 for (uint32_t m = 1; m <= 1; m++) {
22747 GemmMicrokernelTester()
22748 .mr(1)
22749 .nr(4)
22750 .kr(2)
22751 .sr(1)
22752 .m(m)
22753 .n(n)
22754 .k(k)
22755 .iterations(1)
22756 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
22757 }
22758 }
22759 }
22760 }
22761
TEST(QC8_GEMM_MINMAX_FP32_1X4C2__SSE2_LD128,strided_cm_subtile)22762 TEST(QC8_GEMM_MINMAX_FP32_1X4C2__SSE2_LD128, strided_cm_subtile) {
22763 TEST_REQUIRES_X86_SSE2;
22764 for (size_t k = 1; k <= 40; k += 9) {
22765 for (uint32_t n = 1; n <= 4; n++) {
22766 for (uint32_t m = 1; m <= 1; m++) {
22767 GemmMicrokernelTester()
22768 .mr(1)
22769 .nr(4)
22770 .kr(2)
22771 .sr(1)
22772 .m(m)
22773 .n(n)
22774 .k(k)
22775 .cm_stride(7)
22776 .iterations(1)
22777 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
22778 }
22779 }
22780 }
22781 }
22782
TEST(QC8_GEMM_MINMAX_FP32_1X4C2__SSE2_LD128,qmin)22783 TEST(QC8_GEMM_MINMAX_FP32_1X4C2__SSE2_LD128, qmin) {
22784 TEST_REQUIRES_X86_SSE2;
22785 GemmMicrokernelTester()
22786 .mr(1)
22787 .nr(4)
22788 .kr(2)
22789 .sr(1)
22790 .m(1)
22791 .n(4)
22792 .k(8)
22793 .qmin(128)
22794 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
22795 }
22796
TEST(QC8_GEMM_MINMAX_FP32_1X4C2__SSE2_LD128,qmax)22797 TEST(QC8_GEMM_MINMAX_FP32_1X4C2__SSE2_LD128, qmax) {
22798 TEST_REQUIRES_X86_SSE2;
22799 GemmMicrokernelTester()
22800 .mr(1)
22801 .nr(4)
22802 .kr(2)
22803 .sr(1)
22804 .m(1)
22805 .n(4)
22806 .k(8)
22807 .qmax(128)
22808 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
22809 }
22810
TEST(QC8_GEMM_MINMAX_FP32_1X4C2__SSE2_LD128,strided_cm)22811 TEST(QC8_GEMM_MINMAX_FP32_1X4C2__SSE2_LD128, strided_cm) {
22812 TEST_REQUIRES_X86_SSE2;
22813 GemmMicrokernelTester()
22814 .mr(1)
22815 .nr(4)
22816 .kr(2)
22817 .sr(1)
22818 .m(1)
22819 .n(4)
22820 .k(8)
22821 .cm_stride(7)
22822 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
22823 }
22824 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
22825
22826
22827 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QC8_GEMM_MINMAX_FP32_1X4C2__SSE41_LD128,k_eq_8)22828 TEST(QC8_GEMM_MINMAX_FP32_1X4C2__SSE41_LD128, k_eq_8) {
22829 TEST_REQUIRES_X86_SSE41;
22830 GemmMicrokernelTester()
22831 .mr(1)
22832 .nr(4)
22833 .kr(2)
22834 .sr(1)
22835 .m(1)
22836 .n(4)
22837 .k(8)
22838 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
22839 }
22840
TEST(QC8_GEMM_MINMAX_FP32_1X4C2__SSE41_LD128,strided_cn)22841 TEST(QC8_GEMM_MINMAX_FP32_1X4C2__SSE41_LD128, strided_cn) {
22842 TEST_REQUIRES_X86_SSE41;
22843 GemmMicrokernelTester()
22844 .mr(1)
22845 .nr(4)
22846 .kr(2)
22847 .sr(1)
22848 .m(1)
22849 .n(4)
22850 .k(8)
22851 .cn_stride(7)
22852 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
22853 }
22854
TEST(QC8_GEMM_MINMAX_FP32_1X4C2__SSE41_LD128,k_eq_8_strided_a)22855 TEST(QC8_GEMM_MINMAX_FP32_1X4C2__SSE41_LD128, k_eq_8_strided_a) {
22856 TEST_REQUIRES_X86_SSE41;
22857 GemmMicrokernelTester()
22858 .mr(1)
22859 .nr(4)
22860 .kr(2)
22861 .sr(1)
22862 .m(1)
22863 .n(4)
22864 .k(8)
22865 .a_stride(11)
22866 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
22867 }
22868
TEST(QC8_GEMM_MINMAX_FP32_1X4C2__SSE41_LD128,k_eq_8_subtile)22869 TEST(QC8_GEMM_MINMAX_FP32_1X4C2__SSE41_LD128, k_eq_8_subtile) {
22870 TEST_REQUIRES_X86_SSE41;
22871 for (uint32_t n = 1; n <= 4; n++) {
22872 for (uint32_t m = 1; m <= 1; m++) {
22873 GemmMicrokernelTester()
22874 .mr(1)
22875 .nr(4)
22876 .kr(2)
22877 .sr(1)
22878 .m(m)
22879 .n(n)
22880 .k(8)
22881 .iterations(1)
22882 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
22883 }
22884 }
22885 }
22886
TEST(QC8_GEMM_MINMAX_FP32_1X4C2__SSE41_LD128,k_eq_8_subtile_m)22887 TEST(QC8_GEMM_MINMAX_FP32_1X4C2__SSE41_LD128, k_eq_8_subtile_m) {
22888 TEST_REQUIRES_X86_SSE41;
22889 for (uint32_t m = 1; m <= 1; m++) {
22890 GemmMicrokernelTester()
22891 .mr(1)
22892 .nr(4)
22893 .kr(2)
22894 .sr(1)
22895 .m(m)
22896 .n(4)
22897 .k(8)
22898 .iterations(1)
22899 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
22900 }
22901 }
22902
TEST(QC8_GEMM_MINMAX_FP32_1X4C2__SSE41_LD128,k_eq_8_subtile_n)22903 TEST(QC8_GEMM_MINMAX_FP32_1X4C2__SSE41_LD128, k_eq_8_subtile_n) {
22904 TEST_REQUIRES_X86_SSE41;
22905 for (uint32_t n = 1; n <= 4; n++) {
22906 GemmMicrokernelTester()
22907 .mr(1)
22908 .nr(4)
22909 .kr(2)
22910 .sr(1)
22911 .m(1)
22912 .n(n)
22913 .k(8)
22914 .iterations(1)
22915 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
22916 }
22917 }
22918
TEST(QC8_GEMM_MINMAX_FP32_1X4C2__SSE41_LD128,k_lt_8)22919 TEST(QC8_GEMM_MINMAX_FP32_1X4C2__SSE41_LD128, k_lt_8) {
22920 TEST_REQUIRES_X86_SSE41;
22921 for (size_t k = 1; k < 8; k++) {
22922 GemmMicrokernelTester()
22923 .mr(1)
22924 .nr(4)
22925 .kr(2)
22926 .sr(1)
22927 .m(1)
22928 .n(4)
22929 .k(k)
22930 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
22931 }
22932 }
22933
TEST(QC8_GEMM_MINMAX_FP32_1X4C2__SSE41_LD128,k_lt_8_strided_a)22934 TEST(QC8_GEMM_MINMAX_FP32_1X4C2__SSE41_LD128, k_lt_8_strided_a) {
22935 TEST_REQUIRES_X86_SSE41;
22936 for (size_t k = 1; k < 8; k++) {
22937 GemmMicrokernelTester()
22938 .mr(1)
22939 .nr(4)
22940 .kr(2)
22941 .sr(1)
22942 .m(1)
22943 .n(4)
22944 .k(k)
22945 .a_stride(11)
22946 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
22947 }
22948 }
22949
TEST(QC8_GEMM_MINMAX_FP32_1X4C2__SSE41_LD128,k_lt_8_subtile)22950 TEST(QC8_GEMM_MINMAX_FP32_1X4C2__SSE41_LD128, k_lt_8_subtile) {
22951 TEST_REQUIRES_X86_SSE41;
22952 for (size_t k = 1; k < 8; k++) {
22953 for (uint32_t n = 1; n <= 4; n++) {
22954 for (uint32_t m = 1; m <= 1; m++) {
22955 GemmMicrokernelTester()
22956 .mr(1)
22957 .nr(4)
22958 .kr(2)
22959 .sr(1)
22960 .m(m)
22961 .n(n)
22962 .k(k)
22963 .iterations(1)
22964 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
22965 }
22966 }
22967 }
22968 }
22969
TEST(QC8_GEMM_MINMAX_FP32_1X4C2__SSE41_LD128,k_gt_8)22970 TEST(QC8_GEMM_MINMAX_FP32_1X4C2__SSE41_LD128, k_gt_8) {
22971 TEST_REQUIRES_X86_SSE41;
22972 for (size_t k = 9; k < 16; k++) {
22973 GemmMicrokernelTester()
22974 .mr(1)
22975 .nr(4)
22976 .kr(2)
22977 .sr(1)
22978 .m(1)
22979 .n(4)
22980 .k(k)
22981 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
22982 }
22983 }
22984
TEST(QC8_GEMM_MINMAX_FP32_1X4C2__SSE41_LD128,k_gt_8_strided_a)22985 TEST(QC8_GEMM_MINMAX_FP32_1X4C2__SSE41_LD128, k_gt_8_strided_a) {
22986 TEST_REQUIRES_X86_SSE41;
22987 for (size_t k = 9; k < 16; k++) {
22988 GemmMicrokernelTester()
22989 .mr(1)
22990 .nr(4)
22991 .kr(2)
22992 .sr(1)
22993 .m(1)
22994 .n(4)
22995 .k(k)
22996 .a_stride(19)
22997 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
22998 }
22999 }
23000
TEST(QC8_GEMM_MINMAX_FP32_1X4C2__SSE41_LD128,k_gt_8_subtile)23001 TEST(QC8_GEMM_MINMAX_FP32_1X4C2__SSE41_LD128, k_gt_8_subtile) {
23002 TEST_REQUIRES_X86_SSE41;
23003 for (size_t k = 9; k < 16; k++) {
23004 for (uint32_t n = 1; n <= 4; n++) {
23005 for (uint32_t m = 1; m <= 1; m++) {
23006 GemmMicrokernelTester()
23007 .mr(1)
23008 .nr(4)
23009 .kr(2)
23010 .sr(1)
23011 .m(m)
23012 .n(n)
23013 .k(k)
23014 .iterations(1)
23015 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23016 }
23017 }
23018 }
23019 }
23020
TEST(QC8_GEMM_MINMAX_FP32_1X4C2__SSE41_LD128,k_div_8)23021 TEST(QC8_GEMM_MINMAX_FP32_1X4C2__SSE41_LD128, k_div_8) {
23022 TEST_REQUIRES_X86_SSE41;
23023 for (size_t k = 16; k <= 80; k += 8) {
23024 GemmMicrokernelTester()
23025 .mr(1)
23026 .nr(4)
23027 .kr(2)
23028 .sr(1)
23029 .m(1)
23030 .n(4)
23031 .k(k)
23032 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23033 }
23034 }
23035
TEST(QC8_GEMM_MINMAX_FP32_1X4C2__SSE41_LD128,k_div_8_strided_a)23036 TEST(QC8_GEMM_MINMAX_FP32_1X4C2__SSE41_LD128, k_div_8_strided_a) {
23037 TEST_REQUIRES_X86_SSE41;
23038 for (size_t k = 16; k <= 80; k += 8) {
23039 GemmMicrokernelTester()
23040 .mr(1)
23041 .nr(4)
23042 .kr(2)
23043 .sr(1)
23044 .m(1)
23045 .n(4)
23046 .k(k)
23047 .a_stride(83)
23048 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23049 }
23050 }
23051
TEST(QC8_GEMM_MINMAX_FP32_1X4C2__SSE41_LD128,k_div_8_subtile)23052 TEST(QC8_GEMM_MINMAX_FP32_1X4C2__SSE41_LD128, k_div_8_subtile) {
23053 TEST_REQUIRES_X86_SSE41;
23054 for (size_t k = 16; k <= 80; k += 8) {
23055 for (uint32_t n = 1; n <= 4; n++) {
23056 for (uint32_t m = 1; m <= 1; m++) {
23057 GemmMicrokernelTester()
23058 .mr(1)
23059 .nr(4)
23060 .kr(2)
23061 .sr(1)
23062 .m(m)
23063 .n(n)
23064 .k(k)
23065 .iterations(1)
23066 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23067 }
23068 }
23069 }
23070 }
23071
TEST(QC8_GEMM_MINMAX_FP32_1X4C2__SSE41_LD128,n_gt_4)23072 TEST(QC8_GEMM_MINMAX_FP32_1X4C2__SSE41_LD128, n_gt_4) {
23073 TEST_REQUIRES_X86_SSE41;
23074 for (uint32_t n = 5; n < 8; n++) {
23075 for (size_t k = 1; k <= 40; k += 9) {
23076 GemmMicrokernelTester()
23077 .mr(1)
23078 .nr(4)
23079 .kr(2)
23080 .sr(1)
23081 .m(1)
23082 .n(n)
23083 .k(k)
23084 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23085 }
23086 }
23087 }
23088
TEST(QC8_GEMM_MINMAX_FP32_1X4C2__SSE41_LD128,n_gt_4_strided_cn)23089 TEST(QC8_GEMM_MINMAX_FP32_1X4C2__SSE41_LD128, n_gt_4_strided_cn) {
23090 TEST_REQUIRES_X86_SSE41;
23091 for (uint32_t n = 5; n < 8; n++) {
23092 for (size_t k = 1; k <= 40; k += 9) {
23093 GemmMicrokernelTester()
23094 .mr(1)
23095 .nr(4)
23096 .kr(2)
23097 .sr(1)
23098 .m(1)
23099 .n(n)
23100 .k(k)
23101 .cn_stride(7)
23102 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23103 }
23104 }
23105 }
23106
TEST(QC8_GEMM_MINMAX_FP32_1X4C2__SSE41_LD128,n_gt_4_strided_a)23107 TEST(QC8_GEMM_MINMAX_FP32_1X4C2__SSE41_LD128, n_gt_4_strided_a) {
23108 TEST_REQUIRES_X86_SSE41;
23109 for (uint32_t n = 5; n < 8; n++) {
23110 for (size_t k = 1; k <= 40; k += 9) {
23111 GemmMicrokernelTester()
23112 .mr(1)
23113 .nr(4)
23114 .kr(2)
23115 .sr(1)
23116 .m(1)
23117 .n(n)
23118 .k(k)
23119 .a_stride(43)
23120 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23121 }
23122 }
23123 }
23124
TEST(QC8_GEMM_MINMAX_FP32_1X4C2__SSE41_LD128,n_gt_4_subtile)23125 TEST(QC8_GEMM_MINMAX_FP32_1X4C2__SSE41_LD128, n_gt_4_subtile) {
23126 TEST_REQUIRES_X86_SSE41;
23127 for (uint32_t n = 5; n < 8; n++) {
23128 for (size_t k = 1; k <= 40; k += 9) {
23129 for (uint32_t m = 1; m <= 1; m++) {
23130 GemmMicrokernelTester()
23131 .mr(1)
23132 .nr(4)
23133 .kr(2)
23134 .sr(1)
23135 .m(m)
23136 .n(n)
23137 .k(k)
23138 .iterations(1)
23139 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23140 }
23141 }
23142 }
23143 }
23144
TEST(QC8_GEMM_MINMAX_FP32_1X4C2__SSE41_LD128,n_div_4)23145 TEST(QC8_GEMM_MINMAX_FP32_1X4C2__SSE41_LD128, n_div_4) {
23146 TEST_REQUIRES_X86_SSE41;
23147 for (uint32_t n = 8; n <= 12; n += 4) {
23148 for (size_t k = 1; k <= 40; k += 9) {
23149 GemmMicrokernelTester()
23150 .mr(1)
23151 .nr(4)
23152 .kr(2)
23153 .sr(1)
23154 .m(1)
23155 .n(n)
23156 .k(k)
23157 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23158 }
23159 }
23160 }
23161
TEST(QC8_GEMM_MINMAX_FP32_1X4C2__SSE41_LD128,n_div_4_strided_cn)23162 TEST(QC8_GEMM_MINMAX_FP32_1X4C2__SSE41_LD128, n_div_4_strided_cn) {
23163 TEST_REQUIRES_X86_SSE41;
23164 for (uint32_t n = 8; n <= 12; n += 4) {
23165 for (size_t k = 1; k <= 40; k += 9) {
23166 GemmMicrokernelTester()
23167 .mr(1)
23168 .nr(4)
23169 .kr(2)
23170 .sr(1)
23171 .m(1)
23172 .n(n)
23173 .k(k)
23174 .cn_stride(7)
23175 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23176 }
23177 }
23178 }
23179
TEST(QC8_GEMM_MINMAX_FP32_1X4C2__SSE41_LD128,n_div_4_strided_a)23180 TEST(QC8_GEMM_MINMAX_FP32_1X4C2__SSE41_LD128, n_div_4_strided_a) {
23181 TEST_REQUIRES_X86_SSE41;
23182 for (uint32_t n = 8; n <= 12; n += 4) {
23183 for (size_t k = 1; k <= 40; k += 9) {
23184 GemmMicrokernelTester()
23185 .mr(1)
23186 .nr(4)
23187 .kr(2)
23188 .sr(1)
23189 .m(1)
23190 .n(n)
23191 .k(k)
23192 .a_stride(43)
23193 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23194 }
23195 }
23196 }
23197
TEST(QC8_GEMM_MINMAX_FP32_1X4C2__SSE41_LD128,n_div_4_subtile)23198 TEST(QC8_GEMM_MINMAX_FP32_1X4C2__SSE41_LD128, n_div_4_subtile) {
23199 TEST_REQUIRES_X86_SSE41;
23200 for (uint32_t n = 8; n <= 12; n += 4) {
23201 for (size_t k = 1; k <= 40; k += 9) {
23202 for (uint32_t m = 1; m <= 1; m++) {
23203 GemmMicrokernelTester()
23204 .mr(1)
23205 .nr(4)
23206 .kr(2)
23207 .sr(1)
23208 .m(m)
23209 .n(n)
23210 .k(k)
23211 .iterations(1)
23212 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23213 }
23214 }
23215 }
23216 }
23217
TEST(QC8_GEMM_MINMAX_FP32_1X4C2__SSE41_LD128,strided_cm_subtile)23218 TEST(QC8_GEMM_MINMAX_FP32_1X4C2__SSE41_LD128, strided_cm_subtile) {
23219 TEST_REQUIRES_X86_SSE41;
23220 for (size_t k = 1; k <= 40; k += 9) {
23221 for (uint32_t n = 1; n <= 4; n++) {
23222 for (uint32_t m = 1; m <= 1; m++) {
23223 GemmMicrokernelTester()
23224 .mr(1)
23225 .nr(4)
23226 .kr(2)
23227 .sr(1)
23228 .m(m)
23229 .n(n)
23230 .k(k)
23231 .cm_stride(7)
23232 .iterations(1)
23233 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23234 }
23235 }
23236 }
23237 }
23238
TEST(QC8_GEMM_MINMAX_FP32_1X4C2__SSE41_LD128,qmin)23239 TEST(QC8_GEMM_MINMAX_FP32_1X4C2__SSE41_LD128, qmin) {
23240 TEST_REQUIRES_X86_SSE41;
23241 GemmMicrokernelTester()
23242 .mr(1)
23243 .nr(4)
23244 .kr(2)
23245 .sr(1)
23246 .m(1)
23247 .n(4)
23248 .k(8)
23249 .qmin(128)
23250 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23251 }
23252
TEST(QC8_GEMM_MINMAX_FP32_1X4C2__SSE41_LD128,qmax)23253 TEST(QC8_GEMM_MINMAX_FP32_1X4C2__SSE41_LD128, qmax) {
23254 TEST_REQUIRES_X86_SSE41;
23255 GemmMicrokernelTester()
23256 .mr(1)
23257 .nr(4)
23258 .kr(2)
23259 .sr(1)
23260 .m(1)
23261 .n(4)
23262 .k(8)
23263 .qmax(128)
23264 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23265 }
23266
TEST(QC8_GEMM_MINMAX_FP32_1X4C2__SSE41_LD128,strided_cm)23267 TEST(QC8_GEMM_MINMAX_FP32_1X4C2__SSE41_LD128, strided_cm) {
23268 TEST_REQUIRES_X86_SSE41;
23269 GemmMicrokernelTester()
23270 .mr(1)
23271 .nr(4)
23272 .kr(2)
23273 .sr(1)
23274 .m(1)
23275 .n(4)
23276 .k(8)
23277 .cm_stride(7)
23278 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23279 }
23280 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
23281
23282
23283 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QC8_GEMM_MINMAX_FP32_2X4C2__SSE41_LD128,k_eq_8)23284 TEST(QC8_GEMM_MINMAX_FP32_2X4C2__SSE41_LD128, k_eq_8) {
23285 TEST_REQUIRES_X86_SSE41;
23286 GemmMicrokernelTester()
23287 .mr(2)
23288 .nr(4)
23289 .kr(2)
23290 .sr(1)
23291 .m(2)
23292 .n(4)
23293 .k(8)
23294 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23295 }
23296
TEST(QC8_GEMM_MINMAX_FP32_2X4C2__SSE41_LD128,strided_cn)23297 TEST(QC8_GEMM_MINMAX_FP32_2X4C2__SSE41_LD128, strided_cn) {
23298 TEST_REQUIRES_X86_SSE41;
23299 GemmMicrokernelTester()
23300 .mr(2)
23301 .nr(4)
23302 .kr(2)
23303 .sr(1)
23304 .m(2)
23305 .n(4)
23306 .k(8)
23307 .cn_stride(7)
23308 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23309 }
23310
TEST(QC8_GEMM_MINMAX_FP32_2X4C2__SSE41_LD128,k_eq_8_strided_a)23311 TEST(QC8_GEMM_MINMAX_FP32_2X4C2__SSE41_LD128, k_eq_8_strided_a) {
23312 TEST_REQUIRES_X86_SSE41;
23313 GemmMicrokernelTester()
23314 .mr(2)
23315 .nr(4)
23316 .kr(2)
23317 .sr(1)
23318 .m(2)
23319 .n(4)
23320 .k(8)
23321 .a_stride(11)
23322 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23323 }
23324
TEST(QC8_GEMM_MINMAX_FP32_2X4C2__SSE41_LD128,k_eq_8_subtile)23325 TEST(QC8_GEMM_MINMAX_FP32_2X4C2__SSE41_LD128, k_eq_8_subtile) {
23326 TEST_REQUIRES_X86_SSE41;
23327 for (uint32_t n = 1; n <= 4; n++) {
23328 for (uint32_t m = 1; m <= 2; m++) {
23329 GemmMicrokernelTester()
23330 .mr(2)
23331 .nr(4)
23332 .kr(2)
23333 .sr(1)
23334 .m(m)
23335 .n(n)
23336 .k(8)
23337 .iterations(1)
23338 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23339 }
23340 }
23341 }
23342
TEST(QC8_GEMM_MINMAX_FP32_2X4C2__SSE41_LD128,k_eq_8_subtile_m)23343 TEST(QC8_GEMM_MINMAX_FP32_2X4C2__SSE41_LD128, k_eq_8_subtile_m) {
23344 TEST_REQUIRES_X86_SSE41;
23345 for (uint32_t m = 1; m <= 2; m++) {
23346 GemmMicrokernelTester()
23347 .mr(2)
23348 .nr(4)
23349 .kr(2)
23350 .sr(1)
23351 .m(m)
23352 .n(4)
23353 .k(8)
23354 .iterations(1)
23355 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23356 }
23357 }
23358
TEST(QC8_GEMM_MINMAX_FP32_2X4C2__SSE41_LD128,k_eq_8_subtile_n)23359 TEST(QC8_GEMM_MINMAX_FP32_2X4C2__SSE41_LD128, k_eq_8_subtile_n) {
23360 TEST_REQUIRES_X86_SSE41;
23361 for (uint32_t n = 1; n <= 4; n++) {
23362 GemmMicrokernelTester()
23363 .mr(2)
23364 .nr(4)
23365 .kr(2)
23366 .sr(1)
23367 .m(2)
23368 .n(n)
23369 .k(8)
23370 .iterations(1)
23371 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23372 }
23373 }
23374
TEST(QC8_GEMM_MINMAX_FP32_2X4C2__SSE41_LD128,k_lt_8)23375 TEST(QC8_GEMM_MINMAX_FP32_2X4C2__SSE41_LD128, k_lt_8) {
23376 TEST_REQUIRES_X86_SSE41;
23377 for (size_t k = 1; k < 8; k++) {
23378 GemmMicrokernelTester()
23379 .mr(2)
23380 .nr(4)
23381 .kr(2)
23382 .sr(1)
23383 .m(2)
23384 .n(4)
23385 .k(k)
23386 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23387 }
23388 }
23389
TEST(QC8_GEMM_MINMAX_FP32_2X4C2__SSE41_LD128,k_lt_8_strided_a)23390 TEST(QC8_GEMM_MINMAX_FP32_2X4C2__SSE41_LD128, k_lt_8_strided_a) {
23391 TEST_REQUIRES_X86_SSE41;
23392 for (size_t k = 1; k < 8; k++) {
23393 GemmMicrokernelTester()
23394 .mr(2)
23395 .nr(4)
23396 .kr(2)
23397 .sr(1)
23398 .m(2)
23399 .n(4)
23400 .k(k)
23401 .a_stride(11)
23402 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23403 }
23404 }
23405
TEST(QC8_GEMM_MINMAX_FP32_2X4C2__SSE41_LD128,k_lt_8_subtile)23406 TEST(QC8_GEMM_MINMAX_FP32_2X4C2__SSE41_LD128, k_lt_8_subtile) {
23407 TEST_REQUIRES_X86_SSE41;
23408 for (size_t k = 1; k < 8; k++) {
23409 for (uint32_t n = 1; n <= 4; n++) {
23410 for (uint32_t m = 1; m <= 2; m++) {
23411 GemmMicrokernelTester()
23412 .mr(2)
23413 .nr(4)
23414 .kr(2)
23415 .sr(1)
23416 .m(m)
23417 .n(n)
23418 .k(k)
23419 .iterations(1)
23420 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23421 }
23422 }
23423 }
23424 }
23425
TEST(QC8_GEMM_MINMAX_FP32_2X4C2__SSE41_LD128,k_gt_8)23426 TEST(QC8_GEMM_MINMAX_FP32_2X4C2__SSE41_LD128, k_gt_8) {
23427 TEST_REQUIRES_X86_SSE41;
23428 for (size_t k = 9; k < 16; k++) {
23429 GemmMicrokernelTester()
23430 .mr(2)
23431 .nr(4)
23432 .kr(2)
23433 .sr(1)
23434 .m(2)
23435 .n(4)
23436 .k(k)
23437 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23438 }
23439 }
23440
TEST(QC8_GEMM_MINMAX_FP32_2X4C2__SSE41_LD128,k_gt_8_strided_a)23441 TEST(QC8_GEMM_MINMAX_FP32_2X4C2__SSE41_LD128, k_gt_8_strided_a) {
23442 TEST_REQUIRES_X86_SSE41;
23443 for (size_t k = 9; k < 16; k++) {
23444 GemmMicrokernelTester()
23445 .mr(2)
23446 .nr(4)
23447 .kr(2)
23448 .sr(1)
23449 .m(2)
23450 .n(4)
23451 .k(k)
23452 .a_stride(19)
23453 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23454 }
23455 }
23456
TEST(QC8_GEMM_MINMAX_FP32_2X4C2__SSE41_LD128,k_gt_8_subtile)23457 TEST(QC8_GEMM_MINMAX_FP32_2X4C2__SSE41_LD128, k_gt_8_subtile) {
23458 TEST_REQUIRES_X86_SSE41;
23459 for (size_t k = 9; k < 16; k++) {
23460 for (uint32_t n = 1; n <= 4; n++) {
23461 for (uint32_t m = 1; m <= 2; m++) {
23462 GemmMicrokernelTester()
23463 .mr(2)
23464 .nr(4)
23465 .kr(2)
23466 .sr(1)
23467 .m(m)
23468 .n(n)
23469 .k(k)
23470 .iterations(1)
23471 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23472 }
23473 }
23474 }
23475 }
23476
TEST(QC8_GEMM_MINMAX_FP32_2X4C2__SSE41_LD128,k_div_8)23477 TEST(QC8_GEMM_MINMAX_FP32_2X4C2__SSE41_LD128, k_div_8) {
23478 TEST_REQUIRES_X86_SSE41;
23479 for (size_t k = 16; k <= 80; k += 8) {
23480 GemmMicrokernelTester()
23481 .mr(2)
23482 .nr(4)
23483 .kr(2)
23484 .sr(1)
23485 .m(2)
23486 .n(4)
23487 .k(k)
23488 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23489 }
23490 }
23491
TEST(QC8_GEMM_MINMAX_FP32_2X4C2__SSE41_LD128,k_div_8_strided_a)23492 TEST(QC8_GEMM_MINMAX_FP32_2X4C2__SSE41_LD128, k_div_8_strided_a) {
23493 TEST_REQUIRES_X86_SSE41;
23494 for (size_t k = 16; k <= 80; k += 8) {
23495 GemmMicrokernelTester()
23496 .mr(2)
23497 .nr(4)
23498 .kr(2)
23499 .sr(1)
23500 .m(2)
23501 .n(4)
23502 .k(k)
23503 .a_stride(83)
23504 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23505 }
23506 }
23507
TEST(QC8_GEMM_MINMAX_FP32_2X4C2__SSE41_LD128,k_div_8_subtile)23508 TEST(QC8_GEMM_MINMAX_FP32_2X4C2__SSE41_LD128, k_div_8_subtile) {
23509 TEST_REQUIRES_X86_SSE41;
23510 for (size_t k = 16; k <= 80; k += 8) {
23511 for (uint32_t n = 1; n <= 4; n++) {
23512 for (uint32_t m = 1; m <= 2; m++) {
23513 GemmMicrokernelTester()
23514 .mr(2)
23515 .nr(4)
23516 .kr(2)
23517 .sr(1)
23518 .m(m)
23519 .n(n)
23520 .k(k)
23521 .iterations(1)
23522 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23523 }
23524 }
23525 }
23526 }
23527
TEST(QC8_GEMM_MINMAX_FP32_2X4C2__SSE41_LD128,n_gt_4)23528 TEST(QC8_GEMM_MINMAX_FP32_2X4C2__SSE41_LD128, n_gt_4) {
23529 TEST_REQUIRES_X86_SSE41;
23530 for (uint32_t n = 5; n < 8; n++) {
23531 for (size_t k = 1; k <= 40; k += 9) {
23532 GemmMicrokernelTester()
23533 .mr(2)
23534 .nr(4)
23535 .kr(2)
23536 .sr(1)
23537 .m(2)
23538 .n(n)
23539 .k(k)
23540 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23541 }
23542 }
23543 }
23544
TEST(QC8_GEMM_MINMAX_FP32_2X4C2__SSE41_LD128,n_gt_4_strided_cn)23545 TEST(QC8_GEMM_MINMAX_FP32_2X4C2__SSE41_LD128, n_gt_4_strided_cn) {
23546 TEST_REQUIRES_X86_SSE41;
23547 for (uint32_t n = 5; n < 8; n++) {
23548 for (size_t k = 1; k <= 40; k += 9) {
23549 GemmMicrokernelTester()
23550 .mr(2)
23551 .nr(4)
23552 .kr(2)
23553 .sr(1)
23554 .m(2)
23555 .n(n)
23556 .k(k)
23557 .cn_stride(7)
23558 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23559 }
23560 }
23561 }
23562
TEST(QC8_GEMM_MINMAX_FP32_2X4C2__SSE41_LD128,n_gt_4_strided_a)23563 TEST(QC8_GEMM_MINMAX_FP32_2X4C2__SSE41_LD128, n_gt_4_strided_a) {
23564 TEST_REQUIRES_X86_SSE41;
23565 for (uint32_t n = 5; n < 8; n++) {
23566 for (size_t k = 1; k <= 40; k += 9) {
23567 GemmMicrokernelTester()
23568 .mr(2)
23569 .nr(4)
23570 .kr(2)
23571 .sr(1)
23572 .m(2)
23573 .n(n)
23574 .k(k)
23575 .a_stride(43)
23576 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23577 }
23578 }
23579 }
23580
TEST(QC8_GEMM_MINMAX_FP32_2X4C2__SSE41_LD128,n_gt_4_subtile)23581 TEST(QC8_GEMM_MINMAX_FP32_2X4C2__SSE41_LD128, n_gt_4_subtile) {
23582 TEST_REQUIRES_X86_SSE41;
23583 for (uint32_t n = 5; n < 8; n++) {
23584 for (size_t k = 1; k <= 40; k += 9) {
23585 for (uint32_t m = 1; m <= 2; m++) {
23586 GemmMicrokernelTester()
23587 .mr(2)
23588 .nr(4)
23589 .kr(2)
23590 .sr(1)
23591 .m(m)
23592 .n(n)
23593 .k(k)
23594 .iterations(1)
23595 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23596 }
23597 }
23598 }
23599 }
23600
TEST(QC8_GEMM_MINMAX_FP32_2X4C2__SSE41_LD128,n_div_4)23601 TEST(QC8_GEMM_MINMAX_FP32_2X4C2__SSE41_LD128, n_div_4) {
23602 TEST_REQUIRES_X86_SSE41;
23603 for (uint32_t n = 8; n <= 12; n += 4) {
23604 for (size_t k = 1; k <= 40; k += 9) {
23605 GemmMicrokernelTester()
23606 .mr(2)
23607 .nr(4)
23608 .kr(2)
23609 .sr(1)
23610 .m(2)
23611 .n(n)
23612 .k(k)
23613 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23614 }
23615 }
23616 }
23617
TEST(QC8_GEMM_MINMAX_FP32_2X4C2__SSE41_LD128,n_div_4_strided_cn)23618 TEST(QC8_GEMM_MINMAX_FP32_2X4C2__SSE41_LD128, n_div_4_strided_cn) {
23619 TEST_REQUIRES_X86_SSE41;
23620 for (uint32_t n = 8; n <= 12; n += 4) {
23621 for (size_t k = 1; k <= 40; k += 9) {
23622 GemmMicrokernelTester()
23623 .mr(2)
23624 .nr(4)
23625 .kr(2)
23626 .sr(1)
23627 .m(2)
23628 .n(n)
23629 .k(k)
23630 .cn_stride(7)
23631 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23632 }
23633 }
23634 }
23635
TEST(QC8_GEMM_MINMAX_FP32_2X4C2__SSE41_LD128,n_div_4_strided_a)23636 TEST(QC8_GEMM_MINMAX_FP32_2X4C2__SSE41_LD128, n_div_4_strided_a) {
23637 TEST_REQUIRES_X86_SSE41;
23638 for (uint32_t n = 8; n <= 12; n += 4) {
23639 for (size_t k = 1; k <= 40; k += 9) {
23640 GemmMicrokernelTester()
23641 .mr(2)
23642 .nr(4)
23643 .kr(2)
23644 .sr(1)
23645 .m(2)
23646 .n(n)
23647 .k(k)
23648 .a_stride(43)
23649 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23650 }
23651 }
23652 }
23653
TEST(QC8_GEMM_MINMAX_FP32_2X4C2__SSE41_LD128,n_div_4_subtile)23654 TEST(QC8_GEMM_MINMAX_FP32_2X4C2__SSE41_LD128, n_div_4_subtile) {
23655 TEST_REQUIRES_X86_SSE41;
23656 for (uint32_t n = 8; n <= 12; n += 4) {
23657 for (size_t k = 1; k <= 40; k += 9) {
23658 for (uint32_t m = 1; m <= 2; m++) {
23659 GemmMicrokernelTester()
23660 .mr(2)
23661 .nr(4)
23662 .kr(2)
23663 .sr(1)
23664 .m(m)
23665 .n(n)
23666 .k(k)
23667 .iterations(1)
23668 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23669 }
23670 }
23671 }
23672 }
23673
TEST(QC8_GEMM_MINMAX_FP32_2X4C2__SSE41_LD128,strided_cm_subtile)23674 TEST(QC8_GEMM_MINMAX_FP32_2X4C2__SSE41_LD128, strided_cm_subtile) {
23675 TEST_REQUIRES_X86_SSE41;
23676 for (size_t k = 1; k <= 40; k += 9) {
23677 for (uint32_t n = 1; n <= 4; n++) {
23678 for (uint32_t m = 1; m <= 2; m++) {
23679 GemmMicrokernelTester()
23680 .mr(2)
23681 .nr(4)
23682 .kr(2)
23683 .sr(1)
23684 .m(m)
23685 .n(n)
23686 .k(k)
23687 .cm_stride(7)
23688 .iterations(1)
23689 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23690 }
23691 }
23692 }
23693 }
23694
TEST(QC8_GEMM_MINMAX_FP32_2X4C2__SSE41_LD128,qmin)23695 TEST(QC8_GEMM_MINMAX_FP32_2X4C2__SSE41_LD128, qmin) {
23696 TEST_REQUIRES_X86_SSE41;
23697 GemmMicrokernelTester()
23698 .mr(2)
23699 .nr(4)
23700 .kr(2)
23701 .sr(1)
23702 .m(2)
23703 .n(4)
23704 .k(8)
23705 .qmin(128)
23706 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23707 }
23708
TEST(QC8_GEMM_MINMAX_FP32_2X4C2__SSE41_LD128,qmax)23709 TEST(QC8_GEMM_MINMAX_FP32_2X4C2__SSE41_LD128, qmax) {
23710 TEST_REQUIRES_X86_SSE41;
23711 GemmMicrokernelTester()
23712 .mr(2)
23713 .nr(4)
23714 .kr(2)
23715 .sr(1)
23716 .m(2)
23717 .n(4)
23718 .k(8)
23719 .qmax(128)
23720 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23721 }
23722
TEST(QC8_GEMM_MINMAX_FP32_2X4C2__SSE41_LD128,strided_cm)23723 TEST(QC8_GEMM_MINMAX_FP32_2X4C2__SSE41_LD128, strided_cm) {
23724 TEST_REQUIRES_X86_SSE41;
23725 GemmMicrokernelTester()
23726 .mr(2)
23727 .nr(4)
23728 .kr(2)
23729 .sr(1)
23730 .m(2)
23731 .n(4)
23732 .k(8)
23733 .cm_stride(7)
23734 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23735 }
23736 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
23737
23738
23739 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QC8_GEMM_MINMAX_FP32_3X4C2__SSE2_LD128,k_eq_8)23740 TEST(QC8_GEMM_MINMAX_FP32_3X4C2__SSE2_LD128, k_eq_8) {
23741 TEST_REQUIRES_X86_SSE2;
23742 GemmMicrokernelTester()
23743 .mr(3)
23744 .nr(4)
23745 .kr(2)
23746 .sr(1)
23747 .m(3)
23748 .n(4)
23749 .k(8)
23750 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
23751 }
23752
TEST(QC8_GEMM_MINMAX_FP32_3X4C2__SSE2_LD128,strided_cn)23753 TEST(QC8_GEMM_MINMAX_FP32_3X4C2__SSE2_LD128, strided_cn) {
23754 TEST_REQUIRES_X86_SSE2;
23755 GemmMicrokernelTester()
23756 .mr(3)
23757 .nr(4)
23758 .kr(2)
23759 .sr(1)
23760 .m(3)
23761 .n(4)
23762 .k(8)
23763 .cn_stride(7)
23764 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
23765 }
23766
TEST(QC8_GEMM_MINMAX_FP32_3X4C2__SSE2_LD128,k_eq_8_strided_a)23767 TEST(QC8_GEMM_MINMAX_FP32_3X4C2__SSE2_LD128, k_eq_8_strided_a) {
23768 TEST_REQUIRES_X86_SSE2;
23769 GemmMicrokernelTester()
23770 .mr(3)
23771 .nr(4)
23772 .kr(2)
23773 .sr(1)
23774 .m(3)
23775 .n(4)
23776 .k(8)
23777 .a_stride(11)
23778 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
23779 }
23780
TEST(QC8_GEMM_MINMAX_FP32_3X4C2__SSE2_LD128,k_eq_8_subtile)23781 TEST(QC8_GEMM_MINMAX_FP32_3X4C2__SSE2_LD128, k_eq_8_subtile) {
23782 TEST_REQUIRES_X86_SSE2;
23783 for (uint32_t n = 1; n <= 4; n++) {
23784 for (uint32_t m = 1; m <= 3; m++) {
23785 GemmMicrokernelTester()
23786 .mr(3)
23787 .nr(4)
23788 .kr(2)
23789 .sr(1)
23790 .m(m)
23791 .n(n)
23792 .k(8)
23793 .iterations(1)
23794 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
23795 }
23796 }
23797 }
23798
TEST(QC8_GEMM_MINMAX_FP32_3X4C2__SSE2_LD128,k_eq_8_subtile_m)23799 TEST(QC8_GEMM_MINMAX_FP32_3X4C2__SSE2_LD128, k_eq_8_subtile_m) {
23800 TEST_REQUIRES_X86_SSE2;
23801 for (uint32_t m = 1; m <= 3; m++) {
23802 GemmMicrokernelTester()
23803 .mr(3)
23804 .nr(4)
23805 .kr(2)
23806 .sr(1)
23807 .m(m)
23808 .n(4)
23809 .k(8)
23810 .iterations(1)
23811 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
23812 }
23813 }
23814
TEST(QC8_GEMM_MINMAX_FP32_3X4C2__SSE2_LD128,k_eq_8_subtile_n)23815 TEST(QC8_GEMM_MINMAX_FP32_3X4C2__SSE2_LD128, k_eq_8_subtile_n) {
23816 TEST_REQUIRES_X86_SSE2;
23817 for (uint32_t n = 1; n <= 4; n++) {
23818 GemmMicrokernelTester()
23819 .mr(3)
23820 .nr(4)
23821 .kr(2)
23822 .sr(1)
23823 .m(3)
23824 .n(n)
23825 .k(8)
23826 .iterations(1)
23827 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
23828 }
23829 }
23830
TEST(QC8_GEMM_MINMAX_FP32_3X4C2__SSE2_LD128,k_lt_8)23831 TEST(QC8_GEMM_MINMAX_FP32_3X4C2__SSE2_LD128, k_lt_8) {
23832 TEST_REQUIRES_X86_SSE2;
23833 for (size_t k = 1; k < 8; k++) {
23834 GemmMicrokernelTester()
23835 .mr(3)
23836 .nr(4)
23837 .kr(2)
23838 .sr(1)
23839 .m(3)
23840 .n(4)
23841 .k(k)
23842 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
23843 }
23844 }
23845
TEST(QC8_GEMM_MINMAX_FP32_3X4C2__SSE2_LD128,k_lt_8_strided_a)23846 TEST(QC8_GEMM_MINMAX_FP32_3X4C2__SSE2_LD128, k_lt_8_strided_a) {
23847 TEST_REQUIRES_X86_SSE2;
23848 for (size_t k = 1; k < 8; k++) {
23849 GemmMicrokernelTester()
23850 .mr(3)
23851 .nr(4)
23852 .kr(2)
23853 .sr(1)
23854 .m(3)
23855 .n(4)
23856 .k(k)
23857 .a_stride(11)
23858 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
23859 }
23860 }
23861
TEST(QC8_GEMM_MINMAX_FP32_3X4C2__SSE2_LD128,k_lt_8_subtile)23862 TEST(QC8_GEMM_MINMAX_FP32_3X4C2__SSE2_LD128, k_lt_8_subtile) {
23863 TEST_REQUIRES_X86_SSE2;
23864 for (size_t k = 1; k < 8; k++) {
23865 for (uint32_t n = 1; n <= 4; n++) {
23866 for (uint32_t m = 1; m <= 3; m++) {
23867 GemmMicrokernelTester()
23868 .mr(3)
23869 .nr(4)
23870 .kr(2)
23871 .sr(1)
23872 .m(m)
23873 .n(n)
23874 .k(k)
23875 .iterations(1)
23876 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
23877 }
23878 }
23879 }
23880 }
23881
TEST(QC8_GEMM_MINMAX_FP32_3X4C2__SSE2_LD128,k_gt_8)23882 TEST(QC8_GEMM_MINMAX_FP32_3X4C2__SSE2_LD128, k_gt_8) {
23883 TEST_REQUIRES_X86_SSE2;
23884 for (size_t k = 9; k < 16; k++) {
23885 GemmMicrokernelTester()
23886 .mr(3)
23887 .nr(4)
23888 .kr(2)
23889 .sr(1)
23890 .m(3)
23891 .n(4)
23892 .k(k)
23893 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
23894 }
23895 }
23896
TEST(QC8_GEMM_MINMAX_FP32_3X4C2__SSE2_LD128,k_gt_8_strided_a)23897 TEST(QC8_GEMM_MINMAX_FP32_3X4C2__SSE2_LD128, k_gt_8_strided_a) {
23898 TEST_REQUIRES_X86_SSE2;
23899 for (size_t k = 9; k < 16; k++) {
23900 GemmMicrokernelTester()
23901 .mr(3)
23902 .nr(4)
23903 .kr(2)
23904 .sr(1)
23905 .m(3)
23906 .n(4)
23907 .k(k)
23908 .a_stride(19)
23909 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
23910 }
23911 }
23912
TEST(QC8_GEMM_MINMAX_FP32_3X4C2__SSE2_LD128,k_gt_8_subtile)23913 TEST(QC8_GEMM_MINMAX_FP32_3X4C2__SSE2_LD128, k_gt_8_subtile) {
23914 TEST_REQUIRES_X86_SSE2;
23915 for (size_t k = 9; k < 16; k++) {
23916 for (uint32_t n = 1; n <= 4; n++) {
23917 for (uint32_t m = 1; m <= 3; m++) {
23918 GemmMicrokernelTester()
23919 .mr(3)
23920 .nr(4)
23921 .kr(2)
23922 .sr(1)
23923 .m(m)
23924 .n(n)
23925 .k(k)
23926 .iterations(1)
23927 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
23928 }
23929 }
23930 }
23931 }
23932
TEST(QC8_GEMM_MINMAX_FP32_3X4C2__SSE2_LD128,k_div_8)23933 TEST(QC8_GEMM_MINMAX_FP32_3X4C2__SSE2_LD128, k_div_8) {
23934 TEST_REQUIRES_X86_SSE2;
23935 for (size_t k = 16; k <= 80; k += 8) {
23936 GemmMicrokernelTester()
23937 .mr(3)
23938 .nr(4)
23939 .kr(2)
23940 .sr(1)
23941 .m(3)
23942 .n(4)
23943 .k(k)
23944 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
23945 }
23946 }
23947
TEST(QC8_GEMM_MINMAX_FP32_3X4C2__SSE2_LD128,k_div_8_strided_a)23948 TEST(QC8_GEMM_MINMAX_FP32_3X4C2__SSE2_LD128, k_div_8_strided_a) {
23949 TEST_REQUIRES_X86_SSE2;
23950 for (size_t k = 16; k <= 80; k += 8) {
23951 GemmMicrokernelTester()
23952 .mr(3)
23953 .nr(4)
23954 .kr(2)
23955 .sr(1)
23956 .m(3)
23957 .n(4)
23958 .k(k)
23959 .a_stride(83)
23960 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
23961 }
23962 }
23963
TEST(QC8_GEMM_MINMAX_FP32_3X4C2__SSE2_LD128,k_div_8_subtile)23964 TEST(QC8_GEMM_MINMAX_FP32_3X4C2__SSE2_LD128, k_div_8_subtile) {
23965 TEST_REQUIRES_X86_SSE2;
23966 for (size_t k = 16; k <= 80; k += 8) {
23967 for (uint32_t n = 1; n <= 4; n++) {
23968 for (uint32_t m = 1; m <= 3; m++) {
23969 GemmMicrokernelTester()
23970 .mr(3)
23971 .nr(4)
23972 .kr(2)
23973 .sr(1)
23974 .m(m)
23975 .n(n)
23976 .k(k)
23977 .iterations(1)
23978 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
23979 }
23980 }
23981 }
23982 }
23983
TEST(QC8_GEMM_MINMAX_FP32_3X4C2__SSE2_LD128,n_gt_4)23984 TEST(QC8_GEMM_MINMAX_FP32_3X4C2__SSE2_LD128, n_gt_4) {
23985 TEST_REQUIRES_X86_SSE2;
23986 for (uint32_t n = 5; n < 8; n++) {
23987 for (size_t k = 1; k <= 40; k += 9) {
23988 GemmMicrokernelTester()
23989 .mr(3)
23990 .nr(4)
23991 .kr(2)
23992 .sr(1)
23993 .m(3)
23994 .n(n)
23995 .k(k)
23996 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
23997 }
23998 }
23999 }
24000
TEST(QC8_GEMM_MINMAX_FP32_3X4C2__SSE2_LD128,n_gt_4_strided_cn)24001 TEST(QC8_GEMM_MINMAX_FP32_3X4C2__SSE2_LD128, n_gt_4_strided_cn) {
24002 TEST_REQUIRES_X86_SSE2;
24003 for (uint32_t n = 5; n < 8; n++) {
24004 for (size_t k = 1; k <= 40; k += 9) {
24005 GemmMicrokernelTester()
24006 .mr(3)
24007 .nr(4)
24008 .kr(2)
24009 .sr(1)
24010 .m(3)
24011 .n(n)
24012 .k(k)
24013 .cn_stride(7)
24014 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
24015 }
24016 }
24017 }
24018
TEST(QC8_GEMM_MINMAX_FP32_3X4C2__SSE2_LD128,n_gt_4_strided_a)24019 TEST(QC8_GEMM_MINMAX_FP32_3X4C2__SSE2_LD128, n_gt_4_strided_a) {
24020 TEST_REQUIRES_X86_SSE2;
24021 for (uint32_t n = 5; n < 8; n++) {
24022 for (size_t k = 1; k <= 40; k += 9) {
24023 GemmMicrokernelTester()
24024 .mr(3)
24025 .nr(4)
24026 .kr(2)
24027 .sr(1)
24028 .m(3)
24029 .n(n)
24030 .k(k)
24031 .a_stride(43)
24032 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
24033 }
24034 }
24035 }
24036
TEST(QC8_GEMM_MINMAX_FP32_3X4C2__SSE2_LD128,n_gt_4_subtile)24037 TEST(QC8_GEMM_MINMAX_FP32_3X4C2__SSE2_LD128, n_gt_4_subtile) {
24038 TEST_REQUIRES_X86_SSE2;
24039 for (uint32_t n = 5; n < 8; n++) {
24040 for (size_t k = 1; k <= 40; k += 9) {
24041 for (uint32_t m = 1; m <= 3; m++) {
24042 GemmMicrokernelTester()
24043 .mr(3)
24044 .nr(4)
24045 .kr(2)
24046 .sr(1)
24047 .m(m)
24048 .n(n)
24049 .k(k)
24050 .iterations(1)
24051 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
24052 }
24053 }
24054 }
24055 }
24056
TEST(QC8_GEMM_MINMAX_FP32_3X4C2__SSE2_LD128,n_div_4)24057 TEST(QC8_GEMM_MINMAX_FP32_3X4C2__SSE2_LD128, n_div_4) {
24058 TEST_REQUIRES_X86_SSE2;
24059 for (uint32_t n = 8; n <= 12; n += 4) {
24060 for (size_t k = 1; k <= 40; k += 9) {
24061 GemmMicrokernelTester()
24062 .mr(3)
24063 .nr(4)
24064 .kr(2)
24065 .sr(1)
24066 .m(3)
24067 .n(n)
24068 .k(k)
24069 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
24070 }
24071 }
24072 }
24073
TEST(QC8_GEMM_MINMAX_FP32_3X4C2__SSE2_LD128,n_div_4_strided_cn)24074 TEST(QC8_GEMM_MINMAX_FP32_3X4C2__SSE2_LD128, n_div_4_strided_cn) {
24075 TEST_REQUIRES_X86_SSE2;
24076 for (uint32_t n = 8; n <= 12; n += 4) {
24077 for (size_t k = 1; k <= 40; k += 9) {
24078 GemmMicrokernelTester()
24079 .mr(3)
24080 .nr(4)
24081 .kr(2)
24082 .sr(1)
24083 .m(3)
24084 .n(n)
24085 .k(k)
24086 .cn_stride(7)
24087 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
24088 }
24089 }
24090 }
24091
TEST(QC8_GEMM_MINMAX_FP32_3X4C2__SSE2_LD128,n_div_4_strided_a)24092 TEST(QC8_GEMM_MINMAX_FP32_3X4C2__SSE2_LD128, n_div_4_strided_a) {
24093 TEST_REQUIRES_X86_SSE2;
24094 for (uint32_t n = 8; n <= 12; n += 4) {
24095 for (size_t k = 1; k <= 40; k += 9) {
24096 GemmMicrokernelTester()
24097 .mr(3)
24098 .nr(4)
24099 .kr(2)
24100 .sr(1)
24101 .m(3)
24102 .n(n)
24103 .k(k)
24104 .a_stride(43)
24105 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
24106 }
24107 }
24108 }
24109
TEST(QC8_GEMM_MINMAX_FP32_3X4C2__SSE2_LD128,n_div_4_subtile)24110 TEST(QC8_GEMM_MINMAX_FP32_3X4C2__SSE2_LD128, n_div_4_subtile) {
24111 TEST_REQUIRES_X86_SSE2;
24112 for (uint32_t n = 8; n <= 12; n += 4) {
24113 for (size_t k = 1; k <= 40; k += 9) {
24114 for (uint32_t m = 1; m <= 3; m++) {
24115 GemmMicrokernelTester()
24116 .mr(3)
24117 .nr(4)
24118 .kr(2)
24119 .sr(1)
24120 .m(m)
24121 .n(n)
24122 .k(k)
24123 .iterations(1)
24124 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
24125 }
24126 }
24127 }
24128 }
24129
TEST(QC8_GEMM_MINMAX_FP32_3X4C2__SSE2_LD128,strided_cm_subtile)24130 TEST(QC8_GEMM_MINMAX_FP32_3X4C2__SSE2_LD128, strided_cm_subtile) {
24131 TEST_REQUIRES_X86_SSE2;
24132 for (size_t k = 1; k <= 40; k += 9) {
24133 for (uint32_t n = 1; n <= 4; n++) {
24134 for (uint32_t m = 1; m <= 3; m++) {
24135 GemmMicrokernelTester()
24136 .mr(3)
24137 .nr(4)
24138 .kr(2)
24139 .sr(1)
24140 .m(m)
24141 .n(n)
24142 .k(k)
24143 .cm_stride(7)
24144 .iterations(1)
24145 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
24146 }
24147 }
24148 }
24149 }
24150
TEST(QC8_GEMM_MINMAX_FP32_3X4C2__SSE2_LD128,qmin)24151 TEST(QC8_GEMM_MINMAX_FP32_3X4C2__SSE2_LD128, qmin) {
24152 TEST_REQUIRES_X86_SSE2;
24153 GemmMicrokernelTester()
24154 .mr(3)
24155 .nr(4)
24156 .kr(2)
24157 .sr(1)
24158 .m(3)
24159 .n(4)
24160 .k(8)
24161 .qmin(128)
24162 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
24163 }
24164
TEST(QC8_GEMM_MINMAX_FP32_3X4C2__SSE2_LD128,qmax)24165 TEST(QC8_GEMM_MINMAX_FP32_3X4C2__SSE2_LD128, qmax) {
24166 TEST_REQUIRES_X86_SSE2;
24167 GemmMicrokernelTester()
24168 .mr(3)
24169 .nr(4)
24170 .kr(2)
24171 .sr(1)
24172 .m(3)
24173 .n(4)
24174 .k(8)
24175 .qmax(128)
24176 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
24177 }
24178
TEST(QC8_GEMM_MINMAX_FP32_3X4C2__SSE2_LD128,strided_cm)24179 TEST(QC8_GEMM_MINMAX_FP32_3X4C2__SSE2_LD128, strided_cm) {
24180 TEST_REQUIRES_X86_SSE2;
24181 GemmMicrokernelTester()
24182 .mr(3)
24183 .nr(4)
24184 .kr(2)
24185 .sr(1)
24186 .m(3)
24187 .n(4)
24188 .k(8)
24189 .cm_stride(7)
24190 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
24191 }
24192 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
24193
24194
24195 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QC8_GEMM_MINMAX_FP32_2X4C2__XOP_LD128,k_eq_8)24196 TEST(QC8_GEMM_MINMAX_FP32_2X4C2__XOP_LD128, k_eq_8) {
24197 TEST_REQUIRES_X86_XOP;
24198 GemmMicrokernelTester()
24199 .mr(2)
24200 .nr(4)
24201 .kr(2)
24202 .sr(1)
24203 .m(2)
24204 .n(4)
24205 .k(8)
24206 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
24207 }
24208
TEST(QC8_GEMM_MINMAX_FP32_2X4C2__XOP_LD128,strided_cn)24209 TEST(QC8_GEMM_MINMAX_FP32_2X4C2__XOP_LD128, strided_cn) {
24210 TEST_REQUIRES_X86_XOP;
24211 GemmMicrokernelTester()
24212 .mr(2)
24213 .nr(4)
24214 .kr(2)
24215 .sr(1)
24216 .m(2)
24217 .n(4)
24218 .k(8)
24219 .cn_stride(7)
24220 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
24221 }
24222
TEST(QC8_GEMM_MINMAX_FP32_2X4C2__XOP_LD128,k_eq_8_strided_a)24223 TEST(QC8_GEMM_MINMAX_FP32_2X4C2__XOP_LD128, k_eq_8_strided_a) {
24224 TEST_REQUIRES_X86_XOP;
24225 GemmMicrokernelTester()
24226 .mr(2)
24227 .nr(4)
24228 .kr(2)
24229 .sr(1)
24230 .m(2)
24231 .n(4)
24232 .k(8)
24233 .a_stride(11)
24234 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
24235 }
24236
TEST(QC8_GEMM_MINMAX_FP32_2X4C2__XOP_LD128,k_eq_8_subtile)24237 TEST(QC8_GEMM_MINMAX_FP32_2X4C2__XOP_LD128, k_eq_8_subtile) {
24238 TEST_REQUIRES_X86_XOP;
24239 for (uint32_t n = 1; n <= 4; n++) {
24240 for (uint32_t m = 1; m <= 2; m++) {
24241 GemmMicrokernelTester()
24242 .mr(2)
24243 .nr(4)
24244 .kr(2)
24245 .sr(1)
24246 .m(m)
24247 .n(n)
24248 .k(8)
24249 .iterations(1)
24250 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
24251 }
24252 }
24253 }
24254
TEST(QC8_GEMM_MINMAX_FP32_2X4C2__XOP_LD128,k_eq_8_subtile_m)24255 TEST(QC8_GEMM_MINMAX_FP32_2X4C2__XOP_LD128, k_eq_8_subtile_m) {
24256 TEST_REQUIRES_X86_XOP;
24257 for (uint32_t m = 1; m <= 2; m++) {
24258 GemmMicrokernelTester()
24259 .mr(2)
24260 .nr(4)
24261 .kr(2)
24262 .sr(1)
24263 .m(m)
24264 .n(4)
24265 .k(8)
24266 .iterations(1)
24267 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
24268 }
24269 }
24270
TEST(QC8_GEMM_MINMAX_FP32_2X4C2__XOP_LD128,k_eq_8_subtile_n)24271 TEST(QC8_GEMM_MINMAX_FP32_2X4C2__XOP_LD128, k_eq_8_subtile_n) {
24272 TEST_REQUIRES_X86_XOP;
24273 for (uint32_t n = 1; n <= 4; n++) {
24274 GemmMicrokernelTester()
24275 .mr(2)
24276 .nr(4)
24277 .kr(2)
24278 .sr(1)
24279 .m(2)
24280 .n(n)
24281 .k(8)
24282 .iterations(1)
24283 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
24284 }
24285 }
24286
TEST(QC8_GEMM_MINMAX_FP32_2X4C2__XOP_LD128,k_lt_8)24287 TEST(QC8_GEMM_MINMAX_FP32_2X4C2__XOP_LD128, k_lt_8) {
24288 TEST_REQUIRES_X86_XOP;
24289 for (size_t k = 1; k < 8; k++) {
24290 GemmMicrokernelTester()
24291 .mr(2)
24292 .nr(4)
24293 .kr(2)
24294 .sr(1)
24295 .m(2)
24296 .n(4)
24297 .k(k)
24298 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
24299 }
24300 }
24301
TEST(QC8_GEMM_MINMAX_FP32_2X4C2__XOP_LD128,k_lt_8_strided_a)24302 TEST(QC8_GEMM_MINMAX_FP32_2X4C2__XOP_LD128, k_lt_8_strided_a) {
24303 TEST_REQUIRES_X86_XOP;
24304 for (size_t k = 1; k < 8; k++) {
24305 GemmMicrokernelTester()
24306 .mr(2)
24307 .nr(4)
24308 .kr(2)
24309 .sr(1)
24310 .m(2)
24311 .n(4)
24312 .k(k)
24313 .a_stride(11)
24314 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
24315 }
24316 }
24317
TEST(QC8_GEMM_MINMAX_FP32_2X4C2__XOP_LD128,k_lt_8_subtile)24318 TEST(QC8_GEMM_MINMAX_FP32_2X4C2__XOP_LD128, k_lt_8_subtile) {
24319 TEST_REQUIRES_X86_XOP;
24320 for (size_t k = 1; k < 8; k++) {
24321 for (uint32_t n = 1; n <= 4; n++) {
24322 for (uint32_t m = 1; m <= 2; m++) {
24323 GemmMicrokernelTester()
24324 .mr(2)
24325 .nr(4)
24326 .kr(2)
24327 .sr(1)
24328 .m(m)
24329 .n(n)
24330 .k(k)
24331 .iterations(1)
24332 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
24333 }
24334 }
24335 }
24336 }
24337
TEST(QC8_GEMM_MINMAX_FP32_2X4C2__XOP_LD128,k_gt_8)24338 TEST(QC8_GEMM_MINMAX_FP32_2X4C2__XOP_LD128, k_gt_8) {
24339 TEST_REQUIRES_X86_XOP;
24340 for (size_t k = 9; k < 16; k++) {
24341 GemmMicrokernelTester()
24342 .mr(2)
24343 .nr(4)
24344 .kr(2)
24345 .sr(1)
24346 .m(2)
24347 .n(4)
24348 .k(k)
24349 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
24350 }
24351 }
24352
TEST(QC8_GEMM_MINMAX_FP32_2X4C2__XOP_LD128,k_gt_8_strided_a)24353 TEST(QC8_GEMM_MINMAX_FP32_2X4C2__XOP_LD128, k_gt_8_strided_a) {
24354 TEST_REQUIRES_X86_XOP;
24355 for (size_t k = 9; k < 16; k++) {
24356 GemmMicrokernelTester()
24357 .mr(2)
24358 .nr(4)
24359 .kr(2)
24360 .sr(1)
24361 .m(2)
24362 .n(4)
24363 .k(k)
24364 .a_stride(19)
24365 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
24366 }
24367 }
24368
TEST(QC8_GEMM_MINMAX_FP32_2X4C2__XOP_LD128,k_gt_8_subtile)24369 TEST(QC8_GEMM_MINMAX_FP32_2X4C2__XOP_LD128, k_gt_8_subtile) {
24370 TEST_REQUIRES_X86_XOP;
24371 for (size_t k = 9; k < 16; k++) {
24372 for (uint32_t n = 1; n <= 4; n++) {
24373 for (uint32_t m = 1; m <= 2; m++) {
24374 GemmMicrokernelTester()
24375 .mr(2)
24376 .nr(4)
24377 .kr(2)
24378 .sr(1)
24379 .m(m)
24380 .n(n)
24381 .k(k)
24382 .iterations(1)
24383 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
24384 }
24385 }
24386 }
24387 }
24388
TEST(QC8_GEMM_MINMAX_FP32_2X4C2__XOP_LD128,k_div_8)24389 TEST(QC8_GEMM_MINMAX_FP32_2X4C2__XOP_LD128, k_div_8) {
24390 TEST_REQUIRES_X86_XOP;
24391 for (size_t k = 16; k <= 80; k += 8) {
24392 GemmMicrokernelTester()
24393 .mr(2)
24394 .nr(4)
24395 .kr(2)
24396 .sr(1)
24397 .m(2)
24398 .n(4)
24399 .k(k)
24400 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
24401 }
24402 }
24403
TEST(QC8_GEMM_MINMAX_FP32_2X4C2__XOP_LD128,k_div_8_strided_a)24404 TEST(QC8_GEMM_MINMAX_FP32_2X4C2__XOP_LD128, k_div_8_strided_a) {
24405 TEST_REQUIRES_X86_XOP;
24406 for (size_t k = 16; k <= 80; k += 8) {
24407 GemmMicrokernelTester()
24408 .mr(2)
24409 .nr(4)
24410 .kr(2)
24411 .sr(1)
24412 .m(2)
24413 .n(4)
24414 .k(k)
24415 .a_stride(83)
24416 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
24417 }
24418 }
24419
TEST(QC8_GEMM_MINMAX_FP32_2X4C2__XOP_LD128,k_div_8_subtile)24420 TEST(QC8_GEMM_MINMAX_FP32_2X4C2__XOP_LD128, k_div_8_subtile) {
24421 TEST_REQUIRES_X86_XOP;
24422 for (size_t k = 16; k <= 80; k += 8) {
24423 for (uint32_t n = 1; n <= 4; n++) {
24424 for (uint32_t m = 1; m <= 2; m++) {
24425 GemmMicrokernelTester()
24426 .mr(2)
24427 .nr(4)
24428 .kr(2)
24429 .sr(1)
24430 .m(m)
24431 .n(n)
24432 .k(k)
24433 .iterations(1)
24434 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
24435 }
24436 }
24437 }
24438 }
24439
TEST(QC8_GEMM_MINMAX_FP32_2X4C2__XOP_LD128,n_gt_4)24440 TEST(QC8_GEMM_MINMAX_FP32_2X4C2__XOP_LD128, n_gt_4) {
24441 TEST_REQUIRES_X86_XOP;
24442 for (uint32_t n = 5; n < 8; n++) {
24443 for (size_t k = 1; k <= 40; k += 9) {
24444 GemmMicrokernelTester()
24445 .mr(2)
24446 .nr(4)
24447 .kr(2)
24448 .sr(1)
24449 .m(2)
24450 .n(n)
24451 .k(k)
24452 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
24453 }
24454 }
24455 }
24456
TEST(QC8_GEMM_MINMAX_FP32_2X4C2__XOP_LD128,n_gt_4_strided_cn)24457 TEST(QC8_GEMM_MINMAX_FP32_2X4C2__XOP_LD128, n_gt_4_strided_cn) {
24458 TEST_REQUIRES_X86_XOP;
24459 for (uint32_t n = 5; n < 8; n++) {
24460 for (size_t k = 1; k <= 40; k += 9) {
24461 GemmMicrokernelTester()
24462 .mr(2)
24463 .nr(4)
24464 .kr(2)
24465 .sr(1)
24466 .m(2)
24467 .n(n)
24468 .k(k)
24469 .cn_stride(7)
24470 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
24471 }
24472 }
24473 }
24474
TEST(QC8_GEMM_MINMAX_FP32_2X4C2__XOP_LD128,n_gt_4_strided_a)24475 TEST(QC8_GEMM_MINMAX_FP32_2X4C2__XOP_LD128, n_gt_4_strided_a) {
24476 TEST_REQUIRES_X86_XOP;
24477 for (uint32_t n = 5; n < 8; n++) {
24478 for (size_t k = 1; k <= 40; k += 9) {
24479 GemmMicrokernelTester()
24480 .mr(2)
24481 .nr(4)
24482 .kr(2)
24483 .sr(1)
24484 .m(2)
24485 .n(n)
24486 .k(k)
24487 .a_stride(43)
24488 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
24489 }
24490 }
24491 }
24492
TEST(QC8_GEMM_MINMAX_FP32_2X4C2__XOP_LD128,n_gt_4_subtile)24493 TEST(QC8_GEMM_MINMAX_FP32_2X4C2__XOP_LD128, n_gt_4_subtile) {
24494 TEST_REQUIRES_X86_XOP;
24495 for (uint32_t n = 5; n < 8; n++) {
24496 for (size_t k = 1; k <= 40; k += 9) {
24497 for (uint32_t m = 1; m <= 2; m++) {
24498 GemmMicrokernelTester()
24499 .mr(2)
24500 .nr(4)
24501 .kr(2)
24502 .sr(1)
24503 .m(m)
24504 .n(n)
24505 .k(k)
24506 .iterations(1)
24507 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
24508 }
24509 }
24510 }
24511 }
24512
TEST(QC8_GEMM_MINMAX_FP32_2X4C2__XOP_LD128,n_div_4)24513 TEST(QC8_GEMM_MINMAX_FP32_2X4C2__XOP_LD128, n_div_4) {
24514 TEST_REQUIRES_X86_XOP;
24515 for (uint32_t n = 8; n <= 12; n += 4) {
24516 for (size_t k = 1; k <= 40; k += 9) {
24517 GemmMicrokernelTester()
24518 .mr(2)
24519 .nr(4)
24520 .kr(2)
24521 .sr(1)
24522 .m(2)
24523 .n(n)
24524 .k(k)
24525 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
24526 }
24527 }
24528 }
24529
TEST(QC8_GEMM_MINMAX_FP32_2X4C2__XOP_LD128,n_div_4_strided_cn)24530 TEST(QC8_GEMM_MINMAX_FP32_2X4C2__XOP_LD128, n_div_4_strided_cn) {
24531 TEST_REQUIRES_X86_XOP;
24532 for (uint32_t n = 8; n <= 12; n += 4) {
24533 for (size_t k = 1; k <= 40; k += 9) {
24534 GemmMicrokernelTester()
24535 .mr(2)
24536 .nr(4)
24537 .kr(2)
24538 .sr(1)
24539 .m(2)
24540 .n(n)
24541 .k(k)
24542 .cn_stride(7)
24543 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
24544 }
24545 }
24546 }
24547
TEST(QC8_GEMM_MINMAX_FP32_2X4C2__XOP_LD128,n_div_4_strided_a)24548 TEST(QC8_GEMM_MINMAX_FP32_2X4C2__XOP_LD128, n_div_4_strided_a) {
24549 TEST_REQUIRES_X86_XOP;
24550 for (uint32_t n = 8; n <= 12; n += 4) {
24551 for (size_t k = 1; k <= 40; k += 9) {
24552 GemmMicrokernelTester()
24553 .mr(2)
24554 .nr(4)
24555 .kr(2)
24556 .sr(1)
24557 .m(2)
24558 .n(n)
24559 .k(k)
24560 .a_stride(43)
24561 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
24562 }
24563 }
24564 }
24565
TEST(QC8_GEMM_MINMAX_FP32_2X4C2__XOP_LD128,n_div_4_subtile)24566 TEST(QC8_GEMM_MINMAX_FP32_2X4C2__XOP_LD128, n_div_4_subtile) {
24567 TEST_REQUIRES_X86_XOP;
24568 for (uint32_t n = 8; n <= 12; n += 4) {
24569 for (size_t k = 1; k <= 40; k += 9) {
24570 for (uint32_t m = 1; m <= 2; m++) {
24571 GemmMicrokernelTester()
24572 .mr(2)
24573 .nr(4)
24574 .kr(2)
24575 .sr(1)
24576 .m(m)
24577 .n(n)
24578 .k(k)
24579 .iterations(1)
24580 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
24581 }
24582 }
24583 }
24584 }
24585
TEST(QC8_GEMM_MINMAX_FP32_2X4C2__XOP_LD128,strided_cm_subtile)24586 TEST(QC8_GEMM_MINMAX_FP32_2X4C2__XOP_LD128, strided_cm_subtile) {
24587 TEST_REQUIRES_X86_XOP;
24588 for (size_t k = 1; k <= 40; k += 9) {
24589 for (uint32_t n = 1; n <= 4; n++) {
24590 for (uint32_t m = 1; m <= 2; m++) {
24591 GemmMicrokernelTester()
24592 .mr(2)
24593 .nr(4)
24594 .kr(2)
24595 .sr(1)
24596 .m(m)
24597 .n(n)
24598 .k(k)
24599 .cm_stride(7)
24600 .iterations(1)
24601 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
24602 }
24603 }
24604 }
24605 }
24606
TEST(QC8_GEMM_MINMAX_FP32_2X4C2__XOP_LD128,qmin)24607 TEST(QC8_GEMM_MINMAX_FP32_2X4C2__XOP_LD128, qmin) {
24608 TEST_REQUIRES_X86_XOP;
24609 GemmMicrokernelTester()
24610 .mr(2)
24611 .nr(4)
24612 .kr(2)
24613 .sr(1)
24614 .m(2)
24615 .n(4)
24616 .k(8)
24617 .qmin(128)
24618 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
24619 }
24620
TEST(QC8_GEMM_MINMAX_FP32_2X4C2__XOP_LD128,qmax)24621 TEST(QC8_GEMM_MINMAX_FP32_2X4C2__XOP_LD128, qmax) {
24622 TEST_REQUIRES_X86_XOP;
24623 GemmMicrokernelTester()
24624 .mr(2)
24625 .nr(4)
24626 .kr(2)
24627 .sr(1)
24628 .m(2)
24629 .n(4)
24630 .k(8)
24631 .qmax(128)
24632 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
24633 }
24634
TEST(QC8_GEMM_MINMAX_FP32_2X4C2__XOP_LD128,strided_cm)24635 TEST(QC8_GEMM_MINMAX_FP32_2X4C2__XOP_LD128, strided_cm) {
24636 TEST_REQUIRES_X86_XOP;
24637 GemmMicrokernelTester()
24638 .mr(2)
24639 .nr(4)
24640 .kr(2)
24641 .sr(1)
24642 .m(2)
24643 .n(4)
24644 .k(8)
24645 .cm_stride(7)
24646 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
24647 }
24648 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
24649
24650
24651 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__SSE2_LD64,k_eq_8)24652 TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__SSE2_LD64, k_eq_8) {
24653 TEST_REQUIRES_X86_SSE2;
24654 GemmMicrokernelTester()
24655 .mr(1)
24656 .nr(4)
24657 .kr(2)
24658 .sr(4)
24659 .m(1)
24660 .n(4)
24661 .k(8)
24662 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2s4__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
24663 }
24664
TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__SSE2_LD64,strided_cn)24665 TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__SSE2_LD64, strided_cn) {
24666 TEST_REQUIRES_X86_SSE2;
24667 GemmMicrokernelTester()
24668 .mr(1)
24669 .nr(4)
24670 .kr(2)
24671 .sr(4)
24672 .m(1)
24673 .n(4)
24674 .k(8)
24675 .cn_stride(7)
24676 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2s4__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
24677 }
24678
TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__SSE2_LD64,k_eq_8_strided_a)24679 TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__SSE2_LD64, k_eq_8_strided_a) {
24680 TEST_REQUIRES_X86_SSE2;
24681 GemmMicrokernelTester()
24682 .mr(1)
24683 .nr(4)
24684 .kr(2)
24685 .sr(4)
24686 .m(1)
24687 .n(4)
24688 .k(8)
24689 .a_stride(11)
24690 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2s4__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
24691 }
24692
TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__SSE2_LD64,k_eq_8_subtile)24693 TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__SSE2_LD64, k_eq_8_subtile) {
24694 TEST_REQUIRES_X86_SSE2;
24695 for (uint32_t n = 1; n <= 4; n++) {
24696 for (uint32_t m = 1; m <= 1; m++) {
24697 GemmMicrokernelTester()
24698 .mr(1)
24699 .nr(4)
24700 .kr(2)
24701 .sr(4)
24702 .m(m)
24703 .n(n)
24704 .k(8)
24705 .iterations(1)
24706 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2s4__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
24707 }
24708 }
24709 }
24710
TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__SSE2_LD64,k_eq_8_subtile_m)24711 TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__SSE2_LD64, k_eq_8_subtile_m) {
24712 TEST_REQUIRES_X86_SSE2;
24713 for (uint32_t m = 1; m <= 1; m++) {
24714 GemmMicrokernelTester()
24715 .mr(1)
24716 .nr(4)
24717 .kr(2)
24718 .sr(4)
24719 .m(m)
24720 .n(4)
24721 .k(8)
24722 .iterations(1)
24723 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2s4__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
24724 }
24725 }
24726
TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__SSE2_LD64,k_eq_8_subtile_n)24727 TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__SSE2_LD64, k_eq_8_subtile_n) {
24728 TEST_REQUIRES_X86_SSE2;
24729 for (uint32_t n = 1; n <= 4; n++) {
24730 GemmMicrokernelTester()
24731 .mr(1)
24732 .nr(4)
24733 .kr(2)
24734 .sr(4)
24735 .m(1)
24736 .n(n)
24737 .k(8)
24738 .iterations(1)
24739 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2s4__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
24740 }
24741 }
24742
TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__SSE2_LD64,k_lt_8)24743 TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__SSE2_LD64, k_lt_8) {
24744 TEST_REQUIRES_X86_SSE2;
24745 for (size_t k = 1; k < 8; k++) {
24746 GemmMicrokernelTester()
24747 .mr(1)
24748 .nr(4)
24749 .kr(2)
24750 .sr(4)
24751 .m(1)
24752 .n(4)
24753 .k(k)
24754 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2s4__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
24755 }
24756 }
24757
TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__SSE2_LD64,k_lt_8_strided_a)24758 TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__SSE2_LD64, k_lt_8_strided_a) {
24759 TEST_REQUIRES_X86_SSE2;
24760 for (size_t k = 1; k < 8; k++) {
24761 GemmMicrokernelTester()
24762 .mr(1)
24763 .nr(4)
24764 .kr(2)
24765 .sr(4)
24766 .m(1)
24767 .n(4)
24768 .k(k)
24769 .a_stride(11)
24770 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2s4__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
24771 }
24772 }
24773
TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__SSE2_LD64,k_lt_8_subtile)24774 TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__SSE2_LD64, k_lt_8_subtile) {
24775 TEST_REQUIRES_X86_SSE2;
24776 for (size_t k = 1; k < 8; k++) {
24777 for (uint32_t n = 1; n <= 4; n++) {
24778 for (uint32_t m = 1; m <= 1; m++) {
24779 GemmMicrokernelTester()
24780 .mr(1)
24781 .nr(4)
24782 .kr(2)
24783 .sr(4)
24784 .m(m)
24785 .n(n)
24786 .k(k)
24787 .iterations(1)
24788 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2s4__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
24789 }
24790 }
24791 }
24792 }
24793
TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__SSE2_LD64,k_gt_8)24794 TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__SSE2_LD64, k_gt_8) {
24795 TEST_REQUIRES_X86_SSE2;
24796 for (size_t k = 9; k < 16; k++) {
24797 GemmMicrokernelTester()
24798 .mr(1)
24799 .nr(4)
24800 .kr(2)
24801 .sr(4)
24802 .m(1)
24803 .n(4)
24804 .k(k)
24805 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2s4__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
24806 }
24807 }
24808
TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__SSE2_LD64,k_gt_8_strided_a)24809 TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__SSE2_LD64, k_gt_8_strided_a) {
24810 TEST_REQUIRES_X86_SSE2;
24811 for (size_t k = 9; k < 16; k++) {
24812 GemmMicrokernelTester()
24813 .mr(1)
24814 .nr(4)
24815 .kr(2)
24816 .sr(4)
24817 .m(1)
24818 .n(4)
24819 .k(k)
24820 .a_stride(19)
24821 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2s4__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
24822 }
24823 }
24824
TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__SSE2_LD64,k_gt_8_subtile)24825 TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__SSE2_LD64, k_gt_8_subtile) {
24826 TEST_REQUIRES_X86_SSE2;
24827 for (size_t k = 9; k < 16; k++) {
24828 for (uint32_t n = 1; n <= 4; n++) {
24829 for (uint32_t m = 1; m <= 1; m++) {
24830 GemmMicrokernelTester()
24831 .mr(1)
24832 .nr(4)
24833 .kr(2)
24834 .sr(4)
24835 .m(m)
24836 .n(n)
24837 .k(k)
24838 .iterations(1)
24839 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2s4__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
24840 }
24841 }
24842 }
24843 }
24844
TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__SSE2_LD64,k_div_8)24845 TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__SSE2_LD64, k_div_8) {
24846 TEST_REQUIRES_X86_SSE2;
24847 for (size_t k = 16; k <= 80; k += 8) {
24848 GemmMicrokernelTester()
24849 .mr(1)
24850 .nr(4)
24851 .kr(2)
24852 .sr(4)
24853 .m(1)
24854 .n(4)
24855 .k(k)
24856 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2s4__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
24857 }
24858 }
24859
TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__SSE2_LD64,k_div_8_strided_a)24860 TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__SSE2_LD64, k_div_8_strided_a) {
24861 TEST_REQUIRES_X86_SSE2;
24862 for (size_t k = 16; k <= 80; k += 8) {
24863 GemmMicrokernelTester()
24864 .mr(1)
24865 .nr(4)
24866 .kr(2)
24867 .sr(4)
24868 .m(1)
24869 .n(4)
24870 .k(k)
24871 .a_stride(83)
24872 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2s4__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
24873 }
24874 }
24875
TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__SSE2_LD64,k_div_8_subtile)24876 TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__SSE2_LD64, k_div_8_subtile) {
24877 TEST_REQUIRES_X86_SSE2;
24878 for (size_t k = 16; k <= 80; k += 8) {
24879 for (uint32_t n = 1; n <= 4; n++) {
24880 for (uint32_t m = 1; m <= 1; m++) {
24881 GemmMicrokernelTester()
24882 .mr(1)
24883 .nr(4)
24884 .kr(2)
24885 .sr(4)
24886 .m(m)
24887 .n(n)
24888 .k(k)
24889 .iterations(1)
24890 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2s4__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
24891 }
24892 }
24893 }
24894 }
24895
TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__SSE2_LD64,n_gt_4)24896 TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__SSE2_LD64, n_gt_4) {
24897 TEST_REQUIRES_X86_SSE2;
24898 for (uint32_t n = 5; n < 8; n++) {
24899 for (size_t k = 1; k <= 40; k += 9) {
24900 GemmMicrokernelTester()
24901 .mr(1)
24902 .nr(4)
24903 .kr(2)
24904 .sr(4)
24905 .m(1)
24906 .n(n)
24907 .k(k)
24908 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2s4__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
24909 }
24910 }
24911 }
24912
TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__SSE2_LD64,n_gt_4_strided_cn)24913 TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__SSE2_LD64, n_gt_4_strided_cn) {
24914 TEST_REQUIRES_X86_SSE2;
24915 for (uint32_t n = 5; n < 8; n++) {
24916 for (size_t k = 1; k <= 40; k += 9) {
24917 GemmMicrokernelTester()
24918 .mr(1)
24919 .nr(4)
24920 .kr(2)
24921 .sr(4)
24922 .m(1)
24923 .n(n)
24924 .k(k)
24925 .cn_stride(7)
24926 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2s4__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
24927 }
24928 }
24929 }
24930
TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__SSE2_LD64,n_gt_4_strided_a)24931 TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__SSE2_LD64, n_gt_4_strided_a) {
24932 TEST_REQUIRES_X86_SSE2;
24933 for (uint32_t n = 5; n < 8; n++) {
24934 for (size_t k = 1; k <= 40; k += 9) {
24935 GemmMicrokernelTester()
24936 .mr(1)
24937 .nr(4)
24938 .kr(2)
24939 .sr(4)
24940 .m(1)
24941 .n(n)
24942 .k(k)
24943 .a_stride(43)
24944 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2s4__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
24945 }
24946 }
24947 }
24948
TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__SSE2_LD64,n_gt_4_subtile)24949 TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__SSE2_LD64, n_gt_4_subtile) {
24950 TEST_REQUIRES_X86_SSE2;
24951 for (uint32_t n = 5; n < 8; n++) {
24952 for (size_t k = 1; k <= 40; k += 9) {
24953 for (uint32_t m = 1; m <= 1; m++) {
24954 GemmMicrokernelTester()
24955 .mr(1)
24956 .nr(4)
24957 .kr(2)
24958 .sr(4)
24959 .m(m)
24960 .n(n)
24961 .k(k)
24962 .iterations(1)
24963 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2s4__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
24964 }
24965 }
24966 }
24967 }
24968
TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__SSE2_LD64,n_div_4)24969 TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__SSE2_LD64, n_div_4) {
24970 TEST_REQUIRES_X86_SSE2;
24971 for (uint32_t n = 8; n <= 12; n += 4) {
24972 for (size_t k = 1; k <= 40; k += 9) {
24973 GemmMicrokernelTester()
24974 .mr(1)
24975 .nr(4)
24976 .kr(2)
24977 .sr(4)
24978 .m(1)
24979 .n(n)
24980 .k(k)
24981 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2s4__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
24982 }
24983 }
24984 }
24985
TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__SSE2_LD64,n_div_4_strided_cn)24986 TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__SSE2_LD64, n_div_4_strided_cn) {
24987 TEST_REQUIRES_X86_SSE2;
24988 for (uint32_t n = 8; n <= 12; n += 4) {
24989 for (size_t k = 1; k <= 40; k += 9) {
24990 GemmMicrokernelTester()
24991 .mr(1)
24992 .nr(4)
24993 .kr(2)
24994 .sr(4)
24995 .m(1)
24996 .n(n)
24997 .k(k)
24998 .cn_stride(7)
24999 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2s4__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
25000 }
25001 }
25002 }
25003
TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__SSE2_LD64,n_div_4_strided_a)25004 TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__SSE2_LD64, n_div_4_strided_a) {
25005 TEST_REQUIRES_X86_SSE2;
25006 for (uint32_t n = 8; n <= 12; n += 4) {
25007 for (size_t k = 1; k <= 40; k += 9) {
25008 GemmMicrokernelTester()
25009 .mr(1)
25010 .nr(4)
25011 .kr(2)
25012 .sr(4)
25013 .m(1)
25014 .n(n)
25015 .k(k)
25016 .a_stride(43)
25017 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2s4__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
25018 }
25019 }
25020 }
25021
TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__SSE2_LD64,n_div_4_subtile)25022 TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__SSE2_LD64, n_div_4_subtile) {
25023 TEST_REQUIRES_X86_SSE2;
25024 for (uint32_t n = 8; n <= 12; n += 4) {
25025 for (size_t k = 1; k <= 40; k += 9) {
25026 for (uint32_t m = 1; m <= 1; m++) {
25027 GemmMicrokernelTester()
25028 .mr(1)
25029 .nr(4)
25030 .kr(2)
25031 .sr(4)
25032 .m(m)
25033 .n(n)
25034 .k(k)
25035 .iterations(1)
25036 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2s4__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
25037 }
25038 }
25039 }
25040 }
25041
TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__SSE2_LD64,strided_cm_subtile)25042 TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__SSE2_LD64, strided_cm_subtile) {
25043 TEST_REQUIRES_X86_SSE2;
25044 for (size_t k = 1; k <= 40; k += 9) {
25045 for (uint32_t n = 1; n <= 4; n++) {
25046 for (uint32_t m = 1; m <= 1; m++) {
25047 GemmMicrokernelTester()
25048 .mr(1)
25049 .nr(4)
25050 .kr(2)
25051 .sr(4)
25052 .m(m)
25053 .n(n)
25054 .k(k)
25055 .cm_stride(7)
25056 .iterations(1)
25057 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2s4__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
25058 }
25059 }
25060 }
25061 }
25062
TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__SSE2_LD64,qmin)25063 TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__SSE2_LD64, qmin) {
25064 TEST_REQUIRES_X86_SSE2;
25065 GemmMicrokernelTester()
25066 .mr(1)
25067 .nr(4)
25068 .kr(2)
25069 .sr(4)
25070 .m(1)
25071 .n(4)
25072 .k(8)
25073 .qmin(128)
25074 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2s4__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
25075 }
25076
TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__SSE2_LD64,qmax)25077 TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__SSE2_LD64, qmax) {
25078 TEST_REQUIRES_X86_SSE2;
25079 GemmMicrokernelTester()
25080 .mr(1)
25081 .nr(4)
25082 .kr(2)
25083 .sr(4)
25084 .m(1)
25085 .n(4)
25086 .k(8)
25087 .qmax(128)
25088 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2s4__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
25089 }
25090
TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__SSE2_LD64,strided_cm)25091 TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__SSE2_LD64, strided_cm) {
25092 TEST_REQUIRES_X86_SSE2;
25093 GemmMicrokernelTester()
25094 .mr(1)
25095 .nr(4)
25096 .kr(2)
25097 .sr(4)
25098 .m(1)
25099 .n(4)
25100 .k(8)
25101 .cm_stride(7)
25102 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2s4__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
25103 }
25104 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
25105
25106
25107 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__SSE2_LD64,k_eq_8)25108 TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__SSE2_LD64, k_eq_8) {
25109 TEST_REQUIRES_X86_SSE2;
25110 GemmMicrokernelTester()
25111 .mr(2)
25112 .nr(4)
25113 .kr(2)
25114 .sr(4)
25115 .m(2)
25116 .n(4)
25117 .k(8)
25118 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2s4__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
25119 }
25120
TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__SSE2_LD64,strided_cn)25121 TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__SSE2_LD64, strided_cn) {
25122 TEST_REQUIRES_X86_SSE2;
25123 GemmMicrokernelTester()
25124 .mr(2)
25125 .nr(4)
25126 .kr(2)
25127 .sr(4)
25128 .m(2)
25129 .n(4)
25130 .k(8)
25131 .cn_stride(7)
25132 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2s4__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
25133 }
25134
TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__SSE2_LD64,k_eq_8_strided_a)25135 TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__SSE2_LD64, k_eq_8_strided_a) {
25136 TEST_REQUIRES_X86_SSE2;
25137 GemmMicrokernelTester()
25138 .mr(2)
25139 .nr(4)
25140 .kr(2)
25141 .sr(4)
25142 .m(2)
25143 .n(4)
25144 .k(8)
25145 .a_stride(11)
25146 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2s4__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
25147 }
25148
TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__SSE2_LD64,k_eq_8_subtile)25149 TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__SSE2_LD64, k_eq_8_subtile) {
25150 TEST_REQUIRES_X86_SSE2;
25151 for (uint32_t n = 1; n <= 4; n++) {
25152 for (uint32_t m = 1; m <= 2; m++) {
25153 GemmMicrokernelTester()
25154 .mr(2)
25155 .nr(4)
25156 .kr(2)
25157 .sr(4)
25158 .m(m)
25159 .n(n)
25160 .k(8)
25161 .iterations(1)
25162 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2s4__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
25163 }
25164 }
25165 }
25166
TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__SSE2_LD64,k_eq_8_subtile_m)25167 TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__SSE2_LD64, k_eq_8_subtile_m) {
25168 TEST_REQUIRES_X86_SSE2;
25169 for (uint32_t m = 1; m <= 2; m++) {
25170 GemmMicrokernelTester()
25171 .mr(2)
25172 .nr(4)
25173 .kr(2)
25174 .sr(4)
25175 .m(m)
25176 .n(4)
25177 .k(8)
25178 .iterations(1)
25179 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2s4__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
25180 }
25181 }
25182
TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__SSE2_LD64,k_eq_8_subtile_n)25183 TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__SSE2_LD64, k_eq_8_subtile_n) {
25184 TEST_REQUIRES_X86_SSE2;
25185 for (uint32_t n = 1; n <= 4; n++) {
25186 GemmMicrokernelTester()
25187 .mr(2)
25188 .nr(4)
25189 .kr(2)
25190 .sr(4)
25191 .m(2)
25192 .n(n)
25193 .k(8)
25194 .iterations(1)
25195 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2s4__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
25196 }
25197 }
25198
TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__SSE2_LD64,k_lt_8)25199 TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__SSE2_LD64, k_lt_8) {
25200 TEST_REQUIRES_X86_SSE2;
25201 for (size_t k = 1; k < 8; k++) {
25202 GemmMicrokernelTester()
25203 .mr(2)
25204 .nr(4)
25205 .kr(2)
25206 .sr(4)
25207 .m(2)
25208 .n(4)
25209 .k(k)
25210 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2s4__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
25211 }
25212 }
25213
TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__SSE2_LD64,k_lt_8_strided_a)25214 TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__SSE2_LD64, k_lt_8_strided_a) {
25215 TEST_REQUIRES_X86_SSE2;
25216 for (size_t k = 1; k < 8; k++) {
25217 GemmMicrokernelTester()
25218 .mr(2)
25219 .nr(4)
25220 .kr(2)
25221 .sr(4)
25222 .m(2)
25223 .n(4)
25224 .k(k)
25225 .a_stride(11)
25226 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2s4__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
25227 }
25228 }
25229
TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__SSE2_LD64,k_lt_8_subtile)25230 TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__SSE2_LD64, k_lt_8_subtile) {
25231 TEST_REQUIRES_X86_SSE2;
25232 for (size_t k = 1; k < 8; k++) {
25233 for (uint32_t n = 1; n <= 4; n++) {
25234 for (uint32_t m = 1; m <= 2; m++) {
25235 GemmMicrokernelTester()
25236 .mr(2)
25237 .nr(4)
25238 .kr(2)
25239 .sr(4)
25240 .m(m)
25241 .n(n)
25242 .k(k)
25243 .iterations(1)
25244 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2s4__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
25245 }
25246 }
25247 }
25248 }
25249
TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__SSE2_LD64,k_gt_8)25250 TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__SSE2_LD64, k_gt_8) {
25251 TEST_REQUIRES_X86_SSE2;
25252 for (size_t k = 9; k < 16; k++) {
25253 GemmMicrokernelTester()
25254 .mr(2)
25255 .nr(4)
25256 .kr(2)
25257 .sr(4)
25258 .m(2)
25259 .n(4)
25260 .k(k)
25261 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2s4__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
25262 }
25263 }
25264
TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__SSE2_LD64,k_gt_8_strided_a)25265 TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__SSE2_LD64, k_gt_8_strided_a) {
25266 TEST_REQUIRES_X86_SSE2;
25267 for (size_t k = 9; k < 16; k++) {
25268 GemmMicrokernelTester()
25269 .mr(2)
25270 .nr(4)
25271 .kr(2)
25272 .sr(4)
25273 .m(2)
25274 .n(4)
25275 .k(k)
25276 .a_stride(19)
25277 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2s4__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
25278 }
25279 }
25280
TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__SSE2_LD64,k_gt_8_subtile)25281 TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__SSE2_LD64, k_gt_8_subtile) {
25282 TEST_REQUIRES_X86_SSE2;
25283 for (size_t k = 9; k < 16; k++) {
25284 for (uint32_t n = 1; n <= 4; n++) {
25285 for (uint32_t m = 1; m <= 2; m++) {
25286 GemmMicrokernelTester()
25287 .mr(2)
25288 .nr(4)
25289 .kr(2)
25290 .sr(4)
25291 .m(m)
25292 .n(n)
25293 .k(k)
25294 .iterations(1)
25295 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2s4__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
25296 }
25297 }
25298 }
25299 }
25300
TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__SSE2_LD64,k_div_8)25301 TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__SSE2_LD64, k_div_8) {
25302 TEST_REQUIRES_X86_SSE2;
25303 for (size_t k = 16; k <= 80; k += 8) {
25304 GemmMicrokernelTester()
25305 .mr(2)
25306 .nr(4)
25307 .kr(2)
25308 .sr(4)
25309 .m(2)
25310 .n(4)
25311 .k(k)
25312 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2s4__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
25313 }
25314 }
25315
TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__SSE2_LD64,k_div_8_strided_a)25316 TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__SSE2_LD64, k_div_8_strided_a) {
25317 TEST_REQUIRES_X86_SSE2;
25318 for (size_t k = 16; k <= 80; k += 8) {
25319 GemmMicrokernelTester()
25320 .mr(2)
25321 .nr(4)
25322 .kr(2)
25323 .sr(4)
25324 .m(2)
25325 .n(4)
25326 .k(k)
25327 .a_stride(83)
25328 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2s4__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
25329 }
25330 }
25331
TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__SSE2_LD64,k_div_8_subtile)25332 TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__SSE2_LD64, k_div_8_subtile) {
25333 TEST_REQUIRES_X86_SSE2;
25334 for (size_t k = 16; k <= 80; k += 8) {
25335 for (uint32_t n = 1; n <= 4; n++) {
25336 for (uint32_t m = 1; m <= 2; m++) {
25337 GemmMicrokernelTester()
25338 .mr(2)
25339 .nr(4)
25340 .kr(2)
25341 .sr(4)
25342 .m(m)
25343 .n(n)
25344 .k(k)
25345 .iterations(1)
25346 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2s4__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
25347 }
25348 }
25349 }
25350 }
25351
TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__SSE2_LD64,n_gt_4)25352 TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__SSE2_LD64, n_gt_4) {
25353 TEST_REQUIRES_X86_SSE2;
25354 for (uint32_t n = 5; n < 8; n++) {
25355 for (size_t k = 1; k <= 40; k += 9) {
25356 GemmMicrokernelTester()
25357 .mr(2)
25358 .nr(4)
25359 .kr(2)
25360 .sr(4)
25361 .m(2)
25362 .n(n)
25363 .k(k)
25364 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2s4__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
25365 }
25366 }
25367 }
25368
TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__SSE2_LD64,n_gt_4_strided_cn)25369 TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__SSE2_LD64, n_gt_4_strided_cn) {
25370 TEST_REQUIRES_X86_SSE2;
25371 for (uint32_t n = 5; n < 8; n++) {
25372 for (size_t k = 1; k <= 40; k += 9) {
25373 GemmMicrokernelTester()
25374 .mr(2)
25375 .nr(4)
25376 .kr(2)
25377 .sr(4)
25378 .m(2)
25379 .n(n)
25380 .k(k)
25381 .cn_stride(7)
25382 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2s4__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
25383 }
25384 }
25385 }
25386
TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__SSE2_LD64,n_gt_4_strided_a)25387 TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__SSE2_LD64, n_gt_4_strided_a) {
25388 TEST_REQUIRES_X86_SSE2;
25389 for (uint32_t n = 5; n < 8; n++) {
25390 for (size_t k = 1; k <= 40; k += 9) {
25391 GemmMicrokernelTester()
25392 .mr(2)
25393 .nr(4)
25394 .kr(2)
25395 .sr(4)
25396 .m(2)
25397 .n(n)
25398 .k(k)
25399 .a_stride(43)
25400 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2s4__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
25401 }
25402 }
25403 }
25404
TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__SSE2_LD64,n_gt_4_subtile)25405 TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__SSE2_LD64, n_gt_4_subtile) {
25406 TEST_REQUIRES_X86_SSE2;
25407 for (uint32_t n = 5; n < 8; n++) {
25408 for (size_t k = 1; k <= 40; k += 9) {
25409 for (uint32_t m = 1; m <= 2; m++) {
25410 GemmMicrokernelTester()
25411 .mr(2)
25412 .nr(4)
25413 .kr(2)
25414 .sr(4)
25415 .m(m)
25416 .n(n)
25417 .k(k)
25418 .iterations(1)
25419 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2s4__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
25420 }
25421 }
25422 }
25423 }
25424
TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__SSE2_LD64,n_div_4)25425 TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__SSE2_LD64, n_div_4) {
25426 TEST_REQUIRES_X86_SSE2;
25427 for (uint32_t n = 8; n <= 12; n += 4) {
25428 for (size_t k = 1; k <= 40; k += 9) {
25429 GemmMicrokernelTester()
25430 .mr(2)
25431 .nr(4)
25432 .kr(2)
25433 .sr(4)
25434 .m(2)
25435 .n(n)
25436 .k(k)
25437 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2s4__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
25438 }
25439 }
25440 }
25441
TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__SSE2_LD64,n_div_4_strided_cn)25442 TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__SSE2_LD64, n_div_4_strided_cn) {
25443 TEST_REQUIRES_X86_SSE2;
25444 for (uint32_t n = 8; n <= 12; n += 4) {
25445 for (size_t k = 1; k <= 40; k += 9) {
25446 GemmMicrokernelTester()
25447 .mr(2)
25448 .nr(4)
25449 .kr(2)
25450 .sr(4)
25451 .m(2)
25452 .n(n)
25453 .k(k)
25454 .cn_stride(7)
25455 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2s4__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
25456 }
25457 }
25458 }
25459
TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__SSE2_LD64,n_div_4_strided_a)25460 TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__SSE2_LD64, n_div_4_strided_a) {
25461 TEST_REQUIRES_X86_SSE2;
25462 for (uint32_t n = 8; n <= 12; n += 4) {
25463 for (size_t k = 1; k <= 40; k += 9) {
25464 GemmMicrokernelTester()
25465 .mr(2)
25466 .nr(4)
25467 .kr(2)
25468 .sr(4)
25469 .m(2)
25470 .n(n)
25471 .k(k)
25472 .a_stride(43)
25473 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2s4__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
25474 }
25475 }
25476 }
25477
TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__SSE2_LD64,n_div_4_subtile)25478 TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__SSE2_LD64, n_div_4_subtile) {
25479 TEST_REQUIRES_X86_SSE2;
25480 for (uint32_t n = 8; n <= 12; n += 4) {
25481 for (size_t k = 1; k <= 40; k += 9) {
25482 for (uint32_t m = 1; m <= 2; m++) {
25483 GemmMicrokernelTester()
25484 .mr(2)
25485 .nr(4)
25486 .kr(2)
25487 .sr(4)
25488 .m(m)
25489 .n(n)
25490 .k(k)
25491 .iterations(1)
25492 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2s4__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
25493 }
25494 }
25495 }
25496 }
25497
TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__SSE2_LD64,strided_cm_subtile)25498 TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__SSE2_LD64, strided_cm_subtile) {
25499 TEST_REQUIRES_X86_SSE2;
25500 for (size_t k = 1; k <= 40; k += 9) {
25501 for (uint32_t n = 1; n <= 4; n++) {
25502 for (uint32_t m = 1; m <= 2; m++) {
25503 GemmMicrokernelTester()
25504 .mr(2)
25505 .nr(4)
25506 .kr(2)
25507 .sr(4)
25508 .m(m)
25509 .n(n)
25510 .k(k)
25511 .cm_stride(7)
25512 .iterations(1)
25513 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2s4__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
25514 }
25515 }
25516 }
25517 }
25518
TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__SSE2_LD64,qmin)25519 TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__SSE2_LD64, qmin) {
25520 TEST_REQUIRES_X86_SSE2;
25521 GemmMicrokernelTester()
25522 .mr(2)
25523 .nr(4)
25524 .kr(2)
25525 .sr(4)
25526 .m(2)
25527 .n(4)
25528 .k(8)
25529 .qmin(128)
25530 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2s4__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
25531 }
25532
TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__SSE2_LD64,qmax)25533 TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__SSE2_LD64, qmax) {
25534 TEST_REQUIRES_X86_SSE2;
25535 GemmMicrokernelTester()
25536 .mr(2)
25537 .nr(4)
25538 .kr(2)
25539 .sr(4)
25540 .m(2)
25541 .n(4)
25542 .k(8)
25543 .qmax(128)
25544 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2s4__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
25545 }
25546
TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__SSE2_LD64,strided_cm)25547 TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__SSE2_LD64, strided_cm) {
25548 TEST_REQUIRES_X86_SSE2;
25549 GemmMicrokernelTester()
25550 .mr(2)
25551 .nr(4)
25552 .kr(2)
25553 .sr(4)
25554 .m(2)
25555 .n(4)
25556 .k(8)
25557 .cm_stride(7)
25558 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2s4__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
25559 }
25560 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
25561
25562
25563 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__SSE41_LD64,k_eq_8)25564 TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__SSE41_LD64, k_eq_8) {
25565 TEST_REQUIRES_X86_SSE41;
25566 GemmMicrokernelTester()
25567 .mr(4)
25568 .nr(4)
25569 .kr(2)
25570 .sr(4)
25571 .m(4)
25572 .n(4)
25573 .k(8)
25574 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2s4__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25575 }
25576
TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__SSE41_LD64,strided_cn)25577 TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__SSE41_LD64, strided_cn) {
25578 TEST_REQUIRES_X86_SSE41;
25579 GemmMicrokernelTester()
25580 .mr(4)
25581 .nr(4)
25582 .kr(2)
25583 .sr(4)
25584 .m(4)
25585 .n(4)
25586 .k(8)
25587 .cn_stride(7)
25588 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2s4__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25589 }
25590
TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__SSE41_LD64,k_eq_8_strided_a)25591 TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__SSE41_LD64, k_eq_8_strided_a) {
25592 TEST_REQUIRES_X86_SSE41;
25593 GemmMicrokernelTester()
25594 .mr(4)
25595 .nr(4)
25596 .kr(2)
25597 .sr(4)
25598 .m(4)
25599 .n(4)
25600 .k(8)
25601 .a_stride(11)
25602 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2s4__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25603 }
25604
TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__SSE41_LD64,k_eq_8_subtile)25605 TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__SSE41_LD64, k_eq_8_subtile) {
25606 TEST_REQUIRES_X86_SSE41;
25607 for (uint32_t n = 1; n <= 4; n++) {
25608 for (uint32_t m = 1; m <= 4; m++) {
25609 GemmMicrokernelTester()
25610 .mr(4)
25611 .nr(4)
25612 .kr(2)
25613 .sr(4)
25614 .m(m)
25615 .n(n)
25616 .k(8)
25617 .iterations(1)
25618 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2s4__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25619 }
25620 }
25621 }
25622
TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__SSE41_LD64,k_eq_8_subtile_m)25623 TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__SSE41_LD64, k_eq_8_subtile_m) {
25624 TEST_REQUIRES_X86_SSE41;
25625 for (uint32_t m = 1; m <= 4; m++) {
25626 GemmMicrokernelTester()
25627 .mr(4)
25628 .nr(4)
25629 .kr(2)
25630 .sr(4)
25631 .m(m)
25632 .n(4)
25633 .k(8)
25634 .iterations(1)
25635 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2s4__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25636 }
25637 }
25638
TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__SSE41_LD64,k_eq_8_subtile_n)25639 TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__SSE41_LD64, k_eq_8_subtile_n) {
25640 TEST_REQUIRES_X86_SSE41;
25641 for (uint32_t n = 1; n <= 4; n++) {
25642 GemmMicrokernelTester()
25643 .mr(4)
25644 .nr(4)
25645 .kr(2)
25646 .sr(4)
25647 .m(4)
25648 .n(n)
25649 .k(8)
25650 .iterations(1)
25651 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2s4__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25652 }
25653 }
25654
TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__SSE41_LD64,k_lt_8)25655 TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__SSE41_LD64, k_lt_8) {
25656 TEST_REQUIRES_X86_SSE41;
25657 for (size_t k = 1; k < 8; k++) {
25658 GemmMicrokernelTester()
25659 .mr(4)
25660 .nr(4)
25661 .kr(2)
25662 .sr(4)
25663 .m(4)
25664 .n(4)
25665 .k(k)
25666 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2s4__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25667 }
25668 }
25669
TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__SSE41_LD64,k_lt_8_strided_a)25670 TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__SSE41_LD64, k_lt_8_strided_a) {
25671 TEST_REQUIRES_X86_SSE41;
25672 for (size_t k = 1; k < 8; k++) {
25673 GemmMicrokernelTester()
25674 .mr(4)
25675 .nr(4)
25676 .kr(2)
25677 .sr(4)
25678 .m(4)
25679 .n(4)
25680 .k(k)
25681 .a_stride(11)
25682 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2s4__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25683 }
25684 }
25685
TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__SSE41_LD64,k_lt_8_subtile)25686 TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__SSE41_LD64, k_lt_8_subtile) {
25687 TEST_REQUIRES_X86_SSE41;
25688 for (size_t k = 1; k < 8; k++) {
25689 for (uint32_t n = 1; n <= 4; n++) {
25690 for (uint32_t m = 1; m <= 4; m++) {
25691 GemmMicrokernelTester()
25692 .mr(4)
25693 .nr(4)
25694 .kr(2)
25695 .sr(4)
25696 .m(m)
25697 .n(n)
25698 .k(k)
25699 .iterations(1)
25700 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2s4__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25701 }
25702 }
25703 }
25704 }
25705
TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__SSE41_LD64,k_gt_8)25706 TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__SSE41_LD64, k_gt_8) {
25707 TEST_REQUIRES_X86_SSE41;
25708 for (size_t k = 9; k < 16; k++) {
25709 GemmMicrokernelTester()
25710 .mr(4)
25711 .nr(4)
25712 .kr(2)
25713 .sr(4)
25714 .m(4)
25715 .n(4)
25716 .k(k)
25717 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2s4__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25718 }
25719 }
25720
TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__SSE41_LD64,k_gt_8_strided_a)25721 TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__SSE41_LD64, k_gt_8_strided_a) {
25722 TEST_REQUIRES_X86_SSE41;
25723 for (size_t k = 9; k < 16; k++) {
25724 GemmMicrokernelTester()
25725 .mr(4)
25726 .nr(4)
25727 .kr(2)
25728 .sr(4)
25729 .m(4)
25730 .n(4)
25731 .k(k)
25732 .a_stride(19)
25733 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2s4__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25734 }
25735 }
25736
TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__SSE41_LD64,k_gt_8_subtile)25737 TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__SSE41_LD64, k_gt_8_subtile) {
25738 TEST_REQUIRES_X86_SSE41;
25739 for (size_t k = 9; k < 16; k++) {
25740 for (uint32_t n = 1; n <= 4; n++) {
25741 for (uint32_t m = 1; m <= 4; m++) {
25742 GemmMicrokernelTester()
25743 .mr(4)
25744 .nr(4)
25745 .kr(2)
25746 .sr(4)
25747 .m(m)
25748 .n(n)
25749 .k(k)
25750 .iterations(1)
25751 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2s4__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25752 }
25753 }
25754 }
25755 }
25756
TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__SSE41_LD64,k_div_8)25757 TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__SSE41_LD64, k_div_8) {
25758 TEST_REQUIRES_X86_SSE41;
25759 for (size_t k = 16; k <= 80; k += 8) {
25760 GemmMicrokernelTester()
25761 .mr(4)
25762 .nr(4)
25763 .kr(2)
25764 .sr(4)
25765 .m(4)
25766 .n(4)
25767 .k(k)
25768 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2s4__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25769 }
25770 }
25771
TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__SSE41_LD64,k_div_8_strided_a)25772 TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__SSE41_LD64, k_div_8_strided_a) {
25773 TEST_REQUIRES_X86_SSE41;
25774 for (size_t k = 16; k <= 80; k += 8) {
25775 GemmMicrokernelTester()
25776 .mr(4)
25777 .nr(4)
25778 .kr(2)
25779 .sr(4)
25780 .m(4)
25781 .n(4)
25782 .k(k)
25783 .a_stride(83)
25784 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2s4__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25785 }
25786 }
25787
TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__SSE41_LD64,k_div_8_subtile)25788 TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__SSE41_LD64, k_div_8_subtile) {
25789 TEST_REQUIRES_X86_SSE41;
25790 for (size_t k = 16; k <= 80; k += 8) {
25791 for (uint32_t n = 1; n <= 4; n++) {
25792 for (uint32_t m = 1; m <= 4; m++) {
25793 GemmMicrokernelTester()
25794 .mr(4)
25795 .nr(4)
25796 .kr(2)
25797 .sr(4)
25798 .m(m)
25799 .n(n)
25800 .k(k)
25801 .iterations(1)
25802 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2s4__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25803 }
25804 }
25805 }
25806 }
25807
TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__SSE41_LD64,n_gt_4)25808 TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__SSE41_LD64, n_gt_4) {
25809 TEST_REQUIRES_X86_SSE41;
25810 for (uint32_t n = 5; n < 8; n++) {
25811 for (size_t k = 1; k <= 40; k += 9) {
25812 GemmMicrokernelTester()
25813 .mr(4)
25814 .nr(4)
25815 .kr(2)
25816 .sr(4)
25817 .m(4)
25818 .n(n)
25819 .k(k)
25820 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2s4__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25821 }
25822 }
25823 }
25824
TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__SSE41_LD64,n_gt_4_strided_cn)25825 TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__SSE41_LD64, n_gt_4_strided_cn) {
25826 TEST_REQUIRES_X86_SSE41;
25827 for (uint32_t n = 5; n < 8; n++) {
25828 for (size_t k = 1; k <= 40; k += 9) {
25829 GemmMicrokernelTester()
25830 .mr(4)
25831 .nr(4)
25832 .kr(2)
25833 .sr(4)
25834 .m(4)
25835 .n(n)
25836 .k(k)
25837 .cn_stride(7)
25838 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2s4__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25839 }
25840 }
25841 }
25842
TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__SSE41_LD64,n_gt_4_strided_a)25843 TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__SSE41_LD64, n_gt_4_strided_a) {
25844 TEST_REQUIRES_X86_SSE41;
25845 for (uint32_t n = 5; n < 8; n++) {
25846 for (size_t k = 1; k <= 40; k += 9) {
25847 GemmMicrokernelTester()
25848 .mr(4)
25849 .nr(4)
25850 .kr(2)
25851 .sr(4)
25852 .m(4)
25853 .n(n)
25854 .k(k)
25855 .a_stride(43)
25856 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2s4__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25857 }
25858 }
25859 }
25860
TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__SSE41_LD64,n_gt_4_subtile)25861 TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__SSE41_LD64, n_gt_4_subtile) {
25862 TEST_REQUIRES_X86_SSE41;
25863 for (uint32_t n = 5; n < 8; n++) {
25864 for (size_t k = 1; k <= 40; k += 9) {
25865 for (uint32_t m = 1; m <= 4; m++) {
25866 GemmMicrokernelTester()
25867 .mr(4)
25868 .nr(4)
25869 .kr(2)
25870 .sr(4)
25871 .m(m)
25872 .n(n)
25873 .k(k)
25874 .iterations(1)
25875 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2s4__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25876 }
25877 }
25878 }
25879 }
25880
TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__SSE41_LD64,n_div_4)25881 TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__SSE41_LD64, n_div_4) {
25882 TEST_REQUIRES_X86_SSE41;
25883 for (uint32_t n = 8; n <= 12; n += 4) {
25884 for (size_t k = 1; k <= 40; k += 9) {
25885 GemmMicrokernelTester()
25886 .mr(4)
25887 .nr(4)
25888 .kr(2)
25889 .sr(4)
25890 .m(4)
25891 .n(n)
25892 .k(k)
25893 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2s4__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25894 }
25895 }
25896 }
25897
TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__SSE41_LD64,n_div_4_strided_cn)25898 TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__SSE41_LD64, n_div_4_strided_cn) {
25899 TEST_REQUIRES_X86_SSE41;
25900 for (uint32_t n = 8; n <= 12; n += 4) {
25901 for (size_t k = 1; k <= 40; k += 9) {
25902 GemmMicrokernelTester()
25903 .mr(4)
25904 .nr(4)
25905 .kr(2)
25906 .sr(4)
25907 .m(4)
25908 .n(n)
25909 .k(k)
25910 .cn_stride(7)
25911 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2s4__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25912 }
25913 }
25914 }
25915
TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__SSE41_LD64,n_div_4_strided_a)25916 TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__SSE41_LD64, n_div_4_strided_a) {
25917 TEST_REQUIRES_X86_SSE41;
25918 for (uint32_t n = 8; n <= 12; n += 4) {
25919 for (size_t k = 1; k <= 40; k += 9) {
25920 GemmMicrokernelTester()
25921 .mr(4)
25922 .nr(4)
25923 .kr(2)
25924 .sr(4)
25925 .m(4)
25926 .n(n)
25927 .k(k)
25928 .a_stride(43)
25929 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2s4__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25930 }
25931 }
25932 }
25933
TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__SSE41_LD64,n_div_4_subtile)25934 TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__SSE41_LD64, n_div_4_subtile) {
25935 TEST_REQUIRES_X86_SSE41;
25936 for (uint32_t n = 8; n <= 12; n += 4) {
25937 for (size_t k = 1; k <= 40; k += 9) {
25938 for (uint32_t m = 1; m <= 4; m++) {
25939 GemmMicrokernelTester()
25940 .mr(4)
25941 .nr(4)
25942 .kr(2)
25943 .sr(4)
25944 .m(m)
25945 .n(n)
25946 .k(k)
25947 .iterations(1)
25948 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2s4__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25949 }
25950 }
25951 }
25952 }
25953
TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__SSE41_LD64,strided_cm_subtile)25954 TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__SSE41_LD64, strided_cm_subtile) {
25955 TEST_REQUIRES_X86_SSE41;
25956 for (size_t k = 1; k <= 40; k += 9) {
25957 for (uint32_t n = 1; n <= 4; n++) {
25958 for (uint32_t m = 1; m <= 4; m++) {
25959 GemmMicrokernelTester()
25960 .mr(4)
25961 .nr(4)
25962 .kr(2)
25963 .sr(4)
25964 .m(m)
25965 .n(n)
25966 .k(k)
25967 .cm_stride(7)
25968 .iterations(1)
25969 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2s4__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25970 }
25971 }
25972 }
25973 }
25974
TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__SSE41_LD64,qmin)25975 TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__SSE41_LD64, qmin) {
25976 TEST_REQUIRES_X86_SSE41;
25977 GemmMicrokernelTester()
25978 .mr(4)
25979 .nr(4)
25980 .kr(2)
25981 .sr(4)
25982 .m(4)
25983 .n(4)
25984 .k(8)
25985 .qmin(128)
25986 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2s4__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25987 }
25988
TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__SSE41_LD64,qmax)25989 TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__SSE41_LD64, qmax) {
25990 TEST_REQUIRES_X86_SSE41;
25991 GemmMicrokernelTester()
25992 .mr(4)
25993 .nr(4)
25994 .kr(2)
25995 .sr(4)
25996 .m(4)
25997 .n(4)
25998 .k(8)
25999 .qmax(128)
26000 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2s4__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
26001 }
26002
TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__SSE41_LD64,strided_cm)26003 TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__SSE41_LD64, strided_cm) {
26004 TEST_REQUIRES_X86_SSE41;
26005 GemmMicrokernelTester()
26006 .mr(4)
26007 .nr(4)
26008 .kr(2)
26009 .sr(4)
26010 .m(4)
26011 .n(4)
26012 .k(8)
26013 .cm_stride(7)
26014 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2s4__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
26015 }
26016 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
26017
26018
26019 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__XOP_LD64,k_eq_8)26020 TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__XOP_LD64, k_eq_8) {
26021 TEST_REQUIRES_X86_XOP;
26022 GemmMicrokernelTester()
26023 .mr(2)
26024 .nr(4)
26025 .kr(2)
26026 .sr(4)
26027 .m(2)
26028 .n(4)
26029 .k(8)
26030 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2s4__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
26031 }
26032
TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__XOP_LD64,strided_cn)26033 TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__XOP_LD64, strided_cn) {
26034 TEST_REQUIRES_X86_XOP;
26035 GemmMicrokernelTester()
26036 .mr(2)
26037 .nr(4)
26038 .kr(2)
26039 .sr(4)
26040 .m(2)
26041 .n(4)
26042 .k(8)
26043 .cn_stride(7)
26044 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2s4__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
26045 }
26046
TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__XOP_LD64,k_eq_8_strided_a)26047 TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__XOP_LD64, k_eq_8_strided_a) {
26048 TEST_REQUIRES_X86_XOP;
26049 GemmMicrokernelTester()
26050 .mr(2)
26051 .nr(4)
26052 .kr(2)
26053 .sr(4)
26054 .m(2)
26055 .n(4)
26056 .k(8)
26057 .a_stride(11)
26058 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2s4__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
26059 }
26060
TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__XOP_LD64,k_eq_8_subtile)26061 TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__XOP_LD64, k_eq_8_subtile) {
26062 TEST_REQUIRES_X86_XOP;
26063 for (uint32_t n = 1; n <= 4; n++) {
26064 for (uint32_t m = 1; m <= 2; m++) {
26065 GemmMicrokernelTester()
26066 .mr(2)
26067 .nr(4)
26068 .kr(2)
26069 .sr(4)
26070 .m(m)
26071 .n(n)
26072 .k(8)
26073 .iterations(1)
26074 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2s4__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
26075 }
26076 }
26077 }
26078
TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__XOP_LD64,k_eq_8_subtile_m)26079 TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__XOP_LD64, k_eq_8_subtile_m) {
26080 TEST_REQUIRES_X86_XOP;
26081 for (uint32_t m = 1; m <= 2; m++) {
26082 GemmMicrokernelTester()
26083 .mr(2)
26084 .nr(4)
26085 .kr(2)
26086 .sr(4)
26087 .m(m)
26088 .n(4)
26089 .k(8)
26090 .iterations(1)
26091 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2s4__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
26092 }
26093 }
26094
TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__XOP_LD64,k_eq_8_subtile_n)26095 TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__XOP_LD64, k_eq_8_subtile_n) {
26096 TEST_REQUIRES_X86_XOP;
26097 for (uint32_t n = 1; n <= 4; n++) {
26098 GemmMicrokernelTester()
26099 .mr(2)
26100 .nr(4)
26101 .kr(2)
26102 .sr(4)
26103 .m(2)
26104 .n(n)
26105 .k(8)
26106 .iterations(1)
26107 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2s4__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
26108 }
26109 }
26110
TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__XOP_LD64,k_lt_8)26111 TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__XOP_LD64, k_lt_8) {
26112 TEST_REQUIRES_X86_XOP;
26113 for (size_t k = 1; k < 8; k++) {
26114 GemmMicrokernelTester()
26115 .mr(2)
26116 .nr(4)
26117 .kr(2)
26118 .sr(4)
26119 .m(2)
26120 .n(4)
26121 .k(k)
26122 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2s4__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
26123 }
26124 }
26125
TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__XOP_LD64,k_lt_8_strided_a)26126 TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__XOP_LD64, k_lt_8_strided_a) {
26127 TEST_REQUIRES_X86_XOP;
26128 for (size_t k = 1; k < 8; k++) {
26129 GemmMicrokernelTester()
26130 .mr(2)
26131 .nr(4)
26132 .kr(2)
26133 .sr(4)
26134 .m(2)
26135 .n(4)
26136 .k(k)
26137 .a_stride(11)
26138 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2s4__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
26139 }
26140 }
26141
TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__XOP_LD64,k_lt_8_subtile)26142 TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__XOP_LD64, k_lt_8_subtile) {
26143 TEST_REQUIRES_X86_XOP;
26144 for (size_t k = 1; k < 8; k++) {
26145 for (uint32_t n = 1; n <= 4; n++) {
26146 for (uint32_t m = 1; m <= 2; m++) {
26147 GemmMicrokernelTester()
26148 .mr(2)
26149 .nr(4)
26150 .kr(2)
26151 .sr(4)
26152 .m(m)
26153 .n(n)
26154 .k(k)
26155 .iterations(1)
26156 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2s4__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
26157 }
26158 }
26159 }
26160 }
26161
TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__XOP_LD64,k_gt_8)26162 TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__XOP_LD64, k_gt_8) {
26163 TEST_REQUIRES_X86_XOP;
26164 for (size_t k = 9; k < 16; k++) {
26165 GemmMicrokernelTester()
26166 .mr(2)
26167 .nr(4)
26168 .kr(2)
26169 .sr(4)
26170 .m(2)
26171 .n(4)
26172 .k(k)
26173 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2s4__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
26174 }
26175 }
26176
TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__XOP_LD64,k_gt_8_strided_a)26177 TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__XOP_LD64, k_gt_8_strided_a) {
26178 TEST_REQUIRES_X86_XOP;
26179 for (size_t k = 9; k < 16; k++) {
26180 GemmMicrokernelTester()
26181 .mr(2)
26182 .nr(4)
26183 .kr(2)
26184 .sr(4)
26185 .m(2)
26186 .n(4)
26187 .k(k)
26188 .a_stride(19)
26189 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2s4__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
26190 }
26191 }
26192
TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__XOP_LD64,k_gt_8_subtile)26193 TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__XOP_LD64, k_gt_8_subtile) {
26194 TEST_REQUIRES_X86_XOP;
26195 for (size_t k = 9; k < 16; k++) {
26196 for (uint32_t n = 1; n <= 4; n++) {
26197 for (uint32_t m = 1; m <= 2; m++) {
26198 GemmMicrokernelTester()
26199 .mr(2)
26200 .nr(4)
26201 .kr(2)
26202 .sr(4)
26203 .m(m)
26204 .n(n)
26205 .k(k)
26206 .iterations(1)
26207 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2s4__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
26208 }
26209 }
26210 }
26211 }
26212
TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__XOP_LD64,k_div_8)26213 TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__XOP_LD64, k_div_8) {
26214 TEST_REQUIRES_X86_XOP;
26215 for (size_t k = 16; k <= 80; k += 8) {
26216 GemmMicrokernelTester()
26217 .mr(2)
26218 .nr(4)
26219 .kr(2)
26220 .sr(4)
26221 .m(2)
26222 .n(4)
26223 .k(k)
26224 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2s4__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
26225 }
26226 }
26227
TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__XOP_LD64,k_div_8_strided_a)26228 TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__XOP_LD64, k_div_8_strided_a) {
26229 TEST_REQUIRES_X86_XOP;
26230 for (size_t k = 16; k <= 80; k += 8) {
26231 GemmMicrokernelTester()
26232 .mr(2)
26233 .nr(4)
26234 .kr(2)
26235 .sr(4)
26236 .m(2)
26237 .n(4)
26238 .k(k)
26239 .a_stride(83)
26240 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2s4__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
26241 }
26242 }
26243
TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__XOP_LD64,k_div_8_subtile)26244 TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__XOP_LD64, k_div_8_subtile) {
26245 TEST_REQUIRES_X86_XOP;
26246 for (size_t k = 16; k <= 80; k += 8) {
26247 for (uint32_t n = 1; n <= 4; n++) {
26248 for (uint32_t m = 1; m <= 2; m++) {
26249 GemmMicrokernelTester()
26250 .mr(2)
26251 .nr(4)
26252 .kr(2)
26253 .sr(4)
26254 .m(m)
26255 .n(n)
26256 .k(k)
26257 .iterations(1)
26258 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2s4__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
26259 }
26260 }
26261 }
26262 }
26263
TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__XOP_LD64,n_gt_4)26264 TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__XOP_LD64, n_gt_4) {
26265 TEST_REQUIRES_X86_XOP;
26266 for (uint32_t n = 5; n < 8; n++) {
26267 for (size_t k = 1; k <= 40; k += 9) {
26268 GemmMicrokernelTester()
26269 .mr(2)
26270 .nr(4)
26271 .kr(2)
26272 .sr(4)
26273 .m(2)
26274 .n(n)
26275 .k(k)
26276 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2s4__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
26277 }
26278 }
26279 }
26280
TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__XOP_LD64,n_gt_4_strided_cn)26281 TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__XOP_LD64, n_gt_4_strided_cn) {
26282 TEST_REQUIRES_X86_XOP;
26283 for (uint32_t n = 5; n < 8; n++) {
26284 for (size_t k = 1; k <= 40; k += 9) {
26285 GemmMicrokernelTester()
26286 .mr(2)
26287 .nr(4)
26288 .kr(2)
26289 .sr(4)
26290 .m(2)
26291 .n(n)
26292 .k(k)
26293 .cn_stride(7)
26294 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2s4__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
26295 }
26296 }
26297 }
26298
TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__XOP_LD64,n_gt_4_strided_a)26299 TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__XOP_LD64, n_gt_4_strided_a) {
26300 TEST_REQUIRES_X86_XOP;
26301 for (uint32_t n = 5; n < 8; n++) {
26302 for (size_t k = 1; k <= 40; k += 9) {
26303 GemmMicrokernelTester()
26304 .mr(2)
26305 .nr(4)
26306 .kr(2)
26307 .sr(4)
26308 .m(2)
26309 .n(n)
26310 .k(k)
26311 .a_stride(43)
26312 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2s4__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
26313 }
26314 }
26315 }
26316
TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__XOP_LD64,n_gt_4_subtile)26317 TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__XOP_LD64, n_gt_4_subtile) {
26318 TEST_REQUIRES_X86_XOP;
26319 for (uint32_t n = 5; n < 8; n++) {
26320 for (size_t k = 1; k <= 40; k += 9) {
26321 for (uint32_t m = 1; m <= 2; m++) {
26322 GemmMicrokernelTester()
26323 .mr(2)
26324 .nr(4)
26325 .kr(2)
26326 .sr(4)
26327 .m(m)
26328 .n(n)
26329 .k(k)
26330 .iterations(1)
26331 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2s4__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
26332 }
26333 }
26334 }
26335 }
26336
TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__XOP_LD64,n_div_4)26337 TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__XOP_LD64, n_div_4) {
26338 TEST_REQUIRES_X86_XOP;
26339 for (uint32_t n = 8; n <= 12; n += 4) {
26340 for (size_t k = 1; k <= 40; k += 9) {
26341 GemmMicrokernelTester()
26342 .mr(2)
26343 .nr(4)
26344 .kr(2)
26345 .sr(4)
26346 .m(2)
26347 .n(n)
26348 .k(k)
26349 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2s4__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
26350 }
26351 }
26352 }
26353
TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__XOP_LD64,n_div_4_strided_cn)26354 TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__XOP_LD64, n_div_4_strided_cn) {
26355 TEST_REQUIRES_X86_XOP;
26356 for (uint32_t n = 8; n <= 12; n += 4) {
26357 for (size_t k = 1; k <= 40; k += 9) {
26358 GemmMicrokernelTester()
26359 .mr(2)
26360 .nr(4)
26361 .kr(2)
26362 .sr(4)
26363 .m(2)
26364 .n(n)
26365 .k(k)
26366 .cn_stride(7)
26367 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2s4__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
26368 }
26369 }
26370 }
26371
TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__XOP_LD64,n_div_4_strided_a)26372 TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__XOP_LD64, n_div_4_strided_a) {
26373 TEST_REQUIRES_X86_XOP;
26374 for (uint32_t n = 8; n <= 12; n += 4) {
26375 for (size_t k = 1; k <= 40; k += 9) {
26376 GemmMicrokernelTester()
26377 .mr(2)
26378 .nr(4)
26379 .kr(2)
26380 .sr(4)
26381 .m(2)
26382 .n(n)
26383 .k(k)
26384 .a_stride(43)
26385 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2s4__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
26386 }
26387 }
26388 }
26389
TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__XOP_LD64,n_div_4_subtile)26390 TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__XOP_LD64, n_div_4_subtile) {
26391 TEST_REQUIRES_X86_XOP;
26392 for (uint32_t n = 8; n <= 12; n += 4) {
26393 for (size_t k = 1; k <= 40; k += 9) {
26394 for (uint32_t m = 1; m <= 2; m++) {
26395 GemmMicrokernelTester()
26396 .mr(2)
26397 .nr(4)
26398 .kr(2)
26399 .sr(4)
26400 .m(m)
26401 .n(n)
26402 .k(k)
26403 .iterations(1)
26404 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2s4__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
26405 }
26406 }
26407 }
26408 }
26409
TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__XOP_LD64,strided_cm_subtile)26410 TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__XOP_LD64, strided_cm_subtile) {
26411 TEST_REQUIRES_X86_XOP;
26412 for (size_t k = 1; k <= 40; k += 9) {
26413 for (uint32_t n = 1; n <= 4; n++) {
26414 for (uint32_t m = 1; m <= 2; m++) {
26415 GemmMicrokernelTester()
26416 .mr(2)
26417 .nr(4)
26418 .kr(2)
26419 .sr(4)
26420 .m(m)
26421 .n(n)
26422 .k(k)
26423 .cm_stride(7)
26424 .iterations(1)
26425 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2s4__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
26426 }
26427 }
26428 }
26429 }
26430
TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__XOP_LD64,qmin)26431 TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__XOP_LD64, qmin) {
26432 TEST_REQUIRES_X86_XOP;
26433 GemmMicrokernelTester()
26434 .mr(2)
26435 .nr(4)
26436 .kr(2)
26437 .sr(4)
26438 .m(2)
26439 .n(4)
26440 .k(8)
26441 .qmin(128)
26442 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2s4__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
26443 }
26444
TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__XOP_LD64,qmax)26445 TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__XOP_LD64, qmax) {
26446 TEST_REQUIRES_X86_XOP;
26447 GemmMicrokernelTester()
26448 .mr(2)
26449 .nr(4)
26450 .kr(2)
26451 .sr(4)
26452 .m(2)
26453 .n(4)
26454 .k(8)
26455 .qmax(128)
26456 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2s4__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
26457 }
26458
TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__XOP_LD64,strided_cm)26459 TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__XOP_LD64, strided_cm) {
26460 TEST_REQUIRES_X86_XOP;
26461 GemmMicrokernelTester()
26462 .mr(2)
26463 .nr(4)
26464 .kr(2)
26465 .sr(4)
26466 .m(2)
26467 .n(4)
26468 .k(8)
26469 .cm_stride(7)
26470 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2s4__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
26471 }
26472 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
26473
26474
26475 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__AVX_LD64,k_eq_8)26476 TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__AVX_LD64, k_eq_8) {
26477 TEST_REQUIRES_X86_AVX;
26478 GemmMicrokernelTester()
26479 .mr(3)
26480 .nr(4)
26481 .kr(2)
26482 .sr(4)
26483 .m(3)
26484 .n(4)
26485 .k(8)
26486 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2s4__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
26487 }
26488
TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__AVX_LD64,strided_cn)26489 TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__AVX_LD64, strided_cn) {
26490 TEST_REQUIRES_X86_AVX;
26491 GemmMicrokernelTester()
26492 .mr(3)
26493 .nr(4)
26494 .kr(2)
26495 .sr(4)
26496 .m(3)
26497 .n(4)
26498 .k(8)
26499 .cn_stride(7)
26500 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2s4__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
26501 }
26502
TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__AVX_LD64,k_eq_8_strided_a)26503 TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__AVX_LD64, k_eq_8_strided_a) {
26504 TEST_REQUIRES_X86_AVX;
26505 GemmMicrokernelTester()
26506 .mr(3)
26507 .nr(4)
26508 .kr(2)
26509 .sr(4)
26510 .m(3)
26511 .n(4)
26512 .k(8)
26513 .a_stride(11)
26514 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2s4__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
26515 }
26516
TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__AVX_LD64,k_eq_8_subtile)26517 TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__AVX_LD64, k_eq_8_subtile) {
26518 TEST_REQUIRES_X86_AVX;
26519 for (uint32_t n = 1; n <= 4; n++) {
26520 for (uint32_t m = 1; m <= 3; m++) {
26521 GemmMicrokernelTester()
26522 .mr(3)
26523 .nr(4)
26524 .kr(2)
26525 .sr(4)
26526 .m(m)
26527 .n(n)
26528 .k(8)
26529 .iterations(1)
26530 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2s4__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
26531 }
26532 }
26533 }
26534
TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__AVX_LD64,k_eq_8_subtile_m)26535 TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__AVX_LD64, k_eq_8_subtile_m) {
26536 TEST_REQUIRES_X86_AVX;
26537 for (uint32_t m = 1; m <= 3; m++) {
26538 GemmMicrokernelTester()
26539 .mr(3)
26540 .nr(4)
26541 .kr(2)
26542 .sr(4)
26543 .m(m)
26544 .n(4)
26545 .k(8)
26546 .iterations(1)
26547 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2s4__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
26548 }
26549 }
26550
TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__AVX_LD64,k_eq_8_subtile_n)26551 TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__AVX_LD64, k_eq_8_subtile_n) {
26552 TEST_REQUIRES_X86_AVX;
26553 for (uint32_t n = 1; n <= 4; n++) {
26554 GemmMicrokernelTester()
26555 .mr(3)
26556 .nr(4)
26557 .kr(2)
26558 .sr(4)
26559 .m(3)
26560 .n(n)
26561 .k(8)
26562 .iterations(1)
26563 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2s4__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
26564 }
26565 }
26566
TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__AVX_LD64,k_lt_8)26567 TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__AVX_LD64, k_lt_8) {
26568 TEST_REQUIRES_X86_AVX;
26569 for (size_t k = 1; k < 8; k++) {
26570 GemmMicrokernelTester()
26571 .mr(3)
26572 .nr(4)
26573 .kr(2)
26574 .sr(4)
26575 .m(3)
26576 .n(4)
26577 .k(k)
26578 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2s4__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
26579 }
26580 }
26581
TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__AVX_LD64,k_lt_8_strided_a)26582 TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__AVX_LD64, k_lt_8_strided_a) {
26583 TEST_REQUIRES_X86_AVX;
26584 for (size_t k = 1; k < 8; k++) {
26585 GemmMicrokernelTester()
26586 .mr(3)
26587 .nr(4)
26588 .kr(2)
26589 .sr(4)
26590 .m(3)
26591 .n(4)
26592 .k(k)
26593 .a_stride(11)
26594 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2s4__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
26595 }
26596 }
26597
TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__AVX_LD64,k_lt_8_subtile)26598 TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__AVX_LD64, k_lt_8_subtile) {
26599 TEST_REQUIRES_X86_AVX;
26600 for (size_t k = 1; k < 8; k++) {
26601 for (uint32_t n = 1; n <= 4; n++) {
26602 for (uint32_t m = 1; m <= 3; m++) {
26603 GemmMicrokernelTester()
26604 .mr(3)
26605 .nr(4)
26606 .kr(2)
26607 .sr(4)
26608 .m(m)
26609 .n(n)
26610 .k(k)
26611 .iterations(1)
26612 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2s4__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
26613 }
26614 }
26615 }
26616 }
26617
TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__AVX_LD64,k_gt_8)26618 TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__AVX_LD64, k_gt_8) {
26619 TEST_REQUIRES_X86_AVX;
26620 for (size_t k = 9; k < 16; k++) {
26621 GemmMicrokernelTester()
26622 .mr(3)
26623 .nr(4)
26624 .kr(2)
26625 .sr(4)
26626 .m(3)
26627 .n(4)
26628 .k(k)
26629 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2s4__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
26630 }
26631 }
26632
TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__AVX_LD64,k_gt_8_strided_a)26633 TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__AVX_LD64, k_gt_8_strided_a) {
26634 TEST_REQUIRES_X86_AVX;
26635 for (size_t k = 9; k < 16; k++) {
26636 GemmMicrokernelTester()
26637 .mr(3)
26638 .nr(4)
26639 .kr(2)
26640 .sr(4)
26641 .m(3)
26642 .n(4)
26643 .k(k)
26644 .a_stride(19)
26645 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2s4__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
26646 }
26647 }
26648
TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__AVX_LD64,k_gt_8_subtile)26649 TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__AVX_LD64, k_gt_8_subtile) {
26650 TEST_REQUIRES_X86_AVX;
26651 for (size_t k = 9; k < 16; k++) {
26652 for (uint32_t n = 1; n <= 4; n++) {
26653 for (uint32_t m = 1; m <= 3; m++) {
26654 GemmMicrokernelTester()
26655 .mr(3)
26656 .nr(4)
26657 .kr(2)
26658 .sr(4)
26659 .m(m)
26660 .n(n)
26661 .k(k)
26662 .iterations(1)
26663 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2s4__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
26664 }
26665 }
26666 }
26667 }
26668
TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__AVX_LD64,k_div_8)26669 TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__AVX_LD64, k_div_8) {
26670 TEST_REQUIRES_X86_AVX;
26671 for (size_t k = 16; k <= 80; k += 8) {
26672 GemmMicrokernelTester()
26673 .mr(3)
26674 .nr(4)
26675 .kr(2)
26676 .sr(4)
26677 .m(3)
26678 .n(4)
26679 .k(k)
26680 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2s4__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
26681 }
26682 }
26683
TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__AVX_LD64,k_div_8_strided_a)26684 TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__AVX_LD64, k_div_8_strided_a) {
26685 TEST_REQUIRES_X86_AVX;
26686 for (size_t k = 16; k <= 80; k += 8) {
26687 GemmMicrokernelTester()
26688 .mr(3)
26689 .nr(4)
26690 .kr(2)
26691 .sr(4)
26692 .m(3)
26693 .n(4)
26694 .k(k)
26695 .a_stride(83)
26696 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2s4__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
26697 }
26698 }
26699
TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__AVX_LD64,k_div_8_subtile)26700 TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__AVX_LD64, k_div_8_subtile) {
26701 TEST_REQUIRES_X86_AVX;
26702 for (size_t k = 16; k <= 80; k += 8) {
26703 for (uint32_t n = 1; n <= 4; n++) {
26704 for (uint32_t m = 1; m <= 3; m++) {
26705 GemmMicrokernelTester()
26706 .mr(3)
26707 .nr(4)
26708 .kr(2)
26709 .sr(4)
26710 .m(m)
26711 .n(n)
26712 .k(k)
26713 .iterations(1)
26714 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2s4__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
26715 }
26716 }
26717 }
26718 }
26719
TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__AVX_LD64,n_gt_4)26720 TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__AVX_LD64, n_gt_4) {
26721 TEST_REQUIRES_X86_AVX;
26722 for (uint32_t n = 5; n < 8; n++) {
26723 for (size_t k = 1; k <= 40; k += 9) {
26724 GemmMicrokernelTester()
26725 .mr(3)
26726 .nr(4)
26727 .kr(2)
26728 .sr(4)
26729 .m(3)
26730 .n(n)
26731 .k(k)
26732 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2s4__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
26733 }
26734 }
26735 }
26736
TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__AVX_LD64,n_gt_4_strided_cn)26737 TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__AVX_LD64, n_gt_4_strided_cn) {
26738 TEST_REQUIRES_X86_AVX;
26739 for (uint32_t n = 5; n < 8; n++) {
26740 for (size_t k = 1; k <= 40; k += 9) {
26741 GemmMicrokernelTester()
26742 .mr(3)
26743 .nr(4)
26744 .kr(2)
26745 .sr(4)
26746 .m(3)
26747 .n(n)
26748 .k(k)
26749 .cn_stride(7)
26750 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2s4__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
26751 }
26752 }
26753 }
26754
TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__AVX_LD64,n_gt_4_strided_a)26755 TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__AVX_LD64, n_gt_4_strided_a) {
26756 TEST_REQUIRES_X86_AVX;
26757 for (uint32_t n = 5; n < 8; n++) {
26758 for (size_t k = 1; k <= 40; k += 9) {
26759 GemmMicrokernelTester()
26760 .mr(3)
26761 .nr(4)
26762 .kr(2)
26763 .sr(4)
26764 .m(3)
26765 .n(n)
26766 .k(k)
26767 .a_stride(43)
26768 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2s4__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
26769 }
26770 }
26771 }
26772
TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__AVX_LD64,n_gt_4_subtile)26773 TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__AVX_LD64, n_gt_4_subtile) {
26774 TEST_REQUIRES_X86_AVX;
26775 for (uint32_t n = 5; n < 8; n++) {
26776 for (size_t k = 1; k <= 40; k += 9) {
26777 for (uint32_t m = 1; m <= 3; m++) {
26778 GemmMicrokernelTester()
26779 .mr(3)
26780 .nr(4)
26781 .kr(2)
26782 .sr(4)
26783 .m(m)
26784 .n(n)
26785 .k(k)
26786 .iterations(1)
26787 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2s4__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
26788 }
26789 }
26790 }
26791 }
26792
TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__AVX_LD64,n_div_4)26793 TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__AVX_LD64, n_div_4) {
26794 TEST_REQUIRES_X86_AVX;
26795 for (uint32_t n = 8; n <= 12; n += 4) {
26796 for (size_t k = 1; k <= 40; k += 9) {
26797 GemmMicrokernelTester()
26798 .mr(3)
26799 .nr(4)
26800 .kr(2)
26801 .sr(4)
26802 .m(3)
26803 .n(n)
26804 .k(k)
26805 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2s4__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
26806 }
26807 }
26808 }
26809
TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__AVX_LD64,n_div_4_strided_cn)26810 TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__AVX_LD64, n_div_4_strided_cn) {
26811 TEST_REQUIRES_X86_AVX;
26812 for (uint32_t n = 8; n <= 12; n += 4) {
26813 for (size_t k = 1; k <= 40; k += 9) {
26814 GemmMicrokernelTester()
26815 .mr(3)
26816 .nr(4)
26817 .kr(2)
26818 .sr(4)
26819 .m(3)
26820 .n(n)
26821 .k(k)
26822 .cn_stride(7)
26823 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2s4__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
26824 }
26825 }
26826 }
26827
TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__AVX_LD64,n_div_4_strided_a)26828 TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__AVX_LD64, n_div_4_strided_a) {
26829 TEST_REQUIRES_X86_AVX;
26830 for (uint32_t n = 8; n <= 12; n += 4) {
26831 for (size_t k = 1; k <= 40; k += 9) {
26832 GemmMicrokernelTester()
26833 .mr(3)
26834 .nr(4)
26835 .kr(2)
26836 .sr(4)
26837 .m(3)
26838 .n(n)
26839 .k(k)
26840 .a_stride(43)
26841 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2s4__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
26842 }
26843 }
26844 }
26845
TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__AVX_LD64,n_div_4_subtile)26846 TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__AVX_LD64, n_div_4_subtile) {
26847 TEST_REQUIRES_X86_AVX;
26848 for (uint32_t n = 8; n <= 12; n += 4) {
26849 for (size_t k = 1; k <= 40; k += 9) {
26850 for (uint32_t m = 1; m <= 3; m++) {
26851 GemmMicrokernelTester()
26852 .mr(3)
26853 .nr(4)
26854 .kr(2)
26855 .sr(4)
26856 .m(m)
26857 .n(n)
26858 .k(k)
26859 .iterations(1)
26860 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2s4__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
26861 }
26862 }
26863 }
26864 }
26865
TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__AVX_LD64,strided_cm_subtile)26866 TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__AVX_LD64, strided_cm_subtile) {
26867 TEST_REQUIRES_X86_AVX;
26868 for (size_t k = 1; k <= 40; k += 9) {
26869 for (uint32_t n = 1; n <= 4; n++) {
26870 for (uint32_t m = 1; m <= 3; m++) {
26871 GemmMicrokernelTester()
26872 .mr(3)
26873 .nr(4)
26874 .kr(2)
26875 .sr(4)
26876 .m(m)
26877 .n(n)
26878 .k(k)
26879 .cm_stride(7)
26880 .iterations(1)
26881 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2s4__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
26882 }
26883 }
26884 }
26885 }
26886
TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__AVX_LD64,qmin)26887 TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__AVX_LD64, qmin) {
26888 TEST_REQUIRES_X86_AVX;
26889 GemmMicrokernelTester()
26890 .mr(3)
26891 .nr(4)
26892 .kr(2)
26893 .sr(4)
26894 .m(3)
26895 .n(4)
26896 .k(8)
26897 .qmin(128)
26898 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2s4__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
26899 }
26900
TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__AVX_LD64,qmax)26901 TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__AVX_LD64, qmax) {
26902 TEST_REQUIRES_X86_AVX;
26903 GemmMicrokernelTester()
26904 .mr(3)
26905 .nr(4)
26906 .kr(2)
26907 .sr(4)
26908 .m(3)
26909 .n(4)
26910 .k(8)
26911 .qmax(128)
26912 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2s4__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
26913 }
26914
TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__AVX_LD64,strided_cm)26915 TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__AVX_LD64, strided_cm) {
26916 TEST_REQUIRES_X86_AVX;
26917 GemmMicrokernelTester()
26918 .mr(3)
26919 .nr(4)
26920 .kr(2)
26921 .sr(4)
26922 .m(3)
26923 .n(4)
26924 .k(8)
26925 .cm_stride(7)
26926 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2s4__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
26927 }
26928 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
26929
26930
26931 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__SSE2_LD128,k_eq_8)26932 TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__SSE2_LD128, k_eq_8) {
26933 TEST_REQUIRES_X86_SSE2;
26934 GemmMicrokernelTester()
26935 .mr(1)
26936 .nr(4)
26937 .kr(2)
26938 .sr(4)
26939 .m(1)
26940 .n(4)
26941 .k(8)
26942 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2s4__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
26943 }
26944
TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__SSE2_LD128,strided_cn)26945 TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__SSE2_LD128, strided_cn) {
26946 TEST_REQUIRES_X86_SSE2;
26947 GemmMicrokernelTester()
26948 .mr(1)
26949 .nr(4)
26950 .kr(2)
26951 .sr(4)
26952 .m(1)
26953 .n(4)
26954 .k(8)
26955 .cn_stride(7)
26956 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2s4__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
26957 }
26958
TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__SSE2_LD128,k_eq_8_strided_a)26959 TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__SSE2_LD128, k_eq_8_strided_a) {
26960 TEST_REQUIRES_X86_SSE2;
26961 GemmMicrokernelTester()
26962 .mr(1)
26963 .nr(4)
26964 .kr(2)
26965 .sr(4)
26966 .m(1)
26967 .n(4)
26968 .k(8)
26969 .a_stride(11)
26970 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2s4__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
26971 }
26972
TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__SSE2_LD128,k_eq_8_subtile)26973 TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__SSE2_LD128, k_eq_8_subtile) {
26974 TEST_REQUIRES_X86_SSE2;
26975 for (uint32_t n = 1; n <= 4; n++) {
26976 for (uint32_t m = 1; m <= 1; m++) {
26977 GemmMicrokernelTester()
26978 .mr(1)
26979 .nr(4)
26980 .kr(2)
26981 .sr(4)
26982 .m(m)
26983 .n(n)
26984 .k(8)
26985 .iterations(1)
26986 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2s4__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
26987 }
26988 }
26989 }
26990
TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__SSE2_LD128,k_eq_8_subtile_m)26991 TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__SSE2_LD128, k_eq_8_subtile_m) {
26992 TEST_REQUIRES_X86_SSE2;
26993 for (uint32_t m = 1; m <= 1; m++) {
26994 GemmMicrokernelTester()
26995 .mr(1)
26996 .nr(4)
26997 .kr(2)
26998 .sr(4)
26999 .m(m)
27000 .n(4)
27001 .k(8)
27002 .iterations(1)
27003 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2s4__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
27004 }
27005 }
27006
TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__SSE2_LD128,k_eq_8_subtile_n)27007 TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__SSE2_LD128, k_eq_8_subtile_n) {
27008 TEST_REQUIRES_X86_SSE2;
27009 for (uint32_t n = 1; n <= 4; n++) {
27010 GemmMicrokernelTester()
27011 .mr(1)
27012 .nr(4)
27013 .kr(2)
27014 .sr(4)
27015 .m(1)
27016 .n(n)
27017 .k(8)
27018 .iterations(1)
27019 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2s4__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
27020 }
27021 }
27022
TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__SSE2_LD128,k_lt_8)27023 TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__SSE2_LD128, k_lt_8) {
27024 TEST_REQUIRES_X86_SSE2;
27025 for (size_t k = 1; k < 8; k++) {
27026 GemmMicrokernelTester()
27027 .mr(1)
27028 .nr(4)
27029 .kr(2)
27030 .sr(4)
27031 .m(1)
27032 .n(4)
27033 .k(k)
27034 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2s4__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
27035 }
27036 }
27037
TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__SSE2_LD128,k_lt_8_strided_a)27038 TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__SSE2_LD128, k_lt_8_strided_a) {
27039 TEST_REQUIRES_X86_SSE2;
27040 for (size_t k = 1; k < 8; k++) {
27041 GemmMicrokernelTester()
27042 .mr(1)
27043 .nr(4)
27044 .kr(2)
27045 .sr(4)
27046 .m(1)
27047 .n(4)
27048 .k(k)
27049 .a_stride(11)
27050 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2s4__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
27051 }
27052 }
27053
TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__SSE2_LD128,k_lt_8_subtile)27054 TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__SSE2_LD128, k_lt_8_subtile) {
27055 TEST_REQUIRES_X86_SSE2;
27056 for (size_t k = 1; k < 8; k++) {
27057 for (uint32_t n = 1; n <= 4; n++) {
27058 for (uint32_t m = 1; m <= 1; m++) {
27059 GemmMicrokernelTester()
27060 .mr(1)
27061 .nr(4)
27062 .kr(2)
27063 .sr(4)
27064 .m(m)
27065 .n(n)
27066 .k(k)
27067 .iterations(1)
27068 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2s4__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
27069 }
27070 }
27071 }
27072 }
27073
TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__SSE2_LD128,k_gt_8)27074 TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__SSE2_LD128, k_gt_8) {
27075 TEST_REQUIRES_X86_SSE2;
27076 for (size_t k = 9; k < 16; k++) {
27077 GemmMicrokernelTester()
27078 .mr(1)
27079 .nr(4)
27080 .kr(2)
27081 .sr(4)
27082 .m(1)
27083 .n(4)
27084 .k(k)
27085 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2s4__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
27086 }
27087 }
27088
TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__SSE2_LD128,k_gt_8_strided_a)27089 TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__SSE2_LD128, k_gt_8_strided_a) {
27090 TEST_REQUIRES_X86_SSE2;
27091 for (size_t k = 9; k < 16; k++) {
27092 GemmMicrokernelTester()
27093 .mr(1)
27094 .nr(4)
27095 .kr(2)
27096 .sr(4)
27097 .m(1)
27098 .n(4)
27099 .k(k)
27100 .a_stride(19)
27101 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2s4__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
27102 }
27103 }
27104
TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__SSE2_LD128,k_gt_8_subtile)27105 TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__SSE2_LD128, k_gt_8_subtile) {
27106 TEST_REQUIRES_X86_SSE2;
27107 for (size_t k = 9; k < 16; k++) {
27108 for (uint32_t n = 1; n <= 4; n++) {
27109 for (uint32_t m = 1; m <= 1; m++) {
27110 GemmMicrokernelTester()
27111 .mr(1)
27112 .nr(4)
27113 .kr(2)
27114 .sr(4)
27115 .m(m)
27116 .n(n)
27117 .k(k)
27118 .iterations(1)
27119 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2s4__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
27120 }
27121 }
27122 }
27123 }
27124
TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__SSE2_LD128,k_div_8)27125 TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__SSE2_LD128, k_div_8) {
27126 TEST_REQUIRES_X86_SSE2;
27127 for (size_t k = 16; k <= 80; k += 8) {
27128 GemmMicrokernelTester()
27129 .mr(1)
27130 .nr(4)
27131 .kr(2)
27132 .sr(4)
27133 .m(1)
27134 .n(4)
27135 .k(k)
27136 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2s4__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
27137 }
27138 }
27139
TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__SSE2_LD128,k_div_8_strided_a)27140 TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__SSE2_LD128, k_div_8_strided_a) {
27141 TEST_REQUIRES_X86_SSE2;
27142 for (size_t k = 16; k <= 80; k += 8) {
27143 GemmMicrokernelTester()
27144 .mr(1)
27145 .nr(4)
27146 .kr(2)
27147 .sr(4)
27148 .m(1)
27149 .n(4)
27150 .k(k)
27151 .a_stride(83)
27152 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2s4__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
27153 }
27154 }
27155
TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__SSE2_LD128,k_div_8_subtile)27156 TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__SSE2_LD128, k_div_8_subtile) {
27157 TEST_REQUIRES_X86_SSE2;
27158 for (size_t k = 16; k <= 80; k += 8) {
27159 for (uint32_t n = 1; n <= 4; n++) {
27160 for (uint32_t m = 1; m <= 1; m++) {
27161 GemmMicrokernelTester()
27162 .mr(1)
27163 .nr(4)
27164 .kr(2)
27165 .sr(4)
27166 .m(m)
27167 .n(n)
27168 .k(k)
27169 .iterations(1)
27170 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2s4__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
27171 }
27172 }
27173 }
27174 }
27175
TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__SSE2_LD128,n_gt_4)27176 TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__SSE2_LD128, n_gt_4) {
27177 TEST_REQUIRES_X86_SSE2;
27178 for (uint32_t n = 5; n < 8; n++) {
27179 for (size_t k = 1; k <= 40; k += 9) {
27180 GemmMicrokernelTester()
27181 .mr(1)
27182 .nr(4)
27183 .kr(2)
27184 .sr(4)
27185 .m(1)
27186 .n(n)
27187 .k(k)
27188 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2s4__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
27189 }
27190 }
27191 }
27192
TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__SSE2_LD128,n_gt_4_strided_cn)27193 TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__SSE2_LD128, n_gt_4_strided_cn) {
27194 TEST_REQUIRES_X86_SSE2;
27195 for (uint32_t n = 5; n < 8; n++) {
27196 for (size_t k = 1; k <= 40; k += 9) {
27197 GemmMicrokernelTester()
27198 .mr(1)
27199 .nr(4)
27200 .kr(2)
27201 .sr(4)
27202 .m(1)
27203 .n(n)
27204 .k(k)
27205 .cn_stride(7)
27206 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2s4__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
27207 }
27208 }
27209 }
27210
TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__SSE2_LD128,n_gt_4_strided_a)27211 TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__SSE2_LD128, n_gt_4_strided_a) {
27212 TEST_REQUIRES_X86_SSE2;
27213 for (uint32_t n = 5; n < 8; n++) {
27214 for (size_t k = 1; k <= 40; k += 9) {
27215 GemmMicrokernelTester()
27216 .mr(1)
27217 .nr(4)
27218 .kr(2)
27219 .sr(4)
27220 .m(1)
27221 .n(n)
27222 .k(k)
27223 .a_stride(43)
27224 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2s4__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
27225 }
27226 }
27227 }
27228
TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__SSE2_LD128,n_gt_4_subtile)27229 TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__SSE2_LD128, n_gt_4_subtile) {
27230 TEST_REQUIRES_X86_SSE2;
27231 for (uint32_t n = 5; n < 8; n++) {
27232 for (size_t k = 1; k <= 40; k += 9) {
27233 for (uint32_t m = 1; m <= 1; m++) {
27234 GemmMicrokernelTester()
27235 .mr(1)
27236 .nr(4)
27237 .kr(2)
27238 .sr(4)
27239 .m(m)
27240 .n(n)
27241 .k(k)
27242 .iterations(1)
27243 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2s4__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
27244 }
27245 }
27246 }
27247 }
27248
TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__SSE2_LD128,n_div_4)27249 TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__SSE2_LD128, n_div_4) {
27250 TEST_REQUIRES_X86_SSE2;
27251 for (uint32_t n = 8; n <= 12; n += 4) {
27252 for (size_t k = 1; k <= 40; k += 9) {
27253 GemmMicrokernelTester()
27254 .mr(1)
27255 .nr(4)
27256 .kr(2)
27257 .sr(4)
27258 .m(1)
27259 .n(n)
27260 .k(k)
27261 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2s4__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
27262 }
27263 }
27264 }
27265
TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__SSE2_LD128,n_div_4_strided_cn)27266 TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__SSE2_LD128, n_div_4_strided_cn) {
27267 TEST_REQUIRES_X86_SSE2;
27268 for (uint32_t n = 8; n <= 12; n += 4) {
27269 for (size_t k = 1; k <= 40; k += 9) {
27270 GemmMicrokernelTester()
27271 .mr(1)
27272 .nr(4)
27273 .kr(2)
27274 .sr(4)
27275 .m(1)
27276 .n(n)
27277 .k(k)
27278 .cn_stride(7)
27279 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2s4__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
27280 }
27281 }
27282 }
27283
TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__SSE2_LD128,n_div_4_strided_a)27284 TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__SSE2_LD128, n_div_4_strided_a) {
27285 TEST_REQUIRES_X86_SSE2;
27286 for (uint32_t n = 8; n <= 12; n += 4) {
27287 for (size_t k = 1; k <= 40; k += 9) {
27288 GemmMicrokernelTester()
27289 .mr(1)
27290 .nr(4)
27291 .kr(2)
27292 .sr(4)
27293 .m(1)
27294 .n(n)
27295 .k(k)
27296 .a_stride(43)
27297 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2s4__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
27298 }
27299 }
27300 }
27301
TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__SSE2_LD128,n_div_4_subtile)27302 TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__SSE2_LD128, n_div_4_subtile) {
27303 TEST_REQUIRES_X86_SSE2;
27304 for (uint32_t n = 8; n <= 12; n += 4) {
27305 for (size_t k = 1; k <= 40; k += 9) {
27306 for (uint32_t m = 1; m <= 1; m++) {
27307 GemmMicrokernelTester()
27308 .mr(1)
27309 .nr(4)
27310 .kr(2)
27311 .sr(4)
27312 .m(m)
27313 .n(n)
27314 .k(k)
27315 .iterations(1)
27316 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2s4__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
27317 }
27318 }
27319 }
27320 }
27321
TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__SSE2_LD128,strided_cm_subtile)27322 TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__SSE2_LD128, strided_cm_subtile) {
27323 TEST_REQUIRES_X86_SSE2;
27324 for (size_t k = 1; k <= 40; k += 9) {
27325 for (uint32_t n = 1; n <= 4; n++) {
27326 for (uint32_t m = 1; m <= 1; m++) {
27327 GemmMicrokernelTester()
27328 .mr(1)
27329 .nr(4)
27330 .kr(2)
27331 .sr(4)
27332 .m(m)
27333 .n(n)
27334 .k(k)
27335 .cm_stride(7)
27336 .iterations(1)
27337 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2s4__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
27338 }
27339 }
27340 }
27341 }
27342
TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__SSE2_LD128,qmin)27343 TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__SSE2_LD128, qmin) {
27344 TEST_REQUIRES_X86_SSE2;
27345 GemmMicrokernelTester()
27346 .mr(1)
27347 .nr(4)
27348 .kr(2)
27349 .sr(4)
27350 .m(1)
27351 .n(4)
27352 .k(8)
27353 .qmin(128)
27354 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2s4__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
27355 }
27356
TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__SSE2_LD128,qmax)27357 TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__SSE2_LD128, qmax) {
27358 TEST_REQUIRES_X86_SSE2;
27359 GemmMicrokernelTester()
27360 .mr(1)
27361 .nr(4)
27362 .kr(2)
27363 .sr(4)
27364 .m(1)
27365 .n(4)
27366 .k(8)
27367 .qmax(128)
27368 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2s4__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
27369 }
27370
TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__SSE2_LD128,strided_cm)27371 TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__SSE2_LD128, strided_cm) {
27372 TEST_REQUIRES_X86_SSE2;
27373 GemmMicrokernelTester()
27374 .mr(1)
27375 .nr(4)
27376 .kr(2)
27377 .sr(4)
27378 .m(1)
27379 .n(4)
27380 .k(8)
27381 .cm_stride(7)
27382 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2s4__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
27383 }
27384 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
27385
27386
27387 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__SSE41_LD128,k_eq_8)27388 TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__SSE41_LD128, k_eq_8) {
27389 TEST_REQUIRES_X86_SSE41;
27390 GemmMicrokernelTester()
27391 .mr(1)
27392 .nr(4)
27393 .kr(2)
27394 .sr(4)
27395 .m(1)
27396 .n(4)
27397 .k(8)
27398 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2s4__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
27399 }
27400
TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__SSE41_LD128,strided_cn)27401 TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__SSE41_LD128, strided_cn) {
27402 TEST_REQUIRES_X86_SSE41;
27403 GemmMicrokernelTester()
27404 .mr(1)
27405 .nr(4)
27406 .kr(2)
27407 .sr(4)
27408 .m(1)
27409 .n(4)
27410 .k(8)
27411 .cn_stride(7)
27412 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2s4__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
27413 }
27414
TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__SSE41_LD128,k_eq_8_strided_a)27415 TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__SSE41_LD128, k_eq_8_strided_a) {
27416 TEST_REQUIRES_X86_SSE41;
27417 GemmMicrokernelTester()
27418 .mr(1)
27419 .nr(4)
27420 .kr(2)
27421 .sr(4)
27422 .m(1)
27423 .n(4)
27424 .k(8)
27425 .a_stride(11)
27426 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2s4__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
27427 }
27428
TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__SSE41_LD128,k_eq_8_subtile)27429 TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__SSE41_LD128, k_eq_8_subtile) {
27430 TEST_REQUIRES_X86_SSE41;
27431 for (uint32_t n = 1; n <= 4; n++) {
27432 for (uint32_t m = 1; m <= 1; m++) {
27433 GemmMicrokernelTester()
27434 .mr(1)
27435 .nr(4)
27436 .kr(2)
27437 .sr(4)
27438 .m(m)
27439 .n(n)
27440 .k(8)
27441 .iterations(1)
27442 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2s4__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
27443 }
27444 }
27445 }
27446
TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__SSE41_LD128,k_eq_8_subtile_m)27447 TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__SSE41_LD128, k_eq_8_subtile_m) {
27448 TEST_REQUIRES_X86_SSE41;
27449 for (uint32_t m = 1; m <= 1; m++) {
27450 GemmMicrokernelTester()
27451 .mr(1)
27452 .nr(4)
27453 .kr(2)
27454 .sr(4)
27455 .m(m)
27456 .n(4)
27457 .k(8)
27458 .iterations(1)
27459 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2s4__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
27460 }
27461 }
27462
TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__SSE41_LD128,k_eq_8_subtile_n)27463 TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__SSE41_LD128, k_eq_8_subtile_n) {
27464 TEST_REQUIRES_X86_SSE41;
27465 for (uint32_t n = 1; n <= 4; n++) {
27466 GemmMicrokernelTester()
27467 .mr(1)
27468 .nr(4)
27469 .kr(2)
27470 .sr(4)
27471 .m(1)
27472 .n(n)
27473 .k(8)
27474 .iterations(1)
27475 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2s4__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
27476 }
27477 }
27478
TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__SSE41_LD128,k_lt_8)27479 TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__SSE41_LD128, k_lt_8) {
27480 TEST_REQUIRES_X86_SSE41;
27481 for (size_t k = 1; k < 8; k++) {
27482 GemmMicrokernelTester()
27483 .mr(1)
27484 .nr(4)
27485 .kr(2)
27486 .sr(4)
27487 .m(1)
27488 .n(4)
27489 .k(k)
27490 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2s4__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
27491 }
27492 }
27493
TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__SSE41_LD128,k_lt_8_strided_a)27494 TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__SSE41_LD128, k_lt_8_strided_a) {
27495 TEST_REQUIRES_X86_SSE41;
27496 for (size_t k = 1; k < 8; k++) {
27497 GemmMicrokernelTester()
27498 .mr(1)
27499 .nr(4)
27500 .kr(2)
27501 .sr(4)
27502 .m(1)
27503 .n(4)
27504 .k(k)
27505 .a_stride(11)
27506 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2s4__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
27507 }
27508 }
27509
TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__SSE41_LD128,k_lt_8_subtile)27510 TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__SSE41_LD128, k_lt_8_subtile) {
27511 TEST_REQUIRES_X86_SSE41;
27512 for (size_t k = 1; k < 8; k++) {
27513 for (uint32_t n = 1; n <= 4; n++) {
27514 for (uint32_t m = 1; m <= 1; m++) {
27515 GemmMicrokernelTester()
27516 .mr(1)
27517 .nr(4)
27518 .kr(2)
27519 .sr(4)
27520 .m(m)
27521 .n(n)
27522 .k(k)
27523 .iterations(1)
27524 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2s4__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
27525 }
27526 }
27527 }
27528 }
27529
TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__SSE41_LD128,k_gt_8)27530 TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__SSE41_LD128, k_gt_8) {
27531 TEST_REQUIRES_X86_SSE41;
27532 for (size_t k = 9; k < 16; k++) {
27533 GemmMicrokernelTester()
27534 .mr(1)
27535 .nr(4)
27536 .kr(2)
27537 .sr(4)
27538 .m(1)
27539 .n(4)
27540 .k(k)
27541 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2s4__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
27542 }
27543 }
27544
TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__SSE41_LD128,k_gt_8_strided_a)27545 TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__SSE41_LD128, k_gt_8_strided_a) {
27546 TEST_REQUIRES_X86_SSE41;
27547 for (size_t k = 9; k < 16; k++) {
27548 GemmMicrokernelTester()
27549 .mr(1)
27550 .nr(4)
27551 .kr(2)
27552 .sr(4)
27553 .m(1)
27554 .n(4)
27555 .k(k)
27556 .a_stride(19)
27557 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2s4__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
27558 }
27559 }
27560
TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__SSE41_LD128,k_gt_8_subtile)27561 TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__SSE41_LD128, k_gt_8_subtile) {
27562 TEST_REQUIRES_X86_SSE41;
27563 for (size_t k = 9; k < 16; k++) {
27564 for (uint32_t n = 1; n <= 4; n++) {
27565 for (uint32_t m = 1; m <= 1; m++) {
27566 GemmMicrokernelTester()
27567 .mr(1)
27568 .nr(4)
27569 .kr(2)
27570 .sr(4)
27571 .m(m)
27572 .n(n)
27573 .k(k)
27574 .iterations(1)
27575 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2s4__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
27576 }
27577 }
27578 }
27579 }
27580
TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__SSE41_LD128,k_div_8)27581 TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__SSE41_LD128, k_div_8) {
27582 TEST_REQUIRES_X86_SSE41;
27583 for (size_t k = 16; k <= 80; k += 8) {
27584 GemmMicrokernelTester()
27585 .mr(1)
27586 .nr(4)
27587 .kr(2)
27588 .sr(4)
27589 .m(1)
27590 .n(4)
27591 .k(k)
27592 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2s4__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
27593 }
27594 }
27595
TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__SSE41_LD128,k_div_8_strided_a)27596 TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__SSE41_LD128, k_div_8_strided_a) {
27597 TEST_REQUIRES_X86_SSE41;
27598 for (size_t k = 16; k <= 80; k += 8) {
27599 GemmMicrokernelTester()
27600 .mr(1)
27601 .nr(4)
27602 .kr(2)
27603 .sr(4)
27604 .m(1)
27605 .n(4)
27606 .k(k)
27607 .a_stride(83)
27608 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2s4__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
27609 }
27610 }
27611
TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__SSE41_LD128,k_div_8_subtile)27612 TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__SSE41_LD128, k_div_8_subtile) {
27613 TEST_REQUIRES_X86_SSE41;
27614 for (size_t k = 16; k <= 80; k += 8) {
27615 for (uint32_t n = 1; n <= 4; n++) {
27616 for (uint32_t m = 1; m <= 1; m++) {
27617 GemmMicrokernelTester()
27618 .mr(1)
27619 .nr(4)
27620 .kr(2)
27621 .sr(4)
27622 .m(m)
27623 .n(n)
27624 .k(k)
27625 .iterations(1)
27626 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2s4__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
27627 }
27628 }
27629 }
27630 }
27631
TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__SSE41_LD128,n_gt_4)27632 TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__SSE41_LD128, n_gt_4) {
27633 TEST_REQUIRES_X86_SSE41;
27634 for (uint32_t n = 5; n < 8; n++) {
27635 for (size_t k = 1; k <= 40; k += 9) {
27636 GemmMicrokernelTester()
27637 .mr(1)
27638 .nr(4)
27639 .kr(2)
27640 .sr(4)
27641 .m(1)
27642 .n(n)
27643 .k(k)
27644 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2s4__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
27645 }
27646 }
27647 }
27648
TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__SSE41_LD128,n_gt_4_strided_cn)27649 TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__SSE41_LD128, n_gt_4_strided_cn) {
27650 TEST_REQUIRES_X86_SSE41;
27651 for (uint32_t n = 5; n < 8; n++) {
27652 for (size_t k = 1; k <= 40; k += 9) {
27653 GemmMicrokernelTester()
27654 .mr(1)
27655 .nr(4)
27656 .kr(2)
27657 .sr(4)
27658 .m(1)
27659 .n(n)
27660 .k(k)
27661 .cn_stride(7)
27662 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2s4__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
27663 }
27664 }
27665 }
27666
TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__SSE41_LD128,n_gt_4_strided_a)27667 TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__SSE41_LD128, n_gt_4_strided_a) {
27668 TEST_REQUIRES_X86_SSE41;
27669 for (uint32_t n = 5; n < 8; n++) {
27670 for (size_t k = 1; k <= 40; k += 9) {
27671 GemmMicrokernelTester()
27672 .mr(1)
27673 .nr(4)
27674 .kr(2)
27675 .sr(4)
27676 .m(1)
27677 .n(n)
27678 .k(k)
27679 .a_stride(43)
27680 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2s4__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
27681 }
27682 }
27683 }
27684
TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__SSE41_LD128,n_gt_4_subtile)27685 TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__SSE41_LD128, n_gt_4_subtile) {
27686 TEST_REQUIRES_X86_SSE41;
27687 for (uint32_t n = 5; n < 8; n++) {
27688 for (size_t k = 1; k <= 40; k += 9) {
27689 for (uint32_t m = 1; m <= 1; m++) {
27690 GemmMicrokernelTester()
27691 .mr(1)
27692 .nr(4)
27693 .kr(2)
27694 .sr(4)
27695 .m(m)
27696 .n(n)
27697 .k(k)
27698 .iterations(1)
27699 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2s4__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
27700 }
27701 }
27702 }
27703 }
27704
TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__SSE41_LD128,n_div_4)27705 TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__SSE41_LD128, n_div_4) {
27706 TEST_REQUIRES_X86_SSE41;
27707 for (uint32_t n = 8; n <= 12; n += 4) {
27708 for (size_t k = 1; k <= 40; k += 9) {
27709 GemmMicrokernelTester()
27710 .mr(1)
27711 .nr(4)
27712 .kr(2)
27713 .sr(4)
27714 .m(1)
27715 .n(n)
27716 .k(k)
27717 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2s4__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
27718 }
27719 }
27720 }
27721
TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__SSE41_LD128,n_div_4_strided_cn)27722 TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__SSE41_LD128, n_div_4_strided_cn) {
27723 TEST_REQUIRES_X86_SSE41;
27724 for (uint32_t n = 8; n <= 12; n += 4) {
27725 for (size_t k = 1; k <= 40; k += 9) {
27726 GemmMicrokernelTester()
27727 .mr(1)
27728 .nr(4)
27729 .kr(2)
27730 .sr(4)
27731 .m(1)
27732 .n(n)
27733 .k(k)
27734 .cn_stride(7)
27735 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2s4__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
27736 }
27737 }
27738 }
27739
TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__SSE41_LD128,n_div_4_strided_a)27740 TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__SSE41_LD128, n_div_4_strided_a) {
27741 TEST_REQUIRES_X86_SSE41;
27742 for (uint32_t n = 8; n <= 12; n += 4) {
27743 for (size_t k = 1; k <= 40; k += 9) {
27744 GemmMicrokernelTester()
27745 .mr(1)
27746 .nr(4)
27747 .kr(2)
27748 .sr(4)
27749 .m(1)
27750 .n(n)
27751 .k(k)
27752 .a_stride(43)
27753 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2s4__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
27754 }
27755 }
27756 }
27757
TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__SSE41_LD128,n_div_4_subtile)27758 TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__SSE41_LD128, n_div_4_subtile) {
27759 TEST_REQUIRES_X86_SSE41;
27760 for (uint32_t n = 8; n <= 12; n += 4) {
27761 for (size_t k = 1; k <= 40; k += 9) {
27762 for (uint32_t m = 1; m <= 1; m++) {
27763 GemmMicrokernelTester()
27764 .mr(1)
27765 .nr(4)
27766 .kr(2)
27767 .sr(4)
27768 .m(m)
27769 .n(n)
27770 .k(k)
27771 .iterations(1)
27772 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2s4__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
27773 }
27774 }
27775 }
27776 }
27777
TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__SSE41_LD128,strided_cm_subtile)27778 TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__SSE41_LD128, strided_cm_subtile) {
27779 TEST_REQUIRES_X86_SSE41;
27780 for (size_t k = 1; k <= 40; k += 9) {
27781 for (uint32_t n = 1; n <= 4; n++) {
27782 for (uint32_t m = 1; m <= 1; m++) {
27783 GemmMicrokernelTester()
27784 .mr(1)
27785 .nr(4)
27786 .kr(2)
27787 .sr(4)
27788 .m(m)
27789 .n(n)
27790 .k(k)
27791 .cm_stride(7)
27792 .iterations(1)
27793 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2s4__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
27794 }
27795 }
27796 }
27797 }
27798
TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__SSE41_LD128,qmin)27799 TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__SSE41_LD128, qmin) {
27800 TEST_REQUIRES_X86_SSE41;
27801 GemmMicrokernelTester()
27802 .mr(1)
27803 .nr(4)
27804 .kr(2)
27805 .sr(4)
27806 .m(1)
27807 .n(4)
27808 .k(8)
27809 .qmin(128)
27810 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2s4__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
27811 }
27812
TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__SSE41_LD128,qmax)27813 TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__SSE41_LD128, qmax) {
27814 TEST_REQUIRES_X86_SSE41;
27815 GemmMicrokernelTester()
27816 .mr(1)
27817 .nr(4)
27818 .kr(2)
27819 .sr(4)
27820 .m(1)
27821 .n(4)
27822 .k(8)
27823 .qmax(128)
27824 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2s4__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
27825 }
27826
TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__SSE41_LD128,strided_cm)27827 TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__SSE41_LD128, strided_cm) {
27828 TEST_REQUIRES_X86_SSE41;
27829 GemmMicrokernelTester()
27830 .mr(1)
27831 .nr(4)
27832 .kr(2)
27833 .sr(4)
27834 .m(1)
27835 .n(4)
27836 .k(8)
27837 .cm_stride(7)
27838 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2s4__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
27839 }
27840 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
27841
27842
27843 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__SSE2_LD128,k_eq_8)27844 TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__SSE2_LD128, k_eq_8) {
27845 TEST_REQUIRES_X86_SSE2;
27846 GemmMicrokernelTester()
27847 .mr(2)
27848 .nr(4)
27849 .kr(2)
27850 .sr(4)
27851 .m(2)
27852 .n(4)
27853 .k(8)
27854 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2s4__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
27855 }
27856
TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__SSE2_LD128,strided_cn)27857 TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__SSE2_LD128, strided_cn) {
27858 TEST_REQUIRES_X86_SSE2;
27859 GemmMicrokernelTester()
27860 .mr(2)
27861 .nr(4)
27862 .kr(2)
27863 .sr(4)
27864 .m(2)
27865 .n(4)
27866 .k(8)
27867 .cn_stride(7)
27868 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2s4__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
27869 }
27870
TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__SSE2_LD128,k_eq_8_strided_a)27871 TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__SSE2_LD128, k_eq_8_strided_a) {
27872 TEST_REQUIRES_X86_SSE2;
27873 GemmMicrokernelTester()
27874 .mr(2)
27875 .nr(4)
27876 .kr(2)
27877 .sr(4)
27878 .m(2)
27879 .n(4)
27880 .k(8)
27881 .a_stride(11)
27882 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2s4__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
27883 }
27884
TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__SSE2_LD128,k_eq_8_subtile)27885 TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__SSE2_LD128, k_eq_8_subtile) {
27886 TEST_REQUIRES_X86_SSE2;
27887 for (uint32_t n = 1; n <= 4; n++) {
27888 for (uint32_t m = 1; m <= 2; m++) {
27889 GemmMicrokernelTester()
27890 .mr(2)
27891 .nr(4)
27892 .kr(2)
27893 .sr(4)
27894 .m(m)
27895 .n(n)
27896 .k(8)
27897 .iterations(1)
27898 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2s4__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
27899 }
27900 }
27901 }
27902
TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__SSE2_LD128,k_eq_8_subtile_m)27903 TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__SSE2_LD128, k_eq_8_subtile_m) {
27904 TEST_REQUIRES_X86_SSE2;
27905 for (uint32_t m = 1; m <= 2; m++) {
27906 GemmMicrokernelTester()
27907 .mr(2)
27908 .nr(4)
27909 .kr(2)
27910 .sr(4)
27911 .m(m)
27912 .n(4)
27913 .k(8)
27914 .iterations(1)
27915 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2s4__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
27916 }
27917 }
27918
TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__SSE2_LD128,k_eq_8_subtile_n)27919 TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__SSE2_LD128, k_eq_8_subtile_n) {
27920 TEST_REQUIRES_X86_SSE2;
27921 for (uint32_t n = 1; n <= 4; n++) {
27922 GemmMicrokernelTester()
27923 .mr(2)
27924 .nr(4)
27925 .kr(2)
27926 .sr(4)
27927 .m(2)
27928 .n(n)
27929 .k(8)
27930 .iterations(1)
27931 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2s4__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
27932 }
27933 }
27934
TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__SSE2_LD128,k_lt_8)27935 TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__SSE2_LD128, k_lt_8) {
27936 TEST_REQUIRES_X86_SSE2;
27937 for (size_t k = 1; k < 8; k++) {
27938 GemmMicrokernelTester()
27939 .mr(2)
27940 .nr(4)
27941 .kr(2)
27942 .sr(4)
27943 .m(2)
27944 .n(4)
27945 .k(k)
27946 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2s4__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
27947 }
27948 }
27949
TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__SSE2_LD128,k_lt_8_strided_a)27950 TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__SSE2_LD128, k_lt_8_strided_a) {
27951 TEST_REQUIRES_X86_SSE2;
27952 for (size_t k = 1; k < 8; k++) {
27953 GemmMicrokernelTester()
27954 .mr(2)
27955 .nr(4)
27956 .kr(2)
27957 .sr(4)
27958 .m(2)
27959 .n(4)
27960 .k(k)
27961 .a_stride(11)
27962 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2s4__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
27963 }
27964 }
27965
TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__SSE2_LD128,k_lt_8_subtile)27966 TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__SSE2_LD128, k_lt_8_subtile) {
27967 TEST_REQUIRES_X86_SSE2;
27968 for (size_t k = 1; k < 8; k++) {
27969 for (uint32_t n = 1; n <= 4; n++) {
27970 for (uint32_t m = 1; m <= 2; m++) {
27971 GemmMicrokernelTester()
27972 .mr(2)
27973 .nr(4)
27974 .kr(2)
27975 .sr(4)
27976 .m(m)
27977 .n(n)
27978 .k(k)
27979 .iterations(1)
27980 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2s4__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
27981 }
27982 }
27983 }
27984 }
27985
TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__SSE2_LD128,k_gt_8)27986 TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__SSE2_LD128, k_gt_8) {
27987 TEST_REQUIRES_X86_SSE2;
27988 for (size_t k = 9; k < 16; k++) {
27989 GemmMicrokernelTester()
27990 .mr(2)
27991 .nr(4)
27992 .kr(2)
27993 .sr(4)
27994 .m(2)
27995 .n(4)
27996 .k(k)
27997 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2s4__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
27998 }
27999 }
28000
TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__SSE2_LD128,k_gt_8_strided_a)28001 TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__SSE2_LD128, k_gt_8_strided_a) {
28002 TEST_REQUIRES_X86_SSE2;
28003 for (size_t k = 9; k < 16; k++) {
28004 GemmMicrokernelTester()
28005 .mr(2)
28006 .nr(4)
28007 .kr(2)
28008 .sr(4)
28009 .m(2)
28010 .n(4)
28011 .k(k)
28012 .a_stride(19)
28013 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2s4__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
28014 }
28015 }
28016
TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__SSE2_LD128,k_gt_8_subtile)28017 TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__SSE2_LD128, k_gt_8_subtile) {
28018 TEST_REQUIRES_X86_SSE2;
28019 for (size_t k = 9; k < 16; k++) {
28020 for (uint32_t n = 1; n <= 4; n++) {
28021 for (uint32_t m = 1; m <= 2; m++) {
28022 GemmMicrokernelTester()
28023 .mr(2)
28024 .nr(4)
28025 .kr(2)
28026 .sr(4)
28027 .m(m)
28028 .n(n)
28029 .k(k)
28030 .iterations(1)
28031 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2s4__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
28032 }
28033 }
28034 }
28035 }
28036
TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__SSE2_LD128,k_div_8)28037 TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__SSE2_LD128, k_div_8) {
28038 TEST_REQUIRES_X86_SSE2;
28039 for (size_t k = 16; k <= 80; k += 8) {
28040 GemmMicrokernelTester()
28041 .mr(2)
28042 .nr(4)
28043 .kr(2)
28044 .sr(4)
28045 .m(2)
28046 .n(4)
28047 .k(k)
28048 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2s4__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
28049 }
28050 }
28051
TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__SSE2_LD128,k_div_8_strided_a)28052 TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__SSE2_LD128, k_div_8_strided_a) {
28053 TEST_REQUIRES_X86_SSE2;
28054 for (size_t k = 16; k <= 80; k += 8) {
28055 GemmMicrokernelTester()
28056 .mr(2)
28057 .nr(4)
28058 .kr(2)
28059 .sr(4)
28060 .m(2)
28061 .n(4)
28062 .k(k)
28063 .a_stride(83)
28064 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2s4__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
28065 }
28066 }
28067
TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__SSE2_LD128,k_div_8_subtile)28068 TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__SSE2_LD128, k_div_8_subtile) {
28069 TEST_REQUIRES_X86_SSE2;
28070 for (size_t k = 16; k <= 80; k += 8) {
28071 for (uint32_t n = 1; n <= 4; n++) {
28072 for (uint32_t m = 1; m <= 2; m++) {
28073 GemmMicrokernelTester()
28074 .mr(2)
28075 .nr(4)
28076 .kr(2)
28077 .sr(4)
28078 .m(m)
28079 .n(n)
28080 .k(k)
28081 .iterations(1)
28082 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2s4__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
28083 }
28084 }
28085 }
28086 }
28087
TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__SSE2_LD128,n_gt_4)28088 TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__SSE2_LD128, n_gt_4) {
28089 TEST_REQUIRES_X86_SSE2;
28090 for (uint32_t n = 5; n < 8; n++) {
28091 for (size_t k = 1; k <= 40; k += 9) {
28092 GemmMicrokernelTester()
28093 .mr(2)
28094 .nr(4)
28095 .kr(2)
28096 .sr(4)
28097 .m(2)
28098 .n(n)
28099 .k(k)
28100 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2s4__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
28101 }
28102 }
28103 }
28104
TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__SSE2_LD128,n_gt_4_strided_cn)28105 TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__SSE2_LD128, n_gt_4_strided_cn) {
28106 TEST_REQUIRES_X86_SSE2;
28107 for (uint32_t n = 5; n < 8; n++) {
28108 for (size_t k = 1; k <= 40; k += 9) {
28109 GemmMicrokernelTester()
28110 .mr(2)
28111 .nr(4)
28112 .kr(2)
28113 .sr(4)
28114 .m(2)
28115 .n(n)
28116 .k(k)
28117 .cn_stride(7)
28118 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2s4__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
28119 }
28120 }
28121 }
28122
TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__SSE2_LD128,n_gt_4_strided_a)28123 TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__SSE2_LD128, n_gt_4_strided_a) {
28124 TEST_REQUIRES_X86_SSE2;
28125 for (uint32_t n = 5; n < 8; n++) {
28126 for (size_t k = 1; k <= 40; k += 9) {
28127 GemmMicrokernelTester()
28128 .mr(2)
28129 .nr(4)
28130 .kr(2)
28131 .sr(4)
28132 .m(2)
28133 .n(n)
28134 .k(k)
28135 .a_stride(43)
28136 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2s4__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
28137 }
28138 }
28139 }
28140
TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__SSE2_LD128,n_gt_4_subtile)28141 TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__SSE2_LD128, n_gt_4_subtile) {
28142 TEST_REQUIRES_X86_SSE2;
28143 for (uint32_t n = 5; n < 8; n++) {
28144 for (size_t k = 1; k <= 40; k += 9) {
28145 for (uint32_t m = 1; m <= 2; m++) {
28146 GemmMicrokernelTester()
28147 .mr(2)
28148 .nr(4)
28149 .kr(2)
28150 .sr(4)
28151 .m(m)
28152 .n(n)
28153 .k(k)
28154 .iterations(1)
28155 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2s4__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
28156 }
28157 }
28158 }
28159 }
28160
TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__SSE2_LD128,n_div_4)28161 TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__SSE2_LD128, n_div_4) {
28162 TEST_REQUIRES_X86_SSE2;
28163 for (uint32_t n = 8; n <= 12; n += 4) {
28164 for (size_t k = 1; k <= 40; k += 9) {
28165 GemmMicrokernelTester()
28166 .mr(2)
28167 .nr(4)
28168 .kr(2)
28169 .sr(4)
28170 .m(2)
28171 .n(n)
28172 .k(k)
28173 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2s4__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
28174 }
28175 }
28176 }
28177
TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__SSE2_LD128,n_div_4_strided_cn)28178 TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__SSE2_LD128, n_div_4_strided_cn) {
28179 TEST_REQUIRES_X86_SSE2;
28180 for (uint32_t n = 8; n <= 12; n += 4) {
28181 for (size_t k = 1; k <= 40; k += 9) {
28182 GemmMicrokernelTester()
28183 .mr(2)
28184 .nr(4)
28185 .kr(2)
28186 .sr(4)
28187 .m(2)
28188 .n(n)
28189 .k(k)
28190 .cn_stride(7)
28191 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2s4__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
28192 }
28193 }
28194 }
28195
TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__SSE2_LD128,n_div_4_strided_a)28196 TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__SSE2_LD128, n_div_4_strided_a) {
28197 TEST_REQUIRES_X86_SSE2;
28198 for (uint32_t n = 8; n <= 12; n += 4) {
28199 for (size_t k = 1; k <= 40; k += 9) {
28200 GemmMicrokernelTester()
28201 .mr(2)
28202 .nr(4)
28203 .kr(2)
28204 .sr(4)
28205 .m(2)
28206 .n(n)
28207 .k(k)
28208 .a_stride(43)
28209 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2s4__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
28210 }
28211 }
28212 }
28213
TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__SSE2_LD128,n_div_4_subtile)28214 TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__SSE2_LD128, n_div_4_subtile) {
28215 TEST_REQUIRES_X86_SSE2;
28216 for (uint32_t n = 8; n <= 12; n += 4) {
28217 for (size_t k = 1; k <= 40; k += 9) {
28218 for (uint32_t m = 1; m <= 2; m++) {
28219 GemmMicrokernelTester()
28220 .mr(2)
28221 .nr(4)
28222 .kr(2)
28223 .sr(4)
28224 .m(m)
28225 .n(n)
28226 .k(k)
28227 .iterations(1)
28228 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2s4__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
28229 }
28230 }
28231 }
28232 }
28233
TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__SSE2_LD128,strided_cm_subtile)28234 TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__SSE2_LD128, strided_cm_subtile) {
28235 TEST_REQUIRES_X86_SSE2;
28236 for (size_t k = 1; k <= 40; k += 9) {
28237 for (uint32_t n = 1; n <= 4; n++) {
28238 for (uint32_t m = 1; m <= 2; m++) {
28239 GemmMicrokernelTester()
28240 .mr(2)
28241 .nr(4)
28242 .kr(2)
28243 .sr(4)
28244 .m(m)
28245 .n(n)
28246 .k(k)
28247 .cm_stride(7)
28248 .iterations(1)
28249 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2s4__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
28250 }
28251 }
28252 }
28253 }
28254
TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__SSE2_LD128,qmin)28255 TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__SSE2_LD128, qmin) {
28256 TEST_REQUIRES_X86_SSE2;
28257 GemmMicrokernelTester()
28258 .mr(2)
28259 .nr(4)
28260 .kr(2)
28261 .sr(4)
28262 .m(2)
28263 .n(4)
28264 .k(8)
28265 .qmin(128)
28266 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2s4__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
28267 }
28268
TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__SSE2_LD128,qmax)28269 TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__SSE2_LD128, qmax) {
28270 TEST_REQUIRES_X86_SSE2;
28271 GemmMicrokernelTester()
28272 .mr(2)
28273 .nr(4)
28274 .kr(2)
28275 .sr(4)
28276 .m(2)
28277 .n(4)
28278 .k(8)
28279 .qmax(128)
28280 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2s4__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
28281 }
28282
TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__SSE2_LD128,strided_cm)28283 TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__SSE2_LD128, strided_cm) {
28284 TEST_REQUIRES_X86_SSE2;
28285 GemmMicrokernelTester()
28286 .mr(2)
28287 .nr(4)
28288 .kr(2)
28289 .sr(4)
28290 .m(2)
28291 .n(4)
28292 .k(8)
28293 .cm_stride(7)
28294 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2s4__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
28295 }
28296 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
28297
28298
28299 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__SSE2_LD128,k_eq_8)28300 TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__SSE2_LD128, k_eq_8) {
28301 TEST_REQUIRES_X86_SSE2;
28302 GemmMicrokernelTester()
28303 .mr(3)
28304 .nr(4)
28305 .kr(2)
28306 .sr(4)
28307 .m(3)
28308 .n(4)
28309 .k(8)
28310 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2s4__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
28311 }
28312
TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__SSE2_LD128,strided_cn)28313 TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__SSE2_LD128, strided_cn) {
28314 TEST_REQUIRES_X86_SSE2;
28315 GemmMicrokernelTester()
28316 .mr(3)
28317 .nr(4)
28318 .kr(2)
28319 .sr(4)
28320 .m(3)
28321 .n(4)
28322 .k(8)
28323 .cn_stride(7)
28324 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2s4__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
28325 }
28326
TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__SSE2_LD128,k_eq_8_strided_a)28327 TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__SSE2_LD128, k_eq_8_strided_a) {
28328 TEST_REQUIRES_X86_SSE2;
28329 GemmMicrokernelTester()
28330 .mr(3)
28331 .nr(4)
28332 .kr(2)
28333 .sr(4)
28334 .m(3)
28335 .n(4)
28336 .k(8)
28337 .a_stride(11)
28338 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2s4__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
28339 }
28340
TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__SSE2_LD128,k_eq_8_subtile)28341 TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__SSE2_LD128, k_eq_8_subtile) {
28342 TEST_REQUIRES_X86_SSE2;
28343 for (uint32_t n = 1; n <= 4; n++) {
28344 for (uint32_t m = 1; m <= 3; m++) {
28345 GemmMicrokernelTester()
28346 .mr(3)
28347 .nr(4)
28348 .kr(2)
28349 .sr(4)
28350 .m(m)
28351 .n(n)
28352 .k(8)
28353 .iterations(1)
28354 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2s4__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
28355 }
28356 }
28357 }
28358
TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__SSE2_LD128,k_eq_8_subtile_m)28359 TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__SSE2_LD128, k_eq_8_subtile_m) {
28360 TEST_REQUIRES_X86_SSE2;
28361 for (uint32_t m = 1; m <= 3; m++) {
28362 GemmMicrokernelTester()
28363 .mr(3)
28364 .nr(4)
28365 .kr(2)
28366 .sr(4)
28367 .m(m)
28368 .n(4)
28369 .k(8)
28370 .iterations(1)
28371 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2s4__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
28372 }
28373 }
28374
TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__SSE2_LD128,k_eq_8_subtile_n)28375 TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__SSE2_LD128, k_eq_8_subtile_n) {
28376 TEST_REQUIRES_X86_SSE2;
28377 for (uint32_t n = 1; n <= 4; n++) {
28378 GemmMicrokernelTester()
28379 .mr(3)
28380 .nr(4)
28381 .kr(2)
28382 .sr(4)
28383 .m(3)
28384 .n(n)
28385 .k(8)
28386 .iterations(1)
28387 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2s4__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
28388 }
28389 }
28390
TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__SSE2_LD128,k_lt_8)28391 TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__SSE2_LD128, k_lt_8) {
28392 TEST_REQUIRES_X86_SSE2;
28393 for (size_t k = 1; k < 8; k++) {
28394 GemmMicrokernelTester()
28395 .mr(3)
28396 .nr(4)
28397 .kr(2)
28398 .sr(4)
28399 .m(3)
28400 .n(4)
28401 .k(k)
28402 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2s4__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
28403 }
28404 }
28405
TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__SSE2_LD128,k_lt_8_strided_a)28406 TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__SSE2_LD128, k_lt_8_strided_a) {
28407 TEST_REQUIRES_X86_SSE2;
28408 for (size_t k = 1; k < 8; k++) {
28409 GemmMicrokernelTester()
28410 .mr(3)
28411 .nr(4)
28412 .kr(2)
28413 .sr(4)
28414 .m(3)
28415 .n(4)
28416 .k(k)
28417 .a_stride(11)
28418 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2s4__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
28419 }
28420 }
28421
TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__SSE2_LD128,k_lt_8_subtile)28422 TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__SSE2_LD128, k_lt_8_subtile) {
28423 TEST_REQUIRES_X86_SSE2;
28424 for (size_t k = 1; k < 8; k++) {
28425 for (uint32_t n = 1; n <= 4; n++) {
28426 for (uint32_t m = 1; m <= 3; m++) {
28427 GemmMicrokernelTester()
28428 .mr(3)
28429 .nr(4)
28430 .kr(2)
28431 .sr(4)
28432 .m(m)
28433 .n(n)
28434 .k(k)
28435 .iterations(1)
28436 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2s4__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
28437 }
28438 }
28439 }
28440 }
28441
TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__SSE2_LD128,k_gt_8)28442 TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__SSE2_LD128, k_gt_8) {
28443 TEST_REQUIRES_X86_SSE2;
28444 for (size_t k = 9; k < 16; k++) {
28445 GemmMicrokernelTester()
28446 .mr(3)
28447 .nr(4)
28448 .kr(2)
28449 .sr(4)
28450 .m(3)
28451 .n(4)
28452 .k(k)
28453 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2s4__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
28454 }
28455 }
28456
TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__SSE2_LD128,k_gt_8_strided_a)28457 TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__SSE2_LD128, k_gt_8_strided_a) {
28458 TEST_REQUIRES_X86_SSE2;
28459 for (size_t k = 9; k < 16; k++) {
28460 GemmMicrokernelTester()
28461 .mr(3)
28462 .nr(4)
28463 .kr(2)
28464 .sr(4)
28465 .m(3)
28466 .n(4)
28467 .k(k)
28468 .a_stride(19)
28469 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2s4__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
28470 }
28471 }
28472
TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__SSE2_LD128,k_gt_8_subtile)28473 TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__SSE2_LD128, k_gt_8_subtile) {
28474 TEST_REQUIRES_X86_SSE2;
28475 for (size_t k = 9; k < 16; k++) {
28476 for (uint32_t n = 1; n <= 4; n++) {
28477 for (uint32_t m = 1; m <= 3; m++) {
28478 GemmMicrokernelTester()
28479 .mr(3)
28480 .nr(4)
28481 .kr(2)
28482 .sr(4)
28483 .m(m)
28484 .n(n)
28485 .k(k)
28486 .iterations(1)
28487 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2s4__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
28488 }
28489 }
28490 }
28491 }
28492
TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__SSE2_LD128,k_div_8)28493 TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__SSE2_LD128, k_div_8) {
28494 TEST_REQUIRES_X86_SSE2;
28495 for (size_t k = 16; k <= 80; k += 8) {
28496 GemmMicrokernelTester()
28497 .mr(3)
28498 .nr(4)
28499 .kr(2)
28500 .sr(4)
28501 .m(3)
28502 .n(4)
28503 .k(k)
28504 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2s4__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
28505 }
28506 }
28507
TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__SSE2_LD128,k_div_8_strided_a)28508 TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__SSE2_LD128, k_div_8_strided_a) {
28509 TEST_REQUIRES_X86_SSE2;
28510 for (size_t k = 16; k <= 80; k += 8) {
28511 GemmMicrokernelTester()
28512 .mr(3)
28513 .nr(4)
28514 .kr(2)
28515 .sr(4)
28516 .m(3)
28517 .n(4)
28518 .k(k)
28519 .a_stride(83)
28520 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2s4__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
28521 }
28522 }
28523
TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__SSE2_LD128,k_div_8_subtile)28524 TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__SSE2_LD128, k_div_8_subtile) {
28525 TEST_REQUIRES_X86_SSE2;
28526 for (size_t k = 16; k <= 80; k += 8) {
28527 for (uint32_t n = 1; n <= 4; n++) {
28528 for (uint32_t m = 1; m <= 3; m++) {
28529 GemmMicrokernelTester()
28530 .mr(3)
28531 .nr(4)
28532 .kr(2)
28533 .sr(4)
28534 .m(m)
28535 .n(n)
28536 .k(k)
28537 .iterations(1)
28538 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2s4__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
28539 }
28540 }
28541 }
28542 }
28543
TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__SSE2_LD128,n_gt_4)28544 TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__SSE2_LD128, n_gt_4) {
28545 TEST_REQUIRES_X86_SSE2;
28546 for (uint32_t n = 5; n < 8; n++) {
28547 for (size_t k = 1; k <= 40; k += 9) {
28548 GemmMicrokernelTester()
28549 .mr(3)
28550 .nr(4)
28551 .kr(2)
28552 .sr(4)
28553 .m(3)
28554 .n(n)
28555 .k(k)
28556 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2s4__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
28557 }
28558 }
28559 }
28560
TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__SSE2_LD128,n_gt_4_strided_cn)28561 TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__SSE2_LD128, n_gt_4_strided_cn) {
28562 TEST_REQUIRES_X86_SSE2;
28563 for (uint32_t n = 5; n < 8; n++) {
28564 for (size_t k = 1; k <= 40; k += 9) {
28565 GemmMicrokernelTester()
28566 .mr(3)
28567 .nr(4)
28568 .kr(2)
28569 .sr(4)
28570 .m(3)
28571 .n(n)
28572 .k(k)
28573 .cn_stride(7)
28574 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2s4__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
28575 }
28576 }
28577 }
28578
TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__SSE2_LD128,n_gt_4_strided_a)28579 TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__SSE2_LD128, n_gt_4_strided_a) {
28580 TEST_REQUIRES_X86_SSE2;
28581 for (uint32_t n = 5; n < 8; n++) {
28582 for (size_t k = 1; k <= 40; k += 9) {
28583 GemmMicrokernelTester()
28584 .mr(3)
28585 .nr(4)
28586 .kr(2)
28587 .sr(4)
28588 .m(3)
28589 .n(n)
28590 .k(k)
28591 .a_stride(43)
28592 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2s4__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
28593 }
28594 }
28595 }
28596
TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__SSE2_LD128,n_gt_4_subtile)28597 TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__SSE2_LD128, n_gt_4_subtile) {
28598 TEST_REQUIRES_X86_SSE2;
28599 for (uint32_t n = 5; n < 8; n++) {
28600 for (size_t k = 1; k <= 40; k += 9) {
28601 for (uint32_t m = 1; m <= 3; m++) {
28602 GemmMicrokernelTester()
28603 .mr(3)
28604 .nr(4)
28605 .kr(2)
28606 .sr(4)
28607 .m(m)
28608 .n(n)
28609 .k(k)
28610 .iterations(1)
28611 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2s4__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
28612 }
28613 }
28614 }
28615 }
28616
TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__SSE2_LD128,n_div_4)28617 TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__SSE2_LD128, n_div_4) {
28618 TEST_REQUIRES_X86_SSE2;
28619 for (uint32_t n = 8; n <= 12; n += 4) {
28620 for (size_t k = 1; k <= 40; k += 9) {
28621 GemmMicrokernelTester()
28622 .mr(3)
28623 .nr(4)
28624 .kr(2)
28625 .sr(4)
28626 .m(3)
28627 .n(n)
28628 .k(k)
28629 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2s4__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
28630 }
28631 }
28632 }
28633
TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__SSE2_LD128,n_div_4_strided_cn)28634 TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__SSE2_LD128, n_div_4_strided_cn) {
28635 TEST_REQUIRES_X86_SSE2;
28636 for (uint32_t n = 8; n <= 12; n += 4) {
28637 for (size_t k = 1; k <= 40; k += 9) {
28638 GemmMicrokernelTester()
28639 .mr(3)
28640 .nr(4)
28641 .kr(2)
28642 .sr(4)
28643 .m(3)
28644 .n(n)
28645 .k(k)
28646 .cn_stride(7)
28647 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2s4__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
28648 }
28649 }
28650 }
28651
TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__SSE2_LD128,n_div_4_strided_a)28652 TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__SSE2_LD128, n_div_4_strided_a) {
28653 TEST_REQUIRES_X86_SSE2;
28654 for (uint32_t n = 8; n <= 12; n += 4) {
28655 for (size_t k = 1; k <= 40; k += 9) {
28656 GemmMicrokernelTester()
28657 .mr(3)
28658 .nr(4)
28659 .kr(2)
28660 .sr(4)
28661 .m(3)
28662 .n(n)
28663 .k(k)
28664 .a_stride(43)
28665 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2s4__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
28666 }
28667 }
28668 }
28669
TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__SSE2_LD128,n_div_4_subtile)28670 TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__SSE2_LD128, n_div_4_subtile) {
28671 TEST_REQUIRES_X86_SSE2;
28672 for (uint32_t n = 8; n <= 12; n += 4) {
28673 for (size_t k = 1; k <= 40; k += 9) {
28674 for (uint32_t m = 1; m <= 3; m++) {
28675 GemmMicrokernelTester()
28676 .mr(3)
28677 .nr(4)
28678 .kr(2)
28679 .sr(4)
28680 .m(m)
28681 .n(n)
28682 .k(k)
28683 .iterations(1)
28684 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2s4__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
28685 }
28686 }
28687 }
28688 }
28689
TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__SSE2_LD128,strided_cm_subtile)28690 TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__SSE2_LD128, strided_cm_subtile) {
28691 TEST_REQUIRES_X86_SSE2;
28692 for (size_t k = 1; k <= 40; k += 9) {
28693 for (uint32_t n = 1; n <= 4; n++) {
28694 for (uint32_t m = 1; m <= 3; m++) {
28695 GemmMicrokernelTester()
28696 .mr(3)
28697 .nr(4)
28698 .kr(2)
28699 .sr(4)
28700 .m(m)
28701 .n(n)
28702 .k(k)
28703 .cm_stride(7)
28704 .iterations(1)
28705 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2s4__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
28706 }
28707 }
28708 }
28709 }
28710
TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__SSE2_LD128,qmin)28711 TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__SSE2_LD128, qmin) {
28712 TEST_REQUIRES_X86_SSE2;
28713 GemmMicrokernelTester()
28714 .mr(3)
28715 .nr(4)
28716 .kr(2)
28717 .sr(4)
28718 .m(3)
28719 .n(4)
28720 .k(8)
28721 .qmin(128)
28722 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2s4__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
28723 }
28724
TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__SSE2_LD128,qmax)28725 TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__SSE2_LD128, qmax) {
28726 TEST_REQUIRES_X86_SSE2;
28727 GemmMicrokernelTester()
28728 .mr(3)
28729 .nr(4)
28730 .kr(2)
28731 .sr(4)
28732 .m(3)
28733 .n(4)
28734 .k(8)
28735 .qmax(128)
28736 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2s4__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
28737 }
28738
TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__SSE2_LD128,strided_cm)28739 TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__SSE2_LD128, strided_cm) {
28740 TEST_REQUIRES_X86_SSE2;
28741 GemmMicrokernelTester()
28742 .mr(3)
28743 .nr(4)
28744 .kr(2)
28745 .sr(4)
28746 .m(3)
28747 .n(4)
28748 .k(8)
28749 .cm_stride(7)
28750 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2s4__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
28751 }
28752 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
28753
28754
28755 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__SSE2_LD128,k_eq_8)28756 TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__SSE2_LD128, k_eq_8) {
28757 TEST_REQUIRES_X86_SSE2;
28758 GemmMicrokernelTester()
28759 .mr(4)
28760 .nr(4)
28761 .kr(2)
28762 .sr(4)
28763 .m(4)
28764 .n(4)
28765 .k(8)
28766 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2s4__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
28767 }
28768
TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__SSE2_LD128,strided_cn)28769 TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__SSE2_LD128, strided_cn) {
28770 TEST_REQUIRES_X86_SSE2;
28771 GemmMicrokernelTester()
28772 .mr(4)
28773 .nr(4)
28774 .kr(2)
28775 .sr(4)
28776 .m(4)
28777 .n(4)
28778 .k(8)
28779 .cn_stride(7)
28780 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2s4__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
28781 }
28782
TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__SSE2_LD128,k_eq_8_strided_a)28783 TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__SSE2_LD128, k_eq_8_strided_a) {
28784 TEST_REQUIRES_X86_SSE2;
28785 GemmMicrokernelTester()
28786 .mr(4)
28787 .nr(4)
28788 .kr(2)
28789 .sr(4)
28790 .m(4)
28791 .n(4)
28792 .k(8)
28793 .a_stride(11)
28794 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2s4__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
28795 }
28796
TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__SSE2_LD128,k_eq_8_subtile)28797 TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__SSE2_LD128, k_eq_8_subtile) {
28798 TEST_REQUIRES_X86_SSE2;
28799 for (uint32_t n = 1; n <= 4; n++) {
28800 for (uint32_t m = 1; m <= 4; m++) {
28801 GemmMicrokernelTester()
28802 .mr(4)
28803 .nr(4)
28804 .kr(2)
28805 .sr(4)
28806 .m(m)
28807 .n(n)
28808 .k(8)
28809 .iterations(1)
28810 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2s4__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
28811 }
28812 }
28813 }
28814
TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__SSE2_LD128,k_eq_8_subtile_m)28815 TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__SSE2_LD128, k_eq_8_subtile_m) {
28816 TEST_REQUIRES_X86_SSE2;
28817 for (uint32_t m = 1; m <= 4; m++) {
28818 GemmMicrokernelTester()
28819 .mr(4)
28820 .nr(4)
28821 .kr(2)
28822 .sr(4)
28823 .m(m)
28824 .n(4)
28825 .k(8)
28826 .iterations(1)
28827 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2s4__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
28828 }
28829 }
28830
TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__SSE2_LD128,k_eq_8_subtile_n)28831 TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__SSE2_LD128, k_eq_8_subtile_n) {
28832 TEST_REQUIRES_X86_SSE2;
28833 for (uint32_t n = 1; n <= 4; n++) {
28834 GemmMicrokernelTester()
28835 .mr(4)
28836 .nr(4)
28837 .kr(2)
28838 .sr(4)
28839 .m(4)
28840 .n(n)
28841 .k(8)
28842 .iterations(1)
28843 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2s4__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
28844 }
28845 }
28846
TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__SSE2_LD128,k_lt_8)28847 TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__SSE2_LD128, k_lt_8) {
28848 TEST_REQUIRES_X86_SSE2;
28849 for (size_t k = 1; k < 8; k++) {
28850 GemmMicrokernelTester()
28851 .mr(4)
28852 .nr(4)
28853 .kr(2)
28854 .sr(4)
28855 .m(4)
28856 .n(4)
28857 .k(k)
28858 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2s4__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
28859 }
28860 }
28861
TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__SSE2_LD128,k_lt_8_strided_a)28862 TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__SSE2_LD128, k_lt_8_strided_a) {
28863 TEST_REQUIRES_X86_SSE2;
28864 for (size_t k = 1; k < 8; k++) {
28865 GemmMicrokernelTester()
28866 .mr(4)
28867 .nr(4)
28868 .kr(2)
28869 .sr(4)
28870 .m(4)
28871 .n(4)
28872 .k(k)
28873 .a_stride(11)
28874 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2s4__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
28875 }
28876 }
28877
TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__SSE2_LD128,k_lt_8_subtile)28878 TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__SSE2_LD128, k_lt_8_subtile) {
28879 TEST_REQUIRES_X86_SSE2;
28880 for (size_t k = 1; k < 8; k++) {
28881 for (uint32_t n = 1; n <= 4; n++) {
28882 for (uint32_t m = 1; m <= 4; m++) {
28883 GemmMicrokernelTester()
28884 .mr(4)
28885 .nr(4)
28886 .kr(2)
28887 .sr(4)
28888 .m(m)
28889 .n(n)
28890 .k(k)
28891 .iterations(1)
28892 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2s4__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
28893 }
28894 }
28895 }
28896 }
28897
TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__SSE2_LD128,k_gt_8)28898 TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__SSE2_LD128, k_gt_8) {
28899 TEST_REQUIRES_X86_SSE2;
28900 for (size_t k = 9; k < 16; k++) {
28901 GemmMicrokernelTester()
28902 .mr(4)
28903 .nr(4)
28904 .kr(2)
28905 .sr(4)
28906 .m(4)
28907 .n(4)
28908 .k(k)
28909 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2s4__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
28910 }
28911 }
28912
TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__SSE2_LD128,k_gt_8_strided_a)28913 TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__SSE2_LD128, k_gt_8_strided_a) {
28914 TEST_REQUIRES_X86_SSE2;
28915 for (size_t k = 9; k < 16; k++) {
28916 GemmMicrokernelTester()
28917 .mr(4)
28918 .nr(4)
28919 .kr(2)
28920 .sr(4)
28921 .m(4)
28922 .n(4)
28923 .k(k)
28924 .a_stride(19)
28925 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2s4__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
28926 }
28927 }
28928
TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__SSE2_LD128,k_gt_8_subtile)28929 TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__SSE2_LD128, k_gt_8_subtile) {
28930 TEST_REQUIRES_X86_SSE2;
28931 for (size_t k = 9; k < 16; k++) {
28932 for (uint32_t n = 1; n <= 4; n++) {
28933 for (uint32_t m = 1; m <= 4; m++) {
28934 GemmMicrokernelTester()
28935 .mr(4)
28936 .nr(4)
28937 .kr(2)
28938 .sr(4)
28939 .m(m)
28940 .n(n)
28941 .k(k)
28942 .iterations(1)
28943 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2s4__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
28944 }
28945 }
28946 }
28947 }
28948
TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__SSE2_LD128,k_div_8)28949 TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__SSE2_LD128, k_div_8) {
28950 TEST_REQUIRES_X86_SSE2;
28951 for (size_t k = 16; k <= 80; k += 8) {
28952 GemmMicrokernelTester()
28953 .mr(4)
28954 .nr(4)
28955 .kr(2)
28956 .sr(4)
28957 .m(4)
28958 .n(4)
28959 .k(k)
28960 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2s4__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
28961 }
28962 }
28963
TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__SSE2_LD128,k_div_8_strided_a)28964 TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__SSE2_LD128, k_div_8_strided_a) {
28965 TEST_REQUIRES_X86_SSE2;
28966 for (size_t k = 16; k <= 80; k += 8) {
28967 GemmMicrokernelTester()
28968 .mr(4)
28969 .nr(4)
28970 .kr(2)
28971 .sr(4)
28972 .m(4)
28973 .n(4)
28974 .k(k)
28975 .a_stride(83)
28976 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2s4__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
28977 }
28978 }
28979
TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__SSE2_LD128,k_div_8_subtile)28980 TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__SSE2_LD128, k_div_8_subtile) {
28981 TEST_REQUIRES_X86_SSE2;
28982 for (size_t k = 16; k <= 80; k += 8) {
28983 for (uint32_t n = 1; n <= 4; n++) {
28984 for (uint32_t m = 1; m <= 4; m++) {
28985 GemmMicrokernelTester()
28986 .mr(4)
28987 .nr(4)
28988 .kr(2)
28989 .sr(4)
28990 .m(m)
28991 .n(n)
28992 .k(k)
28993 .iterations(1)
28994 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2s4__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
28995 }
28996 }
28997 }
28998 }
28999
TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__SSE2_LD128,n_gt_4)29000 TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__SSE2_LD128, n_gt_4) {
29001 TEST_REQUIRES_X86_SSE2;
29002 for (uint32_t n = 5; n < 8; n++) {
29003 for (size_t k = 1; k <= 40; k += 9) {
29004 GemmMicrokernelTester()
29005 .mr(4)
29006 .nr(4)
29007 .kr(2)
29008 .sr(4)
29009 .m(4)
29010 .n(n)
29011 .k(k)
29012 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2s4__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
29013 }
29014 }
29015 }
29016
TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__SSE2_LD128,n_gt_4_strided_cn)29017 TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__SSE2_LD128, n_gt_4_strided_cn) {
29018 TEST_REQUIRES_X86_SSE2;
29019 for (uint32_t n = 5; n < 8; n++) {
29020 for (size_t k = 1; k <= 40; k += 9) {
29021 GemmMicrokernelTester()
29022 .mr(4)
29023 .nr(4)
29024 .kr(2)
29025 .sr(4)
29026 .m(4)
29027 .n(n)
29028 .k(k)
29029 .cn_stride(7)
29030 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2s4__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
29031 }
29032 }
29033 }
29034
TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__SSE2_LD128,n_gt_4_strided_a)29035 TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__SSE2_LD128, n_gt_4_strided_a) {
29036 TEST_REQUIRES_X86_SSE2;
29037 for (uint32_t n = 5; n < 8; n++) {
29038 for (size_t k = 1; k <= 40; k += 9) {
29039 GemmMicrokernelTester()
29040 .mr(4)
29041 .nr(4)
29042 .kr(2)
29043 .sr(4)
29044 .m(4)
29045 .n(n)
29046 .k(k)
29047 .a_stride(43)
29048 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2s4__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
29049 }
29050 }
29051 }
29052
TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__SSE2_LD128,n_gt_4_subtile)29053 TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__SSE2_LD128, n_gt_4_subtile) {
29054 TEST_REQUIRES_X86_SSE2;
29055 for (uint32_t n = 5; n < 8; n++) {
29056 for (size_t k = 1; k <= 40; k += 9) {
29057 for (uint32_t m = 1; m <= 4; m++) {
29058 GemmMicrokernelTester()
29059 .mr(4)
29060 .nr(4)
29061 .kr(2)
29062 .sr(4)
29063 .m(m)
29064 .n(n)
29065 .k(k)
29066 .iterations(1)
29067 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2s4__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
29068 }
29069 }
29070 }
29071 }
29072
TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__SSE2_LD128,n_div_4)29073 TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__SSE2_LD128, n_div_4) {
29074 TEST_REQUIRES_X86_SSE2;
29075 for (uint32_t n = 8; n <= 12; n += 4) {
29076 for (size_t k = 1; k <= 40; k += 9) {
29077 GemmMicrokernelTester()
29078 .mr(4)
29079 .nr(4)
29080 .kr(2)
29081 .sr(4)
29082 .m(4)
29083 .n(n)
29084 .k(k)
29085 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2s4__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
29086 }
29087 }
29088 }
29089
TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__SSE2_LD128,n_div_4_strided_cn)29090 TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__SSE2_LD128, n_div_4_strided_cn) {
29091 TEST_REQUIRES_X86_SSE2;
29092 for (uint32_t n = 8; n <= 12; n += 4) {
29093 for (size_t k = 1; k <= 40; k += 9) {
29094 GemmMicrokernelTester()
29095 .mr(4)
29096 .nr(4)
29097 .kr(2)
29098 .sr(4)
29099 .m(4)
29100 .n(n)
29101 .k(k)
29102 .cn_stride(7)
29103 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2s4__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
29104 }
29105 }
29106 }
29107
TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__SSE2_LD128,n_div_4_strided_a)29108 TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__SSE2_LD128, n_div_4_strided_a) {
29109 TEST_REQUIRES_X86_SSE2;
29110 for (uint32_t n = 8; n <= 12; n += 4) {
29111 for (size_t k = 1; k <= 40; k += 9) {
29112 GemmMicrokernelTester()
29113 .mr(4)
29114 .nr(4)
29115 .kr(2)
29116 .sr(4)
29117 .m(4)
29118 .n(n)
29119 .k(k)
29120 .a_stride(43)
29121 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2s4__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
29122 }
29123 }
29124 }
29125
TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__SSE2_LD128,n_div_4_subtile)29126 TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__SSE2_LD128, n_div_4_subtile) {
29127 TEST_REQUIRES_X86_SSE2;
29128 for (uint32_t n = 8; n <= 12; n += 4) {
29129 for (size_t k = 1; k <= 40; k += 9) {
29130 for (uint32_t m = 1; m <= 4; m++) {
29131 GemmMicrokernelTester()
29132 .mr(4)
29133 .nr(4)
29134 .kr(2)
29135 .sr(4)
29136 .m(m)
29137 .n(n)
29138 .k(k)
29139 .iterations(1)
29140 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2s4__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
29141 }
29142 }
29143 }
29144 }
29145
TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__SSE2_LD128,strided_cm_subtile)29146 TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__SSE2_LD128, strided_cm_subtile) {
29147 TEST_REQUIRES_X86_SSE2;
29148 for (size_t k = 1; k <= 40; k += 9) {
29149 for (uint32_t n = 1; n <= 4; n++) {
29150 for (uint32_t m = 1; m <= 4; m++) {
29151 GemmMicrokernelTester()
29152 .mr(4)
29153 .nr(4)
29154 .kr(2)
29155 .sr(4)
29156 .m(m)
29157 .n(n)
29158 .k(k)
29159 .cm_stride(7)
29160 .iterations(1)
29161 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2s4__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
29162 }
29163 }
29164 }
29165 }
29166
TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__SSE2_LD128,qmin)29167 TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__SSE2_LD128, qmin) {
29168 TEST_REQUIRES_X86_SSE2;
29169 GemmMicrokernelTester()
29170 .mr(4)
29171 .nr(4)
29172 .kr(2)
29173 .sr(4)
29174 .m(4)
29175 .n(4)
29176 .k(8)
29177 .qmin(128)
29178 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2s4__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
29179 }
29180
TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__SSE2_LD128,qmax)29181 TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__SSE2_LD128, qmax) {
29182 TEST_REQUIRES_X86_SSE2;
29183 GemmMicrokernelTester()
29184 .mr(4)
29185 .nr(4)
29186 .kr(2)
29187 .sr(4)
29188 .m(4)
29189 .n(4)
29190 .k(8)
29191 .qmax(128)
29192 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2s4__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
29193 }
29194
TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__SSE2_LD128,strided_cm)29195 TEST(QC8_GEMM_MINMAX_FP32_4X4C2S4__SSE2_LD128, strided_cm) {
29196 TEST_REQUIRES_X86_SSE2;
29197 GemmMicrokernelTester()
29198 .mr(4)
29199 .nr(4)
29200 .kr(2)
29201 .sr(4)
29202 .m(4)
29203 .n(4)
29204 .k(8)
29205 .cm_stride(7)
29206 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2s4__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
29207 }
29208 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
29209
29210
29211 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__AVX_LD128,k_eq_8)29212 TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__AVX_LD128, k_eq_8) {
29213 TEST_REQUIRES_X86_AVX;
29214 GemmMicrokernelTester()
29215 .mr(3)
29216 .nr(4)
29217 .kr(2)
29218 .sr(4)
29219 .m(3)
29220 .n(4)
29221 .k(8)
29222 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2s4__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
29223 }
29224
TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__AVX_LD128,strided_cn)29225 TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__AVX_LD128, strided_cn) {
29226 TEST_REQUIRES_X86_AVX;
29227 GemmMicrokernelTester()
29228 .mr(3)
29229 .nr(4)
29230 .kr(2)
29231 .sr(4)
29232 .m(3)
29233 .n(4)
29234 .k(8)
29235 .cn_stride(7)
29236 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2s4__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
29237 }
29238
TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__AVX_LD128,k_eq_8_strided_a)29239 TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__AVX_LD128, k_eq_8_strided_a) {
29240 TEST_REQUIRES_X86_AVX;
29241 GemmMicrokernelTester()
29242 .mr(3)
29243 .nr(4)
29244 .kr(2)
29245 .sr(4)
29246 .m(3)
29247 .n(4)
29248 .k(8)
29249 .a_stride(11)
29250 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2s4__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
29251 }
29252
TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__AVX_LD128,k_eq_8_subtile)29253 TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__AVX_LD128, k_eq_8_subtile) {
29254 TEST_REQUIRES_X86_AVX;
29255 for (uint32_t n = 1; n <= 4; n++) {
29256 for (uint32_t m = 1; m <= 3; m++) {
29257 GemmMicrokernelTester()
29258 .mr(3)
29259 .nr(4)
29260 .kr(2)
29261 .sr(4)
29262 .m(m)
29263 .n(n)
29264 .k(8)
29265 .iterations(1)
29266 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2s4__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
29267 }
29268 }
29269 }
29270
TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__AVX_LD128,k_eq_8_subtile_m)29271 TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__AVX_LD128, k_eq_8_subtile_m) {
29272 TEST_REQUIRES_X86_AVX;
29273 for (uint32_t m = 1; m <= 3; m++) {
29274 GemmMicrokernelTester()
29275 .mr(3)
29276 .nr(4)
29277 .kr(2)
29278 .sr(4)
29279 .m(m)
29280 .n(4)
29281 .k(8)
29282 .iterations(1)
29283 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2s4__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
29284 }
29285 }
29286
TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__AVX_LD128,k_eq_8_subtile_n)29287 TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__AVX_LD128, k_eq_8_subtile_n) {
29288 TEST_REQUIRES_X86_AVX;
29289 for (uint32_t n = 1; n <= 4; n++) {
29290 GemmMicrokernelTester()
29291 .mr(3)
29292 .nr(4)
29293 .kr(2)
29294 .sr(4)
29295 .m(3)
29296 .n(n)
29297 .k(8)
29298 .iterations(1)
29299 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2s4__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
29300 }
29301 }
29302
TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__AVX_LD128,k_lt_8)29303 TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__AVX_LD128, k_lt_8) {
29304 TEST_REQUIRES_X86_AVX;
29305 for (size_t k = 1; k < 8; k++) {
29306 GemmMicrokernelTester()
29307 .mr(3)
29308 .nr(4)
29309 .kr(2)
29310 .sr(4)
29311 .m(3)
29312 .n(4)
29313 .k(k)
29314 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2s4__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
29315 }
29316 }
29317
TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__AVX_LD128,k_lt_8_strided_a)29318 TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__AVX_LD128, k_lt_8_strided_a) {
29319 TEST_REQUIRES_X86_AVX;
29320 for (size_t k = 1; k < 8; k++) {
29321 GemmMicrokernelTester()
29322 .mr(3)
29323 .nr(4)
29324 .kr(2)
29325 .sr(4)
29326 .m(3)
29327 .n(4)
29328 .k(k)
29329 .a_stride(11)
29330 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2s4__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
29331 }
29332 }
29333
TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__AVX_LD128,k_lt_8_subtile)29334 TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__AVX_LD128, k_lt_8_subtile) {
29335 TEST_REQUIRES_X86_AVX;
29336 for (size_t k = 1; k < 8; k++) {
29337 for (uint32_t n = 1; n <= 4; n++) {
29338 for (uint32_t m = 1; m <= 3; m++) {
29339 GemmMicrokernelTester()
29340 .mr(3)
29341 .nr(4)
29342 .kr(2)
29343 .sr(4)
29344 .m(m)
29345 .n(n)
29346 .k(k)
29347 .iterations(1)
29348 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2s4__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
29349 }
29350 }
29351 }
29352 }
29353
TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__AVX_LD128,k_gt_8)29354 TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__AVX_LD128, k_gt_8) {
29355 TEST_REQUIRES_X86_AVX;
29356 for (size_t k = 9; k < 16; k++) {
29357 GemmMicrokernelTester()
29358 .mr(3)
29359 .nr(4)
29360 .kr(2)
29361 .sr(4)
29362 .m(3)
29363 .n(4)
29364 .k(k)
29365 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2s4__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
29366 }
29367 }
29368
TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__AVX_LD128,k_gt_8_strided_a)29369 TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__AVX_LD128, k_gt_8_strided_a) {
29370 TEST_REQUIRES_X86_AVX;
29371 for (size_t k = 9; k < 16; k++) {
29372 GemmMicrokernelTester()
29373 .mr(3)
29374 .nr(4)
29375 .kr(2)
29376 .sr(4)
29377 .m(3)
29378 .n(4)
29379 .k(k)
29380 .a_stride(19)
29381 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2s4__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
29382 }
29383 }
29384
TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__AVX_LD128,k_gt_8_subtile)29385 TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__AVX_LD128, k_gt_8_subtile) {
29386 TEST_REQUIRES_X86_AVX;
29387 for (size_t k = 9; k < 16; k++) {
29388 for (uint32_t n = 1; n <= 4; n++) {
29389 for (uint32_t m = 1; m <= 3; m++) {
29390 GemmMicrokernelTester()
29391 .mr(3)
29392 .nr(4)
29393 .kr(2)
29394 .sr(4)
29395 .m(m)
29396 .n(n)
29397 .k(k)
29398 .iterations(1)
29399 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2s4__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
29400 }
29401 }
29402 }
29403 }
29404
TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__AVX_LD128,k_div_8)29405 TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__AVX_LD128, k_div_8) {
29406 TEST_REQUIRES_X86_AVX;
29407 for (size_t k = 16; k <= 80; k += 8) {
29408 GemmMicrokernelTester()
29409 .mr(3)
29410 .nr(4)
29411 .kr(2)
29412 .sr(4)
29413 .m(3)
29414 .n(4)
29415 .k(k)
29416 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2s4__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
29417 }
29418 }
29419
TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__AVX_LD128,k_div_8_strided_a)29420 TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__AVX_LD128, k_div_8_strided_a) {
29421 TEST_REQUIRES_X86_AVX;
29422 for (size_t k = 16; k <= 80; k += 8) {
29423 GemmMicrokernelTester()
29424 .mr(3)
29425 .nr(4)
29426 .kr(2)
29427 .sr(4)
29428 .m(3)
29429 .n(4)
29430 .k(k)
29431 .a_stride(83)
29432 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2s4__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
29433 }
29434 }
29435
TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__AVX_LD128,k_div_8_subtile)29436 TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__AVX_LD128, k_div_8_subtile) {
29437 TEST_REQUIRES_X86_AVX;
29438 for (size_t k = 16; k <= 80; k += 8) {
29439 for (uint32_t n = 1; n <= 4; n++) {
29440 for (uint32_t m = 1; m <= 3; m++) {
29441 GemmMicrokernelTester()
29442 .mr(3)
29443 .nr(4)
29444 .kr(2)
29445 .sr(4)
29446 .m(m)
29447 .n(n)
29448 .k(k)
29449 .iterations(1)
29450 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2s4__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
29451 }
29452 }
29453 }
29454 }
29455
TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__AVX_LD128,n_gt_4)29456 TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__AVX_LD128, n_gt_4) {
29457 TEST_REQUIRES_X86_AVX;
29458 for (uint32_t n = 5; n < 8; n++) {
29459 for (size_t k = 1; k <= 40; k += 9) {
29460 GemmMicrokernelTester()
29461 .mr(3)
29462 .nr(4)
29463 .kr(2)
29464 .sr(4)
29465 .m(3)
29466 .n(n)
29467 .k(k)
29468 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2s4__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
29469 }
29470 }
29471 }
29472
TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__AVX_LD128,n_gt_4_strided_cn)29473 TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__AVX_LD128, n_gt_4_strided_cn) {
29474 TEST_REQUIRES_X86_AVX;
29475 for (uint32_t n = 5; n < 8; n++) {
29476 for (size_t k = 1; k <= 40; k += 9) {
29477 GemmMicrokernelTester()
29478 .mr(3)
29479 .nr(4)
29480 .kr(2)
29481 .sr(4)
29482 .m(3)
29483 .n(n)
29484 .k(k)
29485 .cn_stride(7)
29486 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2s4__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
29487 }
29488 }
29489 }
29490
TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__AVX_LD128,n_gt_4_strided_a)29491 TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__AVX_LD128, n_gt_4_strided_a) {
29492 TEST_REQUIRES_X86_AVX;
29493 for (uint32_t n = 5; n < 8; n++) {
29494 for (size_t k = 1; k <= 40; k += 9) {
29495 GemmMicrokernelTester()
29496 .mr(3)
29497 .nr(4)
29498 .kr(2)
29499 .sr(4)
29500 .m(3)
29501 .n(n)
29502 .k(k)
29503 .a_stride(43)
29504 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2s4__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
29505 }
29506 }
29507 }
29508
TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__AVX_LD128,n_gt_4_subtile)29509 TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__AVX_LD128, n_gt_4_subtile) {
29510 TEST_REQUIRES_X86_AVX;
29511 for (uint32_t n = 5; n < 8; n++) {
29512 for (size_t k = 1; k <= 40; k += 9) {
29513 for (uint32_t m = 1; m <= 3; m++) {
29514 GemmMicrokernelTester()
29515 .mr(3)
29516 .nr(4)
29517 .kr(2)
29518 .sr(4)
29519 .m(m)
29520 .n(n)
29521 .k(k)
29522 .iterations(1)
29523 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2s4__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
29524 }
29525 }
29526 }
29527 }
29528
TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__AVX_LD128,n_div_4)29529 TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__AVX_LD128, n_div_4) {
29530 TEST_REQUIRES_X86_AVX;
29531 for (uint32_t n = 8; n <= 12; n += 4) {
29532 for (size_t k = 1; k <= 40; k += 9) {
29533 GemmMicrokernelTester()
29534 .mr(3)
29535 .nr(4)
29536 .kr(2)
29537 .sr(4)
29538 .m(3)
29539 .n(n)
29540 .k(k)
29541 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2s4__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
29542 }
29543 }
29544 }
29545
TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__AVX_LD128,n_div_4_strided_cn)29546 TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__AVX_LD128, n_div_4_strided_cn) {
29547 TEST_REQUIRES_X86_AVX;
29548 for (uint32_t n = 8; n <= 12; n += 4) {
29549 for (size_t k = 1; k <= 40; k += 9) {
29550 GemmMicrokernelTester()
29551 .mr(3)
29552 .nr(4)
29553 .kr(2)
29554 .sr(4)
29555 .m(3)
29556 .n(n)
29557 .k(k)
29558 .cn_stride(7)
29559 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2s4__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
29560 }
29561 }
29562 }
29563
TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__AVX_LD128,n_div_4_strided_a)29564 TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__AVX_LD128, n_div_4_strided_a) {
29565 TEST_REQUIRES_X86_AVX;
29566 for (uint32_t n = 8; n <= 12; n += 4) {
29567 for (size_t k = 1; k <= 40; k += 9) {
29568 GemmMicrokernelTester()
29569 .mr(3)
29570 .nr(4)
29571 .kr(2)
29572 .sr(4)
29573 .m(3)
29574 .n(n)
29575 .k(k)
29576 .a_stride(43)
29577 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2s4__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
29578 }
29579 }
29580 }
29581
TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__AVX_LD128,n_div_4_subtile)29582 TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__AVX_LD128, n_div_4_subtile) {
29583 TEST_REQUIRES_X86_AVX;
29584 for (uint32_t n = 8; n <= 12; n += 4) {
29585 for (size_t k = 1; k <= 40; k += 9) {
29586 for (uint32_t m = 1; m <= 3; m++) {
29587 GemmMicrokernelTester()
29588 .mr(3)
29589 .nr(4)
29590 .kr(2)
29591 .sr(4)
29592 .m(m)
29593 .n(n)
29594 .k(k)
29595 .iterations(1)
29596 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2s4__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
29597 }
29598 }
29599 }
29600 }
29601
TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__AVX_LD128,strided_cm_subtile)29602 TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__AVX_LD128, strided_cm_subtile) {
29603 TEST_REQUIRES_X86_AVX;
29604 for (size_t k = 1; k <= 40; k += 9) {
29605 for (uint32_t n = 1; n <= 4; n++) {
29606 for (uint32_t m = 1; m <= 3; m++) {
29607 GemmMicrokernelTester()
29608 .mr(3)
29609 .nr(4)
29610 .kr(2)
29611 .sr(4)
29612 .m(m)
29613 .n(n)
29614 .k(k)
29615 .cm_stride(7)
29616 .iterations(1)
29617 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2s4__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
29618 }
29619 }
29620 }
29621 }
29622
TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__AVX_LD128,qmin)29623 TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__AVX_LD128, qmin) {
29624 TEST_REQUIRES_X86_AVX;
29625 GemmMicrokernelTester()
29626 .mr(3)
29627 .nr(4)
29628 .kr(2)
29629 .sr(4)
29630 .m(3)
29631 .n(4)
29632 .k(8)
29633 .qmin(128)
29634 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2s4__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
29635 }
29636
TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__AVX_LD128,qmax)29637 TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__AVX_LD128, qmax) {
29638 TEST_REQUIRES_X86_AVX;
29639 GemmMicrokernelTester()
29640 .mr(3)
29641 .nr(4)
29642 .kr(2)
29643 .sr(4)
29644 .m(3)
29645 .n(4)
29646 .k(8)
29647 .qmax(128)
29648 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2s4__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
29649 }
29650
TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__AVX_LD128,strided_cm)29651 TEST(QC8_GEMM_MINMAX_FP32_3X4C2S4__AVX_LD128, strided_cm) {
29652 TEST_REQUIRES_X86_AVX;
29653 GemmMicrokernelTester()
29654 .mr(3)
29655 .nr(4)
29656 .kr(2)
29657 .sr(4)
29658 .m(3)
29659 .n(4)
29660 .k(8)
29661 .cm_stride(7)
29662 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c2s4__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
29663 }
29664 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
29665
29666
29667 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QC8_GEMM_MINMAX_FP32_2X4C8__SSE2_LD64,k_eq_8)29668 TEST(QC8_GEMM_MINMAX_FP32_2X4C8__SSE2_LD64, k_eq_8) {
29669 TEST_REQUIRES_X86_SSE2;
29670 GemmMicrokernelTester()
29671 .mr(2)
29672 .nr(4)
29673 .kr(8)
29674 .sr(1)
29675 .m(2)
29676 .n(4)
29677 .k(8)
29678 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c8__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
29679 }
29680
TEST(QC8_GEMM_MINMAX_FP32_2X4C8__SSE2_LD64,strided_cn)29681 TEST(QC8_GEMM_MINMAX_FP32_2X4C8__SSE2_LD64, strided_cn) {
29682 TEST_REQUIRES_X86_SSE2;
29683 GemmMicrokernelTester()
29684 .mr(2)
29685 .nr(4)
29686 .kr(8)
29687 .sr(1)
29688 .m(2)
29689 .n(4)
29690 .k(8)
29691 .cn_stride(7)
29692 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c8__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
29693 }
29694
TEST(QC8_GEMM_MINMAX_FP32_2X4C8__SSE2_LD64,k_eq_8_strided_a)29695 TEST(QC8_GEMM_MINMAX_FP32_2X4C8__SSE2_LD64, k_eq_8_strided_a) {
29696 TEST_REQUIRES_X86_SSE2;
29697 GemmMicrokernelTester()
29698 .mr(2)
29699 .nr(4)
29700 .kr(8)
29701 .sr(1)
29702 .m(2)
29703 .n(4)
29704 .k(8)
29705 .a_stride(11)
29706 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c8__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
29707 }
29708
TEST(QC8_GEMM_MINMAX_FP32_2X4C8__SSE2_LD64,k_eq_8_subtile)29709 TEST(QC8_GEMM_MINMAX_FP32_2X4C8__SSE2_LD64, k_eq_8_subtile) {
29710 TEST_REQUIRES_X86_SSE2;
29711 for (uint32_t n = 1; n <= 4; n++) {
29712 for (uint32_t m = 1; m <= 2; m++) {
29713 GemmMicrokernelTester()
29714 .mr(2)
29715 .nr(4)
29716 .kr(8)
29717 .sr(1)
29718 .m(m)
29719 .n(n)
29720 .k(8)
29721 .iterations(1)
29722 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c8__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
29723 }
29724 }
29725 }
29726
TEST(QC8_GEMM_MINMAX_FP32_2X4C8__SSE2_LD64,k_eq_8_subtile_m)29727 TEST(QC8_GEMM_MINMAX_FP32_2X4C8__SSE2_LD64, k_eq_8_subtile_m) {
29728 TEST_REQUIRES_X86_SSE2;
29729 for (uint32_t m = 1; m <= 2; m++) {
29730 GemmMicrokernelTester()
29731 .mr(2)
29732 .nr(4)
29733 .kr(8)
29734 .sr(1)
29735 .m(m)
29736 .n(4)
29737 .k(8)
29738 .iterations(1)
29739 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c8__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
29740 }
29741 }
29742
TEST(QC8_GEMM_MINMAX_FP32_2X4C8__SSE2_LD64,k_eq_8_subtile_n)29743 TEST(QC8_GEMM_MINMAX_FP32_2X4C8__SSE2_LD64, k_eq_8_subtile_n) {
29744 TEST_REQUIRES_X86_SSE2;
29745 for (uint32_t n = 1; n <= 4; n++) {
29746 GemmMicrokernelTester()
29747 .mr(2)
29748 .nr(4)
29749 .kr(8)
29750 .sr(1)
29751 .m(2)
29752 .n(n)
29753 .k(8)
29754 .iterations(1)
29755 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c8__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
29756 }
29757 }
29758
TEST(QC8_GEMM_MINMAX_FP32_2X4C8__SSE2_LD64,k_lt_8)29759 TEST(QC8_GEMM_MINMAX_FP32_2X4C8__SSE2_LD64, k_lt_8) {
29760 TEST_REQUIRES_X86_SSE2;
29761 for (size_t k = 1; k < 8; k++) {
29762 GemmMicrokernelTester()
29763 .mr(2)
29764 .nr(4)
29765 .kr(8)
29766 .sr(1)
29767 .m(2)
29768 .n(4)
29769 .k(k)
29770 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c8__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
29771 }
29772 }
29773
TEST(QC8_GEMM_MINMAX_FP32_2X4C8__SSE2_LD64,k_lt_8_strided_a)29774 TEST(QC8_GEMM_MINMAX_FP32_2X4C8__SSE2_LD64, k_lt_8_strided_a) {
29775 TEST_REQUIRES_X86_SSE2;
29776 for (size_t k = 1; k < 8; k++) {
29777 GemmMicrokernelTester()
29778 .mr(2)
29779 .nr(4)
29780 .kr(8)
29781 .sr(1)
29782 .m(2)
29783 .n(4)
29784 .k(k)
29785 .a_stride(11)
29786 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c8__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
29787 }
29788 }
29789
TEST(QC8_GEMM_MINMAX_FP32_2X4C8__SSE2_LD64,k_lt_8_subtile)29790 TEST(QC8_GEMM_MINMAX_FP32_2X4C8__SSE2_LD64, k_lt_8_subtile) {
29791 TEST_REQUIRES_X86_SSE2;
29792 for (size_t k = 1; k < 8; k++) {
29793 for (uint32_t n = 1; n <= 4; n++) {
29794 for (uint32_t m = 1; m <= 2; m++) {
29795 GemmMicrokernelTester()
29796 .mr(2)
29797 .nr(4)
29798 .kr(8)
29799 .sr(1)
29800 .m(m)
29801 .n(n)
29802 .k(k)
29803 .iterations(1)
29804 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c8__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
29805 }
29806 }
29807 }
29808 }
29809
TEST(QC8_GEMM_MINMAX_FP32_2X4C8__SSE2_LD64,k_gt_8)29810 TEST(QC8_GEMM_MINMAX_FP32_2X4C8__SSE2_LD64, k_gt_8) {
29811 TEST_REQUIRES_X86_SSE2;
29812 for (size_t k = 9; k < 16; k++) {
29813 GemmMicrokernelTester()
29814 .mr(2)
29815 .nr(4)
29816 .kr(8)
29817 .sr(1)
29818 .m(2)
29819 .n(4)
29820 .k(k)
29821 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c8__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
29822 }
29823 }
29824
TEST(QC8_GEMM_MINMAX_FP32_2X4C8__SSE2_LD64,k_gt_8_strided_a)29825 TEST(QC8_GEMM_MINMAX_FP32_2X4C8__SSE2_LD64, k_gt_8_strided_a) {
29826 TEST_REQUIRES_X86_SSE2;
29827 for (size_t k = 9; k < 16; k++) {
29828 GemmMicrokernelTester()
29829 .mr(2)
29830 .nr(4)
29831 .kr(8)
29832 .sr(1)
29833 .m(2)
29834 .n(4)
29835 .k(k)
29836 .a_stride(19)
29837 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c8__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
29838 }
29839 }
29840
TEST(QC8_GEMM_MINMAX_FP32_2X4C8__SSE2_LD64,k_gt_8_subtile)29841 TEST(QC8_GEMM_MINMAX_FP32_2X4C8__SSE2_LD64, k_gt_8_subtile) {
29842 TEST_REQUIRES_X86_SSE2;
29843 for (size_t k = 9; k < 16; k++) {
29844 for (uint32_t n = 1; n <= 4; n++) {
29845 for (uint32_t m = 1; m <= 2; m++) {
29846 GemmMicrokernelTester()
29847 .mr(2)
29848 .nr(4)
29849 .kr(8)
29850 .sr(1)
29851 .m(m)
29852 .n(n)
29853 .k(k)
29854 .iterations(1)
29855 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c8__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
29856 }
29857 }
29858 }
29859 }
29860
TEST(QC8_GEMM_MINMAX_FP32_2X4C8__SSE2_LD64,k_div_8)29861 TEST(QC8_GEMM_MINMAX_FP32_2X4C8__SSE2_LD64, k_div_8) {
29862 TEST_REQUIRES_X86_SSE2;
29863 for (size_t k = 16; k <= 80; k += 8) {
29864 GemmMicrokernelTester()
29865 .mr(2)
29866 .nr(4)
29867 .kr(8)
29868 .sr(1)
29869 .m(2)
29870 .n(4)
29871 .k(k)
29872 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c8__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
29873 }
29874 }
29875
TEST(QC8_GEMM_MINMAX_FP32_2X4C8__SSE2_LD64,k_div_8_strided_a)29876 TEST(QC8_GEMM_MINMAX_FP32_2X4C8__SSE2_LD64, k_div_8_strided_a) {
29877 TEST_REQUIRES_X86_SSE2;
29878 for (size_t k = 16; k <= 80; k += 8) {
29879 GemmMicrokernelTester()
29880 .mr(2)
29881 .nr(4)
29882 .kr(8)
29883 .sr(1)
29884 .m(2)
29885 .n(4)
29886 .k(k)
29887 .a_stride(83)
29888 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c8__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
29889 }
29890 }
29891
TEST(QC8_GEMM_MINMAX_FP32_2X4C8__SSE2_LD64,k_div_8_subtile)29892 TEST(QC8_GEMM_MINMAX_FP32_2X4C8__SSE2_LD64, k_div_8_subtile) {
29893 TEST_REQUIRES_X86_SSE2;
29894 for (size_t k = 16; k <= 80; k += 8) {
29895 for (uint32_t n = 1; n <= 4; n++) {
29896 for (uint32_t m = 1; m <= 2; m++) {
29897 GemmMicrokernelTester()
29898 .mr(2)
29899 .nr(4)
29900 .kr(8)
29901 .sr(1)
29902 .m(m)
29903 .n(n)
29904 .k(k)
29905 .iterations(1)
29906 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c8__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
29907 }
29908 }
29909 }
29910 }
29911
TEST(QC8_GEMM_MINMAX_FP32_2X4C8__SSE2_LD64,n_gt_4)29912 TEST(QC8_GEMM_MINMAX_FP32_2X4C8__SSE2_LD64, n_gt_4) {
29913 TEST_REQUIRES_X86_SSE2;
29914 for (uint32_t n = 5; n < 8; n++) {
29915 for (size_t k = 1; k <= 40; k += 9) {
29916 GemmMicrokernelTester()
29917 .mr(2)
29918 .nr(4)
29919 .kr(8)
29920 .sr(1)
29921 .m(2)
29922 .n(n)
29923 .k(k)
29924 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c8__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
29925 }
29926 }
29927 }
29928
TEST(QC8_GEMM_MINMAX_FP32_2X4C8__SSE2_LD64,n_gt_4_strided_cn)29929 TEST(QC8_GEMM_MINMAX_FP32_2X4C8__SSE2_LD64, n_gt_4_strided_cn) {
29930 TEST_REQUIRES_X86_SSE2;
29931 for (uint32_t n = 5; n < 8; n++) {
29932 for (size_t k = 1; k <= 40; k += 9) {
29933 GemmMicrokernelTester()
29934 .mr(2)
29935 .nr(4)
29936 .kr(8)
29937 .sr(1)
29938 .m(2)
29939 .n(n)
29940 .k(k)
29941 .cn_stride(7)
29942 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c8__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
29943 }
29944 }
29945 }
29946
TEST(QC8_GEMM_MINMAX_FP32_2X4C8__SSE2_LD64,n_gt_4_strided_a)29947 TEST(QC8_GEMM_MINMAX_FP32_2X4C8__SSE2_LD64, n_gt_4_strided_a) {
29948 TEST_REQUIRES_X86_SSE2;
29949 for (uint32_t n = 5; n < 8; n++) {
29950 for (size_t k = 1; k <= 40; k += 9) {
29951 GemmMicrokernelTester()
29952 .mr(2)
29953 .nr(4)
29954 .kr(8)
29955 .sr(1)
29956 .m(2)
29957 .n(n)
29958 .k(k)
29959 .a_stride(43)
29960 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c8__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
29961 }
29962 }
29963 }
29964
TEST(QC8_GEMM_MINMAX_FP32_2X4C8__SSE2_LD64,n_gt_4_subtile)29965 TEST(QC8_GEMM_MINMAX_FP32_2X4C8__SSE2_LD64, n_gt_4_subtile) {
29966 TEST_REQUIRES_X86_SSE2;
29967 for (uint32_t n = 5; n < 8; n++) {
29968 for (size_t k = 1; k <= 40; k += 9) {
29969 for (uint32_t m = 1; m <= 2; m++) {
29970 GemmMicrokernelTester()
29971 .mr(2)
29972 .nr(4)
29973 .kr(8)
29974 .sr(1)
29975 .m(m)
29976 .n(n)
29977 .k(k)
29978 .iterations(1)
29979 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c8__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
29980 }
29981 }
29982 }
29983 }
29984
TEST(QC8_GEMM_MINMAX_FP32_2X4C8__SSE2_LD64,n_div_4)29985 TEST(QC8_GEMM_MINMAX_FP32_2X4C8__SSE2_LD64, n_div_4) {
29986 TEST_REQUIRES_X86_SSE2;
29987 for (uint32_t n = 8; n <= 12; n += 4) {
29988 for (size_t k = 1; k <= 40; k += 9) {
29989 GemmMicrokernelTester()
29990 .mr(2)
29991 .nr(4)
29992 .kr(8)
29993 .sr(1)
29994 .m(2)
29995 .n(n)
29996 .k(k)
29997 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c8__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
29998 }
29999 }
30000 }
30001
TEST(QC8_GEMM_MINMAX_FP32_2X4C8__SSE2_LD64,n_div_4_strided_cn)30002 TEST(QC8_GEMM_MINMAX_FP32_2X4C8__SSE2_LD64, n_div_4_strided_cn) {
30003 TEST_REQUIRES_X86_SSE2;
30004 for (uint32_t n = 8; n <= 12; n += 4) {
30005 for (size_t k = 1; k <= 40; k += 9) {
30006 GemmMicrokernelTester()
30007 .mr(2)
30008 .nr(4)
30009 .kr(8)
30010 .sr(1)
30011 .m(2)
30012 .n(n)
30013 .k(k)
30014 .cn_stride(7)
30015 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c8__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
30016 }
30017 }
30018 }
30019
TEST(QC8_GEMM_MINMAX_FP32_2X4C8__SSE2_LD64,n_div_4_strided_a)30020 TEST(QC8_GEMM_MINMAX_FP32_2X4C8__SSE2_LD64, n_div_4_strided_a) {
30021 TEST_REQUIRES_X86_SSE2;
30022 for (uint32_t n = 8; n <= 12; n += 4) {
30023 for (size_t k = 1; k <= 40; k += 9) {
30024 GemmMicrokernelTester()
30025 .mr(2)
30026 .nr(4)
30027 .kr(8)
30028 .sr(1)
30029 .m(2)
30030 .n(n)
30031 .k(k)
30032 .a_stride(43)
30033 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c8__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
30034 }
30035 }
30036 }
30037
TEST(QC8_GEMM_MINMAX_FP32_2X4C8__SSE2_LD64,n_div_4_subtile)30038 TEST(QC8_GEMM_MINMAX_FP32_2X4C8__SSE2_LD64, n_div_4_subtile) {
30039 TEST_REQUIRES_X86_SSE2;
30040 for (uint32_t n = 8; n <= 12; n += 4) {
30041 for (size_t k = 1; k <= 40; k += 9) {
30042 for (uint32_t m = 1; m <= 2; m++) {
30043 GemmMicrokernelTester()
30044 .mr(2)
30045 .nr(4)
30046 .kr(8)
30047 .sr(1)
30048 .m(m)
30049 .n(n)
30050 .k(k)
30051 .iterations(1)
30052 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c8__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
30053 }
30054 }
30055 }
30056 }
30057
TEST(QC8_GEMM_MINMAX_FP32_2X4C8__SSE2_LD64,strided_cm_subtile)30058 TEST(QC8_GEMM_MINMAX_FP32_2X4C8__SSE2_LD64, strided_cm_subtile) {
30059 TEST_REQUIRES_X86_SSE2;
30060 for (size_t k = 1; k <= 40; k += 9) {
30061 for (uint32_t n = 1; n <= 4; n++) {
30062 for (uint32_t m = 1; m <= 2; m++) {
30063 GemmMicrokernelTester()
30064 .mr(2)
30065 .nr(4)
30066 .kr(8)
30067 .sr(1)
30068 .m(m)
30069 .n(n)
30070 .k(k)
30071 .cm_stride(7)
30072 .iterations(1)
30073 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c8__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
30074 }
30075 }
30076 }
30077 }
30078
TEST(QC8_GEMM_MINMAX_FP32_2X4C8__SSE2_LD64,qmin)30079 TEST(QC8_GEMM_MINMAX_FP32_2X4C8__SSE2_LD64, qmin) {
30080 TEST_REQUIRES_X86_SSE2;
30081 GemmMicrokernelTester()
30082 .mr(2)
30083 .nr(4)
30084 .kr(8)
30085 .sr(1)
30086 .m(2)
30087 .n(4)
30088 .k(8)
30089 .qmin(128)
30090 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c8__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
30091 }
30092
TEST(QC8_GEMM_MINMAX_FP32_2X4C8__SSE2_LD64,qmax)30093 TEST(QC8_GEMM_MINMAX_FP32_2X4C8__SSE2_LD64, qmax) {
30094 TEST_REQUIRES_X86_SSE2;
30095 GemmMicrokernelTester()
30096 .mr(2)
30097 .nr(4)
30098 .kr(8)
30099 .sr(1)
30100 .m(2)
30101 .n(4)
30102 .k(8)
30103 .qmax(128)
30104 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c8__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
30105 }
30106
TEST(QC8_GEMM_MINMAX_FP32_2X4C8__SSE2_LD64,strided_cm)30107 TEST(QC8_GEMM_MINMAX_FP32_2X4C8__SSE2_LD64, strided_cm) {
30108 TEST_REQUIRES_X86_SSE2;
30109 GemmMicrokernelTester()
30110 .mr(2)
30111 .nr(4)
30112 .kr(8)
30113 .sr(1)
30114 .m(2)
30115 .n(4)
30116 .k(8)
30117 .cm_stride(7)
30118 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c8__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
30119 }
30120 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
30121
30122
30123 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QC8_GEMM_MINMAX_FP32_2X4C8__SSE41_LD64,k_eq_8)30124 TEST(QC8_GEMM_MINMAX_FP32_2X4C8__SSE41_LD64, k_eq_8) {
30125 TEST_REQUIRES_X86_SSE41;
30126 GemmMicrokernelTester()
30127 .mr(2)
30128 .nr(4)
30129 .kr(8)
30130 .sr(1)
30131 .m(2)
30132 .n(4)
30133 .k(8)
30134 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c8__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
30135 }
30136
TEST(QC8_GEMM_MINMAX_FP32_2X4C8__SSE41_LD64,strided_cn)30137 TEST(QC8_GEMM_MINMAX_FP32_2X4C8__SSE41_LD64, strided_cn) {
30138 TEST_REQUIRES_X86_SSE41;
30139 GemmMicrokernelTester()
30140 .mr(2)
30141 .nr(4)
30142 .kr(8)
30143 .sr(1)
30144 .m(2)
30145 .n(4)
30146 .k(8)
30147 .cn_stride(7)
30148 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c8__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
30149 }
30150
TEST(QC8_GEMM_MINMAX_FP32_2X4C8__SSE41_LD64,k_eq_8_strided_a)30151 TEST(QC8_GEMM_MINMAX_FP32_2X4C8__SSE41_LD64, k_eq_8_strided_a) {
30152 TEST_REQUIRES_X86_SSE41;
30153 GemmMicrokernelTester()
30154 .mr(2)
30155 .nr(4)
30156 .kr(8)
30157 .sr(1)
30158 .m(2)
30159 .n(4)
30160 .k(8)
30161 .a_stride(11)
30162 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c8__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
30163 }
30164
TEST(QC8_GEMM_MINMAX_FP32_2X4C8__SSE41_LD64,k_eq_8_subtile)30165 TEST(QC8_GEMM_MINMAX_FP32_2X4C8__SSE41_LD64, k_eq_8_subtile) {
30166 TEST_REQUIRES_X86_SSE41;
30167 for (uint32_t n = 1; n <= 4; n++) {
30168 for (uint32_t m = 1; m <= 2; m++) {
30169 GemmMicrokernelTester()
30170 .mr(2)
30171 .nr(4)
30172 .kr(8)
30173 .sr(1)
30174 .m(m)
30175 .n(n)
30176 .k(8)
30177 .iterations(1)
30178 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c8__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
30179 }
30180 }
30181 }
30182
TEST(QC8_GEMM_MINMAX_FP32_2X4C8__SSE41_LD64,k_eq_8_subtile_m)30183 TEST(QC8_GEMM_MINMAX_FP32_2X4C8__SSE41_LD64, k_eq_8_subtile_m) {
30184 TEST_REQUIRES_X86_SSE41;
30185 for (uint32_t m = 1; m <= 2; m++) {
30186 GemmMicrokernelTester()
30187 .mr(2)
30188 .nr(4)
30189 .kr(8)
30190 .sr(1)
30191 .m(m)
30192 .n(4)
30193 .k(8)
30194 .iterations(1)
30195 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c8__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
30196 }
30197 }
30198
TEST(QC8_GEMM_MINMAX_FP32_2X4C8__SSE41_LD64,k_eq_8_subtile_n)30199 TEST(QC8_GEMM_MINMAX_FP32_2X4C8__SSE41_LD64, k_eq_8_subtile_n) {
30200 TEST_REQUIRES_X86_SSE41;
30201 for (uint32_t n = 1; n <= 4; n++) {
30202 GemmMicrokernelTester()
30203 .mr(2)
30204 .nr(4)
30205 .kr(8)
30206 .sr(1)
30207 .m(2)
30208 .n(n)
30209 .k(8)
30210 .iterations(1)
30211 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c8__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
30212 }
30213 }
30214
TEST(QC8_GEMM_MINMAX_FP32_2X4C8__SSE41_LD64,k_lt_8)30215 TEST(QC8_GEMM_MINMAX_FP32_2X4C8__SSE41_LD64, k_lt_8) {
30216 TEST_REQUIRES_X86_SSE41;
30217 for (size_t k = 1; k < 8; k++) {
30218 GemmMicrokernelTester()
30219 .mr(2)
30220 .nr(4)
30221 .kr(8)
30222 .sr(1)
30223 .m(2)
30224 .n(4)
30225 .k(k)
30226 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c8__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
30227 }
30228 }
30229
TEST(QC8_GEMM_MINMAX_FP32_2X4C8__SSE41_LD64,k_lt_8_strided_a)30230 TEST(QC8_GEMM_MINMAX_FP32_2X4C8__SSE41_LD64, k_lt_8_strided_a) {
30231 TEST_REQUIRES_X86_SSE41;
30232 for (size_t k = 1; k < 8; k++) {
30233 GemmMicrokernelTester()
30234 .mr(2)
30235 .nr(4)
30236 .kr(8)
30237 .sr(1)
30238 .m(2)
30239 .n(4)
30240 .k(k)
30241 .a_stride(11)
30242 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c8__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
30243 }
30244 }
30245
TEST(QC8_GEMM_MINMAX_FP32_2X4C8__SSE41_LD64,k_lt_8_subtile)30246 TEST(QC8_GEMM_MINMAX_FP32_2X4C8__SSE41_LD64, k_lt_8_subtile) {
30247 TEST_REQUIRES_X86_SSE41;
30248 for (size_t k = 1; k < 8; k++) {
30249 for (uint32_t n = 1; n <= 4; n++) {
30250 for (uint32_t m = 1; m <= 2; m++) {
30251 GemmMicrokernelTester()
30252 .mr(2)
30253 .nr(4)
30254 .kr(8)
30255 .sr(1)
30256 .m(m)
30257 .n(n)
30258 .k(k)
30259 .iterations(1)
30260 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c8__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
30261 }
30262 }
30263 }
30264 }
30265
TEST(QC8_GEMM_MINMAX_FP32_2X4C8__SSE41_LD64,k_gt_8)30266 TEST(QC8_GEMM_MINMAX_FP32_2X4C8__SSE41_LD64, k_gt_8) {
30267 TEST_REQUIRES_X86_SSE41;
30268 for (size_t k = 9; k < 16; k++) {
30269 GemmMicrokernelTester()
30270 .mr(2)
30271 .nr(4)
30272 .kr(8)
30273 .sr(1)
30274 .m(2)
30275 .n(4)
30276 .k(k)
30277 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c8__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
30278 }
30279 }
30280
TEST(QC8_GEMM_MINMAX_FP32_2X4C8__SSE41_LD64,k_gt_8_strided_a)30281 TEST(QC8_GEMM_MINMAX_FP32_2X4C8__SSE41_LD64, k_gt_8_strided_a) {
30282 TEST_REQUIRES_X86_SSE41;
30283 for (size_t k = 9; k < 16; k++) {
30284 GemmMicrokernelTester()
30285 .mr(2)
30286 .nr(4)
30287 .kr(8)
30288 .sr(1)
30289 .m(2)
30290 .n(4)
30291 .k(k)
30292 .a_stride(19)
30293 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c8__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
30294 }
30295 }
30296
TEST(QC8_GEMM_MINMAX_FP32_2X4C8__SSE41_LD64,k_gt_8_subtile)30297 TEST(QC8_GEMM_MINMAX_FP32_2X4C8__SSE41_LD64, k_gt_8_subtile) {
30298 TEST_REQUIRES_X86_SSE41;
30299 for (size_t k = 9; k < 16; k++) {
30300 for (uint32_t n = 1; n <= 4; n++) {
30301 for (uint32_t m = 1; m <= 2; m++) {
30302 GemmMicrokernelTester()
30303 .mr(2)
30304 .nr(4)
30305 .kr(8)
30306 .sr(1)
30307 .m(m)
30308 .n(n)
30309 .k(k)
30310 .iterations(1)
30311 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c8__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
30312 }
30313 }
30314 }
30315 }
30316
TEST(QC8_GEMM_MINMAX_FP32_2X4C8__SSE41_LD64,k_div_8)30317 TEST(QC8_GEMM_MINMAX_FP32_2X4C8__SSE41_LD64, k_div_8) {
30318 TEST_REQUIRES_X86_SSE41;
30319 for (size_t k = 16; k <= 80; k += 8) {
30320 GemmMicrokernelTester()
30321 .mr(2)
30322 .nr(4)
30323 .kr(8)
30324 .sr(1)
30325 .m(2)
30326 .n(4)
30327 .k(k)
30328 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c8__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
30329 }
30330 }
30331
TEST(QC8_GEMM_MINMAX_FP32_2X4C8__SSE41_LD64,k_div_8_strided_a)30332 TEST(QC8_GEMM_MINMAX_FP32_2X4C8__SSE41_LD64, k_div_8_strided_a) {
30333 TEST_REQUIRES_X86_SSE41;
30334 for (size_t k = 16; k <= 80; k += 8) {
30335 GemmMicrokernelTester()
30336 .mr(2)
30337 .nr(4)
30338 .kr(8)
30339 .sr(1)
30340 .m(2)
30341 .n(4)
30342 .k(k)
30343 .a_stride(83)
30344 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c8__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
30345 }
30346 }
30347
TEST(QC8_GEMM_MINMAX_FP32_2X4C8__SSE41_LD64,k_div_8_subtile)30348 TEST(QC8_GEMM_MINMAX_FP32_2X4C8__SSE41_LD64, k_div_8_subtile) {
30349 TEST_REQUIRES_X86_SSE41;
30350 for (size_t k = 16; k <= 80; k += 8) {
30351 for (uint32_t n = 1; n <= 4; n++) {
30352 for (uint32_t m = 1; m <= 2; m++) {
30353 GemmMicrokernelTester()
30354 .mr(2)
30355 .nr(4)
30356 .kr(8)
30357 .sr(1)
30358 .m(m)
30359 .n(n)
30360 .k(k)
30361 .iterations(1)
30362 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c8__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
30363 }
30364 }
30365 }
30366 }
30367
TEST(QC8_GEMM_MINMAX_FP32_2X4C8__SSE41_LD64,n_gt_4)30368 TEST(QC8_GEMM_MINMAX_FP32_2X4C8__SSE41_LD64, n_gt_4) {
30369 TEST_REQUIRES_X86_SSE41;
30370 for (uint32_t n = 5; n < 8; n++) {
30371 for (size_t k = 1; k <= 40; k += 9) {
30372 GemmMicrokernelTester()
30373 .mr(2)
30374 .nr(4)
30375 .kr(8)
30376 .sr(1)
30377 .m(2)
30378 .n(n)
30379 .k(k)
30380 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c8__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
30381 }
30382 }
30383 }
30384
TEST(QC8_GEMM_MINMAX_FP32_2X4C8__SSE41_LD64,n_gt_4_strided_cn)30385 TEST(QC8_GEMM_MINMAX_FP32_2X4C8__SSE41_LD64, n_gt_4_strided_cn) {
30386 TEST_REQUIRES_X86_SSE41;
30387 for (uint32_t n = 5; n < 8; n++) {
30388 for (size_t k = 1; k <= 40; k += 9) {
30389 GemmMicrokernelTester()
30390 .mr(2)
30391 .nr(4)
30392 .kr(8)
30393 .sr(1)
30394 .m(2)
30395 .n(n)
30396 .k(k)
30397 .cn_stride(7)
30398 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c8__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
30399 }
30400 }
30401 }
30402
TEST(QC8_GEMM_MINMAX_FP32_2X4C8__SSE41_LD64,n_gt_4_strided_a)30403 TEST(QC8_GEMM_MINMAX_FP32_2X4C8__SSE41_LD64, n_gt_4_strided_a) {
30404 TEST_REQUIRES_X86_SSE41;
30405 for (uint32_t n = 5; n < 8; n++) {
30406 for (size_t k = 1; k <= 40; k += 9) {
30407 GemmMicrokernelTester()
30408 .mr(2)
30409 .nr(4)
30410 .kr(8)
30411 .sr(1)
30412 .m(2)
30413 .n(n)
30414 .k(k)
30415 .a_stride(43)
30416 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c8__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
30417 }
30418 }
30419 }
30420
TEST(QC8_GEMM_MINMAX_FP32_2X4C8__SSE41_LD64,n_gt_4_subtile)30421 TEST(QC8_GEMM_MINMAX_FP32_2X4C8__SSE41_LD64, n_gt_4_subtile) {
30422 TEST_REQUIRES_X86_SSE41;
30423 for (uint32_t n = 5; n < 8; n++) {
30424 for (size_t k = 1; k <= 40; k += 9) {
30425 for (uint32_t m = 1; m <= 2; m++) {
30426 GemmMicrokernelTester()
30427 .mr(2)
30428 .nr(4)
30429 .kr(8)
30430 .sr(1)
30431 .m(m)
30432 .n(n)
30433 .k(k)
30434 .iterations(1)
30435 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c8__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
30436 }
30437 }
30438 }
30439 }
30440
TEST(QC8_GEMM_MINMAX_FP32_2X4C8__SSE41_LD64,n_div_4)30441 TEST(QC8_GEMM_MINMAX_FP32_2X4C8__SSE41_LD64, n_div_4) {
30442 TEST_REQUIRES_X86_SSE41;
30443 for (uint32_t n = 8; n <= 12; n += 4) {
30444 for (size_t k = 1; k <= 40; k += 9) {
30445 GemmMicrokernelTester()
30446 .mr(2)
30447 .nr(4)
30448 .kr(8)
30449 .sr(1)
30450 .m(2)
30451 .n(n)
30452 .k(k)
30453 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c8__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
30454 }
30455 }
30456 }
30457
TEST(QC8_GEMM_MINMAX_FP32_2X4C8__SSE41_LD64,n_div_4_strided_cn)30458 TEST(QC8_GEMM_MINMAX_FP32_2X4C8__SSE41_LD64, n_div_4_strided_cn) {
30459 TEST_REQUIRES_X86_SSE41;
30460 for (uint32_t n = 8; n <= 12; n += 4) {
30461 for (size_t k = 1; k <= 40; k += 9) {
30462 GemmMicrokernelTester()
30463 .mr(2)
30464 .nr(4)
30465 .kr(8)
30466 .sr(1)
30467 .m(2)
30468 .n(n)
30469 .k(k)
30470 .cn_stride(7)
30471 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c8__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
30472 }
30473 }
30474 }
30475
TEST(QC8_GEMM_MINMAX_FP32_2X4C8__SSE41_LD64,n_div_4_strided_a)30476 TEST(QC8_GEMM_MINMAX_FP32_2X4C8__SSE41_LD64, n_div_4_strided_a) {
30477 TEST_REQUIRES_X86_SSE41;
30478 for (uint32_t n = 8; n <= 12; n += 4) {
30479 for (size_t k = 1; k <= 40; k += 9) {
30480 GemmMicrokernelTester()
30481 .mr(2)
30482 .nr(4)
30483 .kr(8)
30484 .sr(1)
30485 .m(2)
30486 .n(n)
30487 .k(k)
30488 .a_stride(43)
30489 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c8__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
30490 }
30491 }
30492 }
30493
TEST(QC8_GEMM_MINMAX_FP32_2X4C8__SSE41_LD64,n_div_4_subtile)30494 TEST(QC8_GEMM_MINMAX_FP32_2X4C8__SSE41_LD64, n_div_4_subtile) {
30495 TEST_REQUIRES_X86_SSE41;
30496 for (uint32_t n = 8; n <= 12; n += 4) {
30497 for (size_t k = 1; k <= 40; k += 9) {
30498 for (uint32_t m = 1; m <= 2; m++) {
30499 GemmMicrokernelTester()
30500 .mr(2)
30501 .nr(4)
30502 .kr(8)
30503 .sr(1)
30504 .m(m)
30505 .n(n)
30506 .k(k)
30507 .iterations(1)
30508 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c8__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
30509 }
30510 }
30511 }
30512 }
30513
TEST(QC8_GEMM_MINMAX_FP32_2X4C8__SSE41_LD64,strided_cm_subtile)30514 TEST(QC8_GEMM_MINMAX_FP32_2X4C8__SSE41_LD64, strided_cm_subtile) {
30515 TEST_REQUIRES_X86_SSE41;
30516 for (size_t k = 1; k <= 40; k += 9) {
30517 for (uint32_t n = 1; n <= 4; n++) {
30518 for (uint32_t m = 1; m <= 2; m++) {
30519 GemmMicrokernelTester()
30520 .mr(2)
30521 .nr(4)
30522 .kr(8)
30523 .sr(1)
30524 .m(m)
30525 .n(n)
30526 .k(k)
30527 .cm_stride(7)
30528 .iterations(1)
30529 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c8__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
30530 }
30531 }
30532 }
30533 }
30534
TEST(QC8_GEMM_MINMAX_FP32_2X4C8__SSE41_LD64,qmin)30535 TEST(QC8_GEMM_MINMAX_FP32_2X4C8__SSE41_LD64, qmin) {
30536 TEST_REQUIRES_X86_SSE41;
30537 GemmMicrokernelTester()
30538 .mr(2)
30539 .nr(4)
30540 .kr(8)
30541 .sr(1)
30542 .m(2)
30543 .n(4)
30544 .k(8)
30545 .qmin(128)
30546 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c8__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
30547 }
30548
TEST(QC8_GEMM_MINMAX_FP32_2X4C8__SSE41_LD64,qmax)30549 TEST(QC8_GEMM_MINMAX_FP32_2X4C8__SSE41_LD64, qmax) {
30550 TEST_REQUIRES_X86_SSE41;
30551 GemmMicrokernelTester()
30552 .mr(2)
30553 .nr(4)
30554 .kr(8)
30555 .sr(1)
30556 .m(2)
30557 .n(4)
30558 .k(8)
30559 .qmax(128)
30560 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c8__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
30561 }
30562
TEST(QC8_GEMM_MINMAX_FP32_2X4C8__SSE41_LD64,strided_cm)30563 TEST(QC8_GEMM_MINMAX_FP32_2X4C8__SSE41_LD64, strided_cm) {
30564 TEST_REQUIRES_X86_SSE41;
30565 GemmMicrokernelTester()
30566 .mr(2)
30567 .nr(4)
30568 .kr(8)
30569 .sr(1)
30570 .m(2)
30571 .n(4)
30572 .k(8)
30573 .cm_stride(7)
30574 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c8__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
30575 }
30576 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
30577
30578
30579 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QC8_GEMM_MINMAX_FP32_3X4C8__AVX_LD64,k_eq_8)30580 TEST(QC8_GEMM_MINMAX_FP32_3X4C8__AVX_LD64, k_eq_8) {
30581 TEST_REQUIRES_X86_AVX;
30582 GemmMicrokernelTester()
30583 .mr(3)
30584 .nr(4)
30585 .kr(8)
30586 .sr(1)
30587 .m(3)
30588 .n(4)
30589 .k(8)
30590 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c8__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
30591 }
30592
TEST(QC8_GEMM_MINMAX_FP32_3X4C8__AVX_LD64,strided_cn)30593 TEST(QC8_GEMM_MINMAX_FP32_3X4C8__AVX_LD64, strided_cn) {
30594 TEST_REQUIRES_X86_AVX;
30595 GemmMicrokernelTester()
30596 .mr(3)
30597 .nr(4)
30598 .kr(8)
30599 .sr(1)
30600 .m(3)
30601 .n(4)
30602 .k(8)
30603 .cn_stride(7)
30604 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c8__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
30605 }
30606
TEST(QC8_GEMM_MINMAX_FP32_3X4C8__AVX_LD64,k_eq_8_strided_a)30607 TEST(QC8_GEMM_MINMAX_FP32_3X4C8__AVX_LD64, k_eq_8_strided_a) {
30608 TEST_REQUIRES_X86_AVX;
30609 GemmMicrokernelTester()
30610 .mr(3)
30611 .nr(4)
30612 .kr(8)
30613 .sr(1)
30614 .m(3)
30615 .n(4)
30616 .k(8)
30617 .a_stride(11)
30618 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c8__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
30619 }
30620
TEST(QC8_GEMM_MINMAX_FP32_3X4C8__AVX_LD64,k_eq_8_subtile)30621 TEST(QC8_GEMM_MINMAX_FP32_3X4C8__AVX_LD64, k_eq_8_subtile) {
30622 TEST_REQUIRES_X86_AVX;
30623 for (uint32_t n = 1; n <= 4; n++) {
30624 for (uint32_t m = 1; m <= 3; m++) {
30625 GemmMicrokernelTester()
30626 .mr(3)
30627 .nr(4)
30628 .kr(8)
30629 .sr(1)
30630 .m(m)
30631 .n(n)
30632 .k(8)
30633 .iterations(1)
30634 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c8__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
30635 }
30636 }
30637 }
30638
TEST(QC8_GEMM_MINMAX_FP32_3X4C8__AVX_LD64,k_eq_8_subtile_m)30639 TEST(QC8_GEMM_MINMAX_FP32_3X4C8__AVX_LD64, k_eq_8_subtile_m) {
30640 TEST_REQUIRES_X86_AVX;
30641 for (uint32_t m = 1; m <= 3; m++) {
30642 GemmMicrokernelTester()
30643 .mr(3)
30644 .nr(4)
30645 .kr(8)
30646 .sr(1)
30647 .m(m)
30648 .n(4)
30649 .k(8)
30650 .iterations(1)
30651 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c8__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
30652 }
30653 }
30654
TEST(QC8_GEMM_MINMAX_FP32_3X4C8__AVX_LD64,k_eq_8_subtile_n)30655 TEST(QC8_GEMM_MINMAX_FP32_3X4C8__AVX_LD64, k_eq_8_subtile_n) {
30656 TEST_REQUIRES_X86_AVX;
30657 for (uint32_t n = 1; n <= 4; n++) {
30658 GemmMicrokernelTester()
30659 .mr(3)
30660 .nr(4)
30661 .kr(8)
30662 .sr(1)
30663 .m(3)
30664 .n(n)
30665 .k(8)
30666 .iterations(1)
30667 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c8__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
30668 }
30669 }
30670
TEST(QC8_GEMM_MINMAX_FP32_3X4C8__AVX_LD64,k_lt_8)30671 TEST(QC8_GEMM_MINMAX_FP32_3X4C8__AVX_LD64, k_lt_8) {
30672 TEST_REQUIRES_X86_AVX;
30673 for (size_t k = 1; k < 8; k++) {
30674 GemmMicrokernelTester()
30675 .mr(3)
30676 .nr(4)
30677 .kr(8)
30678 .sr(1)
30679 .m(3)
30680 .n(4)
30681 .k(k)
30682 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c8__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
30683 }
30684 }
30685
TEST(QC8_GEMM_MINMAX_FP32_3X4C8__AVX_LD64,k_lt_8_strided_a)30686 TEST(QC8_GEMM_MINMAX_FP32_3X4C8__AVX_LD64, k_lt_8_strided_a) {
30687 TEST_REQUIRES_X86_AVX;
30688 for (size_t k = 1; k < 8; k++) {
30689 GemmMicrokernelTester()
30690 .mr(3)
30691 .nr(4)
30692 .kr(8)
30693 .sr(1)
30694 .m(3)
30695 .n(4)
30696 .k(k)
30697 .a_stride(11)
30698 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c8__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
30699 }
30700 }
30701
TEST(QC8_GEMM_MINMAX_FP32_3X4C8__AVX_LD64,k_lt_8_subtile)30702 TEST(QC8_GEMM_MINMAX_FP32_3X4C8__AVX_LD64, k_lt_8_subtile) {
30703 TEST_REQUIRES_X86_AVX;
30704 for (size_t k = 1; k < 8; k++) {
30705 for (uint32_t n = 1; n <= 4; n++) {
30706 for (uint32_t m = 1; m <= 3; m++) {
30707 GemmMicrokernelTester()
30708 .mr(3)
30709 .nr(4)
30710 .kr(8)
30711 .sr(1)
30712 .m(m)
30713 .n(n)
30714 .k(k)
30715 .iterations(1)
30716 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c8__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
30717 }
30718 }
30719 }
30720 }
30721
TEST(QC8_GEMM_MINMAX_FP32_3X4C8__AVX_LD64,k_gt_8)30722 TEST(QC8_GEMM_MINMAX_FP32_3X4C8__AVX_LD64, k_gt_8) {
30723 TEST_REQUIRES_X86_AVX;
30724 for (size_t k = 9; k < 16; k++) {
30725 GemmMicrokernelTester()
30726 .mr(3)
30727 .nr(4)
30728 .kr(8)
30729 .sr(1)
30730 .m(3)
30731 .n(4)
30732 .k(k)
30733 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c8__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
30734 }
30735 }
30736
TEST(QC8_GEMM_MINMAX_FP32_3X4C8__AVX_LD64,k_gt_8_strided_a)30737 TEST(QC8_GEMM_MINMAX_FP32_3X4C8__AVX_LD64, k_gt_8_strided_a) {
30738 TEST_REQUIRES_X86_AVX;
30739 for (size_t k = 9; k < 16; k++) {
30740 GemmMicrokernelTester()
30741 .mr(3)
30742 .nr(4)
30743 .kr(8)
30744 .sr(1)
30745 .m(3)
30746 .n(4)
30747 .k(k)
30748 .a_stride(19)
30749 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c8__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
30750 }
30751 }
30752
TEST(QC8_GEMM_MINMAX_FP32_3X4C8__AVX_LD64,k_gt_8_subtile)30753 TEST(QC8_GEMM_MINMAX_FP32_3X4C8__AVX_LD64, k_gt_8_subtile) {
30754 TEST_REQUIRES_X86_AVX;
30755 for (size_t k = 9; k < 16; k++) {
30756 for (uint32_t n = 1; n <= 4; n++) {
30757 for (uint32_t m = 1; m <= 3; m++) {
30758 GemmMicrokernelTester()
30759 .mr(3)
30760 .nr(4)
30761 .kr(8)
30762 .sr(1)
30763 .m(m)
30764 .n(n)
30765 .k(k)
30766 .iterations(1)
30767 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c8__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
30768 }
30769 }
30770 }
30771 }
30772
TEST(QC8_GEMM_MINMAX_FP32_3X4C8__AVX_LD64,k_div_8)30773 TEST(QC8_GEMM_MINMAX_FP32_3X4C8__AVX_LD64, k_div_8) {
30774 TEST_REQUIRES_X86_AVX;
30775 for (size_t k = 16; k <= 80; k += 8) {
30776 GemmMicrokernelTester()
30777 .mr(3)
30778 .nr(4)
30779 .kr(8)
30780 .sr(1)
30781 .m(3)
30782 .n(4)
30783 .k(k)
30784 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c8__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
30785 }
30786 }
30787
TEST(QC8_GEMM_MINMAX_FP32_3X4C8__AVX_LD64,k_div_8_strided_a)30788 TEST(QC8_GEMM_MINMAX_FP32_3X4C8__AVX_LD64, k_div_8_strided_a) {
30789 TEST_REQUIRES_X86_AVX;
30790 for (size_t k = 16; k <= 80; k += 8) {
30791 GemmMicrokernelTester()
30792 .mr(3)
30793 .nr(4)
30794 .kr(8)
30795 .sr(1)
30796 .m(3)
30797 .n(4)
30798 .k(k)
30799 .a_stride(83)
30800 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c8__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
30801 }
30802 }
30803
TEST(QC8_GEMM_MINMAX_FP32_3X4C8__AVX_LD64,k_div_8_subtile)30804 TEST(QC8_GEMM_MINMAX_FP32_3X4C8__AVX_LD64, k_div_8_subtile) {
30805 TEST_REQUIRES_X86_AVX;
30806 for (size_t k = 16; k <= 80; k += 8) {
30807 for (uint32_t n = 1; n <= 4; n++) {
30808 for (uint32_t m = 1; m <= 3; m++) {
30809 GemmMicrokernelTester()
30810 .mr(3)
30811 .nr(4)
30812 .kr(8)
30813 .sr(1)
30814 .m(m)
30815 .n(n)
30816 .k(k)
30817 .iterations(1)
30818 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c8__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
30819 }
30820 }
30821 }
30822 }
30823
TEST(QC8_GEMM_MINMAX_FP32_3X4C8__AVX_LD64,n_gt_4)30824 TEST(QC8_GEMM_MINMAX_FP32_3X4C8__AVX_LD64, n_gt_4) {
30825 TEST_REQUIRES_X86_AVX;
30826 for (uint32_t n = 5; n < 8; n++) {
30827 for (size_t k = 1; k <= 40; k += 9) {
30828 GemmMicrokernelTester()
30829 .mr(3)
30830 .nr(4)
30831 .kr(8)
30832 .sr(1)
30833 .m(3)
30834 .n(n)
30835 .k(k)
30836 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c8__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
30837 }
30838 }
30839 }
30840
TEST(QC8_GEMM_MINMAX_FP32_3X4C8__AVX_LD64,n_gt_4_strided_cn)30841 TEST(QC8_GEMM_MINMAX_FP32_3X4C8__AVX_LD64, n_gt_4_strided_cn) {
30842 TEST_REQUIRES_X86_AVX;
30843 for (uint32_t n = 5; n < 8; n++) {
30844 for (size_t k = 1; k <= 40; k += 9) {
30845 GemmMicrokernelTester()
30846 .mr(3)
30847 .nr(4)
30848 .kr(8)
30849 .sr(1)
30850 .m(3)
30851 .n(n)
30852 .k(k)
30853 .cn_stride(7)
30854 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c8__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
30855 }
30856 }
30857 }
30858
TEST(QC8_GEMM_MINMAX_FP32_3X4C8__AVX_LD64,n_gt_4_strided_a)30859 TEST(QC8_GEMM_MINMAX_FP32_3X4C8__AVX_LD64, n_gt_4_strided_a) {
30860 TEST_REQUIRES_X86_AVX;
30861 for (uint32_t n = 5; n < 8; n++) {
30862 for (size_t k = 1; k <= 40; k += 9) {
30863 GemmMicrokernelTester()
30864 .mr(3)
30865 .nr(4)
30866 .kr(8)
30867 .sr(1)
30868 .m(3)
30869 .n(n)
30870 .k(k)
30871 .a_stride(43)
30872 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c8__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
30873 }
30874 }
30875 }
30876
TEST(QC8_GEMM_MINMAX_FP32_3X4C8__AVX_LD64,n_gt_4_subtile)30877 TEST(QC8_GEMM_MINMAX_FP32_3X4C8__AVX_LD64, n_gt_4_subtile) {
30878 TEST_REQUIRES_X86_AVX;
30879 for (uint32_t n = 5; n < 8; n++) {
30880 for (size_t k = 1; k <= 40; k += 9) {
30881 for (uint32_t m = 1; m <= 3; m++) {
30882 GemmMicrokernelTester()
30883 .mr(3)
30884 .nr(4)
30885 .kr(8)
30886 .sr(1)
30887 .m(m)
30888 .n(n)
30889 .k(k)
30890 .iterations(1)
30891 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c8__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
30892 }
30893 }
30894 }
30895 }
30896
TEST(QC8_GEMM_MINMAX_FP32_3X4C8__AVX_LD64,n_div_4)30897 TEST(QC8_GEMM_MINMAX_FP32_3X4C8__AVX_LD64, n_div_4) {
30898 TEST_REQUIRES_X86_AVX;
30899 for (uint32_t n = 8; n <= 12; n += 4) {
30900 for (size_t k = 1; k <= 40; k += 9) {
30901 GemmMicrokernelTester()
30902 .mr(3)
30903 .nr(4)
30904 .kr(8)
30905 .sr(1)
30906 .m(3)
30907 .n(n)
30908 .k(k)
30909 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c8__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
30910 }
30911 }
30912 }
30913
TEST(QC8_GEMM_MINMAX_FP32_3X4C8__AVX_LD64,n_div_4_strided_cn)30914 TEST(QC8_GEMM_MINMAX_FP32_3X4C8__AVX_LD64, n_div_4_strided_cn) {
30915 TEST_REQUIRES_X86_AVX;
30916 for (uint32_t n = 8; n <= 12; n += 4) {
30917 for (size_t k = 1; k <= 40; k += 9) {
30918 GemmMicrokernelTester()
30919 .mr(3)
30920 .nr(4)
30921 .kr(8)
30922 .sr(1)
30923 .m(3)
30924 .n(n)
30925 .k(k)
30926 .cn_stride(7)
30927 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c8__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
30928 }
30929 }
30930 }
30931
TEST(QC8_GEMM_MINMAX_FP32_3X4C8__AVX_LD64,n_div_4_strided_a)30932 TEST(QC8_GEMM_MINMAX_FP32_3X4C8__AVX_LD64, n_div_4_strided_a) {
30933 TEST_REQUIRES_X86_AVX;
30934 for (uint32_t n = 8; n <= 12; n += 4) {
30935 for (size_t k = 1; k <= 40; k += 9) {
30936 GemmMicrokernelTester()
30937 .mr(3)
30938 .nr(4)
30939 .kr(8)
30940 .sr(1)
30941 .m(3)
30942 .n(n)
30943 .k(k)
30944 .a_stride(43)
30945 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c8__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
30946 }
30947 }
30948 }
30949
TEST(QC8_GEMM_MINMAX_FP32_3X4C8__AVX_LD64,n_div_4_subtile)30950 TEST(QC8_GEMM_MINMAX_FP32_3X4C8__AVX_LD64, n_div_4_subtile) {
30951 TEST_REQUIRES_X86_AVX;
30952 for (uint32_t n = 8; n <= 12; n += 4) {
30953 for (size_t k = 1; k <= 40; k += 9) {
30954 for (uint32_t m = 1; m <= 3; m++) {
30955 GemmMicrokernelTester()
30956 .mr(3)
30957 .nr(4)
30958 .kr(8)
30959 .sr(1)
30960 .m(m)
30961 .n(n)
30962 .k(k)
30963 .iterations(1)
30964 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c8__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
30965 }
30966 }
30967 }
30968 }
30969
TEST(QC8_GEMM_MINMAX_FP32_3X4C8__AVX_LD64,strided_cm_subtile)30970 TEST(QC8_GEMM_MINMAX_FP32_3X4C8__AVX_LD64, strided_cm_subtile) {
30971 TEST_REQUIRES_X86_AVX;
30972 for (size_t k = 1; k <= 40; k += 9) {
30973 for (uint32_t n = 1; n <= 4; n++) {
30974 for (uint32_t m = 1; m <= 3; m++) {
30975 GemmMicrokernelTester()
30976 .mr(3)
30977 .nr(4)
30978 .kr(8)
30979 .sr(1)
30980 .m(m)
30981 .n(n)
30982 .k(k)
30983 .cm_stride(7)
30984 .iterations(1)
30985 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c8__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
30986 }
30987 }
30988 }
30989 }
30990
TEST(QC8_GEMM_MINMAX_FP32_3X4C8__AVX_LD64,qmin)30991 TEST(QC8_GEMM_MINMAX_FP32_3X4C8__AVX_LD64, qmin) {
30992 TEST_REQUIRES_X86_AVX;
30993 GemmMicrokernelTester()
30994 .mr(3)
30995 .nr(4)
30996 .kr(8)
30997 .sr(1)
30998 .m(3)
30999 .n(4)
31000 .k(8)
31001 .qmin(128)
31002 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c8__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
31003 }
31004
TEST(QC8_GEMM_MINMAX_FP32_3X4C8__AVX_LD64,qmax)31005 TEST(QC8_GEMM_MINMAX_FP32_3X4C8__AVX_LD64, qmax) {
31006 TEST_REQUIRES_X86_AVX;
31007 GemmMicrokernelTester()
31008 .mr(3)
31009 .nr(4)
31010 .kr(8)
31011 .sr(1)
31012 .m(3)
31013 .n(4)
31014 .k(8)
31015 .qmax(128)
31016 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c8__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
31017 }
31018
TEST(QC8_GEMM_MINMAX_FP32_3X4C8__AVX_LD64,strided_cm)31019 TEST(QC8_GEMM_MINMAX_FP32_3X4C8__AVX_LD64, strided_cm) {
31020 TEST_REQUIRES_X86_AVX;
31021 GemmMicrokernelTester()
31022 .mr(3)
31023 .nr(4)
31024 .kr(8)
31025 .sr(1)
31026 .m(3)
31027 .n(4)
31028 .k(8)
31029 .cm_stride(7)
31030 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c8__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
31031 }
31032 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
31033
31034
31035 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QC8_GEMM_MINMAX_FP32_3X4C8__XOP_LD64,k_eq_8)31036 TEST(QC8_GEMM_MINMAX_FP32_3X4C8__XOP_LD64, k_eq_8) {
31037 TEST_REQUIRES_X86_XOP;
31038 GemmMicrokernelTester()
31039 .mr(3)
31040 .nr(4)
31041 .kr(8)
31042 .sr(1)
31043 .m(3)
31044 .n(4)
31045 .k(8)
31046 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c8__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
31047 }
31048
TEST(QC8_GEMM_MINMAX_FP32_3X4C8__XOP_LD64,strided_cn)31049 TEST(QC8_GEMM_MINMAX_FP32_3X4C8__XOP_LD64, strided_cn) {
31050 TEST_REQUIRES_X86_XOP;
31051 GemmMicrokernelTester()
31052 .mr(3)
31053 .nr(4)
31054 .kr(8)
31055 .sr(1)
31056 .m(3)
31057 .n(4)
31058 .k(8)
31059 .cn_stride(7)
31060 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c8__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
31061 }
31062
TEST(QC8_GEMM_MINMAX_FP32_3X4C8__XOP_LD64,k_eq_8_strided_a)31063 TEST(QC8_GEMM_MINMAX_FP32_3X4C8__XOP_LD64, k_eq_8_strided_a) {
31064 TEST_REQUIRES_X86_XOP;
31065 GemmMicrokernelTester()
31066 .mr(3)
31067 .nr(4)
31068 .kr(8)
31069 .sr(1)
31070 .m(3)
31071 .n(4)
31072 .k(8)
31073 .a_stride(11)
31074 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c8__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
31075 }
31076
TEST(QC8_GEMM_MINMAX_FP32_3X4C8__XOP_LD64,k_eq_8_subtile)31077 TEST(QC8_GEMM_MINMAX_FP32_3X4C8__XOP_LD64, k_eq_8_subtile) {
31078 TEST_REQUIRES_X86_XOP;
31079 for (uint32_t n = 1; n <= 4; n++) {
31080 for (uint32_t m = 1; m <= 3; m++) {
31081 GemmMicrokernelTester()
31082 .mr(3)
31083 .nr(4)
31084 .kr(8)
31085 .sr(1)
31086 .m(m)
31087 .n(n)
31088 .k(8)
31089 .iterations(1)
31090 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c8__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
31091 }
31092 }
31093 }
31094
TEST(QC8_GEMM_MINMAX_FP32_3X4C8__XOP_LD64,k_eq_8_subtile_m)31095 TEST(QC8_GEMM_MINMAX_FP32_3X4C8__XOP_LD64, k_eq_8_subtile_m) {
31096 TEST_REQUIRES_X86_XOP;
31097 for (uint32_t m = 1; m <= 3; m++) {
31098 GemmMicrokernelTester()
31099 .mr(3)
31100 .nr(4)
31101 .kr(8)
31102 .sr(1)
31103 .m(m)
31104 .n(4)
31105 .k(8)
31106 .iterations(1)
31107 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c8__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
31108 }
31109 }
31110
TEST(QC8_GEMM_MINMAX_FP32_3X4C8__XOP_LD64,k_eq_8_subtile_n)31111 TEST(QC8_GEMM_MINMAX_FP32_3X4C8__XOP_LD64, k_eq_8_subtile_n) {
31112 TEST_REQUIRES_X86_XOP;
31113 for (uint32_t n = 1; n <= 4; n++) {
31114 GemmMicrokernelTester()
31115 .mr(3)
31116 .nr(4)
31117 .kr(8)
31118 .sr(1)
31119 .m(3)
31120 .n(n)
31121 .k(8)
31122 .iterations(1)
31123 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c8__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
31124 }
31125 }
31126
TEST(QC8_GEMM_MINMAX_FP32_3X4C8__XOP_LD64,k_lt_8)31127 TEST(QC8_GEMM_MINMAX_FP32_3X4C8__XOP_LD64, k_lt_8) {
31128 TEST_REQUIRES_X86_XOP;
31129 for (size_t k = 1; k < 8; k++) {
31130 GemmMicrokernelTester()
31131 .mr(3)
31132 .nr(4)
31133 .kr(8)
31134 .sr(1)
31135 .m(3)
31136 .n(4)
31137 .k(k)
31138 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c8__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
31139 }
31140 }
31141
TEST(QC8_GEMM_MINMAX_FP32_3X4C8__XOP_LD64,k_lt_8_strided_a)31142 TEST(QC8_GEMM_MINMAX_FP32_3X4C8__XOP_LD64, k_lt_8_strided_a) {
31143 TEST_REQUIRES_X86_XOP;
31144 for (size_t k = 1; k < 8; k++) {
31145 GemmMicrokernelTester()
31146 .mr(3)
31147 .nr(4)
31148 .kr(8)
31149 .sr(1)
31150 .m(3)
31151 .n(4)
31152 .k(k)
31153 .a_stride(11)
31154 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c8__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
31155 }
31156 }
31157
TEST(QC8_GEMM_MINMAX_FP32_3X4C8__XOP_LD64,k_lt_8_subtile)31158 TEST(QC8_GEMM_MINMAX_FP32_3X4C8__XOP_LD64, k_lt_8_subtile) {
31159 TEST_REQUIRES_X86_XOP;
31160 for (size_t k = 1; k < 8; k++) {
31161 for (uint32_t n = 1; n <= 4; n++) {
31162 for (uint32_t m = 1; m <= 3; m++) {
31163 GemmMicrokernelTester()
31164 .mr(3)
31165 .nr(4)
31166 .kr(8)
31167 .sr(1)
31168 .m(m)
31169 .n(n)
31170 .k(k)
31171 .iterations(1)
31172 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c8__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
31173 }
31174 }
31175 }
31176 }
31177
TEST(QC8_GEMM_MINMAX_FP32_3X4C8__XOP_LD64,k_gt_8)31178 TEST(QC8_GEMM_MINMAX_FP32_3X4C8__XOP_LD64, k_gt_8) {
31179 TEST_REQUIRES_X86_XOP;
31180 for (size_t k = 9; k < 16; k++) {
31181 GemmMicrokernelTester()
31182 .mr(3)
31183 .nr(4)
31184 .kr(8)
31185 .sr(1)
31186 .m(3)
31187 .n(4)
31188 .k(k)
31189 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c8__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
31190 }
31191 }
31192
TEST(QC8_GEMM_MINMAX_FP32_3X4C8__XOP_LD64,k_gt_8_strided_a)31193 TEST(QC8_GEMM_MINMAX_FP32_3X4C8__XOP_LD64, k_gt_8_strided_a) {
31194 TEST_REQUIRES_X86_XOP;
31195 for (size_t k = 9; k < 16; k++) {
31196 GemmMicrokernelTester()
31197 .mr(3)
31198 .nr(4)
31199 .kr(8)
31200 .sr(1)
31201 .m(3)
31202 .n(4)
31203 .k(k)
31204 .a_stride(19)
31205 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c8__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
31206 }
31207 }
31208
TEST(QC8_GEMM_MINMAX_FP32_3X4C8__XOP_LD64,k_gt_8_subtile)31209 TEST(QC8_GEMM_MINMAX_FP32_3X4C8__XOP_LD64, k_gt_8_subtile) {
31210 TEST_REQUIRES_X86_XOP;
31211 for (size_t k = 9; k < 16; k++) {
31212 for (uint32_t n = 1; n <= 4; n++) {
31213 for (uint32_t m = 1; m <= 3; m++) {
31214 GemmMicrokernelTester()
31215 .mr(3)
31216 .nr(4)
31217 .kr(8)
31218 .sr(1)
31219 .m(m)
31220 .n(n)
31221 .k(k)
31222 .iterations(1)
31223 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c8__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
31224 }
31225 }
31226 }
31227 }
31228
TEST(QC8_GEMM_MINMAX_FP32_3X4C8__XOP_LD64,k_div_8)31229 TEST(QC8_GEMM_MINMAX_FP32_3X4C8__XOP_LD64, k_div_8) {
31230 TEST_REQUIRES_X86_XOP;
31231 for (size_t k = 16; k <= 80; k += 8) {
31232 GemmMicrokernelTester()
31233 .mr(3)
31234 .nr(4)
31235 .kr(8)
31236 .sr(1)
31237 .m(3)
31238 .n(4)
31239 .k(k)
31240 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c8__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
31241 }
31242 }
31243
TEST(QC8_GEMM_MINMAX_FP32_3X4C8__XOP_LD64,k_div_8_strided_a)31244 TEST(QC8_GEMM_MINMAX_FP32_3X4C8__XOP_LD64, k_div_8_strided_a) {
31245 TEST_REQUIRES_X86_XOP;
31246 for (size_t k = 16; k <= 80; k += 8) {
31247 GemmMicrokernelTester()
31248 .mr(3)
31249 .nr(4)
31250 .kr(8)
31251 .sr(1)
31252 .m(3)
31253 .n(4)
31254 .k(k)
31255 .a_stride(83)
31256 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c8__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
31257 }
31258 }
31259
TEST(QC8_GEMM_MINMAX_FP32_3X4C8__XOP_LD64,k_div_8_subtile)31260 TEST(QC8_GEMM_MINMAX_FP32_3X4C8__XOP_LD64, k_div_8_subtile) {
31261 TEST_REQUIRES_X86_XOP;
31262 for (size_t k = 16; k <= 80; k += 8) {
31263 for (uint32_t n = 1; n <= 4; n++) {
31264 for (uint32_t m = 1; m <= 3; m++) {
31265 GemmMicrokernelTester()
31266 .mr(3)
31267 .nr(4)
31268 .kr(8)
31269 .sr(1)
31270 .m(m)
31271 .n(n)
31272 .k(k)
31273 .iterations(1)
31274 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c8__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
31275 }
31276 }
31277 }
31278 }
31279
TEST(QC8_GEMM_MINMAX_FP32_3X4C8__XOP_LD64,n_gt_4)31280 TEST(QC8_GEMM_MINMAX_FP32_3X4C8__XOP_LD64, n_gt_4) {
31281 TEST_REQUIRES_X86_XOP;
31282 for (uint32_t n = 5; n < 8; n++) {
31283 for (size_t k = 1; k <= 40; k += 9) {
31284 GemmMicrokernelTester()
31285 .mr(3)
31286 .nr(4)
31287 .kr(8)
31288 .sr(1)
31289 .m(3)
31290 .n(n)
31291 .k(k)
31292 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c8__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
31293 }
31294 }
31295 }
31296
TEST(QC8_GEMM_MINMAX_FP32_3X4C8__XOP_LD64,n_gt_4_strided_cn)31297 TEST(QC8_GEMM_MINMAX_FP32_3X4C8__XOP_LD64, n_gt_4_strided_cn) {
31298 TEST_REQUIRES_X86_XOP;
31299 for (uint32_t n = 5; n < 8; n++) {
31300 for (size_t k = 1; k <= 40; k += 9) {
31301 GemmMicrokernelTester()
31302 .mr(3)
31303 .nr(4)
31304 .kr(8)
31305 .sr(1)
31306 .m(3)
31307 .n(n)
31308 .k(k)
31309 .cn_stride(7)
31310 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c8__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
31311 }
31312 }
31313 }
31314
TEST(QC8_GEMM_MINMAX_FP32_3X4C8__XOP_LD64,n_gt_4_strided_a)31315 TEST(QC8_GEMM_MINMAX_FP32_3X4C8__XOP_LD64, n_gt_4_strided_a) {
31316 TEST_REQUIRES_X86_XOP;
31317 for (uint32_t n = 5; n < 8; n++) {
31318 for (size_t k = 1; k <= 40; k += 9) {
31319 GemmMicrokernelTester()
31320 .mr(3)
31321 .nr(4)
31322 .kr(8)
31323 .sr(1)
31324 .m(3)
31325 .n(n)
31326 .k(k)
31327 .a_stride(43)
31328 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c8__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
31329 }
31330 }
31331 }
31332
TEST(QC8_GEMM_MINMAX_FP32_3X4C8__XOP_LD64,n_gt_4_subtile)31333 TEST(QC8_GEMM_MINMAX_FP32_3X4C8__XOP_LD64, n_gt_4_subtile) {
31334 TEST_REQUIRES_X86_XOP;
31335 for (uint32_t n = 5; n < 8; n++) {
31336 for (size_t k = 1; k <= 40; k += 9) {
31337 for (uint32_t m = 1; m <= 3; m++) {
31338 GemmMicrokernelTester()
31339 .mr(3)
31340 .nr(4)
31341 .kr(8)
31342 .sr(1)
31343 .m(m)
31344 .n(n)
31345 .k(k)
31346 .iterations(1)
31347 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c8__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
31348 }
31349 }
31350 }
31351 }
31352
TEST(QC8_GEMM_MINMAX_FP32_3X4C8__XOP_LD64,n_div_4)31353 TEST(QC8_GEMM_MINMAX_FP32_3X4C8__XOP_LD64, n_div_4) {
31354 TEST_REQUIRES_X86_XOP;
31355 for (uint32_t n = 8; n <= 12; n += 4) {
31356 for (size_t k = 1; k <= 40; k += 9) {
31357 GemmMicrokernelTester()
31358 .mr(3)
31359 .nr(4)
31360 .kr(8)
31361 .sr(1)
31362 .m(3)
31363 .n(n)
31364 .k(k)
31365 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c8__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
31366 }
31367 }
31368 }
31369
TEST(QC8_GEMM_MINMAX_FP32_3X4C8__XOP_LD64,n_div_4_strided_cn)31370 TEST(QC8_GEMM_MINMAX_FP32_3X4C8__XOP_LD64, n_div_4_strided_cn) {
31371 TEST_REQUIRES_X86_XOP;
31372 for (uint32_t n = 8; n <= 12; n += 4) {
31373 for (size_t k = 1; k <= 40; k += 9) {
31374 GemmMicrokernelTester()
31375 .mr(3)
31376 .nr(4)
31377 .kr(8)
31378 .sr(1)
31379 .m(3)
31380 .n(n)
31381 .k(k)
31382 .cn_stride(7)
31383 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c8__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
31384 }
31385 }
31386 }
31387
TEST(QC8_GEMM_MINMAX_FP32_3X4C8__XOP_LD64,n_div_4_strided_a)31388 TEST(QC8_GEMM_MINMAX_FP32_3X4C8__XOP_LD64, n_div_4_strided_a) {
31389 TEST_REQUIRES_X86_XOP;
31390 for (uint32_t n = 8; n <= 12; n += 4) {
31391 for (size_t k = 1; k <= 40; k += 9) {
31392 GemmMicrokernelTester()
31393 .mr(3)
31394 .nr(4)
31395 .kr(8)
31396 .sr(1)
31397 .m(3)
31398 .n(n)
31399 .k(k)
31400 .a_stride(43)
31401 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c8__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
31402 }
31403 }
31404 }
31405
TEST(QC8_GEMM_MINMAX_FP32_3X4C8__XOP_LD64,n_div_4_subtile)31406 TEST(QC8_GEMM_MINMAX_FP32_3X4C8__XOP_LD64, n_div_4_subtile) {
31407 TEST_REQUIRES_X86_XOP;
31408 for (uint32_t n = 8; n <= 12; n += 4) {
31409 for (size_t k = 1; k <= 40; k += 9) {
31410 for (uint32_t m = 1; m <= 3; m++) {
31411 GemmMicrokernelTester()
31412 .mr(3)
31413 .nr(4)
31414 .kr(8)
31415 .sr(1)
31416 .m(m)
31417 .n(n)
31418 .k(k)
31419 .iterations(1)
31420 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c8__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
31421 }
31422 }
31423 }
31424 }
31425
TEST(QC8_GEMM_MINMAX_FP32_3X4C8__XOP_LD64,strided_cm_subtile)31426 TEST(QC8_GEMM_MINMAX_FP32_3X4C8__XOP_LD64, strided_cm_subtile) {
31427 TEST_REQUIRES_X86_XOP;
31428 for (size_t k = 1; k <= 40; k += 9) {
31429 for (uint32_t n = 1; n <= 4; n++) {
31430 for (uint32_t m = 1; m <= 3; m++) {
31431 GemmMicrokernelTester()
31432 .mr(3)
31433 .nr(4)
31434 .kr(8)
31435 .sr(1)
31436 .m(m)
31437 .n(n)
31438 .k(k)
31439 .cm_stride(7)
31440 .iterations(1)
31441 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c8__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
31442 }
31443 }
31444 }
31445 }
31446
TEST(QC8_GEMM_MINMAX_FP32_3X4C8__XOP_LD64,qmin)31447 TEST(QC8_GEMM_MINMAX_FP32_3X4C8__XOP_LD64, qmin) {
31448 TEST_REQUIRES_X86_XOP;
31449 GemmMicrokernelTester()
31450 .mr(3)
31451 .nr(4)
31452 .kr(8)
31453 .sr(1)
31454 .m(3)
31455 .n(4)
31456 .k(8)
31457 .qmin(128)
31458 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c8__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
31459 }
31460
TEST(QC8_GEMM_MINMAX_FP32_3X4C8__XOP_LD64,qmax)31461 TEST(QC8_GEMM_MINMAX_FP32_3X4C8__XOP_LD64, qmax) {
31462 TEST_REQUIRES_X86_XOP;
31463 GemmMicrokernelTester()
31464 .mr(3)
31465 .nr(4)
31466 .kr(8)
31467 .sr(1)
31468 .m(3)
31469 .n(4)
31470 .k(8)
31471 .qmax(128)
31472 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c8__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
31473 }
31474
TEST(QC8_GEMM_MINMAX_FP32_3X4C8__XOP_LD64,strided_cm)31475 TEST(QC8_GEMM_MINMAX_FP32_3X4C8__XOP_LD64, strided_cm) {
31476 TEST_REQUIRES_X86_XOP;
31477 GemmMicrokernelTester()
31478 .mr(3)
31479 .nr(4)
31480 .kr(8)
31481 .sr(1)
31482 .m(3)
31483 .n(4)
31484 .k(8)
31485 .cm_stride(7)
31486 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c8__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
31487 }
31488 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
31489
31490
31491 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QC8_GEMM_MINMAX_FP32_2X4C8__SSE2_LD128,k_eq_8)31492 TEST(QC8_GEMM_MINMAX_FP32_2X4C8__SSE2_LD128, k_eq_8) {
31493 TEST_REQUIRES_X86_SSE2;
31494 GemmMicrokernelTester()
31495 .mr(2)
31496 .nr(4)
31497 .kr(8)
31498 .sr(1)
31499 .m(2)
31500 .n(4)
31501 .k(8)
31502 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c8__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
31503 }
31504
TEST(QC8_GEMM_MINMAX_FP32_2X4C8__SSE2_LD128,strided_cn)31505 TEST(QC8_GEMM_MINMAX_FP32_2X4C8__SSE2_LD128, strided_cn) {
31506 TEST_REQUIRES_X86_SSE2;
31507 GemmMicrokernelTester()
31508 .mr(2)
31509 .nr(4)
31510 .kr(8)
31511 .sr(1)
31512 .m(2)
31513 .n(4)
31514 .k(8)
31515 .cn_stride(7)
31516 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c8__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
31517 }
31518
TEST(QC8_GEMM_MINMAX_FP32_2X4C8__SSE2_LD128,k_eq_8_strided_a)31519 TEST(QC8_GEMM_MINMAX_FP32_2X4C8__SSE2_LD128, k_eq_8_strided_a) {
31520 TEST_REQUIRES_X86_SSE2;
31521 GemmMicrokernelTester()
31522 .mr(2)
31523 .nr(4)
31524 .kr(8)
31525 .sr(1)
31526 .m(2)
31527 .n(4)
31528 .k(8)
31529 .a_stride(11)
31530 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c8__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
31531 }
31532
TEST(QC8_GEMM_MINMAX_FP32_2X4C8__SSE2_LD128,k_eq_8_subtile)31533 TEST(QC8_GEMM_MINMAX_FP32_2X4C8__SSE2_LD128, k_eq_8_subtile) {
31534 TEST_REQUIRES_X86_SSE2;
31535 for (uint32_t n = 1; n <= 4; n++) {
31536 for (uint32_t m = 1; m <= 2; m++) {
31537 GemmMicrokernelTester()
31538 .mr(2)
31539 .nr(4)
31540 .kr(8)
31541 .sr(1)
31542 .m(m)
31543 .n(n)
31544 .k(8)
31545 .iterations(1)
31546 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c8__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
31547 }
31548 }
31549 }
31550
TEST(QC8_GEMM_MINMAX_FP32_2X4C8__SSE2_LD128,k_eq_8_subtile_m)31551 TEST(QC8_GEMM_MINMAX_FP32_2X4C8__SSE2_LD128, k_eq_8_subtile_m) {
31552 TEST_REQUIRES_X86_SSE2;
31553 for (uint32_t m = 1; m <= 2; m++) {
31554 GemmMicrokernelTester()
31555 .mr(2)
31556 .nr(4)
31557 .kr(8)
31558 .sr(1)
31559 .m(m)
31560 .n(4)
31561 .k(8)
31562 .iterations(1)
31563 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c8__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
31564 }
31565 }
31566
TEST(QC8_GEMM_MINMAX_FP32_2X4C8__SSE2_LD128,k_eq_8_subtile_n)31567 TEST(QC8_GEMM_MINMAX_FP32_2X4C8__SSE2_LD128, k_eq_8_subtile_n) {
31568 TEST_REQUIRES_X86_SSE2;
31569 for (uint32_t n = 1; n <= 4; n++) {
31570 GemmMicrokernelTester()
31571 .mr(2)
31572 .nr(4)
31573 .kr(8)
31574 .sr(1)
31575 .m(2)
31576 .n(n)
31577 .k(8)
31578 .iterations(1)
31579 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c8__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
31580 }
31581 }
31582
TEST(QC8_GEMM_MINMAX_FP32_2X4C8__SSE2_LD128,k_lt_8)31583 TEST(QC8_GEMM_MINMAX_FP32_2X4C8__SSE2_LD128, k_lt_8) {
31584 TEST_REQUIRES_X86_SSE2;
31585 for (size_t k = 1; k < 8; k++) {
31586 GemmMicrokernelTester()
31587 .mr(2)
31588 .nr(4)
31589 .kr(8)
31590 .sr(1)
31591 .m(2)
31592 .n(4)
31593 .k(k)
31594 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c8__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
31595 }
31596 }
31597
TEST(QC8_GEMM_MINMAX_FP32_2X4C8__SSE2_LD128,k_lt_8_strided_a)31598 TEST(QC8_GEMM_MINMAX_FP32_2X4C8__SSE2_LD128, k_lt_8_strided_a) {
31599 TEST_REQUIRES_X86_SSE2;
31600 for (size_t k = 1; k < 8; k++) {
31601 GemmMicrokernelTester()
31602 .mr(2)
31603 .nr(4)
31604 .kr(8)
31605 .sr(1)
31606 .m(2)
31607 .n(4)
31608 .k(k)
31609 .a_stride(11)
31610 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c8__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
31611 }
31612 }
31613
TEST(QC8_GEMM_MINMAX_FP32_2X4C8__SSE2_LD128,k_lt_8_subtile)31614 TEST(QC8_GEMM_MINMAX_FP32_2X4C8__SSE2_LD128, k_lt_8_subtile) {
31615 TEST_REQUIRES_X86_SSE2;
31616 for (size_t k = 1; k < 8; k++) {
31617 for (uint32_t n = 1; n <= 4; n++) {
31618 for (uint32_t m = 1; m <= 2; m++) {
31619 GemmMicrokernelTester()
31620 .mr(2)
31621 .nr(4)
31622 .kr(8)
31623 .sr(1)
31624 .m(m)
31625 .n(n)
31626 .k(k)
31627 .iterations(1)
31628 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c8__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
31629 }
31630 }
31631 }
31632 }
31633
TEST(QC8_GEMM_MINMAX_FP32_2X4C8__SSE2_LD128,k_gt_8)31634 TEST(QC8_GEMM_MINMAX_FP32_2X4C8__SSE2_LD128, k_gt_8) {
31635 TEST_REQUIRES_X86_SSE2;
31636 for (size_t k = 9; k < 16; k++) {
31637 GemmMicrokernelTester()
31638 .mr(2)
31639 .nr(4)
31640 .kr(8)
31641 .sr(1)
31642 .m(2)
31643 .n(4)
31644 .k(k)
31645 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c8__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
31646 }
31647 }
31648
TEST(QC8_GEMM_MINMAX_FP32_2X4C8__SSE2_LD128,k_gt_8_strided_a)31649 TEST(QC8_GEMM_MINMAX_FP32_2X4C8__SSE2_LD128, k_gt_8_strided_a) {
31650 TEST_REQUIRES_X86_SSE2;
31651 for (size_t k = 9; k < 16; k++) {
31652 GemmMicrokernelTester()
31653 .mr(2)
31654 .nr(4)
31655 .kr(8)
31656 .sr(1)
31657 .m(2)
31658 .n(4)
31659 .k(k)
31660 .a_stride(19)
31661 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c8__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
31662 }
31663 }
31664
TEST(QC8_GEMM_MINMAX_FP32_2X4C8__SSE2_LD128,k_gt_8_subtile)31665 TEST(QC8_GEMM_MINMAX_FP32_2X4C8__SSE2_LD128, k_gt_8_subtile) {
31666 TEST_REQUIRES_X86_SSE2;
31667 for (size_t k = 9; k < 16; k++) {
31668 for (uint32_t n = 1; n <= 4; n++) {
31669 for (uint32_t m = 1; m <= 2; m++) {
31670 GemmMicrokernelTester()
31671 .mr(2)
31672 .nr(4)
31673 .kr(8)
31674 .sr(1)
31675 .m(m)
31676 .n(n)
31677 .k(k)
31678 .iterations(1)
31679 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c8__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
31680 }
31681 }
31682 }
31683 }
31684
TEST(QC8_GEMM_MINMAX_FP32_2X4C8__SSE2_LD128,k_div_8)31685 TEST(QC8_GEMM_MINMAX_FP32_2X4C8__SSE2_LD128, k_div_8) {
31686 TEST_REQUIRES_X86_SSE2;
31687 for (size_t k = 16; k <= 80; k += 8) {
31688 GemmMicrokernelTester()
31689 .mr(2)
31690 .nr(4)
31691 .kr(8)
31692 .sr(1)
31693 .m(2)
31694 .n(4)
31695 .k(k)
31696 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c8__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
31697 }
31698 }
31699
TEST(QC8_GEMM_MINMAX_FP32_2X4C8__SSE2_LD128,k_div_8_strided_a)31700 TEST(QC8_GEMM_MINMAX_FP32_2X4C8__SSE2_LD128, k_div_8_strided_a) {
31701 TEST_REQUIRES_X86_SSE2;
31702 for (size_t k = 16; k <= 80; k += 8) {
31703 GemmMicrokernelTester()
31704 .mr(2)
31705 .nr(4)
31706 .kr(8)
31707 .sr(1)
31708 .m(2)
31709 .n(4)
31710 .k(k)
31711 .a_stride(83)
31712 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c8__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
31713 }
31714 }
31715
TEST(QC8_GEMM_MINMAX_FP32_2X4C8__SSE2_LD128,k_div_8_subtile)31716 TEST(QC8_GEMM_MINMAX_FP32_2X4C8__SSE2_LD128, k_div_8_subtile) {
31717 TEST_REQUIRES_X86_SSE2;
31718 for (size_t k = 16; k <= 80; k += 8) {
31719 for (uint32_t n = 1; n <= 4; n++) {
31720 for (uint32_t m = 1; m <= 2; m++) {
31721 GemmMicrokernelTester()
31722 .mr(2)
31723 .nr(4)
31724 .kr(8)
31725 .sr(1)
31726 .m(m)
31727 .n(n)
31728 .k(k)
31729 .iterations(1)
31730 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c8__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
31731 }
31732 }
31733 }
31734 }
31735
TEST(QC8_GEMM_MINMAX_FP32_2X4C8__SSE2_LD128,n_gt_4)31736 TEST(QC8_GEMM_MINMAX_FP32_2X4C8__SSE2_LD128, n_gt_4) {
31737 TEST_REQUIRES_X86_SSE2;
31738 for (uint32_t n = 5; n < 8; n++) {
31739 for (size_t k = 1; k <= 40; k += 9) {
31740 GemmMicrokernelTester()
31741 .mr(2)
31742 .nr(4)
31743 .kr(8)
31744 .sr(1)
31745 .m(2)
31746 .n(n)
31747 .k(k)
31748 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c8__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
31749 }
31750 }
31751 }
31752
TEST(QC8_GEMM_MINMAX_FP32_2X4C8__SSE2_LD128,n_gt_4_strided_cn)31753 TEST(QC8_GEMM_MINMAX_FP32_2X4C8__SSE2_LD128, n_gt_4_strided_cn) {
31754 TEST_REQUIRES_X86_SSE2;
31755 for (uint32_t n = 5; n < 8; n++) {
31756 for (size_t k = 1; k <= 40; k += 9) {
31757 GemmMicrokernelTester()
31758 .mr(2)
31759 .nr(4)
31760 .kr(8)
31761 .sr(1)
31762 .m(2)
31763 .n(n)
31764 .k(k)
31765 .cn_stride(7)
31766 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c8__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
31767 }
31768 }
31769 }
31770
TEST(QC8_GEMM_MINMAX_FP32_2X4C8__SSE2_LD128,n_gt_4_strided_a)31771 TEST(QC8_GEMM_MINMAX_FP32_2X4C8__SSE2_LD128, n_gt_4_strided_a) {
31772 TEST_REQUIRES_X86_SSE2;
31773 for (uint32_t n = 5; n < 8; n++) {
31774 for (size_t k = 1; k <= 40; k += 9) {
31775 GemmMicrokernelTester()
31776 .mr(2)
31777 .nr(4)
31778 .kr(8)
31779 .sr(1)
31780 .m(2)
31781 .n(n)
31782 .k(k)
31783 .a_stride(43)
31784 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c8__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
31785 }
31786 }
31787 }
31788
TEST(QC8_GEMM_MINMAX_FP32_2X4C8__SSE2_LD128,n_gt_4_subtile)31789 TEST(QC8_GEMM_MINMAX_FP32_2X4C8__SSE2_LD128, n_gt_4_subtile) {
31790 TEST_REQUIRES_X86_SSE2;
31791 for (uint32_t n = 5; n < 8; n++) {
31792 for (size_t k = 1; k <= 40; k += 9) {
31793 for (uint32_t m = 1; m <= 2; m++) {
31794 GemmMicrokernelTester()
31795 .mr(2)
31796 .nr(4)
31797 .kr(8)
31798 .sr(1)
31799 .m(m)
31800 .n(n)
31801 .k(k)
31802 .iterations(1)
31803 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c8__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
31804 }
31805 }
31806 }
31807 }
31808
TEST(QC8_GEMM_MINMAX_FP32_2X4C8__SSE2_LD128,n_div_4)31809 TEST(QC8_GEMM_MINMAX_FP32_2X4C8__SSE2_LD128, n_div_4) {
31810 TEST_REQUIRES_X86_SSE2;
31811 for (uint32_t n = 8; n <= 12; n += 4) {
31812 for (size_t k = 1; k <= 40; k += 9) {
31813 GemmMicrokernelTester()
31814 .mr(2)
31815 .nr(4)
31816 .kr(8)
31817 .sr(1)
31818 .m(2)
31819 .n(n)
31820 .k(k)
31821 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c8__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
31822 }
31823 }
31824 }
31825
TEST(QC8_GEMM_MINMAX_FP32_2X4C8__SSE2_LD128,n_div_4_strided_cn)31826 TEST(QC8_GEMM_MINMAX_FP32_2X4C8__SSE2_LD128, n_div_4_strided_cn) {
31827 TEST_REQUIRES_X86_SSE2;
31828 for (uint32_t n = 8; n <= 12; n += 4) {
31829 for (size_t k = 1; k <= 40; k += 9) {
31830 GemmMicrokernelTester()
31831 .mr(2)
31832 .nr(4)
31833 .kr(8)
31834 .sr(1)
31835 .m(2)
31836 .n(n)
31837 .k(k)
31838 .cn_stride(7)
31839 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c8__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
31840 }
31841 }
31842 }
31843
TEST(QC8_GEMM_MINMAX_FP32_2X4C8__SSE2_LD128,n_div_4_strided_a)31844 TEST(QC8_GEMM_MINMAX_FP32_2X4C8__SSE2_LD128, n_div_4_strided_a) {
31845 TEST_REQUIRES_X86_SSE2;
31846 for (uint32_t n = 8; n <= 12; n += 4) {
31847 for (size_t k = 1; k <= 40; k += 9) {
31848 GemmMicrokernelTester()
31849 .mr(2)
31850 .nr(4)
31851 .kr(8)
31852 .sr(1)
31853 .m(2)
31854 .n(n)
31855 .k(k)
31856 .a_stride(43)
31857 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c8__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
31858 }
31859 }
31860 }
31861
TEST(QC8_GEMM_MINMAX_FP32_2X4C8__SSE2_LD128,n_div_4_subtile)31862 TEST(QC8_GEMM_MINMAX_FP32_2X4C8__SSE2_LD128, n_div_4_subtile) {
31863 TEST_REQUIRES_X86_SSE2;
31864 for (uint32_t n = 8; n <= 12; n += 4) {
31865 for (size_t k = 1; k <= 40; k += 9) {
31866 for (uint32_t m = 1; m <= 2; m++) {
31867 GemmMicrokernelTester()
31868 .mr(2)
31869 .nr(4)
31870 .kr(8)
31871 .sr(1)
31872 .m(m)
31873 .n(n)
31874 .k(k)
31875 .iterations(1)
31876 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c8__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
31877 }
31878 }
31879 }
31880 }
31881
TEST(QC8_GEMM_MINMAX_FP32_2X4C8__SSE2_LD128,strided_cm_subtile)31882 TEST(QC8_GEMM_MINMAX_FP32_2X4C8__SSE2_LD128, strided_cm_subtile) {
31883 TEST_REQUIRES_X86_SSE2;
31884 for (size_t k = 1; k <= 40; k += 9) {
31885 for (uint32_t n = 1; n <= 4; n++) {
31886 for (uint32_t m = 1; m <= 2; m++) {
31887 GemmMicrokernelTester()
31888 .mr(2)
31889 .nr(4)
31890 .kr(8)
31891 .sr(1)
31892 .m(m)
31893 .n(n)
31894 .k(k)
31895 .cm_stride(7)
31896 .iterations(1)
31897 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c8__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
31898 }
31899 }
31900 }
31901 }
31902
TEST(QC8_GEMM_MINMAX_FP32_2X4C8__SSE2_LD128,qmin)31903 TEST(QC8_GEMM_MINMAX_FP32_2X4C8__SSE2_LD128, qmin) {
31904 TEST_REQUIRES_X86_SSE2;
31905 GemmMicrokernelTester()
31906 .mr(2)
31907 .nr(4)
31908 .kr(8)
31909 .sr(1)
31910 .m(2)
31911 .n(4)
31912 .k(8)
31913 .qmin(128)
31914 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c8__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
31915 }
31916
TEST(QC8_GEMM_MINMAX_FP32_2X4C8__SSE2_LD128,qmax)31917 TEST(QC8_GEMM_MINMAX_FP32_2X4C8__SSE2_LD128, qmax) {
31918 TEST_REQUIRES_X86_SSE2;
31919 GemmMicrokernelTester()
31920 .mr(2)
31921 .nr(4)
31922 .kr(8)
31923 .sr(1)
31924 .m(2)
31925 .n(4)
31926 .k(8)
31927 .qmax(128)
31928 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c8__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
31929 }
31930
TEST(QC8_GEMM_MINMAX_FP32_2X4C8__SSE2_LD128,strided_cm)31931 TEST(QC8_GEMM_MINMAX_FP32_2X4C8__SSE2_LD128, strided_cm) {
31932 TEST_REQUIRES_X86_SSE2;
31933 GemmMicrokernelTester()
31934 .mr(2)
31935 .nr(4)
31936 .kr(8)
31937 .sr(1)
31938 .m(2)
31939 .n(4)
31940 .k(8)
31941 .cm_stride(7)
31942 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c8__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
31943 }
31944 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
31945
31946
31947 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QC8_GEMM_MINMAX_FP32_2X4C8__SSE41_LD128,k_eq_8)31948 TEST(QC8_GEMM_MINMAX_FP32_2X4C8__SSE41_LD128, k_eq_8) {
31949 TEST_REQUIRES_X86_SSE41;
31950 GemmMicrokernelTester()
31951 .mr(2)
31952 .nr(4)
31953 .kr(8)
31954 .sr(1)
31955 .m(2)
31956 .n(4)
31957 .k(8)
31958 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c8__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
31959 }
31960
TEST(QC8_GEMM_MINMAX_FP32_2X4C8__SSE41_LD128,strided_cn)31961 TEST(QC8_GEMM_MINMAX_FP32_2X4C8__SSE41_LD128, strided_cn) {
31962 TEST_REQUIRES_X86_SSE41;
31963 GemmMicrokernelTester()
31964 .mr(2)
31965 .nr(4)
31966 .kr(8)
31967 .sr(1)
31968 .m(2)
31969 .n(4)
31970 .k(8)
31971 .cn_stride(7)
31972 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c8__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
31973 }
31974
TEST(QC8_GEMM_MINMAX_FP32_2X4C8__SSE41_LD128,k_eq_8_strided_a)31975 TEST(QC8_GEMM_MINMAX_FP32_2X4C8__SSE41_LD128, k_eq_8_strided_a) {
31976 TEST_REQUIRES_X86_SSE41;
31977 GemmMicrokernelTester()
31978 .mr(2)
31979 .nr(4)
31980 .kr(8)
31981 .sr(1)
31982 .m(2)
31983 .n(4)
31984 .k(8)
31985 .a_stride(11)
31986 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c8__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
31987 }
31988
TEST(QC8_GEMM_MINMAX_FP32_2X4C8__SSE41_LD128,k_eq_8_subtile)31989 TEST(QC8_GEMM_MINMAX_FP32_2X4C8__SSE41_LD128, k_eq_8_subtile) {
31990 TEST_REQUIRES_X86_SSE41;
31991 for (uint32_t n = 1; n <= 4; n++) {
31992 for (uint32_t m = 1; m <= 2; m++) {
31993 GemmMicrokernelTester()
31994 .mr(2)
31995 .nr(4)
31996 .kr(8)
31997 .sr(1)
31998 .m(m)
31999 .n(n)
32000 .k(8)
32001 .iterations(1)
32002 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c8__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
32003 }
32004 }
32005 }
32006
TEST(QC8_GEMM_MINMAX_FP32_2X4C8__SSE41_LD128,k_eq_8_subtile_m)32007 TEST(QC8_GEMM_MINMAX_FP32_2X4C8__SSE41_LD128, k_eq_8_subtile_m) {
32008 TEST_REQUIRES_X86_SSE41;
32009 for (uint32_t m = 1; m <= 2; m++) {
32010 GemmMicrokernelTester()
32011 .mr(2)
32012 .nr(4)
32013 .kr(8)
32014 .sr(1)
32015 .m(m)
32016 .n(4)
32017 .k(8)
32018 .iterations(1)
32019 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c8__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
32020 }
32021 }
32022
TEST(QC8_GEMM_MINMAX_FP32_2X4C8__SSE41_LD128,k_eq_8_subtile_n)32023 TEST(QC8_GEMM_MINMAX_FP32_2X4C8__SSE41_LD128, k_eq_8_subtile_n) {
32024 TEST_REQUIRES_X86_SSE41;
32025 for (uint32_t n = 1; n <= 4; n++) {
32026 GemmMicrokernelTester()
32027 .mr(2)
32028 .nr(4)
32029 .kr(8)
32030 .sr(1)
32031 .m(2)
32032 .n(n)
32033 .k(8)
32034 .iterations(1)
32035 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c8__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
32036 }
32037 }
32038
TEST(QC8_GEMM_MINMAX_FP32_2X4C8__SSE41_LD128,k_lt_8)32039 TEST(QC8_GEMM_MINMAX_FP32_2X4C8__SSE41_LD128, k_lt_8) {
32040 TEST_REQUIRES_X86_SSE41;
32041 for (size_t k = 1; k < 8; k++) {
32042 GemmMicrokernelTester()
32043 .mr(2)
32044 .nr(4)
32045 .kr(8)
32046 .sr(1)
32047 .m(2)
32048 .n(4)
32049 .k(k)
32050 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c8__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
32051 }
32052 }
32053
TEST(QC8_GEMM_MINMAX_FP32_2X4C8__SSE41_LD128,k_lt_8_strided_a)32054 TEST(QC8_GEMM_MINMAX_FP32_2X4C8__SSE41_LD128, k_lt_8_strided_a) {
32055 TEST_REQUIRES_X86_SSE41;
32056 for (size_t k = 1; k < 8; k++) {
32057 GemmMicrokernelTester()
32058 .mr(2)
32059 .nr(4)
32060 .kr(8)
32061 .sr(1)
32062 .m(2)
32063 .n(4)
32064 .k(k)
32065 .a_stride(11)
32066 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c8__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
32067 }
32068 }
32069
TEST(QC8_GEMM_MINMAX_FP32_2X4C8__SSE41_LD128,k_lt_8_subtile)32070 TEST(QC8_GEMM_MINMAX_FP32_2X4C8__SSE41_LD128, k_lt_8_subtile) {
32071 TEST_REQUIRES_X86_SSE41;
32072 for (size_t k = 1; k < 8; k++) {
32073 for (uint32_t n = 1; n <= 4; n++) {
32074 for (uint32_t m = 1; m <= 2; m++) {
32075 GemmMicrokernelTester()
32076 .mr(2)
32077 .nr(4)
32078 .kr(8)
32079 .sr(1)
32080 .m(m)
32081 .n(n)
32082 .k(k)
32083 .iterations(1)
32084 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c8__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
32085 }
32086 }
32087 }
32088 }
32089
TEST(QC8_GEMM_MINMAX_FP32_2X4C8__SSE41_LD128,k_gt_8)32090 TEST(QC8_GEMM_MINMAX_FP32_2X4C8__SSE41_LD128, k_gt_8) {
32091 TEST_REQUIRES_X86_SSE41;
32092 for (size_t k = 9; k < 16; k++) {
32093 GemmMicrokernelTester()
32094 .mr(2)
32095 .nr(4)
32096 .kr(8)
32097 .sr(1)
32098 .m(2)
32099 .n(4)
32100 .k(k)
32101 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c8__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
32102 }
32103 }
32104
TEST(QC8_GEMM_MINMAX_FP32_2X4C8__SSE41_LD128,k_gt_8_strided_a)32105 TEST(QC8_GEMM_MINMAX_FP32_2X4C8__SSE41_LD128, k_gt_8_strided_a) {
32106 TEST_REQUIRES_X86_SSE41;
32107 for (size_t k = 9; k < 16; k++) {
32108 GemmMicrokernelTester()
32109 .mr(2)
32110 .nr(4)
32111 .kr(8)
32112 .sr(1)
32113 .m(2)
32114 .n(4)
32115 .k(k)
32116 .a_stride(19)
32117 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c8__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
32118 }
32119 }
32120
TEST(QC8_GEMM_MINMAX_FP32_2X4C8__SSE41_LD128,k_gt_8_subtile)32121 TEST(QC8_GEMM_MINMAX_FP32_2X4C8__SSE41_LD128, k_gt_8_subtile) {
32122 TEST_REQUIRES_X86_SSE41;
32123 for (size_t k = 9; k < 16; k++) {
32124 for (uint32_t n = 1; n <= 4; n++) {
32125 for (uint32_t m = 1; m <= 2; m++) {
32126 GemmMicrokernelTester()
32127 .mr(2)
32128 .nr(4)
32129 .kr(8)
32130 .sr(1)
32131 .m(m)
32132 .n(n)
32133 .k(k)
32134 .iterations(1)
32135 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c8__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
32136 }
32137 }
32138 }
32139 }
32140
TEST(QC8_GEMM_MINMAX_FP32_2X4C8__SSE41_LD128,k_div_8)32141 TEST(QC8_GEMM_MINMAX_FP32_2X4C8__SSE41_LD128, k_div_8) {
32142 TEST_REQUIRES_X86_SSE41;
32143 for (size_t k = 16; k <= 80; k += 8) {
32144 GemmMicrokernelTester()
32145 .mr(2)
32146 .nr(4)
32147 .kr(8)
32148 .sr(1)
32149 .m(2)
32150 .n(4)
32151 .k(k)
32152 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c8__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
32153 }
32154 }
32155
TEST(QC8_GEMM_MINMAX_FP32_2X4C8__SSE41_LD128,k_div_8_strided_a)32156 TEST(QC8_GEMM_MINMAX_FP32_2X4C8__SSE41_LD128, k_div_8_strided_a) {
32157 TEST_REQUIRES_X86_SSE41;
32158 for (size_t k = 16; k <= 80; k += 8) {
32159 GemmMicrokernelTester()
32160 .mr(2)
32161 .nr(4)
32162 .kr(8)
32163 .sr(1)
32164 .m(2)
32165 .n(4)
32166 .k(k)
32167 .a_stride(83)
32168 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c8__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
32169 }
32170 }
32171
TEST(QC8_GEMM_MINMAX_FP32_2X4C8__SSE41_LD128,k_div_8_subtile)32172 TEST(QC8_GEMM_MINMAX_FP32_2X4C8__SSE41_LD128, k_div_8_subtile) {
32173 TEST_REQUIRES_X86_SSE41;
32174 for (size_t k = 16; k <= 80; k += 8) {
32175 for (uint32_t n = 1; n <= 4; n++) {
32176 for (uint32_t m = 1; m <= 2; m++) {
32177 GemmMicrokernelTester()
32178 .mr(2)
32179 .nr(4)
32180 .kr(8)
32181 .sr(1)
32182 .m(m)
32183 .n(n)
32184 .k(k)
32185 .iterations(1)
32186 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c8__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
32187 }
32188 }
32189 }
32190 }
32191
TEST(QC8_GEMM_MINMAX_FP32_2X4C8__SSE41_LD128,n_gt_4)32192 TEST(QC8_GEMM_MINMAX_FP32_2X4C8__SSE41_LD128, n_gt_4) {
32193 TEST_REQUIRES_X86_SSE41;
32194 for (uint32_t n = 5; n < 8; n++) {
32195 for (size_t k = 1; k <= 40; k += 9) {
32196 GemmMicrokernelTester()
32197 .mr(2)
32198 .nr(4)
32199 .kr(8)
32200 .sr(1)
32201 .m(2)
32202 .n(n)
32203 .k(k)
32204 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c8__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
32205 }
32206 }
32207 }
32208
TEST(QC8_GEMM_MINMAX_FP32_2X4C8__SSE41_LD128,n_gt_4_strided_cn)32209 TEST(QC8_GEMM_MINMAX_FP32_2X4C8__SSE41_LD128, n_gt_4_strided_cn) {
32210 TEST_REQUIRES_X86_SSE41;
32211 for (uint32_t n = 5; n < 8; n++) {
32212 for (size_t k = 1; k <= 40; k += 9) {
32213 GemmMicrokernelTester()
32214 .mr(2)
32215 .nr(4)
32216 .kr(8)
32217 .sr(1)
32218 .m(2)
32219 .n(n)
32220 .k(k)
32221 .cn_stride(7)
32222 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c8__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
32223 }
32224 }
32225 }
32226
TEST(QC8_GEMM_MINMAX_FP32_2X4C8__SSE41_LD128,n_gt_4_strided_a)32227 TEST(QC8_GEMM_MINMAX_FP32_2X4C8__SSE41_LD128, n_gt_4_strided_a) {
32228 TEST_REQUIRES_X86_SSE41;
32229 for (uint32_t n = 5; n < 8; n++) {
32230 for (size_t k = 1; k <= 40; k += 9) {
32231 GemmMicrokernelTester()
32232 .mr(2)
32233 .nr(4)
32234 .kr(8)
32235 .sr(1)
32236 .m(2)
32237 .n(n)
32238 .k(k)
32239 .a_stride(43)
32240 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c8__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
32241 }
32242 }
32243 }
32244
TEST(QC8_GEMM_MINMAX_FP32_2X4C8__SSE41_LD128,n_gt_4_subtile)32245 TEST(QC8_GEMM_MINMAX_FP32_2X4C8__SSE41_LD128, n_gt_4_subtile) {
32246 TEST_REQUIRES_X86_SSE41;
32247 for (uint32_t n = 5; n < 8; n++) {
32248 for (size_t k = 1; k <= 40; k += 9) {
32249 for (uint32_t m = 1; m <= 2; m++) {
32250 GemmMicrokernelTester()
32251 .mr(2)
32252 .nr(4)
32253 .kr(8)
32254 .sr(1)
32255 .m(m)
32256 .n(n)
32257 .k(k)
32258 .iterations(1)
32259 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c8__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
32260 }
32261 }
32262 }
32263 }
32264
TEST(QC8_GEMM_MINMAX_FP32_2X4C8__SSE41_LD128,n_div_4)32265 TEST(QC8_GEMM_MINMAX_FP32_2X4C8__SSE41_LD128, n_div_4) {
32266 TEST_REQUIRES_X86_SSE41;
32267 for (uint32_t n = 8; n <= 12; n += 4) {
32268 for (size_t k = 1; k <= 40; k += 9) {
32269 GemmMicrokernelTester()
32270 .mr(2)
32271 .nr(4)
32272 .kr(8)
32273 .sr(1)
32274 .m(2)
32275 .n(n)
32276 .k(k)
32277 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c8__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
32278 }
32279 }
32280 }
32281
TEST(QC8_GEMM_MINMAX_FP32_2X4C8__SSE41_LD128,n_div_4_strided_cn)32282 TEST(QC8_GEMM_MINMAX_FP32_2X4C8__SSE41_LD128, n_div_4_strided_cn) {
32283 TEST_REQUIRES_X86_SSE41;
32284 for (uint32_t n = 8; n <= 12; n += 4) {
32285 for (size_t k = 1; k <= 40; k += 9) {
32286 GemmMicrokernelTester()
32287 .mr(2)
32288 .nr(4)
32289 .kr(8)
32290 .sr(1)
32291 .m(2)
32292 .n(n)
32293 .k(k)
32294 .cn_stride(7)
32295 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c8__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
32296 }
32297 }
32298 }
32299
TEST(QC8_GEMM_MINMAX_FP32_2X4C8__SSE41_LD128,n_div_4_strided_a)32300 TEST(QC8_GEMM_MINMAX_FP32_2X4C8__SSE41_LD128, n_div_4_strided_a) {
32301 TEST_REQUIRES_X86_SSE41;
32302 for (uint32_t n = 8; n <= 12; n += 4) {
32303 for (size_t k = 1; k <= 40; k += 9) {
32304 GemmMicrokernelTester()
32305 .mr(2)
32306 .nr(4)
32307 .kr(8)
32308 .sr(1)
32309 .m(2)
32310 .n(n)
32311 .k(k)
32312 .a_stride(43)
32313 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c8__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
32314 }
32315 }
32316 }
32317
TEST(QC8_GEMM_MINMAX_FP32_2X4C8__SSE41_LD128,n_div_4_subtile)32318 TEST(QC8_GEMM_MINMAX_FP32_2X4C8__SSE41_LD128, n_div_4_subtile) {
32319 TEST_REQUIRES_X86_SSE41;
32320 for (uint32_t n = 8; n <= 12; n += 4) {
32321 for (size_t k = 1; k <= 40; k += 9) {
32322 for (uint32_t m = 1; m <= 2; m++) {
32323 GemmMicrokernelTester()
32324 .mr(2)
32325 .nr(4)
32326 .kr(8)
32327 .sr(1)
32328 .m(m)
32329 .n(n)
32330 .k(k)
32331 .iterations(1)
32332 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c8__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
32333 }
32334 }
32335 }
32336 }
32337
TEST(QC8_GEMM_MINMAX_FP32_2X4C8__SSE41_LD128,strided_cm_subtile)32338 TEST(QC8_GEMM_MINMAX_FP32_2X4C8__SSE41_LD128, strided_cm_subtile) {
32339 TEST_REQUIRES_X86_SSE41;
32340 for (size_t k = 1; k <= 40; k += 9) {
32341 for (uint32_t n = 1; n <= 4; n++) {
32342 for (uint32_t m = 1; m <= 2; m++) {
32343 GemmMicrokernelTester()
32344 .mr(2)
32345 .nr(4)
32346 .kr(8)
32347 .sr(1)
32348 .m(m)
32349 .n(n)
32350 .k(k)
32351 .cm_stride(7)
32352 .iterations(1)
32353 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c8__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
32354 }
32355 }
32356 }
32357 }
32358
TEST(QC8_GEMM_MINMAX_FP32_2X4C8__SSE41_LD128,qmin)32359 TEST(QC8_GEMM_MINMAX_FP32_2X4C8__SSE41_LD128, qmin) {
32360 TEST_REQUIRES_X86_SSE41;
32361 GemmMicrokernelTester()
32362 .mr(2)
32363 .nr(4)
32364 .kr(8)
32365 .sr(1)
32366 .m(2)
32367 .n(4)
32368 .k(8)
32369 .qmin(128)
32370 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c8__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
32371 }
32372
TEST(QC8_GEMM_MINMAX_FP32_2X4C8__SSE41_LD128,qmax)32373 TEST(QC8_GEMM_MINMAX_FP32_2X4C8__SSE41_LD128, qmax) {
32374 TEST_REQUIRES_X86_SSE41;
32375 GemmMicrokernelTester()
32376 .mr(2)
32377 .nr(4)
32378 .kr(8)
32379 .sr(1)
32380 .m(2)
32381 .n(4)
32382 .k(8)
32383 .qmax(128)
32384 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c8__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
32385 }
32386
TEST(QC8_GEMM_MINMAX_FP32_2X4C8__SSE41_LD128,strided_cm)32387 TEST(QC8_GEMM_MINMAX_FP32_2X4C8__SSE41_LD128, strided_cm) {
32388 TEST_REQUIRES_X86_SSE41;
32389 GemmMicrokernelTester()
32390 .mr(2)
32391 .nr(4)
32392 .kr(8)
32393 .sr(1)
32394 .m(2)
32395 .n(4)
32396 .k(8)
32397 .cm_stride(7)
32398 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c8__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
32399 }
32400 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
32401
32402
32403 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QC8_GEMM_MINMAX_FP32_3X4C8__SSE41_LD128,k_eq_8)32404 TEST(QC8_GEMM_MINMAX_FP32_3X4C8__SSE41_LD128, k_eq_8) {
32405 TEST_REQUIRES_X86_SSE41;
32406 GemmMicrokernelTester()
32407 .mr(3)
32408 .nr(4)
32409 .kr(8)
32410 .sr(1)
32411 .m(3)
32412 .n(4)
32413 .k(8)
32414 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c8__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
32415 }
32416
TEST(QC8_GEMM_MINMAX_FP32_3X4C8__SSE41_LD128,strided_cn)32417 TEST(QC8_GEMM_MINMAX_FP32_3X4C8__SSE41_LD128, strided_cn) {
32418 TEST_REQUIRES_X86_SSE41;
32419 GemmMicrokernelTester()
32420 .mr(3)
32421 .nr(4)
32422 .kr(8)
32423 .sr(1)
32424 .m(3)
32425 .n(4)
32426 .k(8)
32427 .cn_stride(7)
32428 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c8__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
32429 }
32430
TEST(QC8_GEMM_MINMAX_FP32_3X4C8__SSE41_LD128,k_eq_8_strided_a)32431 TEST(QC8_GEMM_MINMAX_FP32_3X4C8__SSE41_LD128, k_eq_8_strided_a) {
32432 TEST_REQUIRES_X86_SSE41;
32433 GemmMicrokernelTester()
32434 .mr(3)
32435 .nr(4)
32436 .kr(8)
32437 .sr(1)
32438 .m(3)
32439 .n(4)
32440 .k(8)
32441 .a_stride(11)
32442 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c8__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
32443 }
32444
TEST(QC8_GEMM_MINMAX_FP32_3X4C8__SSE41_LD128,k_eq_8_subtile)32445 TEST(QC8_GEMM_MINMAX_FP32_3X4C8__SSE41_LD128, k_eq_8_subtile) {
32446 TEST_REQUIRES_X86_SSE41;
32447 for (uint32_t n = 1; n <= 4; n++) {
32448 for (uint32_t m = 1; m <= 3; m++) {
32449 GemmMicrokernelTester()
32450 .mr(3)
32451 .nr(4)
32452 .kr(8)
32453 .sr(1)
32454 .m(m)
32455 .n(n)
32456 .k(8)
32457 .iterations(1)
32458 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c8__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
32459 }
32460 }
32461 }
32462
TEST(QC8_GEMM_MINMAX_FP32_3X4C8__SSE41_LD128,k_eq_8_subtile_m)32463 TEST(QC8_GEMM_MINMAX_FP32_3X4C8__SSE41_LD128, k_eq_8_subtile_m) {
32464 TEST_REQUIRES_X86_SSE41;
32465 for (uint32_t m = 1; m <= 3; m++) {
32466 GemmMicrokernelTester()
32467 .mr(3)
32468 .nr(4)
32469 .kr(8)
32470 .sr(1)
32471 .m(m)
32472 .n(4)
32473 .k(8)
32474 .iterations(1)
32475 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c8__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
32476 }
32477 }
32478
TEST(QC8_GEMM_MINMAX_FP32_3X4C8__SSE41_LD128,k_eq_8_subtile_n)32479 TEST(QC8_GEMM_MINMAX_FP32_3X4C8__SSE41_LD128, k_eq_8_subtile_n) {
32480 TEST_REQUIRES_X86_SSE41;
32481 for (uint32_t n = 1; n <= 4; n++) {
32482 GemmMicrokernelTester()
32483 .mr(3)
32484 .nr(4)
32485 .kr(8)
32486 .sr(1)
32487 .m(3)
32488 .n(n)
32489 .k(8)
32490 .iterations(1)
32491 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c8__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
32492 }
32493 }
32494
TEST(QC8_GEMM_MINMAX_FP32_3X4C8__SSE41_LD128,k_lt_8)32495 TEST(QC8_GEMM_MINMAX_FP32_3X4C8__SSE41_LD128, k_lt_8) {
32496 TEST_REQUIRES_X86_SSE41;
32497 for (size_t k = 1; k < 8; k++) {
32498 GemmMicrokernelTester()
32499 .mr(3)
32500 .nr(4)
32501 .kr(8)
32502 .sr(1)
32503 .m(3)
32504 .n(4)
32505 .k(k)
32506 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c8__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
32507 }
32508 }
32509
TEST(QC8_GEMM_MINMAX_FP32_3X4C8__SSE41_LD128,k_lt_8_strided_a)32510 TEST(QC8_GEMM_MINMAX_FP32_3X4C8__SSE41_LD128, k_lt_8_strided_a) {
32511 TEST_REQUIRES_X86_SSE41;
32512 for (size_t k = 1; k < 8; k++) {
32513 GemmMicrokernelTester()
32514 .mr(3)
32515 .nr(4)
32516 .kr(8)
32517 .sr(1)
32518 .m(3)
32519 .n(4)
32520 .k(k)
32521 .a_stride(11)
32522 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c8__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
32523 }
32524 }
32525
TEST(QC8_GEMM_MINMAX_FP32_3X4C8__SSE41_LD128,k_lt_8_subtile)32526 TEST(QC8_GEMM_MINMAX_FP32_3X4C8__SSE41_LD128, k_lt_8_subtile) {
32527 TEST_REQUIRES_X86_SSE41;
32528 for (size_t k = 1; k < 8; k++) {
32529 for (uint32_t n = 1; n <= 4; n++) {
32530 for (uint32_t m = 1; m <= 3; m++) {
32531 GemmMicrokernelTester()
32532 .mr(3)
32533 .nr(4)
32534 .kr(8)
32535 .sr(1)
32536 .m(m)
32537 .n(n)
32538 .k(k)
32539 .iterations(1)
32540 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c8__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
32541 }
32542 }
32543 }
32544 }
32545
TEST(QC8_GEMM_MINMAX_FP32_3X4C8__SSE41_LD128,k_gt_8)32546 TEST(QC8_GEMM_MINMAX_FP32_3X4C8__SSE41_LD128, k_gt_8) {
32547 TEST_REQUIRES_X86_SSE41;
32548 for (size_t k = 9; k < 16; k++) {
32549 GemmMicrokernelTester()
32550 .mr(3)
32551 .nr(4)
32552 .kr(8)
32553 .sr(1)
32554 .m(3)
32555 .n(4)
32556 .k(k)
32557 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c8__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
32558 }
32559 }
32560
TEST(QC8_GEMM_MINMAX_FP32_3X4C8__SSE41_LD128,k_gt_8_strided_a)32561 TEST(QC8_GEMM_MINMAX_FP32_3X4C8__SSE41_LD128, k_gt_8_strided_a) {
32562 TEST_REQUIRES_X86_SSE41;
32563 for (size_t k = 9; k < 16; k++) {
32564 GemmMicrokernelTester()
32565 .mr(3)
32566 .nr(4)
32567 .kr(8)
32568 .sr(1)
32569 .m(3)
32570 .n(4)
32571 .k(k)
32572 .a_stride(19)
32573 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c8__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
32574 }
32575 }
32576
TEST(QC8_GEMM_MINMAX_FP32_3X4C8__SSE41_LD128,k_gt_8_subtile)32577 TEST(QC8_GEMM_MINMAX_FP32_3X4C8__SSE41_LD128, k_gt_8_subtile) {
32578 TEST_REQUIRES_X86_SSE41;
32579 for (size_t k = 9; k < 16; k++) {
32580 for (uint32_t n = 1; n <= 4; n++) {
32581 for (uint32_t m = 1; m <= 3; m++) {
32582 GemmMicrokernelTester()
32583 .mr(3)
32584 .nr(4)
32585 .kr(8)
32586 .sr(1)
32587 .m(m)
32588 .n(n)
32589 .k(k)
32590 .iterations(1)
32591 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c8__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
32592 }
32593 }
32594 }
32595 }
32596
TEST(QC8_GEMM_MINMAX_FP32_3X4C8__SSE41_LD128,k_div_8)32597 TEST(QC8_GEMM_MINMAX_FP32_3X4C8__SSE41_LD128, k_div_8) {
32598 TEST_REQUIRES_X86_SSE41;
32599 for (size_t k = 16; k <= 80; k += 8) {
32600 GemmMicrokernelTester()
32601 .mr(3)
32602 .nr(4)
32603 .kr(8)
32604 .sr(1)
32605 .m(3)
32606 .n(4)
32607 .k(k)
32608 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c8__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
32609 }
32610 }
32611
TEST(QC8_GEMM_MINMAX_FP32_3X4C8__SSE41_LD128,k_div_8_strided_a)32612 TEST(QC8_GEMM_MINMAX_FP32_3X4C8__SSE41_LD128, k_div_8_strided_a) {
32613 TEST_REQUIRES_X86_SSE41;
32614 for (size_t k = 16; k <= 80; k += 8) {
32615 GemmMicrokernelTester()
32616 .mr(3)
32617 .nr(4)
32618 .kr(8)
32619 .sr(1)
32620 .m(3)
32621 .n(4)
32622 .k(k)
32623 .a_stride(83)
32624 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c8__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
32625 }
32626 }
32627
TEST(QC8_GEMM_MINMAX_FP32_3X4C8__SSE41_LD128,k_div_8_subtile)32628 TEST(QC8_GEMM_MINMAX_FP32_3X4C8__SSE41_LD128, k_div_8_subtile) {
32629 TEST_REQUIRES_X86_SSE41;
32630 for (size_t k = 16; k <= 80; k += 8) {
32631 for (uint32_t n = 1; n <= 4; n++) {
32632 for (uint32_t m = 1; m <= 3; m++) {
32633 GemmMicrokernelTester()
32634 .mr(3)
32635 .nr(4)
32636 .kr(8)
32637 .sr(1)
32638 .m(m)
32639 .n(n)
32640 .k(k)
32641 .iterations(1)
32642 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c8__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
32643 }
32644 }
32645 }
32646 }
32647
TEST(QC8_GEMM_MINMAX_FP32_3X4C8__SSE41_LD128,n_gt_4)32648 TEST(QC8_GEMM_MINMAX_FP32_3X4C8__SSE41_LD128, n_gt_4) {
32649 TEST_REQUIRES_X86_SSE41;
32650 for (uint32_t n = 5; n < 8; n++) {
32651 for (size_t k = 1; k <= 40; k += 9) {
32652 GemmMicrokernelTester()
32653 .mr(3)
32654 .nr(4)
32655 .kr(8)
32656 .sr(1)
32657 .m(3)
32658 .n(n)
32659 .k(k)
32660 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c8__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
32661 }
32662 }
32663 }
32664
TEST(QC8_GEMM_MINMAX_FP32_3X4C8__SSE41_LD128,n_gt_4_strided_cn)32665 TEST(QC8_GEMM_MINMAX_FP32_3X4C8__SSE41_LD128, n_gt_4_strided_cn) {
32666 TEST_REQUIRES_X86_SSE41;
32667 for (uint32_t n = 5; n < 8; n++) {
32668 for (size_t k = 1; k <= 40; k += 9) {
32669 GemmMicrokernelTester()
32670 .mr(3)
32671 .nr(4)
32672 .kr(8)
32673 .sr(1)
32674 .m(3)
32675 .n(n)
32676 .k(k)
32677 .cn_stride(7)
32678 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c8__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
32679 }
32680 }
32681 }
32682
TEST(QC8_GEMM_MINMAX_FP32_3X4C8__SSE41_LD128,n_gt_4_strided_a)32683 TEST(QC8_GEMM_MINMAX_FP32_3X4C8__SSE41_LD128, n_gt_4_strided_a) {
32684 TEST_REQUIRES_X86_SSE41;
32685 for (uint32_t n = 5; n < 8; n++) {
32686 for (size_t k = 1; k <= 40; k += 9) {
32687 GemmMicrokernelTester()
32688 .mr(3)
32689 .nr(4)
32690 .kr(8)
32691 .sr(1)
32692 .m(3)
32693 .n(n)
32694 .k(k)
32695 .a_stride(43)
32696 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c8__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
32697 }
32698 }
32699 }
32700
TEST(QC8_GEMM_MINMAX_FP32_3X4C8__SSE41_LD128,n_gt_4_subtile)32701 TEST(QC8_GEMM_MINMAX_FP32_3X4C8__SSE41_LD128, n_gt_4_subtile) {
32702 TEST_REQUIRES_X86_SSE41;
32703 for (uint32_t n = 5; n < 8; n++) {
32704 for (size_t k = 1; k <= 40; k += 9) {
32705 for (uint32_t m = 1; m <= 3; m++) {
32706 GemmMicrokernelTester()
32707 .mr(3)
32708 .nr(4)
32709 .kr(8)
32710 .sr(1)
32711 .m(m)
32712 .n(n)
32713 .k(k)
32714 .iterations(1)
32715 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c8__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
32716 }
32717 }
32718 }
32719 }
32720
TEST(QC8_GEMM_MINMAX_FP32_3X4C8__SSE41_LD128,n_div_4)32721 TEST(QC8_GEMM_MINMAX_FP32_3X4C8__SSE41_LD128, n_div_4) {
32722 TEST_REQUIRES_X86_SSE41;
32723 for (uint32_t n = 8; n <= 12; n += 4) {
32724 for (size_t k = 1; k <= 40; k += 9) {
32725 GemmMicrokernelTester()
32726 .mr(3)
32727 .nr(4)
32728 .kr(8)
32729 .sr(1)
32730 .m(3)
32731 .n(n)
32732 .k(k)
32733 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c8__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
32734 }
32735 }
32736 }
32737
TEST(QC8_GEMM_MINMAX_FP32_3X4C8__SSE41_LD128,n_div_4_strided_cn)32738 TEST(QC8_GEMM_MINMAX_FP32_3X4C8__SSE41_LD128, n_div_4_strided_cn) {
32739 TEST_REQUIRES_X86_SSE41;
32740 for (uint32_t n = 8; n <= 12; n += 4) {
32741 for (size_t k = 1; k <= 40; k += 9) {
32742 GemmMicrokernelTester()
32743 .mr(3)
32744 .nr(4)
32745 .kr(8)
32746 .sr(1)
32747 .m(3)
32748 .n(n)
32749 .k(k)
32750 .cn_stride(7)
32751 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c8__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
32752 }
32753 }
32754 }
32755
TEST(QC8_GEMM_MINMAX_FP32_3X4C8__SSE41_LD128,n_div_4_strided_a)32756 TEST(QC8_GEMM_MINMAX_FP32_3X4C8__SSE41_LD128, n_div_4_strided_a) {
32757 TEST_REQUIRES_X86_SSE41;
32758 for (uint32_t n = 8; n <= 12; n += 4) {
32759 for (size_t k = 1; k <= 40; k += 9) {
32760 GemmMicrokernelTester()
32761 .mr(3)
32762 .nr(4)
32763 .kr(8)
32764 .sr(1)
32765 .m(3)
32766 .n(n)
32767 .k(k)
32768 .a_stride(43)
32769 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c8__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
32770 }
32771 }
32772 }
32773
TEST(QC8_GEMM_MINMAX_FP32_3X4C8__SSE41_LD128,n_div_4_subtile)32774 TEST(QC8_GEMM_MINMAX_FP32_3X4C8__SSE41_LD128, n_div_4_subtile) {
32775 TEST_REQUIRES_X86_SSE41;
32776 for (uint32_t n = 8; n <= 12; n += 4) {
32777 for (size_t k = 1; k <= 40; k += 9) {
32778 for (uint32_t m = 1; m <= 3; m++) {
32779 GemmMicrokernelTester()
32780 .mr(3)
32781 .nr(4)
32782 .kr(8)
32783 .sr(1)
32784 .m(m)
32785 .n(n)
32786 .k(k)
32787 .iterations(1)
32788 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c8__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
32789 }
32790 }
32791 }
32792 }
32793
TEST(QC8_GEMM_MINMAX_FP32_3X4C8__SSE41_LD128,strided_cm_subtile)32794 TEST(QC8_GEMM_MINMAX_FP32_3X4C8__SSE41_LD128, strided_cm_subtile) {
32795 TEST_REQUIRES_X86_SSE41;
32796 for (size_t k = 1; k <= 40; k += 9) {
32797 for (uint32_t n = 1; n <= 4; n++) {
32798 for (uint32_t m = 1; m <= 3; m++) {
32799 GemmMicrokernelTester()
32800 .mr(3)
32801 .nr(4)
32802 .kr(8)
32803 .sr(1)
32804 .m(m)
32805 .n(n)
32806 .k(k)
32807 .cm_stride(7)
32808 .iterations(1)
32809 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c8__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
32810 }
32811 }
32812 }
32813 }
32814
TEST(QC8_GEMM_MINMAX_FP32_3X4C8__SSE41_LD128,qmin)32815 TEST(QC8_GEMM_MINMAX_FP32_3X4C8__SSE41_LD128, qmin) {
32816 TEST_REQUIRES_X86_SSE41;
32817 GemmMicrokernelTester()
32818 .mr(3)
32819 .nr(4)
32820 .kr(8)
32821 .sr(1)
32822 .m(3)
32823 .n(4)
32824 .k(8)
32825 .qmin(128)
32826 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c8__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
32827 }
32828
TEST(QC8_GEMM_MINMAX_FP32_3X4C8__SSE41_LD128,qmax)32829 TEST(QC8_GEMM_MINMAX_FP32_3X4C8__SSE41_LD128, qmax) {
32830 TEST_REQUIRES_X86_SSE41;
32831 GemmMicrokernelTester()
32832 .mr(3)
32833 .nr(4)
32834 .kr(8)
32835 .sr(1)
32836 .m(3)
32837 .n(4)
32838 .k(8)
32839 .qmax(128)
32840 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c8__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
32841 }
32842
TEST(QC8_GEMM_MINMAX_FP32_3X4C8__SSE41_LD128,strided_cm)32843 TEST(QC8_GEMM_MINMAX_FP32_3X4C8__SSE41_LD128, strided_cm) {
32844 TEST_REQUIRES_X86_SSE41;
32845 GemmMicrokernelTester()
32846 .mr(3)
32847 .nr(4)
32848 .kr(8)
32849 .sr(1)
32850 .m(3)
32851 .n(4)
32852 .k(8)
32853 .cm_stride(7)
32854 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4c8__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
32855 }
32856 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
32857
32858
32859 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QC8_GEMM_MINMAX_FP32_1X8C8__AVX2,k_eq_8)32860 TEST(QC8_GEMM_MINMAX_FP32_1X8C8__AVX2, k_eq_8) {
32861 TEST_REQUIRES_X86_AVX2;
32862 GemmMicrokernelTester()
32863 .mr(1)
32864 .nr(8)
32865 .kr(8)
32866 .sr(1)
32867 .m(1)
32868 .n(8)
32869 .k(8)
32870 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c8__avx2, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
32871 }
32872
TEST(QC8_GEMM_MINMAX_FP32_1X8C8__AVX2,strided_cn)32873 TEST(QC8_GEMM_MINMAX_FP32_1X8C8__AVX2, strided_cn) {
32874 TEST_REQUIRES_X86_AVX2;
32875 GemmMicrokernelTester()
32876 .mr(1)
32877 .nr(8)
32878 .kr(8)
32879 .sr(1)
32880 .m(1)
32881 .n(8)
32882 .k(8)
32883 .cn_stride(11)
32884 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c8__avx2, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
32885 }
32886
TEST(QC8_GEMM_MINMAX_FP32_1X8C8__AVX2,k_eq_8_strided_a)32887 TEST(QC8_GEMM_MINMAX_FP32_1X8C8__AVX2, k_eq_8_strided_a) {
32888 TEST_REQUIRES_X86_AVX2;
32889 GemmMicrokernelTester()
32890 .mr(1)
32891 .nr(8)
32892 .kr(8)
32893 .sr(1)
32894 .m(1)
32895 .n(8)
32896 .k(8)
32897 .a_stride(11)
32898 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c8__avx2, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
32899 }
32900
TEST(QC8_GEMM_MINMAX_FP32_1X8C8__AVX2,k_eq_8_subtile)32901 TEST(QC8_GEMM_MINMAX_FP32_1X8C8__AVX2, k_eq_8_subtile) {
32902 TEST_REQUIRES_X86_AVX2;
32903 for (uint32_t n = 1; n <= 8; n++) {
32904 for (uint32_t m = 1; m <= 1; m++) {
32905 GemmMicrokernelTester()
32906 .mr(1)
32907 .nr(8)
32908 .kr(8)
32909 .sr(1)
32910 .m(m)
32911 .n(n)
32912 .k(8)
32913 .iterations(1)
32914 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c8__avx2, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
32915 }
32916 }
32917 }
32918
TEST(QC8_GEMM_MINMAX_FP32_1X8C8__AVX2,k_eq_8_subtile_m)32919 TEST(QC8_GEMM_MINMAX_FP32_1X8C8__AVX2, k_eq_8_subtile_m) {
32920 TEST_REQUIRES_X86_AVX2;
32921 for (uint32_t m = 1; m <= 1; m++) {
32922 GemmMicrokernelTester()
32923 .mr(1)
32924 .nr(8)
32925 .kr(8)
32926 .sr(1)
32927 .m(m)
32928 .n(8)
32929 .k(8)
32930 .iterations(1)
32931 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c8__avx2, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
32932 }
32933 }
32934
TEST(QC8_GEMM_MINMAX_FP32_1X8C8__AVX2,k_eq_8_subtile_n)32935 TEST(QC8_GEMM_MINMAX_FP32_1X8C8__AVX2, k_eq_8_subtile_n) {
32936 TEST_REQUIRES_X86_AVX2;
32937 for (uint32_t n = 1; n <= 8; n++) {
32938 GemmMicrokernelTester()
32939 .mr(1)
32940 .nr(8)
32941 .kr(8)
32942 .sr(1)
32943 .m(1)
32944 .n(n)
32945 .k(8)
32946 .iterations(1)
32947 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c8__avx2, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
32948 }
32949 }
32950
TEST(QC8_GEMM_MINMAX_FP32_1X8C8__AVX2,k_lt_8)32951 TEST(QC8_GEMM_MINMAX_FP32_1X8C8__AVX2, k_lt_8) {
32952 TEST_REQUIRES_X86_AVX2;
32953 for (size_t k = 1; k < 8; k++) {
32954 GemmMicrokernelTester()
32955 .mr(1)
32956 .nr(8)
32957 .kr(8)
32958 .sr(1)
32959 .m(1)
32960 .n(8)
32961 .k(k)
32962 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c8__avx2, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
32963 }
32964 }
32965
TEST(QC8_GEMM_MINMAX_FP32_1X8C8__AVX2,k_lt_8_strided_a)32966 TEST(QC8_GEMM_MINMAX_FP32_1X8C8__AVX2, k_lt_8_strided_a) {
32967 TEST_REQUIRES_X86_AVX2;
32968 for (size_t k = 1; k < 8; k++) {
32969 GemmMicrokernelTester()
32970 .mr(1)
32971 .nr(8)
32972 .kr(8)
32973 .sr(1)
32974 .m(1)
32975 .n(8)
32976 .k(k)
32977 .a_stride(11)
32978 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c8__avx2, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
32979 }
32980 }
32981
TEST(QC8_GEMM_MINMAX_FP32_1X8C8__AVX2,k_lt_8_subtile)32982 TEST(QC8_GEMM_MINMAX_FP32_1X8C8__AVX2, k_lt_8_subtile) {
32983 TEST_REQUIRES_X86_AVX2;
32984 for (size_t k = 1; k < 8; k++) {
32985 for (uint32_t n = 1; n <= 8; n++) {
32986 for (uint32_t m = 1; m <= 1; m++) {
32987 GemmMicrokernelTester()
32988 .mr(1)
32989 .nr(8)
32990 .kr(8)
32991 .sr(1)
32992 .m(m)
32993 .n(n)
32994 .k(k)
32995 .iterations(1)
32996 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c8__avx2, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
32997 }
32998 }
32999 }
33000 }
33001
TEST(QC8_GEMM_MINMAX_FP32_1X8C8__AVX2,k_gt_8)33002 TEST(QC8_GEMM_MINMAX_FP32_1X8C8__AVX2, k_gt_8) {
33003 TEST_REQUIRES_X86_AVX2;
33004 for (size_t k = 9; k < 16; k++) {
33005 GemmMicrokernelTester()
33006 .mr(1)
33007 .nr(8)
33008 .kr(8)
33009 .sr(1)
33010 .m(1)
33011 .n(8)
33012 .k(k)
33013 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c8__avx2, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
33014 }
33015 }
33016
TEST(QC8_GEMM_MINMAX_FP32_1X8C8__AVX2,k_gt_8_strided_a)33017 TEST(QC8_GEMM_MINMAX_FP32_1X8C8__AVX2, k_gt_8_strided_a) {
33018 TEST_REQUIRES_X86_AVX2;
33019 for (size_t k = 9; k < 16; k++) {
33020 GemmMicrokernelTester()
33021 .mr(1)
33022 .nr(8)
33023 .kr(8)
33024 .sr(1)
33025 .m(1)
33026 .n(8)
33027 .k(k)
33028 .a_stride(19)
33029 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c8__avx2, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
33030 }
33031 }
33032
TEST(QC8_GEMM_MINMAX_FP32_1X8C8__AVX2,k_gt_8_subtile)33033 TEST(QC8_GEMM_MINMAX_FP32_1X8C8__AVX2, k_gt_8_subtile) {
33034 TEST_REQUIRES_X86_AVX2;
33035 for (size_t k = 9; k < 16; k++) {
33036 for (uint32_t n = 1; n <= 8; n++) {
33037 for (uint32_t m = 1; m <= 1; m++) {
33038 GemmMicrokernelTester()
33039 .mr(1)
33040 .nr(8)
33041 .kr(8)
33042 .sr(1)
33043 .m(m)
33044 .n(n)
33045 .k(k)
33046 .iterations(1)
33047 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c8__avx2, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
33048 }
33049 }
33050 }
33051 }
33052
TEST(QC8_GEMM_MINMAX_FP32_1X8C8__AVX2,k_div_8)33053 TEST(QC8_GEMM_MINMAX_FP32_1X8C8__AVX2, k_div_8) {
33054 TEST_REQUIRES_X86_AVX2;
33055 for (size_t k = 16; k <= 80; k += 8) {
33056 GemmMicrokernelTester()
33057 .mr(1)
33058 .nr(8)
33059 .kr(8)
33060 .sr(1)
33061 .m(1)
33062 .n(8)
33063 .k(k)
33064 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c8__avx2, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
33065 }
33066 }
33067
TEST(QC8_GEMM_MINMAX_FP32_1X8C8__AVX2,k_div_8_strided_a)33068 TEST(QC8_GEMM_MINMAX_FP32_1X8C8__AVX2, k_div_8_strided_a) {
33069 TEST_REQUIRES_X86_AVX2;
33070 for (size_t k = 16; k <= 80; k += 8) {
33071 GemmMicrokernelTester()
33072 .mr(1)
33073 .nr(8)
33074 .kr(8)
33075 .sr(1)
33076 .m(1)
33077 .n(8)
33078 .k(k)
33079 .a_stride(83)
33080 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c8__avx2, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
33081 }
33082 }
33083
TEST(QC8_GEMM_MINMAX_FP32_1X8C8__AVX2,k_div_8_subtile)33084 TEST(QC8_GEMM_MINMAX_FP32_1X8C8__AVX2, k_div_8_subtile) {
33085 TEST_REQUIRES_X86_AVX2;
33086 for (size_t k = 16; k <= 80; k += 8) {
33087 for (uint32_t n = 1; n <= 8; n++) {
33088 for (uint32_t m = 1; m <= 1; m++) {
33089 GemmMicrokernelTester()
33090 .mr(1)
33091 .nr(8)
33092 .kr(8)
33093 .sr(1)
33094 .m(m)
33095 .n(n)
33096 .k(k)
33097 .iterations(1)
33098 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c8__avx2, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
33099 }
33100 }
33101 }
33102 }
33103
TEST(QC8_GEMM_MINMAX_FP32_1X8C8__AVX2,n_gt_8)33104 TEST(QC8_GEMM_MINMAX_FP32_1X8C8__AVX2, n_gt_8) {
33105 TEST_REQUIRES_X86_AVX2;
33106 for (uint32_t n = 9; n < 16; n++) {
33107 for (size_t k = 1; k <= 40; k += 9) {
33108 GemmMicrokernelTester()
33109 .mr(1)
33110 .nr(8)
33111 .kr(8)
33112 .sr(1)
33113 .m(1)
33114 .n(n)
33115 .k(k)
33116 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c8__avx2, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
33117 }
33118 }
33119 }
33120
TEST(QC8_GEMM_MINMAX_FP32_1X8C8__AVX2,n_gt_8_strided_cn)33121 TEST(QC8_GEMM_MINMAX_FP32_1X8C8__AVX2, n_gt_8_strided_cn) {
33122 TEST_REQUIRES_X86_AVX2;
33123 for (uint32_t n = 9; n < 16; n++) {
33124 for (size_t k = 1; k <= 40; k += 9) {
33125 GemmMicrokernelTester()
33126 .mr(1)
33127 .nr(8)
33128 .kr(8)
33129 .sr(1)
33130 .m(1)
33131 .n(n)
33132 .k(k)
33133 .cn_stride(11)
33134 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c8__avx2, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
33135 }
33136 }
33137 }
33138
TEST(QC8_GEMM_MINMAX_FP32_1X8C8__AVX2,n_gt_8_strided_a)33139 TEST(QC8_GEMM_MINMAX_FP32_1X8C8__AVX2, n_gt_8_strided_a) {
33140 TEST_REQUIRES_X86_AVX2;
33141 for (uint32_t n = 9; n < 16; n++) {
33142 for (size_t k = 1; k <= 40; k += 9) {
33143 GemmMicrokernelTester()
33144 .mr(1)
33145 .nr(8)
33146 .kr(8)
33147 .sr(1)
33148 .m(1)
33149 .n(n)
33150 .k(k)
33151 .a_stride(43)
33152 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c8__avx2, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
33153 }
33154 }
33155 }
33156
TEST(QC8_GEMM_MINMAX_FP32_1X8C8__AVX2,n_gt_8_subtile)33157 TEST(QC8_GEMM_MINMAX_FP32_1X8C8__AVX2, n_gt_8_subtile) {
33158 TEST_REQUIRES_X86_AVX2;
33159 for (uint32_t n = 9; n < 16; n++) {
33160 for (size_t k = 1; k <= 40; k += 9) {
33161 for (uint32_t m = 1; m <= 1; m++) {
33162 GemmMicrokernelTester()
33163 .mr(1)
33164 .nr(8)
33165 .kr(8)
33166 .sr(1)
33167 .m(m)
33168 .n(n)
33169 .k(k)
33170 .iterations(1)
33171 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c8__avx2, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
33172 }
33173 }
33174 }
33175 }
33176
TEST(QC8_GEMM_MINMAX_FP32_1X8C8__AVX2,n_div_8)33177 TEST(QC8_GEMM_MINMAX_FP32_1X8C8__AVX2, n_div_8) {
33178 TEST_REQUIRES_X86_AVX2;
33179 for (uint32_t n = 16; n <= 24; n += 8) {
33180 for (size_t k = 1; k <= 40; k += 9) {
33181 GemmMicrokernelTester()
33182 .mr(1)
33183 .nr(8)
33184 .kr(8)
33185 .sr(1)
33186 .m(1)
33187 .n(n)
33188 .k(k)
33189 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c8__avx2, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
33190 }
33191 }
33192 }
33193
TEST(QC8_GEMM_MINMAX_FP32_1X8C8__AVX2,n_div_8_strided_cn)33194 TEST(QC8_GEMM_MINMAX_FP32_1X8C8__AVX2, n_div_8_strided_cn) {
33195 TEST_REQUIRES_X86_AVX2;
33196 for (uint32_t n = 16; n <= 24; n += 8) {
33197 for (size_t k = 1; k <= 40; k += 9) {
33198 GemmMicrokernelTester()
33199 .mr(1)
33200 .nr(8)
33201 .kr(8)
33202 .sr(1)
33203 .m(1)
33204 .n(n)
33205 .k(k)
33206 .cn_stride(11)
33207 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c8__avx2, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
33208 }
33209 }
33210 }
33211
TEST(QC8_GEMM_MINMAX_FP32_1X8C8__AVX2,n_div_8_strided_a)33212 TEST(QC8_GEMM_MINMAX_FP32_1X8C8__AVX2, n_div_8_strided_a) {
33213 TEST_REQUIRES_X86_AVX2;
33214 for (uint32_t n = 16; n <= 24; n += 8) {
33215 for (size_t k = 1; k <= 40; k += 9) {
33216 GemmMicrokernelTester()
33217 .mr(1)
33218 .nr(8)
33219 .kr(8)
33220 .sr(1)
33221 .m(1)
33222 .n(n)
33223 .k(k)
33224 .a_stride(43)
33225 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c8__avx2, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
33226 }
33227 }
33228 }
33229
TEST(QC8_GEMM_MINMAX_FP32_1X8C8__AVX2,n_div_8_subtile)33230 TEST(QC8_GEMM_MINMAX_FP32_1X8C8__AVX2, n_div_8_subtile) {
33231 TEST_REQUIRES_X86_AVX2;
33232 for (uint32_t n = 16; n <= 24; n += 8) {
33233 for (size_t k = 1; k <= 40; k += 9) {
33234 for (uint32_t m = 1; m <= 1; m++) {
33235 GemmMicrokernelTester()
33236 .mr(1)
33237 .nr(8)
33238 .kr(8)
33239 .sr(1)
33240 .m(m)
33241 .n(n)
33242 .k(k)
33243 .iterations(1)
33244 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c8__avx2, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
33245 }
33246 }
33247 }
33248 }
33249
TEST(QC8_GEMM_MINMAX_FP32_1X8C8__AVX2,strided_cm_subtile)33250 TEST(QC8_GEMM_MINMAX_FP32_1X8C8__AVX2, strided_cm_subtile) {
33251 TEST_REQUIRES_X86_AVX2;
33252 for (size_t k = 1; k <= 40; k += 9) {
33253 for (uint32_t n = 1; n <= 8; n++) {
33254 for (uint32_t m = 1; m <= 1; m++) {
33255 GemmMicrokernelTester()
33256 .mr(1)
33257 .nr(8)
33258 .kr(8)
33259 .sr(1)
33260 .m(m)
33261 .n(n)
33262 .k(k)
33263 .cm_stride(11)
33264 .iterations(1)
33265 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c8__avx2, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
33266 }
33267 }
33268 }
33269 }
33270
TEST(QC8_GEMM_MINMAX_FP32_1X8C8__AVX2,qmin)33271 TEST(QC8_GEMM_MINMAX_FP32_1X8C8__AVX2, qmin) {
33272 TEST_REQUIRES_X86_AVX2;
33273 GemmMicrokernelTester()
33274 .mr(1)
33275 .nr(8)
33276 .kr(8)
33277 .sr(1)
33278 .m(1)
33279 .n(8)
33280 .k(8)
33281 .qmin(128)
33282 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c8__avx2, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
33283 }
33284
TEST(QC8_GEMM_MINMAX_FP32_1X8C8__AVX2,qmax)33285 TEST(QC8_GEMM_MINMAX_FP32_1X8C8__AVX2, qmax) {
33286 TEST_REQUIRES_X86_AVX2;
33287 GemmMicrokernelTester()
33288 .mr(1)
33289 .nr(8)
33290 .kr(8)
33291 .sr(1)
33292 .m(1)
33293 .n(8)
33294 .k(8)
33295 .qmax(128)
33296 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c8__avx2, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
33297 }
33298
TEST(QC8_GEMM_MINMAX_FP32_1X8C8__AVX2,strided_cm)33299 TEST(QC8_GEMM_MINMAX_FP32_1X8C8__AVX2, strided_cm) {
33300 TEST_REQUIRES_X86_AVX2;
33301 GemmMicrokernelTester()
33302 .mr(1)
33303 .nr(8)
33304 .kr(8)
33305 .sr(1)
33306 .m(1)
33307 .n(8)
33308 .k(8)
33309 .cm_stride(11)
33310 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x8c8__avx2, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
33311 }
33312 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
33313
33314
33315 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QC8_GEMM_MINMAX_FP32_3X8C8__AVX2,k_eq_8)33316 TEST(QC8_GEMM_MINMAX_FP32_3X8C8__AVX2, k_eq_8) {
33317 TEST_REQUIRES_X86_AVX2;
33318 GemmMicrokernelTester()
33319 .mr(3)
33320 .nr(8)
33321 .kr(8)
33322 .sr(1)
33323 .m(3)
33324 .n(8)
33325 .k(8)
33326 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x8c8__avx2, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
33327 }
33328
TEST(QC8_GEMM_MINMAX_FP32_3X8C8__AVX2,strided_cn)33329 TEST(QC8_GEMM_MINMAX_FP32_3X8C8__AVX2, strided_cn) {
33330 TEST_REQUIRES_X86_AVX2;
33331 GemmMicrokernelTester()
33332 .mr(3)
33333 .nr(8)
33334 .kr(8)
33335 .sr(1)
33336 .m(3)
33337 .n(8)
33338 .k(8)
33339 .cn_stride(11)
33340 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x8c8__avx2, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
33341 }
33342
TEST(QC8_GEMM_MINMAX_FP32_3X8C8__AVX2,k_eq_8_strided_a)33343 TEST(QC8_GEMM_MINMAX_FP32_3X8C8__AVX2, k_eq_8_strided_a) {
33344 TEST_REQUIRES_X86_AVX2;
33345 GemmMicrokernelTester()
33346 .mr(3)
33347 .nr(8)
33348 .kr(8)
33349 .sr(1)
33350 .m(3)
33351 .n(8)
33352 .k(8)
33353 .a_stride(11)
33354 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x8c8__avx2, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
33355 }
33356
TEST(QC8_GEMM_MINMAX_FP32_3X8C8__AVX2,k_eq_8_subtile)33357 TEST(QC8_GEMM_MINMAX_FP32_3X8C8__AVX2, k_eq_8_subtile) {
33358 TEST_REQUIRES_X86_AVX2;
33359 for (uint32_t n = 1; n <= 8; n++) {
33360 for (uint32_t m = 1; m <= 3; m++) {
33361 GemmMicrokernelTester()
33362 .mr(3)
33363 .nr(8)
33364 .kr(8)
33365 .sr(1)
33366 .m(m)
33367 .n(n)
33368 .k(8)
33369 .iterations(1)
33370 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x8c8__avx2, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
33371 }
33372 }
33373 }
33374
TEST(QC8_GEMM_MINMAX_FP32_3X8C8__AVX2,k_eq_8_subtile_m)33375 TEST(QC8_GEMM_MINMAX_FP32_3X8C8__AVX2, k_eq_8_subtile_m) {
33376 TEST_REQUIRES_X86_AVX2;
33377 for (uint32_t m = 1; m <= 3; m++) {
33378 GemmMicrokernelTester()
33379 .mr(3)
33380 .nr(8)
33381 .kr(8)
33382 .sr(1)
33383 .m(m)
33384 .n(8)
33385 .k(8)
33386 .iterations(1)
33387 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x8c8__avx2, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
33388 }
33389 }
33390
TEST(QC8_GEMM_MINMAX_FP32_3X8C8__AVX2,k_eq_8_subtile_n)33391 TEST(QC8_GEMM_MINMAX_FP32_3X8C8__AVX2, k_eq_8_subtile_n) {
33392 TEST_REQUIRES_X86_AVX2;
33393 for (uint32_t n = 1; n <= 8; n++) {
33394 GemmMicrokernelTester()
33395 .mr(3)
33396 .nr(8)
33397 .kr(8)
33398 .sr(1)
33399 .m(3)
33400 .n(n)
33401 .k(8)
33402 .iterations(1)
33403 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x8c8__avx2, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
33404 }
33405 }
33406
TEST(QC8_GEMM_MINMAX_FP32_3X8C8__AVX2,k_lt_8)33407 TEST(QC8_GEMM_MINMAX_FP32_3X8C8__AVX2, k_lt_8) {
33408 TEST_REQUIRES_X86_AVX2;
33409 for (size_t k = 1; k < 8; k++) {
33410 GemmMicrokernelTester()
33411 .mr(3)
33412 .nr(8)
33413 .kr(8)
33414 .sr(1)
33415 .m(3)
33416 .n(8)
33417 .k(k)
33418 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x8c8__avx2, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
33419 }
33420 }
33421
TEST(QC8_GEMM_MINMAX_FP32_3X8C8__AVX2,k_lt_8_strided_a)33422 TEST(QC8_GEMM_MINMAX_FP32_3X8C8__AVX2, k_lt_8_strided_a) {
33423 TEST_REQUIRES_X86_AVX2;
33424 for (size_t k = 1; k < 8; k++) {
33425 GemmMicrokernelTester()
33426 .mr(3)
33427 .nr(8)
33428 .kr(8)
33429 .sr(1)
33430 .m(3)
33431 .n(8)
33432 .k(k)
33433 .a_stride(11)
33434 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x8c8__avx2, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
33435 }
33436 }
33437
TEST(QC8_GEMM_MINMAX_FP32_3X8C8__AVX2,k_lt_8_subtile)33438 TEST(QC8_GEMM_MINMAX_FP32_3X8C8__AVX2, k_lt_8_subtile) {
33439 TEST_REQUIRES_X86_AVX2;
33440 for (size_t k = 1; k < 8; k++) {
33441 for (uint32_t n = 1; n <= 8; n++) {
33442 for (uint32_t m = 1; m <= 3; m++) {
33443 GemmMicrokernelTester()
33444 .mr(3)
33445 .nr(8)
33446 .kr(8)
33447 .sr(1)
33448 .m(m)
33449 .n(n)
33450 .k(k)
33451 .iterations(1)
33452 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x8c8__avx2, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
33453 }
33454 }
33455 }
33456 }
33457
TEST(QC8_GEMM_MINMAX_FP32_3X8C8__AVX2,k_gt_8)33458 TEST(QC8_GEMM_MINMAX_FP32_3X8C8__AVX2, k_gt_8) {
33459 TEST_REQUIRES_X86_AVX2;
33460 for (size_t k = 9; k < 16; k++) {
33461 GemmMicrokernelTester()
33462 .mr(3)
33463 .nr(8)
33464 .kr(8)
33465 .sr(1)
33466 .m(3)
33467 .n(8)
33468 .k(k)
33469 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x8c8__avx2, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
33470 }
33471 }
33472
TEST(QC8_GEMM_MINMAX_FP32_3X8C8__AVX2,k_gt_8_strided_a)33473 TEST(QC8_GEMM_MINMAX_FP32_3X8C8__AVX2, k_gt_8_strided_a) {
33474 TEST_REQUIRES_X86_AVX2;
33475 for (size_t k = 9; k < 16; k++) {
33476 GemmMicrokernelTester()
33477 .mr(3)
33478 .nr(8)
33479 .kr(8)
33480 .sr(1)
33481 .m(3)
33482 .n(8)
33483 .k(k)
33484 .a_stride(19)
33485 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x8c8__avx2, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
33486 }
33487 }
33488
TEST(QC8_GEMM_MINMAX_FP32_3X8C8__AVX2,k_gt_8_subtile)33489 TEST(QC8_GEMM_MINMAX_FP32_3X8C8__AVX2, k_gt_8_subtile) {
33490 TEST_REQUIRES_X86_AVX2;
33491 for (size_t k = 9; k < 16; k++) {
33492 for (uint32_t n = 1; n <= 8; n++) {
33493 for (uint32_t m = 1; m <= 3; m++) {
33494 GemmMicrokernelTester()
33495 .mr(3)
33496 .nr(8)
33497 .kr(8)
33498 .sr(1)
33499 .m(m)
33500 .n(n)
33501 .k(k)
33502 .iterations(1)
33503 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x8c8__avx2, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
33504 }
33505 }
33506 }
33507 }
33508
TEST(QC8_GEMM_MINMAX_FP32_3X8C8__AVX2,k_div_8)33509 TEST(QC8_GEMM_MINMAX_FP32_3X8C8__AVX2, k_div_8) {
33510 TEST_REQUIRES_X86_AVX2;
33511 for (size_t k = 16; k <= 80; k += 8) {
33512 GemmMicrokernelTester()
33513 .mr(3)
33514 .nr(8)
33515 .kr(8)
33516 .sr(1)
33517 .m(3)
33518 .n(8)
33519 .k(k)
33520 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x8c8__avx2, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
33521 }
33522 }
33523
TEST(QC8_GEMM_MINMAX_FP32_3X8C8__AVX2,k_div_8_strided_a)33524 TEST(QC8_GEMM_MINMAX_FP32_3X8C8__AVX2, k_div_8_strided_a) {
33525 TEST_REQUIRES_X86_AVX2;
33526 for (size_t k = 16; k <= 80; k += 8) {
33527 GemmMicrokernelTester()
33528 .mr(3)
33529 .nr(8)
33530 .kr(8)
33531 .sr(1)
33532 .m(3)
33533 .n(8)
33534 .k(k)
33535 .a_stride(83)
33536 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x8c8__avx2, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
33537 }
33538 }
33539
TEST(QC8_GEMM_MINMAX_FP32_3X8C8__AVX2,k_div_8_subtile)33540 TEST(QC8_GEMM_MINMAX_FP32_3X8C8__AVX2, k_div_8_subtile) {
33541 TEST_REQUIRES_X86_AVX2;
33542 for (size_t k = 16; k <= 80; k += 8) {
33543 for (uint32_t n = 1; n <= 8; n++) {
33544 for (uint32_t m = 1; m <= 3; m++) {
33545 GemmMicrokernelTester()
33546 .mr(3)
33547 .nr(8)
33548 .kr(8)
33549 .sr(1)
33550 .m(m)
33551 .n(n)
33552 .k(k)
33553 .iterations(1)
33554 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x8c8__avx2, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
33555 }
33556 }
33557 }
33558 }
33559
TEST(QC8_GEMM_MINMAX_FP32_3X8C8__AVX2,n_gt_8)33560 TEST(QC8_GEMM_MINMAX_FP32_3X8C8__AVX2, n_gt_8) {
33561 TEST_REQUIRES_X86_AVX2;
33562 for (uint32_t n = 9; n < 16; n++) {
33563 for (size_t k = 1; k <= 40; k += 9) {
33564 GemmMicrokernelTester()
33565 .mr(3)
33566 .nr(8)
33567 .kr(8)
33568 .sr(1)
33569 .m(3)
33570 .n(n)
33571 .k(k)
33572 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x8c8__avx2, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
33573 }
33574 }
33575 }
33576
TEST(QC8_GEMM_MINMAX_FP32_3X8C8__AVX2,n_gt_8_strided_cn)33577 TEST(QC8_GEMM_MINMAX_FP32_3X8C8__AVX2, n_gt_8_strided_cn) {
33578 TEST_REQUIRES_X86_AVX2;
33579 for (uint32_t n = 9; n < 16; n++) {
33580 for (size_t k = 1; k <= 40; k += 9) {
33581 GemmMicrokernelTester()
33582 .mr(3)
33583 .nr(8)
33584 .kr(8)
33585 .sr(1)
33586 .m(3)
33587 .n(n)
33588 .k(k)
33589 .cn_stride(11)
33590 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x8c8__avx2, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
33591 }
33592 }
33593 }
33594
TEST(QC8_GEMM_MINMAX_FP32_3X8C8__AVX2,n_gt_8_strided_a)33595 TEST(QC8_GEMM_MINMAX_FP32_3X8C8__AVX2, n_gt_8_strided_a) {
33596 TEST_REQUIRES_X86_AVX2;
33597 for (uint32_t n = 9; n < 16; n++) {
33598 for (size_t k = 1; k <= 40; k += 9) {
33599 GemmMicrokernelTester()
33600 .mr(3)
33601 .nr(8)
33602 .kr(8)
33603 .sr(1)
33604 .m(3)
33605 .n(n)
33606 .k(k)
33607 .a_stride(43)
33608 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x8c8__avx2, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
33609 }
33610 }
33611 }
33612
TEST(QC8_GEMM_MINMAX_FP32_3X8C8__AVX2,n_gt_8_subtile)33613 TEST(QC8_GEMM_MINMAX_FP32_3X8C8__AVX2, n_gt_8_subtile) {
33614 TEST_REQUIRES_X86_AVX2;
33615 for (uint32_t n = 9; n < 16; n++) {
33616 for (size_t k = 1; k <= 40; k += 9) {
33617 for (uint32_t m = 1; m <= 3; m++) {
33618 GemmMicrokernelTester()
33619 .mr(3)
33620 .nr(8)
33621 .kr(8)
33622 .sr(1)
33623 .m(m)
33624 .n(n)
33625 .k(k)
33626 .iterations(1)
33627 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x8c8__avx2, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
33628 }
33629 }
33630 }
33631 }
33632
TEST(QC8_GEMM_MINMAX_FP32_3X8C8__AVX2,n_div_8)33633 TEST(QC8_GEMM_MINMAX_FP32_3X8C8__AVX2, n_div_8) {
33634 TEST_REQUIRES_X86_AVX2;
33635 for (uint32_t n = 16; n <= 24; n += 8) {
33636 for (size_t k = 1; k <= 40; k += 9) {
33637 GemmMicrokernelTester()
33638 .mr(3)
33639 .nr(8)
33640 .kr(8)
33641 .sr(1)
33642 .m(3)
33643 .n(n)
33644 .k(k)
33645 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x8c8__avx2, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
33646 }
33647 }
33648 }
33649
TEST(QC8_GEMM_MINMAX_FP32_3X8C8__AVX2,n_div_8_strided_cn)33650 TEST(QC8_GEMM_MINMAX_FP32_3X8C8__AVX2, n_div_8_strided_cn) {
33651 TEST_REQUIRES_X86_AVX2;
33652 for (uint32_t n = 16; n <= 24; n += 8) {
33653 for (size_t k = 1; k <= 40; k += 9) {
33654 GemmMicrokernelTester()
33655 .mr(3)
33656 .nr(8)
33657 .kr(8)
33658 .sr(1)
33659 .m(3)
33660 .n(n)
33661 .k(k)
33662 .cn_stride(11)
33663 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x8c8__avx2, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
33664 }
33665 }
33666 }
33667
TEST(QC8_GEMM_MINMAX_FP32_3X8C8__AVX2,n_div_8_strided_a)33668 TEST(QC8_GEMM_MINMAX_FP32_3X8C8__AVX2, n_div_8_strided_a) {
33669 TEST_REQUIRES_X86_AVX2;
33670 for (uint32_t n = 16; n <= 24; n += 8) {
33671 for (size_t k = 1; k <= 40; k += 9) {
33672 GemmMicrokernelTester()
33673 .mr(3)
33674 .nr(8)
33675 .kr(8)
33676 .sr(1)
33677 .m(3)
33678 .n(n)
33679 .k(k)
33680 .a_stride(43)
33681 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x8c8__avx2, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
33682 }
33683 }
33684 }
33685
TEST(QC8_GEMM_MINMAX_FP32_3X8C8__AVX2,n_div_8_subtile)33686 TEST(QC8_GEMM_MINMAX_FP32_3X8C8__AVX2, n_div_8_subtile) {
33687 TEST_REQUIRES_X86_AVX2;
33688 for (uint32_t n = 16; n <= 24; n += 8) {
33689 for (size_t k = 1; k <= 40; k += 9) {
33690 for (uint32_t m = 1; m <= 3; m++) {
33691 GemmMicrokernelTester()
33692 .mr(3)
33693 .nr(8)
33694 .kr(8)
33695 .sr(1)
33696 .m(m)
33697 .n(n)
33698 .k(k)
33699 .iterations(1)
33700 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x8c8__avx2, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
33701 }
33702 }
33703 }
33704 }
33705
TEST(QC8_GEMM_MINMAX_FP32_3X8C8__AVX2,strided_cm_subtile)33706 TEST(QC8_GEMM_MINMAX_FP32_3X8C8__AVX2, strided_cm_subtile) {
33707 TEST_REQUIRES_X86_AVX2;
33708 for (size_t k = 1; k <= 40; k += 9) {
33709 for (uint32_t n = 1; n <= 8; n++) {
33710 for (uint32_t m = 1; m <= 3; m++) {
33711 GemmMicrokernelTester()
33712 .mr(3)
33713 .nr(8)
33714 .kr(8)
33715 .sr(1)
33716 .m(m)
33717 .n(n)
33718 .k(k)
33719 .cm_stride(11)
33720 .iterations(1)
33721 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x8c8__avx2, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
33722 }
33723 }
33724 }
33725 }
33726
TEST(QC8_GEMM_MINMAX_FP32_3X8C8__AVX2,qmin)33727 TEST(QC8_GEMM_MINMAX_FP32_3X8C8__AVX2, qmin) {
33728 TEST_REQUIRES_X86_AVX2;
33729 GemmMicrokernelTester()
33730 .mr(3)
33731 .nr(8)
33732 .kr(8)
33733 .sr(1)
33734 .m(3)
33735 .n(8)
33736 .k(8)
33737 .qmin(128)
33738 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x8c8__avx2, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
33739 }
33740
TEST(QC8_GEMM_MINMAX_FP32_3X8C8__AVX2,qmax)33741 TEST(QC8_GEMM_MINMAX_FP32_3X8C8__AVX2, qmax) {
33742 TEST_REQUIRES_X86_AVX2;
33743 GemmMicrokernelTester()
33744 .mr(3)
33745 .nr(8)
33746 .kr(8)
33747 .sr(1)
33748 .m(3)
33749 .n(8)
33750 .k(8)
33751 .qmax(128)
33752 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x8c8__avx2, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
33753 }
33754
TEST(QC8_GEMM_MINMAX_FP32_3X8C8__AVX2,strided_cm)33755 TEST(QC8_GEMM_MINMAX_FP32_3X8C8__AVX2, strided_cm) {
33756 TEST_REQUIRES_X86_AVX2;
33757 GemmMicrokernelTester()
33758 .mr(3)
33759 .nr(8)
33760 .kr(8)
33761 .sr(1)
33762 .m(3)
33763 .n(8)
33764 .k(8)
33765 .cm_stride(11)
33766 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x8c8__avx2, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
33767 }
33768 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
33769
33770
33771 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QC8_GEMM_MINMAX_FP32_4X16C8__AVX512SKX,k_eq_8)33772 TEST(QC8_GEMM_MINMAX_FP32_4X16C8__AVX512SKX, k_eq_8) {
33773 TEST_REQUIRES_X86_AVX512SKX;
33774 GemmMicrokernelTester()
33775 .mr(4)
33776 .nr(16)
33777 .kr(8)
33778 .sr(1)
33779 .m(4)
33780 .n(16)
33781 .k(8)
33782 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x16c8__avx512skx, xnn_init_qc8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
33783 }
33784
TEST(QC8_GEMM_MINMAX_FP32_4X16C8__AVX512SKX,strided_cn)33785 TEST(QC8_GEMM_MINMAX_FP32_4X16C8__AVX512SKX, strided_cn) {
33786 TEST_REQUIRES_X86_AVX512SKX;
33787 GemmMicrokernelTester()
33788 .mr(4)
33789 .nr(16)
33790 .kr(8)
33791 .sr(1)
33792 .m(4)
33793 .n(16)
33794 .k(8)
33795 .cn_stride(19)
33796 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x16c8__avx512skx, xnn_init_qc8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
33797 }
33798
TEST(QC8_GEMM_MINMAX_FP32_4X16C8__AVX512SKX,k_eq_8_strided_a)33799 TEST(QC8_GEMM_MINMAX_FP32_4X16C8__AVX512SKX, k_eq_8_strided_a) {
33800 TEST_REQUIRES_X86_AVX512SKX;
33801 GemmMicrokernelTester()
33802 .mr(4)
33803 .nr(16)
33804 .kr(8)
33805 .sr(1)
33806 .m(4)
33807 .n(16)
33808 .k(8)
33809 .a_stride(11)
33810 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x16c8__avx512skx, xnn_init_qc8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
33811 }
33812
TEST(QC8_GEMM_MINMAX_FP32_4X16C8__AVX512SKX,k_eq_8_subtile)33813 TEST(QC8_GEMM_MINMAX_FP32_4X16C8__AVX512SKX, k_eq_8_subtile) {
33814 TEST_REQUIRES_X86_AVX512SKX;
33815 for (uint32_t n = 1; n <= 16; n++) {
33816 for (uint32_t m = 1; m <= 4; m++) {
33817 GemmMicrokernelTester()
33818 .mr(4)
33819 .nr(16)
33820 .kr(8)
33821 .sr(1)
33822 .m(m)
33823 .n(n)
33824 .k(8)
33825 .iterations(1)
33826 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x16c8__avx512skx, xnn_init_qc8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
33827 }
33828 }
33829 }
33830
TEST(QC8_GEMM_MINMAX_FP32_4X16C8__AVX512SKX,k_eq_8_subtile_m)33831 TEST(QC8_GEMM_MINMAX_FP32_4X16C8__AVX512SKX, k_eq_8_subtile_m) {
33832 TEST_REQUIRES_X86_AVX512SKX;
33833 for (uint32_t m = 1; m <= 4; m++) {
33834 GemmMicrokernelTester()
33835 .mr(4)
33836 .nr(16)
33837 .kr(8)
33838 .sr(1)
33839 .m(m)
33840 .n(16)
33841 .k(8)
33842 .iterations(1)
33843 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x16c8__avx512skx, xnn_init_qc8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
33844 }
33845 }
33846
TEST(QC8_GEMM_MINMAX_FP32_4X16C8__AVX512SKX,k_eq_8_subtile_n)33847 TEST(QC8_GEMM_MINMAX_FP32_4X16C8__AVX512SKX, k_eq_8_subtile_n) {
33848 TEST_REQUIRES_X86_AVX512SKX;
33849 for (uint32_t n = 1; n <= 16; n++) {
33850 GemmMicrokernelTester()
33851 .mr(4)
33852 .nr(16)
33853 .kr(8)
33854 .sr(1)
33855 .m(4)
33856 .n(n)
33857 .k(8)
33858 .iterations(1)
33859 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x16c8__avx512skx, xnn_init_qc8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
33860 }
33861 }
33862
TEST(QC8_GEMM_MINMAX_FP32_4X16C8__AVX512SKX,k_lt_8)33863 TEST(QC8_GEMM_MINMAX_FP32_4X16C8__AVX512SKX, k_lt_8) {
33864 TEST_REQUIRES_X86_AVX512SKX;
33865 for (size_t k = 1; k < 8; k++) {
33866 GemmMicrokernelTester()
33867 .mr(4)
33868 .nr(16)
33869 .kr(8)
33870 .sr(1)
33871 .m(4)
33872 .n(16)
33873 .k(k)
33874 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x16c8__avx512skx, xnn_init_qc8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
33875 }
33876 }
33877
TEST(QC8_GEMM_MINMAX_FP32_4X16C8__AVX512SKX,k_lt_8_strided_a)33878 TEST(QC8_GEMM_MINMAX_FP32_4X16C8__AVX512SKX, k_lt_8_strided_a) {
33879 TEST_REQUIRES_X86_AVX512SKX;
33880 for (size_t k = 1; k < 8; k++) {
33881 GemmMicrokernelTester()
33882 .mr(4)
33883 .nr(16)
33884 .kr(8)
33885 .sr(1)
33886 .m(4)
33887 .n(16)
33888 .k(k)
33889 .a_stride(11)
33890 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x16c8__avx512skx, xnn_init_qc8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
33891 }
33892 }
33893
TEST(QC8_GEMM_MINMAX_FP32_4X16C8__AVX512SKX,k_lt_8_subtile)33894 TEST(QC8_GEMM_MINMAX_FP32_4X16C8__AVX512SKX, k_lt_8_subtile) {
33895 TEST_REQUIRES_X86_AVX512SKX;
33896 for (size_t k = 1; k < 8; k++) {
33897 for (uint32_t n = 1; n <= 16; n++) {
33898 for (uint32_t m = 1; m <= 4; m++) {
33899 GemmMicrokernelTester()
33900 .mr(4)
33901 .nr(16)
33902 .kr(8)
33903 .sr(1)
33904 .m(m)
33905 .n(n)
33906 .k(k)
33907 .iterations(1)
33908 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x16c8__avx512skx, xnn_init_qc8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
33909 }
33910 }
33911 }
33912 }
33913
TEST(QC8_GEMM_MINMAX_FP32_4X16C8__AVX512SKX,k_gt_8)33914 TEST(QC8_GEMM_MINMAX_FP32_4X16C8__AVX512SKX, k_gt_8) {
33915 TEST_REQUIRES_X86_AVX512SKX;
33916 for (size_t k = 9; k < 16; k++) {
33917 GemmMicrokernelTester()
33918 .mr(4)
33919 .nr(16)
33920 .kr(8)
33921 .sr(1)
33922 .m(4)
33923 .n(16)
33924 .k(k)
33925 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x16c8__avx512skx, xnn_init_qc8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
33926 }
33927 }
33928
TEST(QC8_GEMM_MINMAX_FP32_4X16C8__AVX512SKX,k_gt_8_strided_a)33929 TEST(QC8_GEMM_MINMAX_FP32_4X16C8__AVX512SKX, k_gt_8_strided_a) {
33930 TEST_REQUIRES_X86_AVX512SKX;
33931 for (size_t k = 9; k < 16; k++) {
33932 GemmMicrokernelTester()
33933 .mr(4)
33934 .nr(16)
33935 .kr(8)
33936 .sr(1)
33937 .m(4)
33938 .n(16)
33939 .k(k)
33940 .a_stride(19)
33941 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x16c8__avx512skx, xnn_init_qc8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
33942 }
33943 }
33944
TEST(QC8_GEMM_MINMAX_FP32_4X16C8__AVX512SKX,k_gt_8_subtile)33945 TEST(QC8_GEMM_MINMAX_FP32_4X16C8__AVX512SKX, k_gt_8_subtile) {
33946 TEST_REQUIRES_X86_AVX512SKX;
33947 for (size_t k = 9; k < 16; k++) {
33948 for (uint32_t n = 1; n <= 16; n++) {
33949 for (uint32_t m = 1; m <= 4; m++) {
33950 GemmMicrokernelTester()
33951 .mr(4)
33952 .nr(16)
33953 .kr(8)
33954 .sr(1)
33955 .m(m)
33956 .n(n)
33957 .k(k)
33958 .iterations(1)
33959 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x16c8__avx512skx, xnn_init_qc8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
33960 }
33961 }
33962 }
33963 }
33964
TEST(QC8_GEMM_MINMAX_FP32_4X16C8__AVX512SKX,k_div_8)33965 TEST(QC8_GEMM_MINMAX_FP32_4X16C8__AVX512SKX, k_div_8) {
33966 TEST_REQUIRES_X86_AVX512SKX;
33967 for (size_t k = 16; k <= 80; k += 8) {
33968 GemmMicrokernelTester()
33969 .mr(4)
33970 .nr(16)
33971 .kr(8)
33972 .sr(1)
33973 .m(4)
33974 .n(16)
33975 .k(k)
33976 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x16c8__avx512skx, xnn_init_qc8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
33977 }
33978 }
33979
TEST(QC8_GEMM_MINMAX_FP32_4X16C8__AVX512SKX,k_div_8_strided_a)33980 TEST(QC8_GEMM_MINMAX_FP32_4X16C8__AVX512SKX, k_div_8_strided_a) {
33981 TEST_REQUIRES_X86_AVX512SKX;
33982 for (size_t k = 16; k <= 80; k += 8) {
33983 GemmMicrokernelTester()
33984 .mr(4)
33985 .nr(16)
33986 .kr(8)
33987 .sr(1)
33988 .m(4)
33989 .n(16)
33990 .k(k)
33991 .a_stride(83)
33992 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x16c8__avx512skx, xnn_init_qc8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
33993 }
33994 }
33995
TEST(QC8_GEMM_MINMAX_FP32_4X16C8__AVX512SKX,k_div_8_subtile)33996 TEST(QC8_GEMM_MINMAX_FP32_4X16C8__AVX512SKX, k_div_8_subtile) {
33997 TEST_REQUIRES_X86_AVX512SKX;
33998 for (size_t k = 16; k <= 80; k += 8) {
33999 for (uint32_t n = 1; n <= 16; n++) {
34000 for (uint32_t m = 1; m <= 4; m++) {
34001 GemmMicrokernelTester()
34002 .mr(4)
34003 .nr(16)
34004 .kr(8)
34005 .sr(1)
34006 .m(m)
34007 .n(n)
34008 .k(k)
34009 .iterations(1)
34010 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x16c8__avx512skx, xnn_init_qc8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
34011 }
34012 }
34013 }
34014 }
34015
TEST(QC8_GEMM_MINMAX_FP32_4X16C8__AVX512SKX,n_gt_16)34016 TEST(QC8_GEMM_MINMAX_FP32_4X16C8__AVX512SKX, n_gt_16) {
34017 TEST_REQUIRES_X86_AVX512SKX;
34018 for (uint32_t n = 17; n < 32; n++) {
34019 for (size_t k = 1; k <= 40; k += 9) {
34020 GemmMicrokernelTester()
34021 .mr(4)
34022 .nr(16)
34023 .kr(8)
34024 .sr(1)
34025 .m(4)
34026 .n(n)
34027 .k(k)
34028 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x16c8__avx512skx, xnn_init_qc8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
34029 }
34030 }
34031 }
34032
TEST(QC8_GEMM_MINMAX_FP32_4X16C8__AVX512SKX,n_gt_16_strided_cn)34033 TEST(QC8_GEMM_MINMAX_FP32_4X16C8__AVX512SKX, n_gt_16_strided_cn) {
34034 TEST_REQUIRES_X86_AVX512SKX;
34035 for (uint32_t n = 17; n < 32; n++) {
34036 for (size_t k = 1; k <= 40; k += 9) {
34037 GemmMicrokernelTester()
34038 .mr(4)
34039 .nr(16)
34040 .kr(8)
34041 .sr(1)
34042 .m(4)
34043 .n(n)
34044 .k(k)
34045 .cn_stride(19)
34046 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x16c8__avx512skx, xnn_init_qc8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
34047 }
34048 }
34049 }
34050
TEST(QC8_GEMM_MINMAX_FP32_4X16C8__AVX512SKX,n_gt_16_strided_a)34051 TEST(QC8_GEMM_MINMAX_FP32_4X16C8__AVX512SKX, n_gt_16_strided_a) {
34052 TEST_REQUIRES_X86_AVX512SKX;
34053 for (uint32_t n = 17; n < 32; n++) {
34054 for (size_t k = 1; k <= 40; k += 9) {
34055 GemmMicrokernelTester()
34056 .mr(4)
34057 .nr(16)
34058 .kr(8)
34059 .sr(1)
34060 .m(4)
34061 .n(n)
34062 .k(k)
34063 .a_stride(43)
34064 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x16c8__avx512skx, xnn_init_qc8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
34065 }
34066 }
34067 }
34068
TEST(QC8_GEMM_MINMAX_FP32_4X16C8__AVX512SKX,n_gt_16_subtile)34069 TEST(QC8_GEMM_MINMAX_FP32_4X16C8__AVX512SKX, n_gt_16_subtile) {
34070 TEST_REQUIRES_X86_AVX512SKX;
34071 for (uint32_t n = 17; n < 32; n++) {
34072 for (size_t k = 1; k <= 40; k += 9) {
34073 for (uint32_t m = 1; m <= 4; m++) {
34074 GemmMicrokernelTester()
34075 .mr(4)
34076 .nr(16)
34077 .kr(8)
34078 .sr(1)
34079 .m(m)
34080 .n(n)
34081 .k(k)
34082 .iterations(1)
34083 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x16c8__avx512skx, xnn_init_qc8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
34084 }
34085 }
34086 }
34087 }
34088
TEST(QC8_GEMM_MINMAX_FP32_4X16C8__AVX512SKX,n_div_16)34089 TEST(QC8_GEMM_MINMAX_FP32_4X16C8__AVX512SKX, n_div_16) {
34090 TEST_REQUIRES_X86_AVX512SKX;
34091 for (uint32_t n = 32; n <= 48; n += 16) {
34092 for (size_t k = 1; k <= 40; k += 9) {
34093 GemmMicrokernelTester()
34094 .mr(4)
34095 .nr(16)
34096 .kr(8)
34097 .sr(1)
34098 .m(4)
34099 .n(n)
34100 .k(k)
34101 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x16c8__avx512skx, xnn_init_qc8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
34102 }
34103 }
34104 }
34105
TEST(QC8_GEMM_MINMAX_FP32_4X16C8__AVX512SKX,n_div_16_strided_cn)34106 TEST(QC8_GEMM_MINMAX_FP32_4X16C8__AVX512SKX, n_div_16_strided_cn) {
34107 TEST_REQUIRES_X86_AVX512SKX;
34108 for (uint32_t n = 32; n <= 48; n += 16) {
34109 for (size_t k = 1; k <= 40; k += 9) {
34110 GemmMicrokernelTester()
34111 .mr(4)
34112 .nr(16)
34113 .kr(8)
34114 .sr(1)
34115 .m(4)
34116 .n(n)
34117 .k(k)
34118 .cn_stride(19)
34119 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x16c8__avx512skx, xnn_init_qc8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
34120 }
34121 }
34122 }
34123
TEST(QC8_GEMM_MINMAX_FP32_4X16C8__AVX512SKX,n_div_16_strided_a)34124 TEST(QC8_GEMM_MINMAX_FP32_4X16C8__AVX512SKX, n_div_16_strided_a) {
34125 TEST_REQUIRES_X86_AVX512SKX;
34126 for (uint32_t n = 32; n <= 48; n += 16) {
34127 for (size_t k = 1; k <= 40; k += 9) {
34128 GemmMicrokernelTester()
34129 .mr(4)
34130 .nr(16)
34131 .kr(8)
34132 .sr(1)
34133 .m(4)
34134 .n(n)
34135 .k(k)
34136 .a_stride(43)
34137 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x16c8__avx512skx, xnn_init_qc8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
34138 }
34139 }
34140 }
34141
TEST(QC8_GEMM_MINMAX_FP32_4X16C8__AVX512SKX,n_div_16_subtile)34142 TEST(QC8_GEMM_MINMAX_FP32_4X16C8__AVX512SKX, n_div_16_subtile) {
34143 TEST_REQUIRES_X86_AVX512SKX;
34144 for (uint32_t n = 32; n <= 48; n += 16) {
34145 for (size_t k = 1; k <= 40; k += 9) {
34146 for (uint32_t m = 1; m <= 4; m++) {
34147 GemmMicrokernelTester()
34148 .mr(4)
34149 .nr(16)
34150 .kr(8)
34151 .sr(1)
34152 .m(m)
34153 .n(n)
34154 .k(k)
34155 .iterations(1)
34156 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x16c8__avx512skx, xnn_init_qc8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
34157 }
34158 }
34159 }
34160 }
34161
TEST(QC8_GEMM_MINMAX_FP32_4X16C8__AVX512SKX,strided_cm_subtile)34162 TEST(QC8_GEMM_MINMAX_FP32_4X16C8__AVX512SKX, strided_cm_subtile) {
34163 TEST_REQUIRES_X86_AVX512SKX;
34164 for (size_t k = 1; k <= 40; k += 9) {
34165 for (uint32_t n = 1; n <= 16; n++) {
34166 for (uint32_t m = 1; m <= 4; m++) {
34167 GemmMicrokernelTester()
34168 .mr(4)
34169 .nr(16)
34170 .kr(8)
34171 .sr(1)
34172 .m(m)
34173 .n(n)
34174 .k(k)
34175 .cm_stride(19)
34176 .iterations(1)
34177 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x16c8__avx512skx, xnn_init_qc8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
34178 }
34179 }
34180 }
34181 }
34182
TEST(QC8_GEMM_MINMAX_FP32_4X16C8__AVX512SKX,qmin)34183 TEST(QC8_GEMM_MINMAX_FP32_4X16C8__AVX512SKX, qmin) {
34184 TEST_REQUIRES_X86_AVX512SKX;
34185 GemmMicrokernelTester()
34186 .mr(4)
34187 .nr(16)
34188 .kr(8)
34189 .sr(1)
34190 .m(4)
34191 .n(16)
34192 .k(8)
34193 .qmin(128)
34194 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x16c8__avx512skx, xnn_init_qc8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
34195 }
34196
TEST(QC8_GEMM_MINMAX_FP32_4X16C8__AVX512SKX,qmax)34197 TEST(QC8_GEMM_MINMAX_FP32_4X16C8__AVX512SKX, qmax) {
34198 TEST_REQUIRES_X86_AVX512SKX;
34199 GemmMicrokernelTester()
34200 .mr(4)
34201 .nr(16)
34202 .kr(8)
34203 .sr(1)
34204 .m(4)
34205 .n(16)
34206 .k(8)
34207 .qmax(128)
34208 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x16c8__avx512skx, xnn_init_qc8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
34209 }
34210
TEST(QC8_GEMM_MINMAX_FP32_4X16C8__AVX512SKX,strided_cm)34211 TEST(QC8_GEMM_MINMAX_FP32_4X16C8__AVX512SKX, strided_cm) {
34212 TEST_REQUIRES_X86_AVX512SKX;
34213 GemmMicrokernelTester()
34214 .mr(4)
34215 .nr(16)
34216 .kr(8)
34217 .sr(1)
34218 .m(4)
34219 .n(16)
34220 .k(8)
34221 .cm_stride(19)
34222 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x16c8__avx512skx, xnn_init_qc8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
34223 }
34224 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
34225
34226
34227 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(QC8_GEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD128,k_eq_8)34228 TEST(QC8_GEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD128, k_eq_8) {
34229 GemmMicrokernelTester()
34230 .mr(1)
34231 .nr(4)
34232 .kr(2)
34233 .sr(1)
34234 .m(1)
34235 .n(4)
34236 .k(8)
34237 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
34238 }
34239
TEST(QC8_GEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD128,strided_cn)34240 TEST(QC8_GEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD128, strided_cn) {
34241 GemmMicrokernelTester()
34242 .mr(1)
34243 .nr(4)
34244 .kr(2)
34245 .sr(1)
34246 .m(1)
34247 .n(4)
34248 .k(8)
34249 .cn_stride(7)
34250 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
34251 }
34252
TEST(QC8_GEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD128,k_eq_8_strided_a)34253 TEST(QC8_GEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD128, k_eq_8_strided_a) {
34254 GemmMicrokernelTester()
34255 .mr(1)
34256 .nr(4)
34257 .kr(2)
34258 .sr(1)
34259 .m(1)
34260 .n(4)
34261 .k(8)
34262 .a_stride(11)
34263 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
34264 }
34265
TEST(QC8_GEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD128,k_eq_8_subtile)34266 TEST(QC8_GEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD128, k_eq_8_subtile) {
34267 for (uint32_t n = 1; n <= 4; n++) {
34268 for (uint32_t m = 1; m <= 1; m++) {
34269 GemmMicrokernelTester()
34270 .mr(1)
34271 .nr(4)
34272 .kr(2)
34273 .sr(1)
34274 .m(m)
34275 .n(n)
34276 .k(8)
34277 .iterations(1)
34278 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
34279 }
34280 }
34281 }
34282
TEST(QC8_GEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD128,k_eq_8_subtile_m)34283 TEST(QC8_GEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD128, k_eq_8_subtile_m) {
34284 for (uint32_t m = 1; m <= 1; m++) {
34285 GemmMicrokernelTester()
34286 .mr(1)
34287 .nr(4)
34288 .kr(2)
34289 .sr(1)
34290 .m(m)
34291 .n(4)
34292 .k(8)
34293 .iterations(1)
34294 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
34295 }
34296 }
34297
TEST(QC8_GEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD128,k_eq_8_subtile_n)34298 TEST(QC8_GEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD128, k_eq_8_subtile_n) {
34299 for (uint32_t n = 1; n <= 4; n++) {
34300 GemmMicrokernelTester()
34301 .mr(1)
34302 .nr(4)
34303 .kr(2)
34304 .sr(1)
34305 .m(1)
34306 .n(n)
34307 .k(8)
34308 .iterations(1)
34309 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
34310 }
34311 }
34312
TEST(QC8_GEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD128,k_lt_8)34313 TEST(QC8_GEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD128, k_lt_8) {
34314 for (size_t k = 1; k < 8; k++) {
34315 GemmMicrokernelTester()
34316 .mr(1)
34317 .nr(4)
34318 .kr(2)
34319 .sr(1)
34320 .m(1)
34321 .n(4)
34322 .k(k)
34323 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
34324 }
34325 }
34326
TEST(QC8_GEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD128,k_lt_8_strided_a)34327 TEST(QC8_GEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD128, k_lt_8_strided_a) {
34328 for (size_t k = 1; k < 8; k++) {
34329 GemmMicrokernelTester()
34330 .mr(1)
34331 .nr(4)
34332 .kr(2)
34333 .sr(1)
34334 .m(1)
34335 .n(4)
34336 .k(k)
34337 .a_stride(11)
34338 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
34339 }
34340 }
34341
TEST(QC8_GEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD128,k_lt_8_subtile)34342 TEST(QC8_GEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD128, k_lt_8_subtile) {
34343 for (size_t k = 1; k < 8; k++) {
34344 for (uint32_t n = 1; n <= 4; n++) {
34345 for (uint32_t m = 1; m <= 1; m++) {
34346 GemmMicrokernelTester()
34347 .mr(1)
34348 .nr(4)
34349 .kr(2)
34350 .sr(1)
34351 .m(m)
34352 .n(n)
34353 .k(k)
34354 .iterations(1)
34355 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
34356 }
34357 }
34358 }
34359 }
34360
TEST(QC8_GEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD128,k_gt_8)34361 TEST(QC8_GEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD128, k_gt_8) {
34362 for (size_t k = 9; k < 16; k++) {
34363 GemmMicrokernelTester()
34364 .mr(1)
34365 .nr(4)
34366 .kr(2)
34367 .sr(1)
34368 .m(1)
34369 .n(4)
34370 .k(k)
34371 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
34372 }
34373 }
34374
TEST(QC8_GEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD128,k_gt_8_strided_a)34375 TEST(QC8_GEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD128, k_gt_8_strided_a) {
34376 for (size_t k = 9; k < 16; k++) {
34377 GemmMicrokernelTester()
34378 .mr(1)
34379 .nr(4)
34380 .kr(2)
34381 .sr(1)
34382 .m(1)
34383 .n(4)
34384 .k(k)
34385 .a_stride(19)
34386 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
34387 }
34388 }
34389
TEST(QC8_GEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD128,k_gt_8_subtile)34390 TEST(QC8_GEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD128, k_gt_8_subtile) {
34391 for (size_t k = 9; k < 16; k++) {
34392 for (uint32_t n = 1; n <= 4; n++) {
34393 for (uint32_t m = 1; m <= 1; m++) {
34394 GemmMicrokernelTester()
34395 .mr(1)
34396 .nr(4)
34397 .kr(2)
34398 .sr(1)
34399 .m(m)
34400 .n(n)
34401 .k(k)
34402 .iterations(1)
34403 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
34404 }
34405 }
34406 }
34407 }
34408
TEST(QC8_GEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD128,k_div_8)34409 TEST(QC8_GEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD128, k_div_8) {
34410 for (size_t k = 16; k <= 80; k += 8) {
34411 GemmMicrokernelTester()
34412 .mr(1)
34413 .nr(4)
34414 .kr(2)
34415 .sr(1)
34416 .m(1)
34417 .n(4)
34418 .k(k)
34419 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
34420 }
34421 }
34422
TEST(QC8_GEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD128,k_div_8_strided_a)34423 TEST(QC8_GEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD128, k_div_8_strided_a) {
34424 for (size_t k = 16; k <= 80; k += 8) {
34425 GemmMicrokernelTester()
34426 .mr(1)
34427 .nr(4)
34428 .kr(2)
34429 .sr(1)
34430 .m(1)
34431 .n(4)
34432 .k(k)
34433 .a_stride(83)
34434 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
34435 }
34436 }
34437
TEST(QC8_GEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD128,k_div_8_subtile)34438 TEST(QC8_GEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD128, k_div_8_subtile) {
34439 for (size_t k = 16; k <= 80; k += 8) {
34440 for (uint32_t n = 1; n <= 4; n++) {
34441 for (uint32_t m = 1; m <= 1; m++) {
34442 GemmMicrokernelTester()
34443 .mr(1)
34444 .nr(4)
34445 .kr(2)
34446 .sr(1)
34447 .m(m)
34448 .n(n)
34449 .k(k)
34450 .iterations(1)
34451 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
34452 }
34453 }
34454 }
34455 }
34456
TEST(QC8_GEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD128,n_gt_4)34457 TEST(QC8_GEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD128, n_gt_4) {
34458 for (uint32_t n = 5; n < 8; n++) {
34459 for (size_t k = 1; k <= 40; k += 9) {
34460 GemmMicrokernelTester()
34461 .mr(1)
34462 .nr(4)
34463 .kr(2)
34464 .sr(1)
34465 .m(1)
34466 .n(n)
34467 .k(k)
34468 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
34469 }
34470 }
34471 }
34472
TEST(QC8_GEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD128,n_gt_4_strided_cn)34473 TEST(QC8_GEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD128, n_gt_4_strided_cn) {
34474 for (uint32_t n = 5; n < 8; n++) {
34475 for (size_t k = 1; k <= 40; k += 9) {
34476 GemmMicrokernelTester()
34477 .mr(1)
34478 .nr(4)
34479 .kr(2)
34480 .sr(1)
34481 .m(1)
34482 .n(n)
34483 .k(k)
34484 .cn_stride(7)
34485 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
34486 }
34487 }
34488 }
34489
TEST(QC8_GEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD128,n_gt_4_strided_a)34490 TEST(QC8_GEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD128, n_gt_4_strided_a) {
34491 for (uint32_t n = 5; n < 8; n++) {
34492 for (size_t k = 1; k <= 40; k += 9) {
34493 GemmMicrokernelTester()
34494 .mr(1)
34495 .nr(4)
34496 .kr(2)
34497 .sr(1)
34498 .m(1)
34499 .n(n)
34500 .k(k)
34501 .a_stride(43)
34502 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
34503 }
34504 }
34505 }
34506
TEST(QC8_GEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD128,n_gt_4_subtile)34507 TEST(QC8_GEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD128, n_gt_4_subtile) {
34508 for (uint32_t n = 5; n < 8; n++) {
34509 for (size_t k = 1; k <= 40; k += 9) {
34510 for (uint32_t m = 1; m <= 1; m++) {
34511 GemmMicrokernelTester()
34512 .mr(1)
34513 .nr(4)
34514 .kr(2)
34515 .sr(1)
34516 .m(m)
34517 .n(n)
34518 .k(k)
34519 .iterations(1)
34520 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
34521 }
34522 }
34523 }
34524 }
34525
TEST(QC8_GEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD128,n_div_4)34526 TEST(QC8_GEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD128, n_div_4) {
34527 for (uint32_t n = 8; n <= 12; n += 4) {
34528 for (size_t k = 1; k <= 40; k += 9) {
34529 GemmMicrokernelTester()
34530 .mr(1)
34531 .nr(4)
34532 .kr(2)
34533 .sr(1)
34534 .m(1)
34535 .n(n)
34536 .k(k)
34537 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
34538 }
34539 }
34540 }
34541
TEST(QC8_GEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD128,n_div_4_strided_cn)34542 TEST(QC8_GEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD128, n_div_4_strided_cn) {
34543 for (uint32_t n = 8; n <= 12; n += 4) {
34544 for (size_t k = 1; k <= 40; k += 9) {
34545 GemmMicrokernelTester()
34546 .mr(1)
34547 .nr(4)
34548 .kr(2)
34549 .sr(1)
34550 .m(1)
34551 .n(n)
34552 .k(k)
34553 .cn_stride(7)
34554 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
34555 }
34556 }
34557 }
34558
TEST(QC8_GEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD128,n_div_4_strided_a)34559 TEST(QC8_GEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD128, n_div_4_strided_a) {
34560 for (uint32_t n = 8; n <= 12; n += 4) {
34561 for (size_t k = 1; k <= 40; k += 9) {
34562 GemmMicrokernelTester()
34563 .mr(1)
34564 .nr(4)
34565 .kr(2)
34566 .sr(1)
34567 .m(1)
34568 .n(n)
34569 .k(k)
34570 .a_stride(43)
34571 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
34572 }
34573 }
34574 }
34575
TEST(QC8_GEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD128,n_div_4_subtile)34576 TEST(QC8_GEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD128, n_div_4_subtile) {
34577 for (uint32_t n = 8; n <= 12; n += 4) {
34578 for (size_t k = 1; k <= 40; k += 9) {
34579 for (uint32_t m = 1; m <= 1; m++) {
34580 GemmMicrokernelTester()
34581 .mr(1)
34582 .nr(4)
34583 .kr(2)
34584 .sr(1)
34585 .m(m)
34586 .n(n)
34587 .k(k)
34588 .iterations(1)
34589 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
34590 }
34591 }
34592 }
34593 }
34594
TEST(QC8_GEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD128,strided_cm_subtile)34595 TEST(QC8_GEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD128, strided_cm_subtile) {
34596 for (size_t k = 1; k <= 40; k += 9) {
34597 for (uint32_t n = 1; n <= 4; n++) {
34598 for (uint32_t m = 1; m <= 1; m++) {
34599 GemmMicrokernelTester()
34600 .mr(1)
34601 .nr(4)
34602 .kr(2)
34603 .sr(1)
34604 .m(m)
34605 .n(n)
34606 .k(k)
34607 .cm_stride(7)
34608 .iterations(1)
34609 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
34610 }
34611 }
34612 }
34613 }
34614
TEST(QC8_GEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD128,qmin)34615 TEST(QC8_GEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD128, qmin) {
34616 GemmMicrokernelTester()
34617 .mr(1)
34618 .nr(4)
34619 .kr(2)
34620 .sr(1)
34621 .m(1)
34622 .n(4)
34623 .k(8)
34624 .qmin(128)
34625 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
34626 }
34627
TEST(QC8_GEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD128,qmax)34628 TEST(QC8_GEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD128, qmax) {
34629 GemmMicrokernelTester()
34630 .mr(1)
34631 .nr(4)
34632 .kr(2)
34633 .sr(1)
34634 .m(1)
34635 .n(4)
34636 .k(8)
34637 .qmax(128)
34638 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
34639 }
34640
TEST(QC8_GEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD128,strided_cm)34641 TEST(QC8_GEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD128, strided_cm) {
34642 GemmMicrokernelTester()
34643 .mr(1)
34644 .nr(4)
34645 .kr(2)
34646 .sr(1)
34647 .m(1)
34648 .n(4)
34649 .k(8)
34650 .cm_stride(7)
34651 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
34652 }
34653 #endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
34654
34655
34656 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD64,k_eq_8)34657 TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD64, k_eq_8) {
34658 GemmMicrokernelTester()
34659 .mr(1)
34660 .nr(4)
34661 .kr(2)
34662 .sr(4)
34663 .m(1)
34664 .n(4)
34665 .k(8)
34666 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
34667 }
34668
TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD64,strided_cn)34669 TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD64, strided_cn) {
34670 GemmMicrokernelTester()
34671 .mr(1)
34672 .nr(4)
34673 .kr(2)
34674 .sr(4)
34675 .m(1)
34676 .n(4)
34677 .k(8)
34678 .cn_stride(7)
34679 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
34680 }
34681
TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD64,k_eq_8_strided_a)34682 TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD64, k_eq_8_strided_a) {
34683 GemmMicrokernelTester()
34684 .mr(1)
34685 .nr(4)
34686 .kr(2)
34687 .sr(4)
34688 .m(1)
34689 .n(4)
34690 .k(8)
34691 .a_stride(11)
34692 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
34693 }
34694
TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD64,k_eq_8_subtile)34695 TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD64, k_eq_8_subtile) {
34696 for (uint32_t n = 1; n <= 4; n++) {
34697 for (uint32_t m = 1; m <= 1; m++) {
34698 GemmMicrokernelTester()
34699 .mr(1)
34700 .nr(4)
34701 .kr(2)
34702 .sr(4)
34703 .m(m)
34704 .n(n)
34705 .k(8)
34706 .iterations(1)
34707 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
34708 }
34709 }
34710 }
34711
TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD64,k_eq_8_subtile_m)34712 TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD64, k_eq_8_subtile_m) {
34713 for (uint32_t m = 1; m <= 1; m++) {
34714 GemmMicrokernelTester()
34715 .mr(1)
34716 .nr(4)
34717 .kr(2)
34718 .sr(4)
34719 .m(m)
34720 .n(4)
34721 .k(8)
34722 .iterations(1)
34723 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
34724 }
34725 }
34726
TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD64,k_eq_8_subtile_n)34727 TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD64, k_eq_8_subtile_n) {
34728 for (uint32_t n = 1; n <= 4; n++) {
34729 GemmMicrokernelTester()
34730 .mr(1)
34731 .nr(4)
34732 .kr(2)
34733 .sr(4)
34734 .m(1)
34735 .n(n)
34736 .k(8)
34737 .iterations(1)
34738 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
34739 }
34740 }
34741
TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD64,k_lt_8)34742 TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD64, k_lt_8) {
34743 for (size_t k = 1; k < 8; k++) {
34744 GemmMicrokernelTester()
34745 .mr(1)
34746 .nr(4)
34747 .kr(2)
34748 .sr(4)
34749 .m(1)
34750 .n(4)
34751 .k(k)
34752 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
34753 }
34754 }
34755
TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD64,k_lt_8_strided_a)34756 TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD64, k_lt_8_strided_a) {
34757 for (size_t k = 1; k < 8; k++) {
34758 GemmMicrokernelTester()
34759 .mr(1)
34760 .nr(4)
34761 .kr(2)
34762 .sr(4)
34763 .m(1)
34764 .n(4)
34765 .k(k)
34766 .a_stride(11)
34767 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
34768 }
34769 }
34770
TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD64,k_lt_8_subtile)34771 TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD64, k_lt_8_subtile) {
34772 for (size_t k = 1; k < 8; k++) {
34773 for (uint32_t n = 1; n <= 4; n++) {
34774 for (uint32_t m = 1; m <= 1; m++) {
34775 GemmMicrokernelTester()
34776 .mr(1)
34777 .nr(4)
34778 .kr(2)
34779 .sr(4)
34780 .m(m)
34781 .n(n)
34782 .k(k)
34783 .iterations(1)
34784 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
34785 }
34786 }
34787 }
34788 }
34789
TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD64,k_gt_8)34790 TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD64, k_gt_8) {
34791 for (size_t k = 9; k < 16; k++) {
34792 GemmMicrokernelTester()
34793 .mr(1)
34794 .nr(4)
34795 .kr(2)
34796 .sr(4)
34797 .m(1)
34798 .n(4)
34799 .k(k)
34800 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
34801 }
34802 }
34803
TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD64,k_gt_8_strided_a)34804 TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD64, k_gt_8_strided_a) {
34805 for (size_t k = 9; k < 16; k++) {
34806 GemmMicrokernelTester()
34807 .mr(1)
34808 .nr(4)
34809 .kr(2)
34810 .sr(4)
34811 .m(1)
34812 .n(4)
34813 .k(k)
34814 .a_stride(19)
34815 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
34816 }
34817 }
34818
TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD64,k_gt_8_subtile)34819 TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD64, k_gt_8_subtile) {
34820 for (size_t k = 9; k < 16; k++) {
34821 for (uint32_t n = 1; n <= 4; n++) {
34822 for (uint32_t m = 1; m <= 1; m++) {
34823 GemmMicrokernelTester()
34824 .mr(1)
34825 .nr(4)
34826 .kr(2)
34827 .sr(4)
34828 .m(m)
34829 .n(n)
34830 .k(k)
34831 .iterations(1)
34832 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
34833 }
34834 }
34835 }
34836 }
34837
TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD64,k_div_8)34838 TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD64, k_div_8) {
34839 for (size_t k = 16; k <= 80; k += 8) {
34840 GemmMicrokernelTester()
34841 .mr(1)
34842 .nr(4)
34843 .kr(2)
34844 .sr(4)
34845 .m(1)
34846 .n(4)
34847 .k(k)
34848 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
34849 }
34850 }
34851
TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD64,k_div_8_strided_a)34852 TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD64, k_div_8_strided_a) {
34853 for (size_t k = 16; k <= 80; k += 8) {
34854 GemmMicrokernelTester()
34855 .mr(1)
34856 .nr(4)
34857 .kr(2)
34858 .sr(4)
34859 .m(1)
34860 .n(4)
34861 .k(k)
34862 .a_stride(83)
34863 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
34864 }
34865 }
34866
TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD64,k_div_8_subtile)34867 TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD64, k_div_8_subtile) {
34868 for (size_t k = 16; k <= 80; k += 8) {
34869 for (uint32_t n = 1; n <= 4; n++) {
34870 for (uint32_t m = 1; m <= 1; m++) {
34871 GemmMicrokernelTester()
34872 .mr(1)
34873 .nr(4)
34874 .kr(2)
34875 .sr(4)
34876 .m(m)
34877 .n(n)
34878 .k(k)
34879 .iterations(1)
34880 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
34881 }
34882 }
34883 }
34884 }
34885
TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD64,n_gt_4)34886 TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD64, n_gt_4) {
34887 for (uint32_t n = 5; n < 8; n++) {
34888 for (size_t k = 1; k <= 40; k += 9) {
34889 GemmMicrokernelTester()
34890 .mr(1)
34891 .nr(4)
34892 .kr(2)
34893 .sr(4)
34894 .m(1)
34895 .n(n)
34896 .k(k)
34897 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
34898 }
34899 }
34900 }
34901
TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD64,n_gt_4_strided_cn)34902 TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD64, n_gt_4_strided_cn) {
34903 for (uint32_t n = 5; n < 8; n++) {
34904 for (size_t k = 1; k <= 40; k += 9) {
34905 GemmMicrokernelTester()
34906 .mr(1)
34907 .nr(4)
34908 .kr(2)
34909 .sr(4)
34910 .m(1)
34911 .n(n)
34912 .k(k)
34913 .cn_stride(7)
34914 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
34915 }
34916 }
34917 }
34918
TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD64,n_gt_4_strided_a)34919 TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD64, n_gt_4_strided_a) {
34920 for (uint32_t n = 5; n < 8; n++) {
34921 for (size_t k = 1; k <= 40; k += 9) {
34922 GemmMicrokernelTester()
34923 .mr(1)
34924 .nr(4)
34925 .kr(2)
34926 .sr(4)
34927 .m(1)
34928 .n(n)
34929 .k(k)
34930 .a_stride(43)
34931 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
34932 }
34933 }
34934 }
34935
TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD64,n_gt_4_subtile)34936 TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD64, n_gt_4_subtile) {
34937 for (uint32_t n = 5; n < 8; n++) {
34938 for (size_t k = 1; k <= 40; k += 9) {
34939 for (uint32_t m = 1; m <= 1; m++) {
34940 GemmMicrokernelTester()
34941 .mr(1)
34942 .nr(4)
34943 .kr(2)
34944 .sr(4)
34945 .m(m)
34946 .n(n)
34947 .k(k)
34948 .iterations(1)
34949 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
34950 }
34951 }
34952 }
34953 }
34954
TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD64,n_div_4)34955 TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD64, n_div_4) {
34956 for (uint32_t n = 8; n <= 12; n += 4) {
34957 for (size_t k = 1; k <= 40; k += 9) {
34958 GemmMicrokernelTester()
34959 .mr(1)
34960 .nr(4)
34961 .kr(2)
34962 .sr(4)
34963 .m(1)
34964 .n(n)
34965 .k(k)
34966 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
34967 }
34968 }
34969 }
34970
TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD64,n_div_4_strided_cn)34971 TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD64, n_div_4_strided_cn) {
34972 for (uint32_t n = 8; n <= 12; n += 4) {
34973 for (size_t k = 1; k <= 40; k += 9) {
34974 GemmMicrokernelTester()
34975 .mr(1)
34976 .nr(4)
34977 .kr(2)
34978 .sr(4)
34979 .m(1)
34980 .n(n)
34981 .k(k)
34982 .cn_stride(7)
34983 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
34984 }
34985 }
34986 }
34987
TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD64,n_div_4_strided_a)34988 TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD64, n_div_4_strided_a) {
34989 for (uint32_t n = 8; n <= 12; n += 4) {
34990 for (size_t k = 1; k <= 40; k += 9) {
34991 GemmMicrokernelTester()
34992 .mr(1)
34993 .nr(4)
34994 .kr(2)
34995 .sr(4)
34996 .m(1)
34997 .n(n)
34998 .k(k)
34999 .a_stride(43)
35000 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
35001 }
35002 }
35003 }
35004
TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD64,n_div_4_subtile)35005 TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD64, n_div_4_subtile) {
35006 for (uint32_t n = 8; n <= 12; n += 4) {
35007 for (size_t k = 1; k <= 40; k += 9) {
35008 for (uint32_t m = 1; m <= 1; m++) {
35009 GemmMicrokernelTester()
35010 .mr(1)
35011 .nr(4)
35012 .kr(2)
35013 .sr(4)
35014 .m(m)
35015 .n(n)
35016 .k(k)
35017 .iterations(1)
35018 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
35019 }
35020 }
35021 }
35022 }
35023
TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD64,strided_cm_subtile)35024 TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD64, strided_cm_subtile) {
35025 for (size_t k = 1; k <= 40; k += 9) {
35026 for (uint32_t n = 1; n <= 4; n++) {
35027 for (uint32_t m = 1; m <= 1; m++) {
35028 GemmMicrokernelTester()
35029 .mr(1)
35030 .nr(4)
35031 .kr(2)
35032 .sr(4)
35033 .m(m)
35034 .n(n)
35035 .k(k)
35036 .cm_stride(7)
35037 .iterations(1)
35038 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
35039 }
35040 }
35041 }
35042 }
35043
TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD64,qmin)35044 TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD64, qmin) {
35045 GemmMicrokernelTester()
35046 .mr(1)
35047 .nr(4)
35048 .kr(2)
35049 .sr(4)
35050 .m(1)
35051 .n(4)
35052 .k(8)
35053 .qmin(128)
35054 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
35055 }
35056
TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD64,qmax)35057 TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD64, qmax) {
35058 GemmMicrokernelTester()
35059 .mr(1)
35060 .nr(4)
35061 .kr(2)
35062 .sr(4)
35063 .m(1)
35064 .n(4)
35065 .k(8)
35066 .qmax(128)
35067 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
35068 }
35069
TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD64,strided_cm)35070 TEST(QC8_GEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD64, strided_cm) {
35071 GemmMicrokernelTester()
35072 .mr(1)
35073 .nr(4)
35074 .kr(2)
35075 .sr(4)
35076 .m(1)
35077 .n(4)
35078 .k(8)
35079 .cm_stride(7)
35080 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
35081 }
35082 #endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
35083
35084
35085 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(QC8_GEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD128,k_eq_8)35086 TEST(QC8_GEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD128, k_eq_8) {
35087 GemmMicrokernelTester()
35088 .mr(1)
35089 .nr(4)
35090 .kr(8)
35091 .sr(1)
35092 .m(1)
35093 .n(4)
35094 .k(8)
35095 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
35096 }
35097
TEST(QC8_GEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD128,strided_cn)35098 TEST(QC8_GEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD128, strided_cn) {
35099 GemmMicrokernelTester()
35100 .mr(1)
35101 .nr(4)
35102 .kr(8)
35103 .sr(1)
35104 .m(1)
35105 .n(4)
35106 .k(8)
35107 .cn_stride(7)
35108 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
35109 }
35110
TEST(QC8_GEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD128,k_eq_8_strided_a)35111 TEST(QC8_GEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD128, k_eq_8_strided_a) {
35112 GemmMicrokernelTester()
35113 .mr(1)
35114 .nr(4)
35115 .kr(8)
35116 .sr(1)
35117 .m(1)
35118 .n(4)
35119 .k(8)
35120 .a_stride(11)
35121 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
35122 }
35123
TEST(QC8_GEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD128,k_eq_8_subtile)35124 TEST(QC8_GEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD128, k_eq_8_subtile) {
35125 for (uint32_t n = 1; n <= 4; n++) {
35126 for (uint32_t m = 1; m <= 1; m++) {
35127 GemmMicrokernelTester()
35128 .mr(1)
35129 .nr(4)
35130 .kr(8)
35131 .sr(1)
35132 .m(m)
35133 .n(n)
35134 .k(8)
35135 .iterations(1)
35136 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
35137 }
35138 }
35139 }
35140
TEST(QC8_GEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD128,k_eq_8_subtile_m)35141 TEST(QC8_GEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD128, k_eq_8_subtile_m) {
35142 for (uint32_t m = 1; m <= 1; m++) {
35143 GemmMicrokernelTester()
35144 .mr(1)
35145 .nr(4)
35146 .kr(8)
35147 .sr(1)
35148 .m(m)
35149 .n(4)
35150 .k(8)
35151 .iterations(1)
35152 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
35153 }
35154 }
35155
TEST(QC8_GEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD128,k_eq_8_subtile_n)35156 TEST(QC8_GEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD128, k_eq_8_subtile_n) {
35157 for (uint32_t n = 1; n <= 4; n++) {
35158 GemmMicrokernelTester()
35159 .mr(1)
35160 .nr(4)
35161 .kr(8)
35162 .sr(1)
35163 .m(1)
35164 .n(n)
35165 .k(8)
35166 .iterations(1)
35167 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
35168 }
35169 }
35170
TEST(QC8_GEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD128,k_lt_8)35171 TEST(QC8_GEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD128, k_lt_8) {
35172 for (size_t k = 1; k < 8; k++) {
35173 GemmMicrokernelTester()
35174 .mr(1)
35175 .nr(4)
35176 .kr(8)
35177 .sr(1)
35178 .m(1)
35179 .n(4)
35180 .k(k)
35181 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
35182 }
35183 }
35184
TEST(QC8_GEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD128,k_lt_8_strided_a)35185 TEST(QC8_GEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD128, k_lt_8_strided_a) {
35186 for (size_t k = 1; k < 8; k++) {
35187 GemmMicrokernelTester()
35188 .mr(1)
35189 .nr(4)
35190 .kr(8)
35191 .sr(1)
35192 .m(1)
35193 .n(4)
35194 .k(k)
35195 .a_stride(11)
35196 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
35197 }
35198 }
35199
TEST(QC8_GEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD128,k_lt_8_subtile)35200 TEST(QC8_GEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD128, k_lt_8_subtile) {
35201 for (size_t k = 1; k < 8; k++) {
35202 for (uint32_t n = 1; n <= 4; n++) {
35203 for (uint32_t m = 1; m <= 1; m++) {
35204 GemmMicrokernelTester()
35205 .mr(1)
35206 .nr(4)
35207 .kr(8)
35208 .sr(1)
35209 .m(m)
35210 .n(n)
35211 .k(k)
35212 .iterations(1)
35213 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
35214 }
35215 }
35216 }
35217 }
35218
TEST(QC8_GEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD128,k_gt_8)35219 TEST(QC8_GEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD128, k_gt_8) {
35220 for (size_t k = 9; k < 16; k++) {
35221 GemmMicrokernelTester()
35222 .mr(1)
35223 .nr(4)
35224 .kr(8)
35225 .sr(1)
35226 .m(1)
35227 .n(4)
35228 .k(k)
35229 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
35230 }
35231 }
35232
TEST(QC8_GEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD128,k_gt_8_strided_a)35233 TEST(QC8_GEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD128, k_gt_8_strided_a) {
35234 for (size_t k = 9; k < 16; k++) {
35235 GemmMicrokernelTester()
35236 .mr(1)
35237 .nr(4)
35238 .kr(8)
35239 .sr(1)
35240 .m(1)
35241 .n(4)
35242 .k(k)
35243 .a_stride(19)
35244 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
35245 }
35246 }
35247
TEST(QC8_GEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD128,k_gt_8_subtile)35248 TEST(QC8_GEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD128, k_gt_8_subtile) {
35249 for (size_t k = 9; k < 16; k++) {
35250 for (uint32_t n = 1; n <= 4; n++) {
35251 for (uint32_t m = 1; m <= 1; m++) {
35252 GemmMicrokernelTester()
35253 .mr(1)
35254 .nr(4)
35255 .kr(8)
35256 .sr(1)
35257 .m(m)
35258 .n(n)
35259 .k(k)
35260 .iterations(1)
35261 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
35262 }
35263 }
35264 }
35265 }
35266
TEST(QC8_GEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD128,k_div_8)35267 TEST(QC8_GEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD128, k_div_8) {
35268 for (size_t k = 16; k <= 80; k += 8) {
35269 GemmMicrokernelTester()
35270 .mr(1)
35271 .nr(4)
35272 .kr(8)
35273 .sr(1)
35274 .m(1)
35275 .n(4)
35276 .k(k)
35277 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
35278 }
35279 }
35280
TEST(QC8_GEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD128,k_div_8_strided_a)35281 TEST(QC8_GEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD128, k_div_8_strided_a) {
35282 for (size_t k = 16; k <= 80; k += 8) {
35283 GemmMicrokernelTester()
35284 .mr(1)
35285 .nr(4)
35286 .kr(8)
35287 .sr(1)
35288 .m(1)
35289 .n(4)
35290 .k(k)
35291 .a_stride(83)
35292 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
35293 }
35294 }
35295
TEST(QC8_GEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD128,k_div_8_subtile)35296 TEST(QC8_GEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD128, k_div_8_subtile) {
35297 for (size_t k = 16; k <= 80; k += 8) {
35298 for (uint32_t n = 1; n <= 4; n++) {
35299 for (uint32_t m = 1; m <= 1; m++) {
35300 GemmMicrokernelTester()
35301 .mr(1)
35302 .nr(4)
35303 .kr(8)
35304 .sr(1)
35305 .m(m)
35306 .n(n)
35307 .k(k)
35308 .iterations(1)
35309 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
35310 }
35311 }
35312 }
35313 }
35314
TEST(QC8_GEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD128,n_gt_4)35315 TEST(QC8_GEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD128, n_gt_4) {
35316 for (uint32_t n = 5; n < 8; n++) {
35317 for (size_t k = 1; k <= 40; k += 9) {
35318 GemmMicrokernelTester()
35319 .mr(1)
35320 .nr(4)
35321 .kr(8)
35322 .sr(1)
35323 .m(1)
35324 .n(n)
35325 .k(k)
35326 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
35327 }
35328 }
35329 }
35330
TEST(QC8_GEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD128,n_gt_4_strided_cn)35331 TEST(QC8_GEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD128, n_gt_4_strided_cn) {
35332 for (uint32_t n = 5; n < 8; n++) {
35333 for (size_t k = 1; k <= 40; k += 9) {
35334 GemmMicrokernelTester()
35335 .mr(1)
35336 .nr(4)
35337 .kr(8)
35338 .sr(1)
35339 .m(1)
35340 .n(n)
35341 .k(k)
35342 .cn_stride(7)
35343 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
35344 }
35345 }
35346 }
35347
TEST(QC8_GEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD128,n_gt_4_strided_a)35348 TEST(QC8_GEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD128, n_gt_4_strided_a) {
35349 for (uint32_t n = 5; n < 8; n++) {
35350 for (size_t k = 1; k <= 40; k += 9) {
35351 GemmMicrokernelTester()
35352 .mr(1)
35353 .nr(4)
35354 .kr(8)
35355 .sr(1)
35356 .m(1)
35357 .n(n)
35358 .k(k)
35359 .a_stride(43)
35360 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
35361 }
35362 }
35363 }
35364
TEST(QC8_GEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD128,n_gt_4_subtile)35365 TEST(QC8_GEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD128, n_gt_4_subtile) {
35366 for (uint32_t n = 5; n < 8; n++) {
35367 for (size_t k = 1; k <= 40; k += 9) {
35368 for (uint32_t m = 1; m <= 1; m++) {
35369 GemmMicrokernelTester()
35370 .mr(1)
35371 .nr(4)
35372 .kr(8)
35373 .sr(1)
35374 .m(m)
35375 .n(n)
35376 .k(k)
35377 .iterations(1)
35378 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
35379 }
35380 }
35381 }
35382 }
35383
TEST(QC8_GEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD128,n_div_4)35384 TEST(QC8_GEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD128, n_div_4) {
35385 for (uint32_t n = 8; n <= 12; n += 4) {
35386 for (size_t k = 1; k <= 40; k += 9) {
35387 GemmMicrokernelTester()
35388 .mr(1)
35389 .nr(4)
35390 .kr(8)
35391 .sr(1)
35392 .m(1)
35393 .n(n)
35394 .k(k)
35395 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
35396 }
35397 }
35398 }
35399
TEST(QC8_GEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD128,n_div_4_strided_cn)35400 TEST(QC8_GEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD128, n_div_4_strided_cn) {
35401 for (uint32_t n = 8; n <= 12; n += 4) {
35402 for (size_t k = 1; k <= 40; k += 9) {
35403 GemmMicrokernelTester()
35404 .mr(1)
35405 .nr(4)
35406 .kr(8)
35407 .sr(1)
35408 .m(1)
35409 .n(n)
35410 .k(k)
35411 .cn_stride(7)
35412 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
35413 }
35414 }
35415 }
35416
TEST(QC8_GEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD128,n_div_4_strided_a)35417 TEST(QC8_GEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD128, n_div_4_strided_a) {
35418 for (uint32_t n = 8; n <= 12; n += 4) {
35419 for (size_t k = 1; k <= 40; k += 9) {
35420 GemmMicrokernelTester()
35421 .mr(1)
35422 .nr(4)
35423 .kr(8)
35424 .sr(1)
35425 .m(1)
35426 .n(n)
35427 .k(k)
35428 .a_stride(43)
35429 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
35430 }
35431 }
35432 }
35433
TEST(QC8_GEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD128,n_div_4_subtile)35434 TEST(QC8_GEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD128, n_div_4_subtile) {
35435 for (uint32_t n = 8; n <= 12; n += 4) {
35436 for (size_t k = 1; k <= 40; k += 9) {
35437 for (uint32_t m = 1; m <= 1; m++) {
35438 GemmMicrokernelTester()
35439 .mr(1)
35440 .nr(4)
35441 .kr(8)
35442 .sr(1)
35443 .m(m)
35444 .n(n)
35445 .k(k)
35446 .iterations(1)
35447 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
35448 }
35449 }
35450 }
35451 }
35452
TEST(QC8_GEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD128,strided_cm_subtile)35453 TEST(QC8_GEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD128, strided_cm_subtile) {
35454 for (size_t k = 1; k <= 40; k += 9) {
35455 for (uint32_t n = 1; n <= 4; n++) {
35456 for (uint32_t m = 1; m <= 1; m++) {
35457 GemmMicrokernelTester()
35458 .mr(1)
35459 .nr(4)
35460 .kr(8)
35461 .sr(1)
35462 .m(m)
35463 .n(n)
35464 .k(k)
35465 .cm_stride(7)
35466 .iterations(1)
35467 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
35468 }
35469 }
35470 }
35471 }
35472
TEST(QC8_GEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD128,qmin)35473 TEST(QC8_GEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD128, qmin) {
35474 GemmMicrokernelTester()
35475 .mr(1)
35476 .nr(4)
35477 .kr(8)
35478 .sr(1)
35479 .m(1)
35480 .n(4)
35481 .k(8)
35482 .qmin(128)
35483 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
35484 }
35485
TEST(QC8_GEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD128,qmax)35486 TEST(QC8_GEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD128, qmax) {
35487 GemmMicrokernelTester()
35488 .mr(1)
35489 .nr(4)
35490 .kr(8)
35491 .sr(1)
35492 .m(1)
35493 .n(4)
35494 .k(8)
35495 .qmax(128)
35496 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
35497 }
35498
TEST(QC8_GEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD128,strided_cm)35499 TEST(QC8_GEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD128, strided_cm) {
35500 GemmMicrokernelTester()
35501 .mr(1)
35502 .nr(4)
35503 .kr(8)
35504 .sr(1)
35505 .m(1)
35506 .n(4)
35507 .k(8)
35508 .cm_stride(7)
35509 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
35510 }
35511 #endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
35512
35513
35514 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(QC8_GEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128,k_eq_8)35515 TEST(QC8_GEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128, k_eq_8) {
35516 GemmMicrokernelTester()
35517 .mr(2)
35518 .nr(4)
35519 .kr(2)
35520 .sr(1)
35521 .m(2)
35522 .n(4)
35523 .k(8)
35524 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
35525 }
35526
TEST(QC8_GEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128,strided_cn)35527 TEST(QC8_GEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128, strided_cn) {
35528 GemmMicrokernelTester()
35529 .mr(2)
35530 .nr(4)
35531 .kr(2)
35532 .sr(1)
35533 .m(2)
35534 .n(4)
35535 .k(8)
35536 .cn_stride(7)
35537 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
35538 }
35539
TEST(QC8_GEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128,k_eq_8_strided_a)35540 TEST(QC8_GEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128, k_eq_8_strided_a) {
35541 GemmMicrokernelTester()
35542 .mr(2)
35543 .nr(4)
35544 .kr(2)
35545 .sr(1)
35546 .m(2)
35547 .n(4)
35548 .k(8)
35549 .a_stride(11)
35550 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
35551 }
35552
TEST(QC8_GEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128,k_eq_8_subtile)35553 TEST(QC8_GEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128, k_eq_8_subtile) {
35554 for (uint32_t n = 1; n <= 4; n++) {
35555 for (uint32_t m = 1; m <= 2; m++) {
35556 GemmMicrokernelTester()
35557 .mr(2)
35558 .nr(4)
35559 .kr(2)
35560 .sr(1)
35561 .m(m)
35562 .n(n)
35563 .k(8)
35564 .iterations(1)
35565 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
35566 }
35567 }
35568 }
35569
TEST(QC8_GEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128,k_eq_8_subtile_m)35570 TEST(QC8_GEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128, k_eq_8_subtile_m) {
35571 for (uint32_t m = 1; m <= 2; m++) {
35572 GemmMicrokernelTester()
35573 .mr(2)
35574 .nr(4)
35575 .kr(2)
35576 .sr(1)
35577 .m(m)
35578 .n(4)
35579 .k(8)
35580 .iterations(1)
35581 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
35582 }
35583 }
35584
TEST(QC8_GEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128,k_eq_8_subtile_n)35585 TEST(QC8_GEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128, k_eq_8_subtile_n) {
35586 for (uint32_t n = 1; n <= 4; n++) {
35587 GemmMicrokernelTester()
35588 .mr(2)
35589 .nr(4)
35590 .kr(2)
35591 .sr(1)
35592 .m(2)
35593 .n(n)
35594 .k(8)
35595 .iterations(1)
35596 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
35597 }
35598 }
35599
TEST(QC8_GEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128,k_lt_8)35600 TEST(QC8_GEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128, k_lt_8) {
35601 for (size_t k = 1; k < 8; k++) {
35602 GemmMicrokernelTester()
35603 .mr(2)
35604 .nr(4)
35605 .kr(2)
35606 .sr(1)
35607 .m(2)
35608 .n(4)
35609 .k(k)
35610 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
35611 }
35612 }
35613
TEST(QC8_GEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128,k_lt_8_strided_a)35614 TEST(QC8_GEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128, k_lt_8_strided_a) {
35615 for (size_t k = 1; k < 8; k++) {
35616 GemmMicrokernelTester()
35617 .mr(2)
35618 .nr(4)
35619 .kr(2)
35620 .sr(1)
35621 .m(2)
35622 .n(4)
35623 .k(k)
35624 .a_stride(11)
35625 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
35626 }
35627 }
35628
TEST(QC8_GEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128,k_lt_8_subtile)35629 TEST(QC8_GEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128, k_lt_8_subtile) {
35630 for (size_t k = 1; k < 8; k++) {
35631 for (uint32_t n = 1; n <= 4; n++) {
35632 for (uint32_t m = 1; m <= 2; m++) {
35633 GemmMicrokernelTester()
35634 .mr(2)
35635 .nr(4)
35636 .kr(2)
35637 .sr(1)
35638 .m(m)
35639 .n(n)
35640 .k(k)
35641 .iterations(1)
35642 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
35643 }
35644 }
35645 }
35646 }
35647
TEST(QC8_GEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128,k_gt_8)35648 TEST(QC8_GEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128, k_gt_8) {
35649 for (size_t k = 9; k < 16; k++) {
35650 GemmMicrokernelTester()
35651 .mr(2)
35652 .nr(4)
35653 .kr(2)
35654 .sr(1)
35655 .m(2)
35656 .n(4)
35657 .k(k)
35658 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
35659 }
35660 }
35661
TEST(QC8_GEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128,k_gt_8_strided_a)35662 TEST(QC8_GEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128, k_gt_8_strided_a) {
35663 for (size_t k = 9; k < 16; k++) {
35664 GemmMicrokernelTester()
35665 .mr(2)
35666 .nr(4)
35667 .kr(2)
35668 .sr(1)
35669 .m(2)
35670 .n(4)
35671 .k(k)
35672 .a_stride(19)
35673 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
35674 }
35675 }
35676
TEST(QC8_GEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128,k_gt_8_subtile)35677 TEST(QC8_GEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128, k_gt_8_subtile) {
35678 for (size_t k = 9; k < 16; k++) {
35679 for (uint32_t n = 1; n <= 4; n++) {
35680 for (uint32_t m = 1; m <= 2; m++) {
35681 GemmMicrokernelTester()
35682 .mr(2)
35683 .nr(4)
35684 .kr(2)
35685 .sr(1)
35686 .m(m)
35687 .n(n)
35688 .k(k)
35689 .iterations(1)
35690 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
35691 }
35692 }
35693 }
35694 }
35695
TEST(QC8_GEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128,k_div_8)35696 TEST(QC8_GEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128, k_div_8) {
35697 for (size_t k = 16; k <= 80; k += 8) {
35698 GemmMicrokernelTester()
35699 .mr(2)
35700 .nr(4)
35701 .kr(2)
35702 .sr(1)
35703 .m(2)
35704 .n(4)
35705 .k(k)
35706 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
35707 }
35708 }
35709
TEST(QC8_GEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128,k_div_8_strided_a)35710 TEST(QC8_GEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128, k_div_8_strided_a) {
35711 for (size_t k = 16; k <= 80; k += 8) {
35712 GemmMicrokernelTester()
35713 .mr(2)
35714 .nr(4)
35715 .kr(2)
35716 .sr(1)
35717 .m(2)
35718 .n(4)
35719 .k(k)
35720 .a_stride(83)
35721 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
35722 }
35723 }
35724
TEST(QC8_GEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128,k_div_8_subtile)35725 TEST(QC8_GEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128, k_div_8_subtile) {
35726 for (size_t k = 16; k <= 80; k += 8) {
35727 for (uint32_t n = 1; n <= 4; n++) {
35728 for (uint32_t m = 1; m <= 2; m++) {
35729 GemmMicrokernelTester()
35730 .mr(2)
35731 .nr(4)
35732 .kr(2)
35733 .sr(1)
35734 .m(m)
35735 .n(n)
35736 .k(k)
35737 .iterations(1)
35738 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
35739 }
35740 }
35741 }
35742 }
35743
TEST(QC8_GEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128,n_gt_4)35744 TEST(QC8_GEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128, n_gt_4) {
35745 for (uint32_t n = 5; n < 8; n++) {
35746 for (size_t k = 1; k <= 40; k += 9) {
35747 GemmMicrokernelTester()
35748 .mr(2)
35749 .nr(4)
35750 .kr(2)
35751 .sr(1)
35752 .m(2)
35753 .n(n)
35754 .k(k)
35755 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
35756 }
35757 }
35758 }
35759
TEST(QC8_GEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128,n_gt_4_strided_cn)35760 TEST(QC8_GEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128, n_gt_4_strided_cn) {
35761 for (uint32_t n = 5; n < 8; n++) {
35762 for (size_t k = 1; k <= 40; k += 9) {
35763 GemmMicrokernelTester()
35764 .mr(2)
35765 .nr(4)
35766 .kr(2)
35767 .sr(1)
35768 .m(2)
35769 .n(n)
35770 .k(k)
35771 .cn_stride(7)
35772 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
35773 }
35774 }
35775 }
35776
TEST(QC8_GEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128,n_gt_4_strided_a)35777 TEST(QC8_GEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128, n_gt_4_strided_a) {
35778 for (uint32_t n = 5; n < 8; n++) {
35779 for (size_t k = 1; k <= 40; k += 9) {
35780 GemmMicrokernelTester()
35781 .mr(2)
35782 .nr(4)
35783 .kr(2)
35784 .sr(1)
35785 .m(2)
35786 .n(n)
35787 .k(k)
35788 .a_stride(43)
35789 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
35790 }
35791 }
35792 }
35793
TEST(QC8_GEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128,n_gt_4_subtile)35794 TEST(QC8_GEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128, n_gt_4_subtile) {
35795 for (uint32_t n = 5; n < 8; n++) {
35796 for (size_t k = 1; k <= 40; k += 9) {
35797 for (uint32_t m = 1; m <= 2; m++) {
35798 GemmMicrokernelTester()
35799 .mr(2)
35800 .nr(4)
35801 .kr(2)
35802 .sr(1)
35803 .m(m)
35804 .n(n)
35805 .k(k)
35806 .iterations(1)
35807 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
35808 }
35809 }
35810 }
35811 }
35812
TEST(QC8_GEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128,n_div_4)35813 TEST(QC8_GEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128, n_div_4) {
35814 for (uint32_t n = 8; n <= 12; n += 4) {
35815 for (size_t k = 1; k <= 40; k += 9) {
35816 GemmMicrokernelTester()
35817 .mr(2)
35818 .nr(4)
35819 .kr(2)
35820 .sr(1)
35821 .m(2)
35822 .n(n)
35823 .k(k)
35824 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
35825 }
35826 }
35827 }
35828
TEST(QC8_GEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128,n_div_4_strided_cn)35829 TEST(QC8_GEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128, n_div_4_strided_cn) {
35830 for (uint32_t n = 8; n <= 12; n += 4) {
35831 for (size_t k = 1; k <= 40; k += 9) {
35832 GemmMicrokernelTester()
35833 .mr(2)
35834 .nr(4)
35835 .kr(2)
35836 .sr(1)
35837 .m(2)
35838 .n(n)
35839 .k(k)
35840 .cn_stride(7)
35841 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
35842 }
35843 }
35844 }
35845
TEST(QC8_GEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128,n_div_4_strided_a)35846 TEST(QC8_GEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128, n_div_4_strided_a) {
35847 for (uint32_t n = 8; n <= 12; n += 4) {
35848 for (size_t k = 1; k <= 40; k += 9) {
35849 GemmMicrokernelTester()
35850 .mr(2)
35851 .nr(4)
35852 .kr(2)
35853 .sr(1)
35854 .m(2)
35855 .n(n)
35856 .k(k)
35857 .a_stride(43)
35858 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
35859 }
35860 }
35861 }
35862
TEST(QC8_GEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128,n_div_4_subtile)35863 TEST(QC8_GEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128, n_div_4_subtile) {
35864 for (uint32_t n = 8; n <= 12; n += 4) {
35865 for (size_t k = 1; k <= 40; k += 9) {
35866 for (uint32_t m = 1; m <= 2; m++) {
35867 GemmMicrokernelTester()
35868 .mr(2)
35869 .nr(4)
35870 .kr(2)
35871 .sr(1)
35872 .m(m)
35873 .n(n)
35874 .k(k)
35875 .iterations(1)
35876 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
35877 }
35878 }
35879 }
35880 }
35881
TEST(QC8_GEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128,strided_cm_subtile)35882 TEST(QC8_GEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128, strided_cm_subtile) {
35883 for (size_t k = 1; k <= 40; k += 9) {
35884 for (uint32_t n = 1; n <= 4; n++) {
35885 for (uint32_t m = 1; m <= 2; m++) {
35886 GemmMicrokernelTester()
35887 .mr(2)
35888 .nr(4)
35889 .kr(2)
35890 .sr(1)
35891 .m(m)
35892 .n(n)
35893 .k(k)
35894 .cm_stride(7)
35895 .iterations(1)
35896 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
35897 }
35898 }
35899 }
35900 }
35901
TEST(QC8_GEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128,qmin)35902 TEST(QC8_GEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128, qmin) {
35903 GemmMicrokernelTester()
35904 .mr(2)
35905 .nr(4)
35906 .kr(2)
35907 .sr(1)
35908 .m(2)
35909 .n(4)
35910 .k(8)
35911 .qmin(128)
35912 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
35913 }
35914
TEST(QC8_GEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128,qmax)35915 TEST(QC8_GEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128, qmax) {
35916 GemmMicrokernelTester()
35917 .mr(2)
35918 .nr(4)
35919 .kr(2)
35920 .sr(1)
35921 .m(2)
35922 .n(4)
35923 .k(8)
35924 .qmax(128)
35925 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
35926 }
35927
TEST(QC8_GEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128,strided_cm)35928 TEST(QC8_GEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128, strided_cm) {
35929 GemmMicrokernelTester()
35930 .mr(2)
35931 .nr(4)
35932 .kr(2)
35933 .sr(1)
35934 .m(2)
35935 .n(4)
35936 .k(8)
35937 .cm_stride(7)
35938 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
35939 }
35940 #endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
35941
35942
35943 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD128,k_eq_8)35944 TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD128, k_eq_8) {
35945 GemmMicrokernelTester()
35946 .mr(2)
35947 .nr(4)
35948 .kr(2)
35949 .sr(4)
35950 .m(2)
35951 .n(4)
35952 .k(8)
35953 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
35954 }
35955
TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD128,strided_cn)35956 TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD128, strided_cn) {
35957 GemmMicrokernelTester()
35958 .mr(2)
35959 .nr(4)
35960 .kr(2)
35961 .sr(4)
35962 .m(2)
35963 .n(4)
35964 .k(8)
35965 .cn_stride(7)
35966 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
35967 }
35968
TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD128,k_eq_8_strided_a)35969 TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD128, k_eq_8_strided_a) {
35970 GemmMicrokernelTester()
35971 .mr(2)
35972 .nr(4)
35973 .kr(2)
35974 .sr(4)
35975 .m(2)
35976 .n(4)
35977 .k(8)
35978 .a_stride(11)
35979 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
35980 }
35981
TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD128,k_eq_8_subtile)35982 TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD128, k_eq_8_subtile) {
35983 for (uint32_t n = 1; n <= 4; n++) {
35984 for (uint32_t m = 1; m <= 2; m++) {
35985 GemmMicrokernelTester()
35986 .mr(2)
35987 .nr(4)
35988 .kr(2)
35989 .sr(4)
35990 .m(m)
35991 .n(n)
35992 .k(8)
35993 .iterations(1)
35994 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
35995 }
35996 }
35997 }
35998
TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD128,k_eq_8_subtile_m)35999 TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD128, k_eq_8_subtile_m) {
36000 for (uint32_t m = 1; m <= 2; m++) {
36001 GemmMicrokernelTester()
36002 .mr(2)
36003 .nr(4)
36004 .kr(2)
36005 .sr(4)
36006 .m(m)
36007 .n(4)
36008 .k(8)
36009 .iterations(1)
36010 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
36011 }
36012 }
36013
TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD128,k_eq_8_subtile_n)36014 TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD128, k_eq_8_subtile_n) {
36015 for (uint32_t n = 1; n <= 4; n++) {
36016 GemmMicrokernelTester()
36017 .mr(2)
36018 .nr(4)
36019 .kr(2)
36020 .sr(4)
36021 .m(2)
36022 .n(n)
36023 .k(8)
36024 .iterations(1)
36025 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
36026 }
36027 }
36028
TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD128,k_lt_8)36029 TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD128, k_lt_8) {
36030 for (size_t k = 1; k < 8; k++) {
36031 GemmMicrokernelTester()
36032 .mr(2)
36033 .nr(4)
36034 .kr(2)
36035 .sr(4)
36036 .m(2)
36037 .n(4)
36038 .k(k)
36039 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
36040 }
36041 }
36042
TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD128,k_lt_8_strided_a)36043 TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD128, k_lt_8_strided_a) {
36044 for (size_t k = 1; k < 8; k++) {
36045 GemmMicrokernelTester()
36046 .mr(2)
36047 .nr(4)
36048 .kr(2)
36049 .sr(4)
36050 .m(2)
36051 .n(4)
36052 .k(k)
36053 .a_stride(11)
36054 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
36055 }
36056 }
36057
TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD128,k_lt_8_subtile)36058 TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD128, k_lt_8_subtile) {
36059 for (size_t k = 1; k < 8; k++) {
36060 for (uint32_t n = 1; n <= 4; n++) {
36061 for (uint32_t m = 1; m <= 2; m++) {
36062 GemmMicrokernelTester()
36063 .mr(2)
36064 .nr(4)
36065 .kr(2)
36066 .sr(4)
36067 .m(m)
36068 .n(n)
36069 .k(k)
36070 .iterations(1)
36071 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
36072 }
36073 }
36074 }
36075 }
36076
TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD128,k_gt_8)36077 TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD128, k_gt_8) {
36078 for (size_t k = 9; k < 16; k++) {
36079 GemmMicrokernelTester()
36080 .mr(2)
36081 .nr(4)
36082 .kr(2)
36083 .sr(4)
36084 .m(2)
36085 .n(4)
36086 .k(k)
36087 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
36088 }
36089 }
36090
TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD128,k_gt_8_strided_a)36091 TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD128, k_gt_8_strided_a) {
36092 for (size_t k = 9; k < 16; k++) {
36093 GemmMicrokernelTester()
36094 .mr(2)
36095 .nr(4)
36096 .kr(2)
36097 .sr(4)
36098 .m(2)
36099 .n(4)
36100 .k(k)
36101 .a_stride(19)
36102 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
36103 }
36104 }
36105
TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD128,k_gt_8_subtile)36106 TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD128, k_gt_8_subtile) {
36107 for (size_t k = 9; k < 16; k++) {
36108 for (uint32_t n = 1; n <= 4; n++) {
36109 for (uint32_t m = 1; m <= 2; m++) {
36110 GemmMicrokernelTester()
36111 .mr(2)
36112 .nr(4)
36113 .kr(2)
36114 .sr(4)
36115 .m(m)
36116 .n(n)
36117 .k(k)
36118 .iterations(1)
36119 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
36120 }
36121 }
36122 }
36123 }
36124
TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD128,k_div_8)36125 TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD128, k_div_8) {
36126 for (size_t k = 16; k <= 80; k += 8) {
36127 GemmMicrokernelTester()
36128 .mr(2)
36129 .nr(4)
36130 .kr(2)
36131 .sr(4)
36132 .m(2)
36133 .n(4)
36134 .k(k)
36135 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
36136 }
36137 }
36138
TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD128,k_div_8_strided_a)36139 TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD128, k_div_8_strided_a) {
36140 for (size_t k = 16; k <= 80; k += 8) {
36141 GemmMicrokernelTester()
36142 .mr(2)
36143 .nr(4)
36144 .kr(2)
36145 .sr(4)
36146 .m(2)
36147 .n(4)
36148 .k(k)
36149 .a_stride(83)
36150 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
36151 }
36152 }
36153
TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD128,k_div_8_subtile)36154 TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD128, k_div_8_subtile) {
36155 for (size_t k = 16; k <= 80; k += 8) {
36156 for (uint32_t n = 1; n <= 4; n++) {
36157 for (uint32_t m = 1; m <= 2; m++) {
36158 GemmMicrokernelTester()
36159 .mr(2)
36160 .nr(4)
36161 .kr(2)
36162 .sr(4)
36163 .m(m)
36164 .n(n)
36165 .k(k)
36166 .iterations(1)
36167 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
36168 }
36169 }
36170 }
36171 }
36172
TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD128,n_gt_4)36173 TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD128, n_gt_4) {
36174 for (uint32_t n = 5; n < 8; n++) {
36175 for (size_t k = 1; k <= 40; k += 9) {
36176 GemmMicrokernelTester()
36177 .mr(2)
36178 .nr(4)
36179 .kr(2)
36180 .sr(4)
36181 .m(2)
36182 .n(n)
36183 .k(k)
36184 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
36185 }
36186 }
36187 }
36188
TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD128,n_gt_4_strided_cn)36189 TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD128, n_gt_4_strided_cn) {
36190 for (uint32_t n = 5; n < 8; n++) {
36191 for (size_t k = 1; k <= 40; k += 9) {
36192 GemmMicrokernelTester()
36193 .mr(2)
36194 .nr(4)
36195 .kr(2)
36196 .sr(4)
36197 .m(2)
36198 .n(n)
36199 .k(k)
36200 .cn_stride(7)
36201 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
36202 }
36203 }
36204 }
36205
TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD128,n_gt_4_strided_a)36206 TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD128, n_gt_4_strided_a) {
36207 for (uint32_t n = 5; n < 8; n++) {
36208 for (size_t k = 1; k <= 40; k += 9) {
36209 GemmMicrokernelTester()
36210 .mr(2)
36211 .nr(4)
36212 .kr(2)
36213 .sr(4)
36214 .m(2)
36215 .n(n)
36216 .k(k)
36217 .a_stride(43)
36218 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
36219 }
36220 }
36221 }
36222
TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD128,n_gt_4_subtile)36223 TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD128, n_gt_4_subtile) {
36224 for (uint32_t n = 5; n < 8; n++) {
36225 for (size_t k = 1; k <= 40; k += 9) {
36226 for (uint32_t m = 1; m <= 2; m++) {
36227 GemmMicrokernelTester()
36228 .mr(2)
36229 .nr(4)
36230 .kr(2)
36231 .sr(4)
36232 .m(m)
36233 .n(n)
36234 .k(k)
36235 .iterations(1)
36236 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
36237 }
36238 }
36239 }
36240 }
36241
TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD128,n_div_4)36242 TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD128, n_div_4) {
36243 for (uint32_t n = 8; n <= 12; n += 4) {
36244 for (size_t k = 1; k <= 40; k += 9) {
36245 GemmMicrokernelTester()
36246 .mr(2)
36247 .nr(4)
36248 .kr(2)
36249 .sr(4)
36250 .m(2)
36251 .n(n)
36252 .k(k)
36253 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
36254 }
36255 }
36256 }
36257
TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD128,n_div_4_strided_cn)36258 TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD128, n_div_4_strided_cn) {
36259 for (uint32_t n = 8; n <= 12; n += 4) {
36260 for (size_t k = 1; k <= 40; k += 9) {
36261 GemmMicrokernelTester()
36262 .mr(2)
36263 .nr(4)
36264 .kr(2)
36265 .sr(4)
36266 .m(2)
36267 .n(n)
36268 .k(k)
36269 .cn_stride(7)
36270 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
36271 }
36272 }
36273 }
36274
TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD128,n_div_4_strided_a)36275 TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD128, n_div_4_strided_a) {
36276 for (uint32_t n = 8; n <= 12; n += 4) {
36277 for (size_t k = 1; k <= 40; k += 9) {
36278 GemmMicrokernelTester()
36279 .mr(2)
36280 .nr(4)
36281 .kr(2)
36282 .sr(4)
36283 .m(2)
36284 .n(n)
36285 .k(k)
36286 .a_stride(43)
36287 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
36288 }
36289 }
36290 }
36291
TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD128,n_div_4_subtile)36292 TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD128, n_div_4_subtile) {
36293 for (uint32_t n = 8; n <= 12; n += 4) {
36294 for (size_t k = 1; k <= 40; k += 9) {
36295 for (uint32_t m = 1; m <= 2; m++) {
36296 GemmMicrokernelTester()
36297 .mr(2)
36298 .nr(4)
36299 .kr(2)
36300 .sr(4)
36301 .m(m)
36302 .n(n)
36303 .k(k)
36304 .iterations(1)
36305 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
36306 }
36307 }
36308 }
36309 }
36310
TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD128,strided_cm_subtile)36311 TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD128, strided_cm_subtile) {
36312 for (size_t k = 1; k <= 40; k += 9) {
36313 for (uint32_t n = 1; n <= 4; n++) {
36314 for (uint32_t m = 1; m <= 2; m++) {
36315 GemmMicrokernelTester()
36316 .mr(2)
36317 .nr(4)
36318 .kr(2)
36319 .sr(4)
36320 .m(m)
36321 .n(n)
36322 .k(k)
36323 .cm_stride(7)
36324 .iterations(1)
36325 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
36326 }
36327 }
36328 }
36329 }
36330
TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD128,qmin)36331 TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD128, qmin) {
36332 GemmMicrokernelTester()
36333 .mr(2)
36334 .nr(4)
36335 .kr(2)
36336 .sr(4)
36337 .m(2)
36338 .n(4)
36339 .k(8)
36340 .qmin(128)
36341 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
36342 }
36343
TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD128,qmax)36344 TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD128, qmax) {
36345 GemmMicrokernelTester()
36346 .mr(2)
36347 .nr(4)
36348 .kr(2)
36349 .sr(4)
36350 .m(2)
36351 .n(4)
36352 .k(8)
36353 .qmax(128)
36354 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
36355 }
36356
TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD128,strided_cm)36357 TEST(QC8_GEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD128, strided_cm) {
36358 GemmMicrokernelTester()
36359 .mr(2)
36360 .nr(4)
36361 .kr(2)
36362 .sr(4)
36363 .m(2)
36364 .n(4)
36365 .k(8)
36366 .cm_stride(7)
36367 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
36368 }
36369 #endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
36370
36371
36372 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(QC8_GEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD128,k_eq_8)36373 TEST(QC8_GEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD128, k_eq_8) {
36374 GemmMicrokernelTester()
36375 .mr(2)
36376 .nr(4)
36377 .kr(8)
36378 .sr(1)
36379 .m(2)
36380 .n(4)
36381 .k(8)
36382 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
36383 }
36384
TEST(QC8_GEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD128,strided_cn)36385 TEST(QC8_GEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD128, strided_cn) {
36386 GemmMicrokernelTester()
36387 .mr(2)
36388 .nr(4)
36389 .kr(8)
36390 .sr(1)
36391 .m(2)
36392 .n(4)
36393 .k(8)
36394 .cn_stride(7)
36395 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
36396 }
36397
TEST(QC8_GEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD128,k_eq_8_strided_a)36398 TEST(QC8_GEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD128, k_eq_8_strided_a) {
36399 GemmMicrokernelTester()
36400 .mr(2)
36401 .nr(4)
36402 .kr(8)
36403 .sr(1)
36404 .m(2)
36405 .n(4)
36406 .k(8)
36407 .a_stride(11)
36408 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
36409 }
36410
TEST(QC8_GEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD128,k_eq_8_subtile)36411 TEST(QC8_GEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD128, k_eq_8_subtile) {
36412 for (uint32_t n = 1; n <= 4; n++) {
36413 for (uint32_t m = 1; m <= 2; m++) {
36414 GemmMicrokernelTester()
36415 .mr(2)
36416 .nr(4)
36417 .kr(8)
36418 .sr(1)
36419 .m(m)
36420 .n(n)
36421 .k(8)
36422 .iterations(1)
36423 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
36424 }
36425 }
36426 }
36427
TEST(QC8_GEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD128,k_eq_8_subtile_m)36428 TEST(QC8_GEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD128, k_eq_8_subtile_m) {
36429 for (uint32_t m = 1; m <= 2; m++) {
36430 GemmMicrokernelTester()
36431 .mr(2)
36432 .nr(4)
36433 .kr(8)
36434 .sr(1)
36435 .m(m)
36436 .n(4)
36437 .k(8)
36438 .iterations(1)
36439 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
36440 }
36441 }
36442
TEST(QC8_GEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD128,k_eq_8_subtile_n)36443 TEST(QC8_GEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD128, k_eq_8_subtile_n) {
36444 for (uint32_t n = 1; n <= 4; n++) {
36445 GemmMicrokernelTester()
36446 .mr(2)
36447 .nr(4)
36448 .kr(8)
36449 .sr(1)
36450 .m(2)
36451 .n(n)
36452 .k(8)
36453 .iterations(1)
36454 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
36455 }
36456 }
36457
TEST(QC8_GEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD128,k_lt_8)36458 TEST(QC8_GEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD128, k_lt_8) {
36459 for (size_t k = 1; k < 8; k++) {
36460 GemmMicrokernelTester()
36461 .mr(2)
36462 .nr(4)
36463 .kr(8)
36464 .sr(1)
36465 .m(2)
36466 .n(4)
36467 .k(k)
36468 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
36469 }
36470 }
36471
TEST(QC8_GEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD128,k_lt_8_strided_a)36472 TEST(QC8_GEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD128, k_lt_8_strided_a) {
36473 for (size_t k = 1; k < 8; k++) {
36474 GemmMicrokernelTester()
36475 .mr(2)
36476 .nr(4)
36477 .kr(8)
36478 .sr(1)
36479 .m(2)
36480 .n(4)
36481 .k(k)
36482 .a_stride(11)
36483 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
36484 }
36485 }
36486
TEST(QC8_GEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD128,k_lt_8_subtile)36487 TEST(QC8_GEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD128, k_lt_8_subtile) {
36488 for (size_t k = 1; k < 8; k++) {
36489 for (uint32_t n = 1; n <= 4; n++) {
36490 for (uint32_t m = 1; m <= 2; m++) {
36491 GemmMicrokernelTester()
36492 .mr(2)
36493 .nr(4)
36494 .kr(8)
36495 .sr(1)
36496 .m(m)
36497 .n(n)
36498 .k(k)
36499 .iterations(1)
36500 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
36501 }
36502 }
36503 }
36504 }
36505
TEST(QC8_GEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD128,k_gt_8)36506 TEST(QC8_GEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD128, k_gt_8) {
36507 for (size_t k = 9; k < 16; k++) {
36508 GemmMicrokernelTester()
36509 .mr(2)
36510 .nr(4)
36511 .kr(8)
36512 .sr(1)
36513 .m(2)
36514 .n(4)
36515 .k(k)
36516 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
36517 }
36518 }
36519
TEST(QC8_GEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD128,k_gt_8_strided_a)36520 TEST(QC8_GEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD128, k_gt_8_strided_a) {
36521 for (size_t k = 9; k < 16; k++) {
36522 GemmMicrokernelTester()
36523 .mr(2)
36524 .nr(4)
36525 .kr(8)
36526 .sr(1)
36527 .m(2)
36528 .n(4)
36529 .k(k)
36530 .a_stride(19)
36531 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
36532 }
36533 }
36534
TEST(QC8_GEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD128,k_gt_8_subtile)36535 TEST(QC8_GEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD128, k_gt_8_subtile) {
36536 for (size_t k = 9; k < 16; k++) {
36537 for (uint32_t n = 1; n <= 4; n++) {
36538 for (uint32_t m = 1; m <= 2; m++) {
36539 GemmMicrokernelTester()
36540 .mr(2)
36541 .nr(4)
36542 .kr(8)
36543 .sr(1)
36544 .m(m)
36545 .n(n)
36546 .k(k)
36547 .iterations(1)
36548 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
36549 }
36550 }
36551 }
36552 }
36553
TEST(QC8_GEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD128,k_div_8)36554 TEST(QC8_GEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD128, k_div_8) {
36555 for (size_t k = 16; k <= 80; k += 8) {
36556 GemmMicrokernelTester()
36557 .mr(2)
36558 .nr(4)
36559 .kr(8)
36560 .sr(1)
36561 .m(2)
36562 .n(4)
36563 .k(k)
36564 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
36565 }
36566 }
36567
TEST(QC8_GEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD128,k_div_8_strided_a)36568 TEST(QC8_GEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD128, k_div_8_strided_a) {
36569 for (size_t k = 16; k <= 80; k += 8) {
36570 GemmMicrokernelTester()
36571 .mr(2)
36572 .nr(4)
36573 .kr(8)
36574 .sr(1)
36575 .m(2)
36576 .n(4)
36577 .k(k)
36578 .a_stride(83)
36579 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
36580 }
36581 }
36582
TEST(QC8_GEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD128,k_div_8_subtile)36583 TEST(QC8_GEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD128, k_div_8_subtile) {
36584 for (size_t k = 16; k <= 80; k += 8) {
36585 for (uint32_t n = 1; n <= 4; n++) {
36586 for (uint32_t m = 1; m <= 2; m++) {
36587 GemmMicrokernelTester()
36588 .mr(2)
36589 .nr(4)
36590 .kr(8)
36591 .sr(1)
36592 .m(m)
36593 .n(n)
36594 .k(k)
36595 .iterations(1)
36596 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
36597 }
36598 }
36599 }
36600 }
36601
TEST(QC8_GEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD128,n_gt_4)36602 TEST(QC8_GEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD128, n_gt_4) {
36603 for (uint32_t n = 5; n < 8; n++) {
36604 for (size_t k = 1; k <= 40; k += 9) {
36605 GemmMicrokernelTester()
36606 .mr(2)
36607 .nr(4)
36608 .kr(8)
36609 .sr(1)
36610 .m(2)
36611 .n(n)
36612 .k(k)
36613 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
36614 }
36615 }
36616 }
36617
TEST(QC8_GEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD128,n_gt_4_strided_cn)36618 TEST(QC8_GEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD128, n_gt_4_strided_cn) {
36619 for (uint32_t n = 5; n < 8; n++) {
36620 for (size_t k = 1; k <= 40; k += 9) {
36621 GemmMicrokernelTester()
36622 .mr(2)
36623 .nr(4)
36624 .kr(8)
36625 .sr(1)
36626 .m(2)
36627 .n(n)
36628 .k(k)
36629 .cn_stride(7)
36630 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
36631 }
36632 }
36633 }
36634
TEST(QC8_GEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD128,n_gt_4_strided_a)36635 TEST(QC8_GEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD128, n_gt_4_strided_a) {
36636 for (uint32_t n = 5; n < 8; n++) {
36637 for (size_t k = 1; k <= 40; k += 9) {
36638 GemmMicrokernelTester()
36639 .mr(2)
36640 .nr(4)
36641 .kr(8)
36642 .sr(1)
36643 .m(2)
36644 .n(n)
36645 .k(k)
36646 .a_stride(43)
36647 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
36648 }
36649 }
36650 }
36651
TEST(QC8_GEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD128,n_gt_4_subtile)36652 TEST(QC8_GEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD128, n_gt_4_subtile) {
36653 for (uint32_t n = 5; n < 8; n++) {
36654 for (size_t k = 1; k <= 40; k += 9) {
36655 for (uint32_t m = 1; m <= 2; m++) {
36656 GemmMicrokernelTester()
36657 .mr(2)
36658 .nr(4)
36659 .kr(8)
36660 .sr(1)
36661 .m(m)
36662 .n(n)
36663 .k(k)
36664 .iterations(1)
36665 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
36666 }
36667 }
36668 }
36669 }
36670
TEST(QC8_GEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD128,n_div_4)36671 TEST(QC8_GEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD128, n_div_4) {
36672 for (uint32_t n = 8; n <= 12; n += 4) {
36673 for (size_t k = 1; k <= 40; k += 9) {
36674 GemmMicrokernelTester()
36675 .mr(2)
36676 .nr(4)
36677 .kr(8)
36678 .sr(1)
36679 .m(2)
36680 .n(n)
36681 .k(k)
36682 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
36683 }
36684 }
36685 }
36686
TEST(QC8_GEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD128,n_div_4_strided_cn)36687 TEST(QC8_GEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD128, n_div_4_strided_cn) {
36688 for (uint32_t n = 8; n <= 12; n += 4) {
36689 for (size_t k = 1; k <= 40; k += 9) {
36690 GemmMicrokernelTester()
36691 .mr(2)
36692 .nr(4)
36693 .kr(8)
36694 .sr(1)
36695 .m(2)
36696 .n(n)
36697 .k(k)
36698 .cn_stride(7)
36699 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
36700 }
36701 }
36702 }
36703
TEST(QC8_GEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD128,n_div_4_strided_a)36704 TEST(QC8_GEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD128, n_div_4_strided_a) {
36705 for (uint32_t n = 8; n <= 12; n += 4) {
36706 for (size_t k = 1; k <= 40; k += 9) {
36707 GemmMicrokernelTester()
36708 .mr(2)
36709 .nr(4)
36710 .kr(8)
36711 .sr(1)
36712 .m(2)
36713 .n(n)
36714 .k(k)
36715 .a_stride(43)
36716 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
36717 }
36718 }
36719 }
36720
TEST(QC8_GEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD128,n_div_4_subtile)36721 TEST(QC8_GEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD128, n_div_4_subtile) {
36722 for (uint32_t n = 8; n <= 12; n += 4) {
36723 for (size_t k = 1; k <= 40; k += 9) {
36724 for (uint32_t m = 1; m <= 2; m++) {
36725 GemmMicrokernelTester()
36726 .mr(2)
36727 .nr(4)
36728 .kr(8)
36729 .sr(1)
36730 .m(m)
36731 .n(n)
36732 .k(k)
36733 .iterations(1)
36734 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
36735 }
36736 }
36737 }
36738 }
36739
TEST(QC8_GEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD128,strided_cm_subtile)36740 TEST(QC8_GEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD128, strided_cm_subtile) {
36741 for (size_t k = 1; k <= 40; k += 9) {
36742 for (uint32_t n = 1; n <= 4; n++) {
36743 for (uint32_t m = 1; m <= 2; m++) {
36744 GemmMicrokernelTester()
36745 .mr(2)
36746 .nr(4)
36747 .kr(8)
36748 .sr(1)
36749 .m(m)
36750 .n(n)
36751 .k(k)
36752 .cm_stride(7)
36753 .iterations(1)
36754 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
36755 }
36756 }
36757 }
36758 }
36759
TEST(QC8_GEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD128,qmin)36760 TEST(QC8_GEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD128, qmin) {
36761 GemmMicrokernelTester()
36762 .mr(2)
36763 .nr(4)
36764 .kr(8)
36765 .sr(1)
36766 .m(2)
36767 .n(4)
36768 .k(8)
36769 .qmin(128)
36770 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
36771 }
36772
TEST(QC8_GEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD128,qmax)36773 TEST(QC8_GEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD128, qmax) {
36774 GemmMicrokernelTester()
36775 .mr(2)
36776 .nr(4)
36777 .kr(8)
36778 .sr(1)
36779 .m(2)
36780 .n(4)
36781 .k(8)
36782 .qmax(128)
36783 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
36784 }
36785
TEST(QC8_GEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD128,strided_cm)36786 TEST(QC8_GEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD128, strided_cm) {
36787 GemmMicrokernelTester()
36788 .mr(2)
36789 .nr(4)
36790 .kr(8)
36791 .sr(1)
36792 .m(2)
36793 .n(4)
36794 .k(8)
36795 .cm_stride(7)
36796 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
36797 }
36798 #endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
36799
36800
36801 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(QC8_GEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD128,k_eq_8)36802 TEST(QC8_GEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD128, k_eq_8) {
36803 GemmMicrokernelTester()
36804 .mr(4)
36805 .nr(4)
36806 .kr(2)
36807 .sr(1)
36808 .m(4)
36809 .n(4)
36810 .k(8)
36811 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
36812 }
36813
TEST(QC8_GEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD128,strided_cn)36814 TEST(QC8_GEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD128, strided_cn) {
36815 GemmMicrokernelTester()
36816 .mr(4)
36817 .nr(4)
36818 .kr(2)
36819 .sr(1)
36820 .m(4)
36821 .n(4)
36822 .k(8)
36823 .cn_stride(7)
36824 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
36825 }
36826
TEST(QC8_GEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD128,k_eq_8_strided_a)36827 TEST(QC8_GEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD128, k_eq_8_strided_a) {
36828 GemmMicrokernelTester()
36829 .mr(4)
36830 .nr(4)
36831 .kr(2)
36832 .sr(1)
36833 .m(4)
36834 .n(4)
36835 .k(8)
36836 .a_stride(11)
36837 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
36838 }
36839
TEST(QC8_GEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD128,k_eq_8_subtile)36840 TEST(QC8_GEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD128, k_eq_8_subtile) {
36841 for (uint32_t n = 1; n <= 4; n++) {
36842 for (uint32_t m = 1; m <= 4; m++) {
36843 GemmMicrokernelTester()
36844 .mr(4)
36845 .nr(4)
36846 .kr(2)
36847 .sr(1)
36848 .m(m)
36849 .n(n)
36850 .k(8)
36851 .iterations(1)
36852 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
36853 }
36854 }
36855 }
36856
TEST(QC8_GEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD128,k_eq_8_subtile_m)36857 TEST(QC8_GEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD128, k_eq_8_subtile_m) {
36858 for (uint32_t m = 1; m <= 4; m++) {
36859 GemmMicrokernelTester()
36860 .mr(4)
36861 .nr(4)
36862 .kr(2)
36863 .sr(1)
36864 .m(m)
36865 .n(4)
36866 .k(8)
36867 .iterations(1)
36868 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
36869 }
36870 }
36871
TEST(QC8_GEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD128,k_eq_8_subtile_n)36872 TEST(QC8_GEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD128, k_eq_8_subtile_n) {
36873 for (uint32_t n = 1; n <= 4; n++) {
36874 GemmMicrokernelTester()
36875 .mr(4)
36876 .nr(4)
36877 .kr(2)
36878 .sr(1)
36879 .m(4)
36880 .n(n)
36881 .k(8)
36882 .iterations(1)
36883 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
36884 }
36885 }
36886
TEST(QC8_GEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD128,k_lt_8)36887 TEST(QC8_GEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD128, k_lt_8) {
36888 for (size_t k = 1; k < 8; k++) {
36889 GemmMicrokernelTester()
36890 .mr(4)
36891 .nr(4)
36892 .kr(2)
36893 .sr(1)
36894 .m(4)
36895 .n(4)
36896 .k(k)
36897 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
36898 }
36899 }
36900
TEST(QC8_GEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD128,k_lt_8_strided_a)36901 TEST(QC8_GEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD128, k_lt_8_strided_a) {
36902 for (size_t k = 1; k < 8; k++) {
36903 GemmMicrokernelTester()
36904 .mr(4)
36905 .nr(4)
36906 .kr(2)
36907 .sr(1)
36908 .m(4)
36909 .n(4)
36910 .k(k)
36911 .a_stride(11)
36912 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
36913 }
36914 }
36915
TEST(QC8_GEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD128,k_lt_8_subtile)36916 TEST(QC8_GEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD128, k_lt_8_subtile) {
36917 for (size_t k = 1; k < 8; k++) {
36918 for (uint32_t n = 1; n <= 4; n++) {
36919 for (uint32_t m = 1; m <= 4; m++) {
36920 GemmMicrokernelTester()
36921 .mr(4)
36922 .nr(4)
36923 .kr(2)
36924 .sr(1)
36925 .m(m)
36926 .n(n)
36927 .k(k)
36928 .iterations(1)
36929 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
36930 }
36931 }
36932 }
36933 }
36934
TEST(QC8_GEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD128,k_gt_8)36935 TEST(QC8_GEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD128, k_gt_8) {
36936 for (size_t k = 9; k < 16; k++) {
36937 GemmMicrokernelTester()
36938 .mr(4)
36939 .nr(4)
36940 .kr(2)
36941 .sr(1)
36942 .m(4)
36943 .n(4)
36944 .k(k)
36945 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
36946 }
36947 }
36948
TEST(QC8_GEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD128,k_gt_8_strided_a)36949 TEST(QC8_GEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD128, k_gt_8_strided_a) {
36950 for (size_t k = 9; k < 16; k++) {
36951 GemmMicrokernelTester()
36952 .mr(4)
36953 .nr(4)
36954 .kr(2)
36955 .sr(1)
36956 .m(4)
36957 .n(4)
36958 .k(k)
36959 .a_stride(19)
36960 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
36961 }
36962 }
36963
TEST(QC8_GEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD128,k_gt_8_subtile)36964 TEST(QC8_GEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD128, k_gt_8_subtile) {
36965 for (size_t k = 9; k < 16; k++) {
36966 for (uint32_t n = 1; n <= 4; n++) {
36967 for (uint32_t m = 1; m <= 4; m++) {
36968 GemmMicrokernelTester()
36969 .mr(4)
36970 .nr(4)
36971 .kr(2)
36972 .sr(1)
36973 .m(m)
36974 .n(n)
36975 .k(k)
36976 .iterations(1)
36977 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
36978 }
36979 }
36980 }
36981 }
36982
TEST(QC8_GEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD128,k_div_8)36983 TEST(QC8_GEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD128, k_div_8) {
36984 for (size_t k = 16; k <= 80; k += 8) {
36985 GemmMicrokernelTester()
36986 .mr(4)
36987 .nr(4)
36988 .kr(2)
36989 .sr(1)
36990 .m(4)
36991 .n(4)
36992 .k(k)
36993 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
36994 }
36995 }
36996
TEST(QC8_GEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD128,k_div_8_strided_a)36997 TEST(QC8_GEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD128, k_div_8_strided_a) {
36998 for (size_t k = 16; k <= 80; k += 8) {
36999 GemmMicrokernelTester()
37000 .mr(4)
37001 .nr(4)
37002 .kr(2)
37003 .sr(1)
37004 .m(4)
37005 .n(4)
37006 .k(k)
37007 .a_stride(83)
37008 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
37009 }
37010 }
37011
TEST(QC8_GEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD128,k_div_8_subtile)37012 TEST(QC8_GEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD128, k_div_8_subtile) {
37013 for (size_t k = 16; k <= 80; k += 8) {
37014 for (uint32_t n = 1; n <= 4; n++) {
37015 for (uint32_t m = 1; m <= 4; m++) {
37016 GemmMicrokernelTester()
37017 .mr(4)
37018 .nr(4)
37019 .kr(2)
37020 .sr(1)
37021 .m(m)
37022 .n(n)
37023 .k(k)
37024 .iterations(1)
37025 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
37026 }
37027 }
37028 }
37029 }
37030
TEST(QC8_GEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD128,n_gt_4)37031 TEST(QC8_GEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD128, n_gt_4) {
37032 for (uint32_t n = 5; n < 8; n++) {
37033 for (size_t k = 1; k <= 40; k += 9) {
37034 GemmMicrokernelTester()
37035 .mr(4)
37036 .nr(4)
37037 .kr(2)
37038 .sr(1)
37039 .m(4)
37040 .n(n)
37041 .k(k)
37042 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
37043 }
37044 }
37045 }
37046
TEST(QC8_GEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD128,n_gt_4_strided_cn)37047 TEST(QC8_GEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD128, n_gt_4_strided_cn) {
37048 for (uint32_t n = 5; n < 8; n++) {
37049 for (size_t k = 1; k <= 40; k += 9) {
37050 GemmMicrokernelTester()
37051 .mr(4)
37052 .nr(4)
37053 .kr(2)
37054 .sr(1)
37055 .m(4)
37056 .n(n)
37057 .k(k)
37058 .cn_stride(7)
37059 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
37060 }
37061 }
37062 }
37063
TEST(QC8_GEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD128,n_gt_4_strided_a)37064 TEST(QC8_GEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD128, n_gt_4_strided_a) {
37065 for (uint32_t n = 5; n < 8; n++) {
37066 for (size_t k = 1; k <= 40; k += 9) {
37067 GemmMicrokernelTester()
37068 .mr(4)
37069 .nr(4)
37070 .kr(2)
37071 .sr(1)
37072 .m(4)
37073 .n(n)
37074 .k(k)
37075 .a_stride(43)
37076 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
37077 }
37078 }
37079 }
37080
TEST(QC8_GEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD128,n_gt_4_subtile)37081 TEST(QC8_GEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD128, n_gt_4_subtile) {
37082 for (uint32_t n = 5; n < 8; n++) {
37083 for (size_t k = 1; k <= 40; k += 9) {
37084 for (uint32_t m = 1; m <= 4; m++) {
37085 GemmMicrokernelTester()
37086 .mr(4)
37087 .nr(4)
37088 .kr(2)
37089 .sr(1)
37090 .m(m)
37091 .n(n)
37092 .k(k)
37093 .iterations(1)
37094 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
37095 }
37096 }
37097 }
37098 }
37099
TEST(QC8_GEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD128,n_div_4)37100 TEST(QC8_GEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD128, n_div_4) {
37101 for (uint32_t n = 8; n <= 12; n += 4) {
37102 for (size_t k = 1; k <= 40; k += 9) {
37103 GemmMicrokernelTester()
37104 .mr(4)
37105 .nr(4)
37106 .kr(2)
37107 .sr(1)
37108 .m(4)
37109 .n(n)
37110 .k(k)
37111 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
37112 }
37113 }
37114 }
37115
TEST(QC8_GEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD128,n_div_4_strided_cn)37116 TEST(QC8_GEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD128, n_div_4_strided_cn) {
37117 for (uint32_t n = 8; n <= 12; n += 4) {
37118 for (size_t k = 1; k <= 40; k += 9) {
37119 GemmMicrokernelTester()
37120 .mr(4)
37121 .nr(4)
37122 .kr(2)
37123 .sr(1)
37124 .m(4)
37125 .n(n)
37126 .k(k)
37127 .cn_stride(7)
37128 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
37129 }
37130 }
37131 }
37132
TEST(QC8_GEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD128,n_div_4_strided_a)37133 TEST(QC8_GEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD128, n_div_4_strided_a) {
37134 for (uint32_t n = 8; n <= 12; n += 4) {
37135 for (size_t k = 1; k <= 40; k += 9) {
37136 GemmMicrokernelTester()
37137 .mr(4)
37138 .nr(4)
37139 .kr(2)
37140 .sr(1)
37141 .m(4)
37142 .n(n)
37143 .k(k)
37144 .a_stride(43)
37145 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
37146 }
37147 }
37148 }
37149
TEST(QC8_GEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD128,n_div_4_subtile)37150 TEST(QC8_GEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD128, n_div_4_subtile) {
37151 for (uint32_t n = 8; n <= 12; n += 4) {
37152 for (size_t k = 1; k <= 40; k += 9) {
37153 for (uint32_t m = 1; m <= 4; m++) {
37154 GemmMicrokernelTester()
37155 .mr(4)
37156 .nr(4)
37157 .kr(2)
37158 .sr(1)
37159 .m(m)
37160 .n(n)
37161 .k(k)
37162 .iterations(1)
37163 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
37164 }
37165 }
37166 }
37167 }
37168
TEST(QC8_GEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD128,strided_cm_subtile)37169 TEST(QC8_GEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD128, strided_cm_subtile) {
37170 for (size_t k = 1; k <= 40; k += 9) {
37171 for (uint32_t n = 1; n <= 4; n++) {
37172 for (uint32_t m = 1; m <= 4; m++) {
37173 GemmMicrokernelTester()
37174 .mr(4)
37175 .nr(4)
37176 .kr(2)
37177 .sr(1)
37178 .m(m)
37179 .n(n)
37180 .k(k)
37181 .cm_stride(7)
37182 .iterations(1)
37183 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
37184 }
37185 }
37186 }
37187 }
37188
TEST(QC8_GEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD128,qmin)37189 TEST(QC8_GEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD128, qmin) {
37190 GemmMicrokernelTester()
37191 .mr(4)
37192 .nr(4)
37193 .kr(2)
37194 .sr(1)
37195 .m(4)
37196 .n(4)
37197 .k(8)
37198 .qmin(128)
37199 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
37200 }
37201
TEST(QC8_GEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD128,qmax)37202 TEST(QC8_GEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD128, qmax) {
37203 GemmMicrokernelTester()
37204 .mr(4)
37205 .nr(4)
37206 .kr(2)
37207 .sr(1)
37208 .m(4)
37209 .n(4)
37210 .k(8)
37211 .qmax(128)
37212 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
37213 }
37214
TEST(QC8_GEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD128,strided_cm)37215 TEST(QC8_GEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD128, strided_cm) {
37216 GemmMicrokernelTester()
37217 .mr(4)
37218 .nr(4)
37219 .kr(2)
37220 .sr(1)
37221 .m(4)
37222 .n(4)
37223 .k(8)
37224 .cm_stride(7)
37225 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
37226 }
37227 #endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
37228
37229
37230 #if XNN_ARCH_WASM || XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(QC8_GEMM_MINMAX_FP32_1X2__WASM_FMAGIC,k_eq_1)37231 TEST(QC8_GEMM_MINMAX_FP32_1X2__WASM_FMAGIC, k_eq_1) {
37232 GemmMicrokernelTester()
37233 .mr(1)
37234 .nr(2)
37235 .kr(1)
37236 .sr(1)
37237 .m(1)
37238 .n(2)
37239 .k(1)
37240 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x2__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
37241 }
37242
TEST(QC8_GEMM_MINMAX_FP32_1X2__WASM_FMAGIC,strided_cn)37243 TEST(QC8_GEMM_MINMAX_FP32_1X2__WASM_FMAGIC, strided_cn) {
37244 GemmMicrokernelTester()
37245 .mr(1)
37246 .nr(2)
37247 .kr(1)
37248 .sr(1)
37249 .m(1)
37250 .n(2)
37251 .k(1)
37252 .cn_stride(5)
37253 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x2__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
37254 }
37255
TEST(QC8_GEMM_MINMAX_FP32_1X2__WASM_FMAGIC,k_eq_1_strided_a)37256 TEST(QC8_GEMM_MINMAX_FP32_1X2__WASM_FMAGIC, k_eq_1_strided_a) {
37257 GemmMicrokernelTester()
37258 .mr(1)
37259 .nr(2)
37260 .kr(1)
37261 .sr(1)
37262 .m(1)
37263 .n(2)
37264 .k(1)
37265 .a_stride(3)
37266 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x2__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
37267 }
37268
TEST(QC8_GEMM_MINMAX_FP32_1X2__WASM_FMAGIC,k_eq_1_subtile)37269 TEST(QC8_GEMM_MINMAX_FP32_1X2__WASM_FMAGIC, k_eq_1_subtile) {
37270 for (uint32_t n = 1; n <= 2; n++) {
37271 for (uint32_t m = 1; m <= 1; m++) {
37272 GemmMicrokernelTester()
37273 .mr(1)
37274 .nr(2)
37275 .kr(1)
37276 .sr(1)
37277 .m(m)
37278 .n(n)
37279 .k(1)
37280 .iterations(1)
37281 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x2__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
37282 }
37283 }
37284 }
37285
TEST(QC8_GEMM_MINMAX_FP32_1X2__WASM_FMAGIC,k_eq_1_subtile_m)37286 TEST(QC8_GEMM_MINMAX_FP32_1X2__WASM_FMAGIC, k_eq_1_subtile_m) {
37287 for (uint32_t m = 1; m <= 1; m++) {
37288 GemmMicrokernelTester()
37289 .mr(1)
37290 .nr(2)
37291 .kr(1)
37292 .sr(1)
37293 .m(m)
37294 .n(2)
37295 .k(1)
37296 .iterations(1)
37297 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x2__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
37298 }
37299 }
37300
TEST(QC8_GEMM_MINMAX_FP32_1X2__WASM_FMAGIC,k_eq_1_subtile_n)37301 TEST(QC8_GEMM_MINMAX_FP32_1X2__WASM_FMAGIC, k_eq_1_subtile_n) {
37302 for (uint32_t n = 1; n <= 2; n++) {
37303 GemmMicrokernelTester()
37304 .mr(1)
37305 .nr(2)
37306 .kr(1)
37307 .sr(1)
37308 .m(1)
37309 .n(n)
37310 .k(1)
37311 .iterations(1)
37312 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x2__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
37313 }
37314 }
37315
TEST(QC8_GEMM_MINMAX_FP32_1X2__WASM_FMAGIC,k_gt_1)37316 TEST(QC8_GEMM_MINMAX_FP32_1X2__WASM_FMAGIC, k_gt_1) {
37317 for (size_t k = 2; k < 10; k++) {
37318 GemmMicrokernelTester()
37319 .mr(1)
37320 .nr(2)
37321 .kr(1)
37322 .sr(1)
37323 .m(1)
37324 .n(2)
37325 .k(k)
37326 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x2__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
37327 }
37328 }
37329
TEST(QC8_GEMM_MINMAX_FP32_1X2__WASM_FMAGIC,k_gt_1_strided_a)37330 TEST(QC8_GEMM_MINMAX_FP32_1X2__WASM_FMAGIC, k_gt_1_strided_a) {
37331 for (size_t k = 2; k < 10; k++) {
37332 GemmMicrokernelTester()
37333 .mr(1)
37334 .nr(2)
37335 .kr(1)
37336 .sr(1)
37337 .m(1)
37338 .n(2)
37339 .k(k)
37340 .a_stride(11)
37341 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x2__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
37342 }
37343 }
37344
TEST(QC8_GEMM_MINMAX_FP32_1X2__WASM_FMAGIC,k_gt_1_subtile)37345 TEST(QC8_GEMM_MINMAX_FP32_1X2__WASM_FMAGIC, k_gt_1_subtile) {
37346 for (size_t k = 2; k < 10; k++) {
37347 for (uint32_t n = 1; n <= 2; n++) {
37348 for (uint32_t m = 1; m <= 1; m++) {
37349 GemmMicrokernelTester()
37350 .mr(1)
37351 .nr(2)
37352 .kr(1)
37353 .sr(1)
37354 .m(m)
37355 .n(n)
37356 .k(k)
37357 .iterations(1)
37358 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x2__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
37359 }
37360 }
37361 }
37362 }
37363
TEST(QC8_GEMM_MINMAX_FP32_1X2__WASM_FMAGIC,n_gt_2)37364 TEST(QC8_GEMM_MINMAX_FP32_1X2__WASM_FMAGIC, n_gt_2) {
37365 for (uint32_t n = 3; n < 4; n++) {
37366 for (size_t k = 1; k <= 5; k += 2) {
37367 GemmMicrokernelTester()
37368 .mr(1)
37369 .nr(2)
37370 .kr(1)
37371 .sr(1)
37372 .m(1)
37373 .n(n)
37374 .k(k)
37375 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x2__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
37376 }
37377 }
37378 }
37379
TEST(QC8_GEMM_MINMAX_FP32_1X2__WASM_FMAGIC,n_gt_2_strided_cn)37380 TEST(QC8_GEMM_MINMAX_FP32_1X2__WASM_FMAGIC, n_gt_2_strided_cn) {
37381 for (uint32_t n = 3; n < 4; n++) {
37382 for (size_t k = 1; k <= 5; k += 2) {
37383 GemmMicrokernelTester()
37384 .mr(1)
37385 .nr(2)
37386 .kr(1)
37387 .sr(1)
37388 .m(1)
37389 .n(n)
37390 .k(k)
37391 .cn_stride(5)
37392 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x2__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
37393 }
37394 }
37395 }
37396
TEST(QC8_GEMM_MINMAX_FP32_1X2__WASM_FMAGIC,n_gt_2_strided_a)37397 TEST(QC8_GEMM_MINMAX_FP32_1X2__WASM_FMAGIC, n_gt_2_strided_a) {
37398 for (uint32_t n = 3; n < 4; n++) {
37399 for (size_t k = 1; k <= 5; k += 2) {
37400 GemmMicrokernelTester()
37401 .mr(1)
37402 .nr(2)
37403 .kr(1)
37404 .sr(1)
37405 .m(1)
37406 .n(n)
37407 .k(k)
37408 .a_stride(7)
37409 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x2__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
37410 }
37411 }
37412 }
37413
TEST(QC8_GEMM_MINMAX_FP32_1X2__WASM_FMAGIC,n_gt_2_subtile)37414 TEST(QC8_GEMM_MINMAX_FP32_1X2__WASM_FMAGIC, n_gt_2_subtile) {
37415 for (uint32_t n = 3; n < 4; n++) {
37416 for (size_t k = 1; k <= 5; k += 2) {
37417 for (uint32_t m = 1; m <= 1; m++) {
37418 GemmMicrokernelTester()
37419 .mr(1)
37420 .nr(2)
37421 .kr(1)
37422 .sr(1)
37423 .m(m)
37424 .n(n)
37425 .k(k)
37426 .iterations(1)
37427 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x2__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
37428 }
37429 }
37430 }
37431 }
37432
TEST(QC8_GEMM_MINMAX_FP32_1X2__WASM_FMAGIC,n_div_2)37433 TEST(QC8_GEMM_MINMAX_FP32_1X2__WASM_FMAGIC, n_div_2) {
37434 for (uint32_t n = 4; n <= 6; n += 2) {
37435 for (size_t k = 1; k <= 5; k += 2) {
37436 GemmMicrokernelTester()
37437 .mr(1)
37438 .nr(2)
37439 .kr(1)
37440 .sr(1)
37441 .m(1)
37442 .n(n)
37443 .k(k)
37444 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x2__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
37445 }
37446 }
37447 }
37448
TEST(QC8_GEMM_MINMAX_FP32_1X2__WASM_FMAGIC,n_div_2_strided_cn)37449 TEST(QC8_GEMM_MINMAX_FP32_1X2__WASM_FMAGIC, n_div_2_strided_cn) {
37450 for (uint32_t n = 4; n <= 6; n += 2) {
37451 for (size_t k = 1; k <= 5; k += 2) {
37452 GemmMicrokernelTester()
37453 .mr(1)
37454 .nr(2)
37455 .kr(1)
37456 .sr(1)
37457 .m(1)
37458 .n(n)
37459 .k(k)
37460 .cn_stride(5)
37461 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x2__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
37462 }
37463 }
37464 }
37465
TEST(QC8_GEMM_MINMAX_FP32_1X2__WASM_FMAGIC,n_div_2_strided_a)37466 TEST(QC8_GEMM_MINMAX_FP32_1X2__WASM_FMAGIC, n_div_2_strided_a) {
37467 for (uint32_t n = 4; n <= 6; n += 2) {
37468 for (size_t k = 1; k <= 5; k += 2) {
37469 GemmMicrokernelTester()
37470 .mr(1)
37471 .nr(2)
37472 .kr(1)
37473 .sr(1)
37474 .m(1)
37475 .n(n)
37476 .k(k)
37477 .a_stride(7)
37478 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x2__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
37479 }
37480 }
37481 }
37482
TEST(QC8_GEMM_MINMAX_FP32_1X2__WASM_FMAGIC,n_div_2_subtile)37483 TEST(QC8_GEMM_MINMAX_FP32_1X2__WASM_FMAGIC, n_div_2_subtile) {
37484 for (uint32_t n = 4; n <= 6; n += 2) {
37485 for (size_t k = 1; k <= 5; k += 2) {
37486 for (uint32_t m = 1; m <= 1; m++) {
37487 GemmMicrokernelTester()
37488 .mr(1)
37489 .nr(2)
37490 .kr(1)
37491 .sr(1)
37492 .m(m)
37493 .n(n)
37494 .k(k)
37495 .iterations(1)
37496 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x2__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
37497 }
37498 }
37499 }
37500 }
37501
TEST(QC8_GEMM_MINMAX_FP32_1X2__WASM_FMAGIC,strided_cm_subtile)37502 TEST(QC8_GEMM_MINMAX_FP32_1X2__WASM_FMAGIC, strided_cm_subtile) {
37503 for (size_t k = 1; k <= 5; k += 2) {
37504 for (uint32_t n = 1; n <= 2; n++) {
37505 for (uint32_t m = 1; m <= 1; m++) {
37506 GemmMicrokernelTester()
37507 .mr(1)
37508 .nr(2)
37509 .kr(1)
37510 .sr(1)
37511 .m(m)
37512 .n(n)
37513 .k(k)
37514 .cm_stride(5)
37515 .iterations(1)
37516 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x2__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
37517 }
37518 }
37519 }
37520 }
37521
TEST(QC8_GEMM_MINMAX_FP32_1X2__WASM_FMAGIC,qmin)37522 TEST(QC8_GEMM_MINMAX_FP32_1X2__WASM_FMAGIC, qmin) {
37523 GemmMicrokernelTester()
37524 .mr(1)
37525 .nr(2)
37526 .kr(1)
37527 .sr(1)
37528 .m(1)
37529 .n(2)
37530 .k(1)
37531 .qmin(128)
37532 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x2__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
37533 }
37534
TEST(QC8_GEMM_MINMAX_FP32_1X2__WASM_FMAGIC,qmax)37535 TEST(QC8_GEMM_MINMAX_FP32_1X2__WASM_FMAGIC, qmax) {
37536 GemmMicrokernelTester()
37537 .mr(1)
37538 .nr(2)
37539 .kr(1)
37540 .sr(1)
37541 .m(1)
37542 .n(2)
37543 .k(1)
37544 .qmax(128)
37545 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x2__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
37546 }
37547
TEST(QC8_GEMM_MINMAX_FP32_1X2__WASM_FMAGIC,strided_cm)37548 TEST(QC8_GEMM_MINMAX_FP32_1X2__WASM_FMAGIC, strided_cm) {
37549 GemmMicrokernelTester()
37550 .mr(1)
37551 .nr(2)
37552 .kr(1)
37553 .sr(1)
37554 .m(1)
37555 .n(2)
37556 .k(1)
37557 .cm_stride(5)
37558 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x2__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
37559 }
37560 #endif // XNN_ARCH_WASM || XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
37561
37562
37563 #if XNN_ARCH_WASM || XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(QC8_GEMM_MINMAX_FP32_3X2__WASM_FMAGIC,k_eq_1)37564 TEST(QC8_GEMM_MINMAX_FP32_3X2__WASM_FMAGIC, k_eq_1) {
37565 GemmMicrokernelTester()
37566 .mr(3)
37567 .nr(2)
37568 .kr(1)
37569 .sr(1)
37570 .m(3)
37571 .n(2)
37572 .k(1)
37573 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x2__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
37574 }
37575
TEST(QC8_GEMM_MINMAX_FP32_3X2__WASM_FMAGIC,strided_cn)37576 TEST(QC8_GEMM_MINMAX_FP32_3X2__WASM_FMAGIC, strided_cn) {
37577 GemmMicrokernelTester()
37578 .mr(3)
37579 .nr(2)
37580 .kr(1)
37581 .sr(1)
37582 .m(3)
37583 .n(2)
37584 .k(1)
37585 .cn_stride(5)
37586 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x2__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
37587 }
37588
TEST(QC8_GEMM_MINMAX_FP32_3X2__WASM_FMAGIC,k_eq_1_strided_a)37589 TEST(QC8_GEMM_MINMAX_FP32_3X2__WASM_FMAGIC, k_eq_1_strided_a) {
37590 GemmMicrokernelTester()
37591 .mr(3)
37592 .nr(2)
37593 .kr(1)
37594 .sr(1)
37595 .m(3)
37596 .n(2)
37597 .k(1)
37598 .a_stride(3)
37599 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x2__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
37600 }
37601
TEST(QC8_GEMM_MINMAX_FP32_3X2__WASM_FMAGIC,k_eq_1_subtile)37602 TEST(QC8_GEMM_MINMAX_FP32_3X2__WASM_FMAGIC, k_eq_1_subtile) {
37603 for (uint32_t n = 1; n <= 2; n++) {
37604 for (uint32_t m = 1; m <= 3; m++) {
37605 GemmMicrokernelTester()
37606 .mr(3)
37607 .nr(2)
37608 .kr(1)
37609 .sr(1)
37610 .m(m)
37611 .n(n)
37612 .k(1)
37613 .iterations(1)
37614 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x2__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
37615 }
37616 }
37617 }
37618
TEST(QC8_GEMM_MINMAX_FP32_3X2__WASM_FMAGIC,k_eq_1_subtile_m)37619 TEST(QC8_GEMM_MINMAX_FP32_3X2__WASM_FMAGIC, k_eq_1_subtile_m) {
37620 for (uint32_t m = 1; m <= 3; m++) {
37621 GemmMicrokernelTester()
37622 .mr(3)
37623 .nr(2)
37624 .kr(1)
37625 .sr(1)
37626 .m(m)
37627 .n(2)
37628 .k(1)
37629 .iterations(1)
37630 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x2__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
37631 }
37632 }
37633
TEST(QC8_GEMM_MINMAX_FP32_3X2__WASM_FMAGIC,k_eq_1_subtile_n)37634 TEST(QC8_GEMM_MINMAX_FP32_3X2__WASM_FMAGIC, k_eq_1_subtile_n) {
37635 for (uint32_t n = 1; n <= 2; n++) {
37636 GemmMicrokernelTester()
37637 .mr(3)
37638 .nr(2)
37639 .kr(1)
37640 .sr(1)
37641 .m(3)
37642 .n(n)
37643 .k(1)
37644 .iterations(1)
37645 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x2__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
37646 }
37647 }
37648
TEST(QC8_GEMM_MINMAX_FP32_3X2__WASM_FMAGIC,k_gt_1)37649 TEST(QC8_GEMM_MINMAX_FP32_3X2__WASM_FMAGIC, k_gt_1) {
37650 for (size_t k = 2; k < 10; k++) {
37651 GemmMicrokernelTester()
37652 .mr(3)
37653 .nr(2)
37654 .kr(1)
37655 .sr(1)
37656 .m(3)
37657 .n(2)
37658 .k(k)
37659 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x2__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
37660 }
37661 }
37662
TEST(QC8_GEMM_MINMAX_FP32_3X2__WASM_FMAGIC,k_gt_1_strided_a)37663 TEST(QC8_GEMM_MINMAX_FP32_3X2__WASM_FMAGIC, k_gt_1_strided_a) {
37664 for (size_t k = 2; k < 10; k++) {
37665 GemmMicrokernelTester()
37666 .mr(3)
37667 .nr(2)
37668 .kr(1)
37669 .sr(1)
37670 .m(3)
37671 .n(2)
37672 .k(k)
37673 .a_stride(11)
37674 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x2__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
37675 }
37676 }
37677
TEST(QC8_GEMM_MINMAX_FP32_3X2__WASM_FMAGIC,k_gt_1_subtile)37678 TEST(QC8_GEMM_MINMAX_FP32_3X2__WASM_FMAGIC, k_gt_1_subtile) {
37679 for (size_t k = 2; k < 10; k++) {
37680 for (uint32_t n = 1; n <= 2; n++) {
37681 for (uint32_t m = 1; m <= 3; m++) {
37682 GemmMicrokernelTester()
37683 .mr(3)
37684 .nr(2)
37685 .kr(1)
37686 .sr(1)
37687 .m(m)
37688 .n(n)
37689 .k(k)
37690 .iterations(1)
37691 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x2__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
37692 }
37693 }
37694 }
37695 }
37696
TEST(QC8_GEMM_MINMAX_FP32_3X2__WASM_FMAGIC,n_gt_2)37697 TEST(QC8_GEMM_MINMAX_FP32_3X2__WASM_FMAGIC, n_gt_2) {
37698 for (uint32_t n = 3; n < 4; n++) {
37699 for (size_t k = 1; k <= 5; k += 2) {
37700 GemmMicrokernelTester()
37701 .mr(3)
37702 .nr(2)
37703 .kr(1)
37704 .sr(1)
37705 .m(3)
37706 .n(n)
37707 .k(k)
37708 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x2__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
37709 }
37710 }
37711 }
37712
TEST(QC8_GEMM_MINMAX_FP32_3X2__WASM_FMAGIC,n_gt_2_strided_cn)37713 TEST(QC8_GEMM_MINMAX_FP32_3X2__WASM_FMAGIC, n_gt_2_strided_cn) {
37714 for (uint32_t n = 3; n < 4; n++) {
37715 for (size_t k = 1; k <= 5; k += 2) {
37716 GemmMicrokernelTester()
37717 .mr(3)
37718 .nr(2)
37719 .kr(1)
37720 .sr(1)
37721 .m(3)
37722 .n(n)
37723 .k(k)
37724 .cn_stride(5)
37725 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x2__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
37726 }
37727 }
37728 }
37729
TEST(QC8_GEMM_MINMAX_FP32_3X2__WASM_FMAGIC,n_gt_2_strided_a)37730 TEST(QC8_GEMM_MINMAX_FP32_3X2__WASM_FMAGIC, n_gt_2_strided_a) {
37731 for (uint32_t n = 3; n < 4; n++) {
37732 for (size_t k = 1; k <= 5; k += 2) {
37733 GemmMicrokernelTester()
37734 .mr(3)
37735 .nr(2)
37736 .kr(1)
37737 .sr(1)
37738 .m(3)
37739 .n(n)
37740 .k(k)
37741 .a_stride(7)
37742 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x2__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
37743 }
37744 }
37745 }
37746
TEST(QC8_GEMM_MINMAX_FP32_3X2__WASM_FMAGIC,n_gt_2_subtile)37747 TEST(QC8_GEMM_MINMAX_FP32_3X2__WASM_FMAGIC, n_gt_2_subtile) {
37748 for (uint32_t n = 3; n < 4; n++) {
37749 for (size_t k = 1; k <= 5; k += 2) {
37750 for (uint32_t m = 1; m <= 3; m++) {
37751 GemmMicrokernelTester()
37752 .mr(3)
37753 .nr(2)
37754 .kr(1)
37755 .sr(1)
37756 .m(m)
37757 .n(n)
37758 .k(k)
37759 .iterations(1)
37760 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x2__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
37761 }
37762 }
37763 }
37764 }
37765
TEST(QC8_GEMM_MINMAX_FP32_3X2__WASM_FMAGIC,n_div_2)37766 TEST(QC8_GEMM_MINMAX_FP32_3X2__WASM_FMAGIC, n_div_2) {
37767 for (uint32_t n = 4; n <= 6; n += 2) {
37768 for (size_t k = 1; k <= 5; k += 2) {
37769 GemmMicrokernelTester()
37770 .mr(3)
37771 .nr(2)
37772 .kr(1)
37773 .sr(1)
37774 .m(3)
37775 .n(n)
37776 .k(k)
37777 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x2__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
37778 }
37779 }
37780 }
37781
TEST(QC8_GEMM_MINMAX_FP32_3X2__WASM_FMAGIC,n_div_2_strided_cn)37782 TEST(QC8_GEMM_MINMAX_FP32_3X2__WASM_FMAGIC, n_div_2_strided_cn) {
37783 for (uint32_t n = 4; n <= 6; n += 2) {
37784 for (size_t k = 1; k <= 5; k += 2) {
37785 GemmMicrokernelTester()
37786 .mr(3)
37787 .nr(2)
37788 .kr(1)
37789 .sr(1)
37790 .m(3)
37791 .n(n)
37792 .k(k)
37793 .cn_stride(5)
37794 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x2__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
37795 }
37796 }
37797 }
37798
TEST(QC8_GEMM_MINMAX_FP32_3X2__WASM_FMAGIC,n_div_2_strided_a)37799 TEST(QC8_GEMM_MINMAX_FP32_3X2__WASM_FMAGIC, n_div_2_strided_a) {
37800 for (uint32_t n = 4; n <= 6; n += 2) {
37801 for (size_t k = 1; k <= 5; k += 2) {
37802 GemmMicrokernelTester()
37803 .mr(3)
37804 .nr(2)
37805 .kr(1)
37806 .sr(1)
37807 .m(3)
37808 .n(n)
37809 .k(k)
37810 .a_stride(7)
37811 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x2__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
37812 }
37813 }
37814 }
37815
TEST(QC8_GEMM_MINMAX_FP32_3X2__WASM_FMAGIC,n_div_2_subtile)37816 TEST(QC8_GEMM_MINMAX_FP32_3X2__WASM_FMAGIC, n_div_2_subtile) {
37817 for (uint32_t n = 4; n <= 6; n += 2) {
37818 for (size_t k = 1; k <= 5; k += 2) {
37819 for (uint32_t m = 1; m <= 3; m++) {
37820 GemmMicrokernelTester()
37821 .mr(3)
37822 .nr(2)
37823 .kr(1)
37824 .sr(1)
37825 .m(m)
37826 .n(n)
37827 .k(k)
37828 .iterations(1)
37829 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x2__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
37830 }
37831 }
37832 }
37833 }
37834
TEST(QC8_GEMM_MINMAX_FP32_3X2__WASM_FMAGIC,strided_cm_subtile)37835 TEST(QC8_GEMM_MINMAX_FP32_3X2__WASM_FMAGIC, strided_cm_subtile) {
37836 for (size_t k = 1; k <= 5; k += 2) {
37837 for (uint32_t n = 1; n <= 2; n++) {
37838 for (uint32_t m = 1; m <= 3; m++) {
37839 GemmMicrokernelTester()
37840 .mr(3)
37841 .nr(2)
37842 .kr(1)
37843 .sr(1)
37844 .m(m)
37845 .n(n)
37846 .k(k)
37847 .cm_stride(5)
37848 .iterations(1)
37849 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x2__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
37850 }
37851 }
37852 }
37853 }
37854
TEST(QC8_GEMM_MINMAX_FP32_3X2__WASM_FMAGIC,qmin)37855 TEST(QC8_GEMM_MINMAX_FP32_3X2__WASM_FMAGIC, qmin) {
37856 GemmMicrokernelTester()
37857 .mr(3)
37858 .nr(2)
37859 .kr(1)
37860 .sr(1)
37861 .m(3)
37862 .n(2)
37863 .k(1)
37864 .qmin(128)
37865 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x2__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
37866 }
37867
TEST(QC8_GEMM_MINMAX_FP32_3X2__WASM_FMAGIC,qmax)37868 TEST(QC8_GEMM_MINMAX_FP32_3X2__WASM_FMAGIC, qmax) {
37869 GemmMicrokernelTester()
37870 .mr(3)
37871 .nr(2)
37872 .kr(1)
37873 .sr(1)
37874 .m(3)
37875 .n(2)
37876 .k(1)
37877 .qmax(128)
37878 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x2__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
37879 }
37880
TEST(QC8_GEMM_MINMAX_FP32_3X2__WASM_FMAGIC,strided_cm)37881 TEST(QC8_GEMM_MINMAX_FP32_3X2__WASM_FMAGIC, strided_cm) {
37882 GemmMicrokernelTester()
37883 .mr(3)
37884 .nr(2)
37885 .kr(1)
37886 .sr(1)
37887 .m(3)
37888 .n(2)
37889 .k(1)
37890 .cm_stride(5)
37891 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x2__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
37892 }
37893 #endif // XNN_ARCH_WASM || XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
37894
37895
37896 #if XNN_ARCH_WASM || XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(QC8_GEMM_MINMAX_FP32_3X4__WASM_FMAGIC,k_eq_1)37897 TEST(QC8_GEMM_MINMAX_FP32_3X4__WASM_FMAGIC, k_eq_1) {
37898 GemmMicrokernelTester()
37899 .mr(3)
37900 .nr(4)
37901 .kr(1)
37902 .sr(1)
37903 .m(3)
37904 .n(4)
37905 .k(1)
37906 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
37907 }
37908
TEST(QC8_GEMM_MINMAX_FP32_3X4__WASM_FMAGIC,strided_cn)37909 TEST(QC8_GEMM_MINMAX_FP32_3X4__WASM_FMAGIC, strided_cn) {
37910 GemmMicrokernelTester()
37911 .mr(3)
37912 .nr(4)
37913 .kr(1)
37914 .sr(1)
37915 .m(3)
37916 .n(4)
37917 .k(1)
37918 .cn_stride(7)
37919 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
37920 }
37921
TEST(QC8_GEMM_MINMAX_FP32_3X4__WASM_FMAGIC,k_eq_1_strided_a)37922 TEST(QC8_GEMM_MINMAX_FP32_3X4__WASM_FMAGIC, k_eq_1_strided_a) {
37923 GemmMicrokernelTester()
37924 .mr(3)
37925 .nr(4)
37926 .kr(1)
37927 .sr(1)
37928 .m(3)
37929 .n(4)
37930 .k(1)
37931 .a_stride(3)
37932 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
37933 }
37934
TEST(QC8_GEMM_MINMAX_FP32_3X4__WASM_FMAGIC,k_eq_1_subtile)37935 TEST(QC8_GEMM_MINMAX_FP32_3X4__WASM_FMAGIC, k_eq_1_subtile) {
37936 for (uint32_t n = 1; n <= 4; n++) {
37937 for (uint32_t m = 1; m <= 3; m++) {
37938 GemmMicrokernelTester()
37939 .mr(3)
37940 .nr(4)
37941 .kr(1)
37942 .sr(1)
37943 .m(m)
37944 .n(n)
37945 .k(1)
37946 .iterations(1)
37947 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
37948 }
37949 }
37950 }
37951
TEST(QC8_GEMM_MINMAX_FP32_3X4__WASM_FMAGIC,k_eq_1_subtile_m)37952 TEST(QC8_GEMM_MINMAX_FP32_3X4__WASM_FMAGIC, k_eq_1_subtile_m) {
37953 for (uint32_t m = 1; m <= 3; m++) {
37954 GemmMicrokernelTester()
37955 .mr(3)
37956 .nr(4)
37957 .kr(1)
37958 .sr(1)
37959 .m(m)
37960 .n(4)
37961 .k(1)
37962 .iterations(1)
37963 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
37964 }
37965 }
37966
TEST(QC8_GEMM_MINMAX_FP32_3X4__WASM_FMAGIC,k_eq_1_subtile_n)37967 TEST(QC8_GEMM_MINMAX_FP32_3X4__WASM_FMAGIC, k_eq_1_subtile_n) {
37968 for (uint32_t n = 1; n <= 4; n++) {
37969 GemmMicrokernelTester()
37970 .mr(3)
37971 .nr(4)
37972 .kr(1)
37973 .sr(1)
37974 .m(3)
37975 .n(n)
37976 .k(1)
37977 .iterations(1)
37978 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
37979 }
37980 }
37981
TEST(QC8_GEMM_MINMAX_FP32_3X4__WASM_FMAGIC,k_gt_1)37982 TEST(QC8_GEMM_MINMAX_FP32_3X4__WASM_FMAGIC, k_gt_1) {
37983 for (size_t k = 2; k < 10; k++) {
37984 GemmMicrokernelTester()
37985 .mr(3)
37986 .nr(4)
37987 .kr(1)
37988 .sr(1)
37989 .m(3)
37990 .n(4)
37991 .k(k)
37992 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
37993 }
37994 }
37995
TEST(QC8_GEMM_MINMAX_FP32_3X4__WASM_FMAGIC,k_gt_1_strided_a)37996 TEST(QC8_GEMM_MINMAX_FP32_3X4__WASM_FMAGIC, k_gt_1_strided_a) {
37997 for (size_t k = 2; k < 10; k++) {
37998 GemmMicrokernelTester()
37999 .mr(3)
38000 .nr(4)
38001 .kr(1)
38002 .sr(1)
38003 .m(3)
38004 .n(4)
38005 .k(k)
38006 .a_stride(11)
38007 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
38008 }
38009 }
38010
TEST(QC8_GEMM_MINMAX_FP32_3X4__WASM_FMAGIC,k_gt_1_subtile)38011 TEST(QC8_GEMM_MINMAX_FP32_3X4__WASM_FMAGIC, k_gt_1_subtile) {
38012 for (size_t k = 2; k < 10; k++) {
38013 for (uint32_t n = 1; n <= 4; n++) {
38014 for (uint32_t m = 1; m <= 3; m++) {
38015 GemmMicrokernelTester()
38016 .mr(3)
38017 .nr(4)
38018 .kr(1)
38019 .sr(1)
38020 .m(m)
38021 .n(n)
38022 .k(k)
38023 .iterations(1)
38024 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
38025 }
38026 }
38027 }
38028 }
38029
TEST(QC8_GEMM_MINMAX_FP32_3X4__WASM_FMAGIC,n_gt_4)38030 TEST(QC8_GEMM_MINMAX_FP32_3X4__WASM_FMAGIC, n_gt_4) {
38031 for (uint32_t n = 5; n < 8; n++) {
38032 for (size_t k = 1; k <= 5; k += 2) {
38033 GemmMicrokernelTester()
38034 .mr(3)
38035 .nr(4)
38036 .kr(1)
38037 .sr(1)
38038 .m(3)
38039 .n(n)
38040 .k(k)
38041 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
38042 }
38043 }
38044 }
38045
TEST(QC8_GEMM_MINMAX_FP32_3X4__WASM_FMAGIC,n_gt_4_strided_cn)38046 TEST(QC8_GEMM_MINMAX_FP32_3X4__WASM_FMAGIC, n_gt_4_strided_cn) {
38047 for (uint32_t n = 5; n < 8; n++) {
38048 for (size_t k = 1; k <= 5; k += 2) {
38049 GemmMicrokernelTester()
38050 .mr(3)
38051 .nr(4)
38052 .kr(1)
38053 .sr(1)
38054 .m(3)
38055 .n(n)
38056 .k(k)
38057 .cn_stride(7)
38058 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
38059 }
38060 }
38061 }
38062
TEST(QC8_GEMM_MINMAX_FP32_3X4__WASM_FMAGIC,n_gt_4_strided_a)38063 TEST(QC8_GEMM_MINMAX_FP32_3X4__WASM_FMAGIC, n_gt_4_strided_a) {
38064 for (uint32_t n = 5; n < 8; n++) {
38065 for (size_t k = 1; k <= 5; k += 2) {
38066 GemmMicrokernelTester()
38067 .mr(3)
38068 .nr(4)
38069 .kr(1)
38070 .sr(1)
38071 .m(3)
38072 .n(n)
38073 .k(k)
38074 .a_stride(7)
38075 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
38076 }
38077 }
38078 }
38079
TEST(QC8_GEMM_MINMAX_FP32_3X4__WASM_FMAGIC,n_gt_4_subtile)38080 TEST(QC8_GEMM_MINMAX_FP32_3X4__WASM_FMAGIC, n_gt_4_subtile) {
38081 for (uint32_t n = 5; n < 8; n++) {
38082 for (size_t k = 1; k <= 5; k += 2) {
38083 for (uint32_t m = 1; m <= 3; m++) {
38084 GemmMicrokernelTester()
38085 .mr(3)
38086 .nr(4)
38087 .kr(1)
38088 .sr(1)
38089 .m(m)
38090 .n(n)
38091 .k(k)
38092 .iterations(1)
38093 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
38094 }
38095 }
38096 }
38097 }
38098
TEST(QC8_GEMM_MINMAX_FP32_3X4__WASM_FMAGIC,n_div_4)38099 TEST(QC8_GEMM_MINMAX_FP32_3X4__WASM_FMAGIC, n_div_4) {
38100 for (uint32_t n = 8; n <= 12; n += 4) {
38101 for (size_t k = 1; k <= 5; k += 2) {
38102 GemmMicrokernelTester()
38103 .mr(3)
38104 .nr(4)
38105 .kr(1)
38106 .sr(1)
38107 .m(3)
38108 .n(n)
38109 .k(k)
38110 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
38111 }
38112 }
38113 }
38114
TEST(QC8_GEMM_MINMAX_FP32_3X4__WASM_FMAGIC,n_div_4_strided_cn)38115 TEST(QC8_GEMM_MINMAX_FP32_3X4__WASM_FMAGIC, n_div_4_strided_cn) {
38116 for (uint32_t n = 8; n <= 12; n += 4) {
38117 for (size_t k = 1; k <= 5; k += 2) {
38118 GemmMicrokernelTester()
38119 .mr(3)
38120 .nr(4)
38121 .kr(1)
38122 .sr(1)
38123 .m(3)
38124 .n(n)
38125 .k(k)
38126 .cn_stride(7)
38127 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
38128 }
38129 }
38130 }
38131
TEST(QC8_GEMM_MINMAX_FP32_3X4__WASM_FMAGIC,n_div_4_strided_a)38132 TEST(QC8_GEMM_MINMAX_FP32_3X4__WASM_FMAGIC, n_div_4_strided_a) {
38133 for (uint32_t n = 8; n <= 12; n += 4) {
38134 for (size_t k = 1; k <= 5; k += 2) {
38135 GemmMicrokernelTester()
38136 .mr(3)
38137 .nr(4)
38138 .kr(1)
38139 .sr(1)
38140 .m(3)
38141 .n(n)
38142 .k(k)
38143 .a_stride(7)
38144 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
38145 }
38146 }
38147 }
38148
TEST(QC8_GEMM_MINMAX_FP32_3X4__WASM_FMAGIC,n_div_4_subtile)38149 TEST(QC8_GEMM_MINMAX_FP32_3X4__WASM_FMAGIC, n_div_4_subtile) {
38150 for (uint32_t n = 8; n <= 12; n += 4) {
38151 for (size_t k = 1; k <= 5; k += 2) {
38152 for (uint32_t m = 1; m <= 3; m++) {
38153 GemmMicrokernelTester()
38154 .mr(3)
38155 .nr(4)
38156 .kr(1)
38157 .sr(1)
38158 .m(m)
38159 .n(n)
38160 .k(k)
38161 .iterations(1)
38162 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
38163 }
38164 }
38165 }
38166 }
38167
TEST(QC8_GEMM_MINMAX_FP32_3X4__WASM_FMAGIC,strided_cm_subtile)38168 TEST(QC8_GEMM_MINMAX_FP32_3X4__WASM_FMAGIC, strided_cm_subtile) {
38169 for (size_t k = 1; k <= 5; k += 2) {
38170 for (uint32_t n = 1; n <= 4; n++) {
38171 for (uint32_t m = 1; m <= 3; m++) {
38172 GemmMicrokernelTester()
38173 .mr(3)
38174 .nr(4)
38175 .kr(1)
38176 .sr(1)
38177 .m(m)
38178 .n(n)
38179 .k(k)
38180 .cm_stride(7)
38181 .iterations(1)
38182 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
38183 }
38184 }
38185 }
38186 }
38187
TEST(QC8_GEMM_MINMAX_FP32_3X4__WASM_FMAGIC,qmin)38188 TEST(QC8_GEMM_MINMAX_FP32_3X4__WASM_FMAGIC, qmin) {
38189 GemmMicrokernelTester()
38190 .mr(3)
38191 .nr(4)
38192 .kr(1)
38193 .sr(1)
38194 .m(3)
38195 .n(4)
38196 .k(1)
38197 .qmin(128)
38198 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
38199 }
38200
TEST(QC8_GEMM_MINMAX_FP32_3X4__WASM_FMAGIC,qmax)38201 TEST(QC8_GEMM_MINMAX_FP32_3X4__WASM_FMAGIC, qmax) {
38202 GemmMicrokernelTester()
38203 .mr(3)
38204 .nr(4)
38205 .kr(1)
38206 .sr(1)
38207 .m(3)
38208 .n(4)
38209 .k(1)
38210 .qmax(128)
38211 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
38212 }
38213
TEST(QC8_GEMM_MINMAX_FP32_3X4__WASM_FMAGIC,strided_cm)38214 TEST(QC8_GEMM_MINMAX_FP32_3X4__WASM_FMAGIC, strided_cm) {
38215 GemmMicrokernelTester()
38216 .mr(3)
38217 .nr(4)
38218 .kr(1)
38219 .sr(1)
38220 .m(3)
38221 .n(4)
38222 .k(1)
38223 .cm_stride(7)
38224 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x4__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
38225 }
38226 #endif // XNN_ARCH_WASM || XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
38227
38228
TEST(QC8_GEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC,k_eq_1)38229 TEST(QC8_GEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC, k_eq_1) {
38230 GemmMicrokernelTester()
38231 .mr(1)
38232 .nr(2)
38233 .kr(1)
38234 .sr(1)
38235 .m(1)
38236 .n(2)
38237 .k(1)
38238 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x2__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
38239 }
38240
TEST(QC8_GEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC,strided_cn)38241 TEST(QC8_GEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC, strided_cn) {
38242 GemmMicrokernelTester()
38243 .mr(1)
38244 .nr(2)
38245 .kr(1)
38246 .sr(1)
38247 .m(1)
38248 .n(2)
38249 .k(1)
38250 .cn_stride(5)
38251 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x2__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
38252 }
38253
TEST(QC8_GEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC,k_eq_1_strided_a)38254 TEST(QC8_GEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC, k_eq_1_strided_a) {
38255 GemmMicrokernelTester()
38256 .mr(1)
38257 .nr(2)
38258 .kr(1)
38259 .sr(1)
38260 .m(1)
38261 .n(2)
38262 .k(1)
38263 .a_stride(3)
38264 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x2__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
38265 }
38266
TEST(QC8_GEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC,k_eq_1_subtile)38267 TEST(QC8_GEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC, k_eq_1_subtile) {
38268 for (uint32_t n = 1; n <= 2; n++) {
38269 for (uint32_t m = 1; m <= 1; m++) {
38270 GemmMicrokernelTester()
38271 .mr(1)
38272 .nr(2)
38273 .kr(1)
38274 .sr(1)
38275 .m(m)
38276 .n(n)
38277 .k(1)
38278 .iterations(1)
38279 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x2__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
38280 }
38281 }
38282 }
38283
TEST(QC8_GEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC,k_eq_1_subtile_m)38284 TEST(QC8_GEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC, k_eq_1_subtile_m) {
38285 for (uint32_t m = 1; m <= 1; m++) {
38286 GemmMicrokernelTester()
38287 .mr(1)
38288 .nr(2)
38289 .kr(1)
38290 .sr(1)
38291 .m(m)
38292 .n(2)
38293 .k(1)
38294 .iterations(1)
38295 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x2__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
38296 }
38297 }
38298
TEST(QC8_GEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC,k_eq_1_subtile_n)38299 TEST(QC8_GEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC, k_eq_1_subtile_n) {
38300 for (uint32_t n = 1; n <= 2; n++) {
38301 GemmMicrokernelTester()
38302 .mr(1)
38303 .nr(2)
38304 .kr(1)
38305 .sr(1)
38306 .m(1)
38307 .n(n)
38308 .k(1)
38309 .iterations(1)
38310 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x2__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
38311 }
38312 }
38313
TEST(QC8_GEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC,k_gt_1)38314 TEST(QC8_GEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC, k_gt_1) {
38315 for (size_t k = 2; k < 10; k++) {
38316 GemmMicrokernelTester()
38317 .mr(1)
38318 .nr(2)
38319 .kr(1)
38320 .sr(1)
38321 .m(1)
38322 .n(2)
38323 .k(k)
38324 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x2__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
38325 }
38326 }
38327
TEST(QC8_GEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC,k_gt_1_strided_a)38328 TEST(QC8_GEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC, k_gt_1_strided_a) {
38329 for (size_t k = 2; k < 10; k++) {
38330 GemmMicrokernelTester()
38331 .mr(1)
38332 .nr(2)
38333 .kr(1)
38334 .sr(1)
38335 .m(1)
38336 .n(2)
38337 .k(k)
38338 .a_stride(11)
38339 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x2__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
38340 }
38341 }
38342
TEST(QC8_GEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC,k_gt_1_subtile)38343 TEST(QC8_GEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC, k_gt_1_subtile) {
38344 for (size_t k = 2; k < 10; k++) {
38345 for (uint32_t n = 1; n <= 2; n++) {
38346 for (uint32_t m = 1; m <= 1; m++) {
38347 GemmMicrokernelTester()
38348 .mr(1)
38349 .nr(2)
38350 .kr(1)
38351 .sr(1)
38352 .m(m)
38353 .n(n)
38354 .k(k)
38355 .iterations(1)
38356 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x2__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
38357 }
38358 }
38359 }
38360 }
38361
TEST(QC8_GEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC,n_gt_2)38362 TEST(QC8_GEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC, n_gt_2) {
38363 for (uint32_t n = 3; n < 4; n++) {
38364 for (size_t k = 1; k <= 5; k += 2) {
38365 GemmMicrokernelTester()
38366 .mr(1)
38367 .nr(2)
38368 .kr(1)
38369 .sr(1)
38370 .m(1)
38371 .n(n)
38372 .k(k)
38373 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x2__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
38374 }
38375 }
38376 }
38377
TEST(QC8_GEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC,n_gt_2_strided_cn)38378 TEST(QC8_GEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC, n_gt_2_strided_cn) {
38379 for (uint32_t n = 3; n < 4; n++) {
38380 for (size_t k = 1; k <= 5; k += 2) {
38381 GemmMicrokernelTester()
38382 .mr(1)
38383 .nr(2)
38384 .kr(1)
38385 .sr(1)
38386 .m(1)
38387 .n(n)
38388 .k(k)
38389 .cn_stride(5)
38390 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x2__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
38391 }
38392 }
38393 }
38394
TEST(QC8_GEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC,n_gt_2_strided_a)38395 TEST(QC8_GEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC, n_gt_2_strided_a) {
38396 for (uint32_t n = 3; n < 4; n++) {
38397 for (size_t k = 1; k <= 5; k += 2) {
38398 GemmMicrokernelTester()
38399 .mr(1)
38400 .nr(2)
38401 .kr(1)
38402 .sr(1)
38403 .m(1)
38404 .n(n)
38405 .k(k)
38406 .a_stride(7)
38407 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x2__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
38408 }
38409 }
38410 }
38411
TEST(QC8_GEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC,n_gt_2_subtile)38412 TEST(QC8_GEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC, n_gt_2_subtile) {
38413 for (uint32_t n = 3; n < 4; n++) {
38414 for (size_t k = 1; k <= 5; k += 2) {
38415 for (uint32_t m = 1; m <= 1; m++) {
38416 GemmMicrokernelTester()
38417 .mr(1)
38418 .nr(2)
38419 .kr(1)
38420 .sr(1)
38421 .m(m)
38422 .n(n)
38423 .k(k)
38424 .iterations(1)
38425 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x2__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
38426 }
38427 }
38428 }
38429 }
38430
TEST(QC8_GEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC,n_div_2)38431 TEST(QC8_GEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC, n_div_2) {
38432 for (uint32_t n = 4; n <= 6; n += 2) {
38433 for (size_t k = 1; k <= 5; k += 2) {
38434 GemmMicrokernelTester()
38435 .mr(1)
38436 .nr(2)
38437 .kr(1)
38438 .sr(1)
38439 .m(1)
38440 .n(n)
38441 .k(k)
38442 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x2__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
38443 }
38444 }
38445 }
38446
TEST(QC8_GEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC,n_div_2_strided_cn)38447 TEST(QC8_GEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC, n_div_2_strided_cn) {
38448 for (uint32_t n = 4; n <= 6; n += 2) {
38449 for (size_t k = 1; k <= 5; k += 2) {
38450 GemmMicrokernelTester()
38451 .mr(1)
38452 .nr(2)
38453 .kr(1)
38454 .sr(1)
38455 .m(1)
38456 .n(n)
38457 .k(k)
38458 .cn_stride(5)
38459 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x2__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
38460 }
38461 }
38462 }
38463
TEST(QC8_GEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC,n_div_2_strided_a)38464 TEST(QC8_GEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC, n_div_2_strided_a) {
38465 for (uint32_t n = 4; n <= 6; n += 2) {
38466 for (size_t k = 1; k <= 5; k += 2) {
38467 GemmMicrokernelTester()
38468 .mr(1)
38469 .nr(2)
38470 .kr(1)
38471 .sr(1)
38472 .m(1)
38473 .n(n)
38474 .k(k)
38475 .a_stride(7)
38476 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x2__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
38477 }
38478 }
38479 }
38480
TEST(QC8_GEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC,n_div_2_subtile)38481 TEST(QC8_GEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC, n_div_2_subtile) {
38482 for (uint32_t n = 4; n <= 6; n += 2) {
38483 for (size_t k = 1; k <= 5; k += 2) {
38484 for (uint32_t m = 1; m <= 1; m++) {
38485 GemmMicrokernelTester()
38486 .mr(1)
38487 .nr(2)
38488 .kr(1)
38489 .sr(1)
38490 .m(m)
38491 .n(n)
38492 .k(k)
38493 .iterations(1)
38494 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x2__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
38495 }
38496 }
38497 }
38498 }
38499
TEST(QC8_GEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC,strided_cm_subtile)38500 TEST(QC8_GEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC, strided_cm_subtile) {
38501 for (size_t k = 1; k <= 5; k += 2) {
38502 for (uint32_t n = 1; n <= 2; n++) {
38503 for (uint32_t m = 1; m <= 1; m++) {
38504 GemmMicrokernelTester()
38505 .mr(1)
38506 .nr(2)
38507 .kr(1)
38508 .sr(1)
38509 .m(m)
38510 .n(n)
38511 .k(k)
38512 .cm_stride(5)
38513 .iterations(1)
38514 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x2__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
38515 }
38516 }
38517 }
38518 }
38519
TEST(QC8_GEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC,qmin)38520 TEST(QC8_GEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC, qmin) {
38521 GemmMicrokernelTester()
38522 .mr(1)
38523 .nr(2)
38524 .kr(1)
38525 .sr(1)
38526 .m(1)
38527 .n(2)
38528 .k(1)
38529 .qmin(128)
38530 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x2__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
38531 }
38532
TEST(QC8_GEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC,qmax)38533 TEST(QC8_GEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC, qmax) {
38534 GemmMicrokernelTester()
38535 .mr(1)
38536 .nr(2)
38537 .kr(1)
38538 .sr(1)
38539 .m(1)
38540 .n(2)
38541 .k(1)
38542 .qmax(128)
38543 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x2__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
38544 }
38545
TEST(QC8_GEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC,strided_cm)38546 TEST(QC8_GEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC, strided_cm) {
38547 GemmMicrokernelTester()
38548 .mr(1)
38549 .nr(2)
38550 .kr(1)
38551 .sr(1)
38552 .m(1)
38553 .n(2)
38554 .k(1)
38555 .cm_stride(5)
38556 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_1x2__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
38557 }
38558
38559
TEST(QC8_GEMM_MINMAX_FP32_2X2__SCALAR_LRINTF,k_eq_1)38560 TEST(QC8_GEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, k_eq_1) {
38561 GemmMicrokernelTester()
38562 .mr(2)
38563 .nr(2)
38564 .kr(1)
38565 .sr(1)
38566 .m(2)
38567 .n(2)
38568 .k(1)
38569 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
38570 }
38571
TEST(QC8_GEMM_MINMAX_FP32_2X2__SCALAR_LRINTF,strided_cn)38572 TEST(QC8_GEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, strided_cn) {
38573 GemmMicrokernelTester()
38574 .mr(2)
38575 .nr(2)
38576 .kr(1)
38577 .sr(1)
38578 .m(2)
38579 .n(2)
38580 .k(1)
38581 .cn_stride(5)
38582 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
38583 }
38584
TEST(QC8_GEMM_MINMAX_FP32_2X2__SCALAR_LRINTF,k_eq_1_strided_a)38585 TEST(QC8_GEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, k_eq_1_strided_a) {
38586 GemmMicrokernelTester()
38587 .mr(2)
38588 .nr(2)
38589 .kr(1)
38590 .sr(1)
38591 .m(2)
38592 .n(2)
38593 .k(1)
38594 .a_stride(3)
38595 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
38596 }
38597
TEST(QC8_GEMM_MINMAX_FP32_2X2__SCALAR_LRINTF,k_eq_1_subtile)38598 TEST(QC8_GEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, k_eq_1_subtile) {
38599 for (uint32_t n = 1; n <= 2; n++) {
38600 for (uint32_t m = 1; m <= 2; m++) {
38601 GemmMicrokernelTester()
38602 .mr(2)
38603 .nr(2)
38604 .kr(1)
38605 .sr(1)
38606 .m(m)
38607 .n(n)
38608 .k(1)
38609 .iterations(1)
38610 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
38611 }
38612 }
38613 }
38614
TEST(QC8_GEMM_MINMAX_FP32_2X2__SCALAR_LRINTF,k_eq_1_subtile_m)38615 TEST(QC8_GEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, k_eq_1_subtile_m) {
38616 for (uint32_t m = 1; m <= 2; m++) {
38617 GemmMicrokernelTester()
38618 .mr(2)
38619 .nr(2)
38620 .kr(1)
38621 .sr(1)
38622 .m(m)
38623 .n(2)
38624 .k(1)
38625 .iterations(1)
38626 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
38627 }
38628 }
38629
TEST(QC8_GEMM_MINMAX_FP32_2X2__SCALAR_LRINTF,k_eq_1_subtile_n)38630 TEST(QC8_GEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, k_eq_1_subtile_n) {
38631 for (uint32_t n = 1; n <= 2; n++) {
38632 GemmMicrokernelTester()
38633 .mr(2)
38634 .nr(2)
38635 .kr(1)
38636 .sr(1)
38637 .m(2)
38638 .n(n)
38639 .k(1)
38640 .iterations(1)
38641 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
38642 }
38643 }
38644
TEST(QC8_GEMM_MINMAX_FP32_2X2__SCALAR_LRINTF,k_gt_1)38645 TEST(QC8_GEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, k_gt_1) {
38646 for (size_t k = 2; k < 10; k++) {
38647 GemmMicrokernelTester()
38648 .mr(2)
38649 .nr(2)
38650 .kr(1)
38651 .sr(1)
38652 .m(2)
38653 .n(2)
38654 .k(k)
38655 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
38656 }
38657 }
38658
TEST(QC8_GEMM_MINMAX_FP32_2X2__SCALAR_LRINTF,k_gt_1_strided_a)38659 TEST(QC8_GEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, k_gt_1_strided_a) {
38660 for (size_t k = 2; k < 10; k++) {
38661 GemmMicrokernelTester()
38662 .mr(2)
38663 .nr(2)
38664 .kr(1)
38665 .sr(1)
38666 .m(2)
38667 .n(2)
38668 .k(k)
38669 .a_stride(11)
38670 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
38671 }
38672 }
38673
TEST(QC8_GEMM_MINMAX_FP32_2X2__SCALAR_LRINTF,k_gt_1_subtile)38674 TEST(QC8_GEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, k_gt_1_subtile) {
38675 for (size_t k = 2; k < 10; k++) {
38676 for (uint32_t n = 1; n <= 2; n++) {
38677 for (uint32_t m = 1; m <= 2; m++) {
38678 GemmMicrokernelTester()
38679 .mr(2)
38680 .nr(2)
38681 .kr(1)
38682 .sr(1)
38683 .m(m)
38684 .n(n)
38685 .k(k)
38686 .iterations(1)
38687 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
38688 }
38689 }
38690 }
38691 }
38692
TEST(QC8_GEMM_MINMAX_FP32_2X2__SCALAR_LRINTF,n_gt_2)38693 TEST(QC8_GEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, n_gt_2) {
38694 for (uint32_t n = 3; n < 4; n++) {
38695 for (size_t k = 1; k <= 5; k += 2) {
38696 GemmMicrokernelTester()
38697 .mr(2)
38698 .nr(2)
38699 .kr(1)
38700 .sr(1)
38701 .m(2)
38702 .n(n)
38703 .k(k)
38704 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
38705 }
38706 }
38707 }
38708
TEST(QC8_GEMM_MINMAX_FP32_2X2__SCALAR_LRINTF,n_gt_2_strided_cn)38709 TEST(QC8_GEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, n_gt_2_strided_cn) {
38710 for (uint32_t n = 3; n < 4; n++) {
38711 for (size_t k = 1; k <= 5; k += 2) {
38712 GemmMicrokernelTester()
38713 .mr(2)
38714 .nr(2)
38715 .kr(1)
38716 .sr(1)
38717 .m(2)
38718 .n(n)
38719 .k(k)
38720 .cn_stride(5)
38721 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
38722 }
38723 }
38724 }
38725
TEST(QC8_GEMM_MINMAX_FP32_2X2__SCALAR_LRINTF,n_gt_2_strided_a)38726 TEST(QC8_GEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, n_gt_2_strided_a) {
38727 for (uint32_t n = 3; n < 4; n++) {
38728 for (size_t k = 1; k <= 5; k += 2) {
38729 GemmMicrokernelTester()
38730 .mr(2)
38731 .nr(2)
38732 .kr(1)
38733 .sr(1)
38734 .m(2)
38735 .n(n)
38736 .k(k)
38737 .a_stride(7)
38738 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
38739 }
38740 }
38741 }
38742
TEST(QC8_GEMM_MINMAX_FP32_2X2__SCALAR_LRINTF,n_gt_2_subtile)38743 TEST(QC8_GEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, n_gt_2_subtile) {
38744 for (uint32_t n = 3; n < 4; n++) {
38745 for (size_t k = 1; k <= 5; k += 2) {
38746 for (uint32_t m = 1; m <= 2; m++) {
38747 GemmMicrokernelTester()
38748 .mr(2)
38749 .nr(2)
38750 .kr(1)
38751 .sr(1)
38752 .m(m)
38753 .n(n)
38754 .k(k)
38755 .iterations(1)
38756 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
38757 }
38758 }
38759 }
38760 }
38761
TEST(QC8_GEMM_MINMAX_FP32_2X2__SCALAR_LRINTF,n_div_2)38762 TEST(QC8_GEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, n_div_2) {
38763 for (uint32_t n = 4; n <= 6; n += 2) {
38764 for (size_t k = 1; k <= 5; k += 2) {
38765 GemmMicrokernelTester()
38766 .mr(2)
38767 .nr(2)
38768 .kr(1)
38769 .sr(1)
38770 .m(2)
38771 .n(n)
38772 .k(k)
38773 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
38774 }
38775 }
38776 }
38777
TEST(QC8_GEMM_MINMAX_FP32_2X2__SCALAR_LRINTF,n_div_2_strided_cn)38778 TEST(QC8_GEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, n_div_2_strided_cn) {
38779 for (uint32_t n = 4; n <= 6; n += 2) {
38780 for (size_t k = 1; k <= 5; k += 2) {
38781 GemmMicrokernelTester()
38782 .mr(2)
38783 .nr(2)
38784 .kr(1)
38785 .sr(1)
38786 .m(2)
38787 .n(n)
38788 .k(k)
38789 .cn_stride(5)
38790 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
38791 }
38792 }
38793 }
38794
TEST(QC8_GEMM_MINMAX_FP32_2X2__SCALAR_LRINTF,n_div_2_strided_a)38795 TEST(QC8_GEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, n_div_2_strided_a) {
38796 for (uint32_t n = 4; n <= 6; n += 2) {
38797 for (size_t k = 1; k <= 5; k += 2) {
38798 GemmMicrokernelTester()
38799 .mr(2)
38800 .nr(2)
38801 .kr(1)
38802 .sr(1)
38803 .m(2)
38804 .n(n)
38805 .k(k)
38806 .a_stride(7)
38807 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
38808 }
38809 }
38810 }
38811
TEST(QC8_GEMM_MINMAX_FP32_2X2__SCALAR_LRINTF,n_div_2_subtile)38812 TEST(QC8_GEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, n_div_2_subtile) {
38813 for (uint32_t n = 4; n <= 6; n += 2) {
38814 for (size_t k = 1; k <= 5; k += 2) {
38815 for (uint32_t m = 1; m <= 2; m++) {
38816 GemmMicrokernelTester()
38817 .mr(2)
38818 .nr(2)
38819 .kr(1)
38820 .sr(1)
38821 .m(m)
38822 .n(n)
38823 .k(k)
38824 .iterations(1)
38825 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
38826 }
38827 }
38828 }
38829 }
38830
TEST(QC8_GEMM_MINMAX_FP32_2X2__SCALAR_LRINTF,strided_cm_subtile)38831 TEST(QC8_GEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, strided_cm_subtile) {
38832 for (size_t k = 1; k <= 5; k += 2) {
38833 for (uint32_t n = 1; n <= 2; n++) {
38834 for (uint32_t m = 1; m <= 2; m++) {
38835 GemmMicrokernelTester()
38836 .mr(2)
38837 .nr(2)
38838 .kr(1)
38839 .sr(1)
38840 .m(m)
38841 .n(n)
38842 .k(k)
38843 .cm_stride(5)
38844 .iterations(1)
38845 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
38846 }
38847 }
38848 }
38849 }
38850
TEST(QC8_GEMM_MINMAX_FP32_2X2__SCALAR_LRINTF,qmin)38851 TEST(QC8_GEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, qmin) {
38852 GemmMicrokernelTester()
38853 .mr(2)
38854 .nr(2)
38855 .kr(1)
38856 .sr(1)
38857 .m(2)
38858 .n(2)
38859 .k(1)
38860 .qmin(128)
38861 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
38862 }
38863
TEST(QC8_GEMM_MINMAX_FP32_2X2__SCALAR_LRINTF,qmax)38864 TEST(QC8_GEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, qmax) {
38865 GemmMicrokernelTester()
38866 .mr(2)
38867 .nr(2)
38868 .kr(1)
38869 .sr(1)
38870 .m(2)
38871 .n(2)
38872 .k(1)
38873 .qmax(128)
38874 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
38875 }
38876
TEST(QC8_GEMM_MINMAX_FP32_2X2__SCALAR_LRINTF,strided_cm)38877 TEST(QC8_GEMM_MINMAX_FP32_2X2__SCALAR_LRINTF, strided_cm) {
38878 GemmMicrokernelTester()
38879 .mr(2)
38880 .nr(2)
38881 .kr(1)
38882 .sr(1)
38883 .m(2)
38884 .n(2)
38885 .k(1)
38886 .cm_stride(5)
38887 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_2x2__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
38888 }
38889
38890
TEST(QC8_GEMM_MINMAX_FP32_3X2__SCALAR_FMAGIC,k_eq_1)38891 TEST(QC8_GEMM_MINMAX_FP32_3X2__SCALAR_FMAGIC, k_eq_1) {
38892 GemmMicrokernelTester()
38893 .mr(3)
38894 .nr(2)
38895 .kr(1)
38896 .sr(1)
38897 .m(3)
38898 .n(2)
38899 .k(1)
38900 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x2__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
38901 }
38902
TEST(QC8_GEMM_MINMAX_FP32_3X2__SCALAR_FMAGIC,strided_cn)38903 TEST(QC8_GEMM_MINMAX_FP32_3X2__SCALAR_FMAGIC, strided_cn) {
38904 GemmMicrokernelTester()
38905 .mr(3)
38906 .nr(2)
38907 .kr(1)
38908 .sr(1)
38909 .m(3)
38910 .n(2)
38911 .k(1)
38912 .cn_stride(5)
38913 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x2__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
38914 }
38915
TEST(QC8_GEMM_MINMAX_FP32_3X2__SCALAR_FMAGIC,k_eq_1_strided_a)38916 TEST(QC8_GEMM_MINMAX_FP32_3X2__SCALAR_FMAGIC, k_eq_1_strided_a) {
38917 GemmMicrokernelTester()
38918 .mr(3)
38919 .nr(2)
38920 .kr(1)
38921 .sr(1)
38922 .m(3)
38923 .n(2)
38924 .k(1)
38925 .a_stride(3)
38926 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x2__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
38927 }
38928
TEST(QC8_GEMM_MINMAX_FP32_3X2__SCALAR_FMAGIC,k_eq_1_subtile)38929 TEST(QC8_GEMM_MINMAX_FP32_3X2__SCALAR_FMAGIC, k_eq_1_subtile) {
38930 for (uint32_t n = 1; n <= 2; n++) {
38931 for (uint32_t m = 1; m <= 3; m++) {
38932 GemmMicrokernelTester()
38933 .mr(3)
38934 .nr(2)
38935 .kr(1)
38936 .sr(1)
38937 .m(m)
38938 .n(n)
38939 .k(1)
38940 .iterations(1)
38941 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x2__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
38942 }
38943 }
38944 }
38945
TEST(QC8_GEMM_MINMAX_FP32_3X2__SCALAR_FMAGIC,k_eq_1_subtile_m)38946 TEST(QC8_GEMM_MINMAX_FP32_3X2__SCALAR_FMAGIC, k_eq_1_subtile_m) {
38947 for (uint32_t m = 1; m <= 3; m++) {
38948 GemmMicrokernelTester()
38949 .mr(3)
38950 .nr(2)
38951 .kr(1)
38952 .sr(1)
38953 .m(m)
38954 .n(2)
38955 .k(1)
38956 .iterations(1)
38957 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x2__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
38958 }
38959 }
38960
TEST(QC8_GEMM_MINMAX_FP32_3X2__SCALAR_FMAGIC,k_eq_1_subtile_n)38961 TEST(QC8_GEMM_MINMAX_FP32_3X2__SCALAR_FMAGIC, k_eq_1_subtile_n) {
38962 for (uint32_t n = 1; n <= 2; n++) {
38963 GemmMicrokernelTester()
38964 .mr(3)
38965 .nr(2)
38966 .kr(1)
38967 .sr(1)
38968 .m(3)
38969 .n(n)
38970 .k(1)
38971 .iterations(1)
38972 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x2__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
38973 }
38974 }
38975
TEST(QC8_GEMM_MINMAX_FP32_3X2__SCALAR_FMAGIC,k_gt_1)38976 TEST(QC8_GEMM_MINMAX_FP32_3X2__SCALAR_FMAGIC, k_gt_1) {
38977 for (size_t k = 2; k < 10; k++) {
38978 GemmMicrokernelTester()
38979 .mr(3)
38980 .nr(2)
38981 .kr(1)
38982 .sr(1)
38983 .m(3)
38984 .n(2)
38985 .k(k)
38986 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x2__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
38987 }
38988 }
38989
TEST(QC8_GEMM_MINMAX_FP32_3X2__SCALAR_FMAGIC,k_gt_1_strided_a)38990 TEST(QC8_GEMM_MINMAX_FP32_3X2__SCALAR_FMAGIC, k_gt_1_strided_a) {
38991 for (size_t k = 2; k < 10; k++) {
38992 GemmMicrokernelTester()
38993 .mr(3)
38994 .nr(2)
38995 .kr(1)
38996 .sr(1)
38997 .m(3)
38998 .n(2)
38999 .k(k)
39000 .a_stride(11)
39001 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x2__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
39002 }
39003 }
39004
TEST(QC8_GEMM_MINMAX_FP32_3X2__SCALAR_FMAGIC,k_gt_1_subtile)39005 TEST(QC8_GEMM_MINMAX_FP32_3X2__SCALAR_FMAGIC, k_gt_1_subtile) {
39006 for (size_t k = 2; k < 10; k++) {
39007 for (uint32_t n = 1; n <= 2; n++) {
39008 for (uint32_t m = 1; m <= 3; m++) {
39009 GemmMicrokernelTester()
39010 .mr(3)
39011 .nr(2)
39012 .kr(1)
39013 .sr(1)
39014 .m(m)
39015 .n(n)
39016 .k(k)
39017 .iterations(1)
39018 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x2__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
39019 }
39020 }
39021 }
39022 }
39023
TEST(QC8_GEMM_MINMAX_FP32_3X2__SCALAR_FMAGIC,n_gt_2)39024 TEST(QC8_GEMM_MINMAX_FP32_3X2__SCALAR_FMAGIC, n_gt_2) {
39025 for (uint32_t n = 3; n < 4; n++) {
39026 for (size_t k = 1; k <= 5; k += 2) {
39027 GemmMicrokernelTester()
39028 .mr(3)
39029 .nr(2)
39030 .kr(1)
39031 .sr(1)
39032 .m(3)
39033 .n(n)
39034 .k(k)
39035 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x2__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
39036 }
39037 }
39038 }
39039
TEST(QC8_GEMM_MINMAX_FP32_3X2__SCALAR_FMAGIC,n_gt_2_strided_cn)39040 TEST(QC8_GEMM_MINMAX_FP32_3X2__SCALAR_FMAGIC, n_gt_2_strided_cn) {
39041 for (uint32_t n = 3; n < 4; n++) {
39042 for (size_t k = 1; k <= 5; k += 2) {
39043 GemmMicrokernelTester()
39044 .mr(3)
39045 .nr(2)
39046 .kr(1)
39047 .sr(1)
39048 .m(3)
39049 .n(n)
39050 .k(k)
39051 .cn_stride(5)
39052 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x2__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
39053 }
39054 }
39055 }
39056
TEST(QC8_GEMM_MINMAX_FP32_3X2__SCALAR_FMAGIC,n_gt_2_strided_a)39057 TEST(QC8_GEMM_MINMAX_FP32_3X2__SCALAR_FMAGIC, n_gt_2_strided_a) {
39058 for (uint32_t n = 3; n < 4; n++) {
39059 for (size_t k = 1; k <= 5; k += 2) {
39060 GemmMicrokernelTester()
39061 .mr(3)
39062 .nr(2)
39063 .kr(1)
39064 .sr(1)
39065 .m(3)
39066 .n(n)
39067 .k(k)
39068 .a_stride(7)
39069 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x2__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
39070 }
39071 }
39072 }
39073
TEST(QC8_GEMM_MINMAX_FP32_3X2__SCALAR_FMAGIC,n_gt_2_subtile)39074 TEST(QC8_GEMM_MINMAX_FP32_3X2__SCALAR_FMAGIC, n_gt_2_subtile) {
39075 for (uint32_t n = 3; n < 4; n++) {
39076 for (size_t k = 1; k <= 5; k += 2) {
39077 for (uint32_t m = 1; m <= 3; m++) {
39078 GemmMicrokernelTester()
39079 .mr(3)
39080 .nr(2)
39081 .kr(1)
39082 .sr(1)
39083 .m(m)
39084 .n(n)
39085 .k(k)
39086 .iterations(1)
39087 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x2__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
39088 }
39089 }
39090 }
39091 }
39092
TEST(QC8_GEMM_MINMAX_FP32_3X2__SCALAR_FMAGIC,n_div_2)39093 TEST(QC8_GEMM_MINMAX_FP32_3X2__SCALAR_FMAGIC, n_div_2) {
39094 for (uint32_t n = 4; n <= 6; n += 2) {
39095 for (size_t k = 1; k <= 5; k += 2) {
39096 GemmMicrokernelTester()
39097 .mr(3)
39098 .nr(2)
39099 .kr(1)
39100 .sr(1)
39101 .m(3)
39102 .n(n)
39103 .k(k)
39104 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x2__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
39105 }
39106 }
39107 }
39108
TEST(QC8_GEMM_MINMAX_FP32_3X2__SCALAR_FMAGIC,n_div_2_strided_cn)39109 TEST(QC8_GEMM_MINMAX_FP32_3X2__SCALAR_FMAGIC, n_div_2_strided_cn) {
39110 for (uint32_t n = 4; n <= 6; n += 2) {
39111 for (size_t k = 1; k <= 5; k += 2) {
39112 GemmMicrokernelTester()
39113 .mr(3)
39114 .nr(2)
39115 .kr(1)
39116 .sr(1)
39117 .m(3)
39118 .n(n)
39119 .k(k)
39120 .cn_stride(5)
39121 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x2__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
39122 }
39123 }
39124 }
39125
TEST(QC8_GEMM_MINMAX_FP32_3X2__SCALAR_FMAGIC,n_div_2_strided_a)39126 TEST(QC8_GEMM_MINMAX_FP32_3X2__SCALAR_FMAGIC, n_div_2_strided_a) {
39127 for (uint32_t n = 4; n <= 6; n += 2) {
39128 for (size_t k = 1; k <= 5; k += 2) {
39129 GemmMicrokernelTester()
39130 .mr(3)
39131 .nr(2)
39132 .kr(1)
39133 .sr(1)
39134 .m(3)
39135 .n(n)
39136 .k(k)
39137 .a_stride(7)
39138 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x2__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
39139 }
39140 }
39141 }
39142
TEST(QC8_GEMM_MINMAX_FP32_3X2__SCALAR_FMAGIC,n_div_2_subtile)39143 TEST(QC8_GEMM_MINMAX_FP32_3X2__SCALAR_FMAGIC, n_div_2_subtile) {
39144 for (uint32_t n = 4; n <= 6; n += 2) {
39145 for (size_t k = 1; k <= 5; k += 2) {
39146 for (uint32_t m = 1; m <= 3; m++) {
39147 GemmMicrokernelTester()
39148 .mr(3)
39149 .nr(2)
39150 .kr(1)
39151 .sr(1)
39152 .m(m)
39153 .n(n)
39154 .k(k)
39155 .iterations(1)
39156 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x2__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
39157 }
39158 }
39159 }
39160 }
39161
TEST(QC8_GEMM_MINMAX_FP32_3X2__SCALAR_FMAGIC,strided_cm_subtile)39162 TEST(QC8_GEMM_MINMAX_FP32_3X2__SCALAR_FMAGIC, strided_cm_subtile) {
39163 for (size_t k = 1; k <= 5; k += 2) {
39164 for (uint32_t n = 1; n <= 2; n++) {
39165 for (uint32_t m = 1; m <= 3; m++) {
39166 GemmMicrokernelTester()
39167 .mr(3)
39168 .nr(2)
39169 .kr(1)
39170 .sr(1)
39171 .m(m)
39172 .n(n)
39173 .k(k)
39174 .cm_stride(5)
39175 .iterations(1)
39176 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x2__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
39177 }
39178 }
39179 }
39180 }
39181
TEST(QC8_GEMM_MINMAX_FP32_3X2__SCALAR_FMAGIC,qmin)39182 TEST(QC8_GEMM_MINMAX_FP32_3X2__SCALAR_FMAGIC, qmin) {
39183 GemmMicrokernelTester()
39184 .mr(3)
39185 .nr(2)
39186 .kr(1)
39187 .sr(1)
39188 .m(3)
39189 .n(2)
39190 .k(1)
39191 .qmin(128)
39192 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x2__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
39193 }
39194
TEST(QC8_GEMM_MINMAX_FP32_3X2__SCALAR_FMAGIC,qmax)39195 TEST(QC8_GEMM_MINMAX_FP32_3X2__SCALAR_FMAGIC, qmax) {
39196 GemmMicrokernelTester()
39197 .mr(3)
39198 .nr(2)
39199 .kr(1)
39200 .sr(1)
39201 .m(3)
39202 .n(2)
39203 .k(1)
39204 .qmax(128)
39205 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x2__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
39206 }
39207
TEST(QC8_GEMM_MINMAX_FP32_3X2__SCALAR_FMAGIC,strided_cm)39208 TEST(QC8_GEMM_MINMAX_FP32_3X2__SCALAR_FMAGIC, strided_cm) {
39209 GemmMicrokernelTester()
39210 .mr(3)
39211 .nr(2)
39212 .kr(1)
39213 .sr(1)
39214 .m(3)
39215 .n(2)
39216 .k(1)
39217 .cm_stride(5)
39218 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x2__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
39219 }
39220
39221
TEST(QC8_GEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC,k_eq_1)39222 TEST(QC8_GEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC, k_eq_1) {
39223 GemmMicrokernelTester()
39224 .mr(3)
39225 .nr(2)
39226 .kr(1)
39227 .sr(1)
39228 .m(3)
39229 .n(2)
39230 .k(1)
39231 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x2__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
39232 }
39233
TEST(QC8_GEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC,strided_cn)39234 TEST(QC8_GEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC, strided_cn) {
39235 GemmMicrokernelTester()
39236 .mr(3)
39237 .nr(2)
39238 .kr(1)
39239 .sr(1)
39240 .m(3)
39241 .n(2)
39242 .k(1)
39243 .cn_stride(5)
39244 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x2__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
39245 }
39246
TEST(QC8_GEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC,k_eq_1_strided_a)39247 TEST(QC8_GEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC, k_eq_1_strided_a) {
39248 GemmMicrokernelTester()
39249 .mr(3)
39250 .nr(2)
39251 .kr(1)
39252 .sr(1)
39253 .m(3)
39254 .n(2)
39255 .k(1)
39256 .a_stride(3)
39257 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x2__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
39258 }
39259
TEST(QC8_GEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC,k_eq_1_subtile)39260 TEST(QC8_GEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC, k_eq_1_subtile) {
39261 for (uint32_t n = 1; n <= 2; n++) {
39262 for (uint32_t m = 1; m <= 3; m++) {
39263 GemmMicrokernelTester()
39264 .mr(3)
39265 .nr(2)
39266 .kr(1)
39267 .sr(1)
39268 .m(m)
39269 .n(n)
39270 .k(1)
39271 .iterations(1)
39272 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x2__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
39273 }
39274 }
39275 }
39276
TEST(QC8_GEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC,k_eq_1_subtile_m)39277 TEST(QC8_GEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC, k_eq_1_subtile_m) {
39278 for (uint32_t m = 1; m <= 3; m++) {
39279 GemmMicrokernelTester()
39280 .mr(3)
39281 .nr(2)
39282 .kr(1)
39283 .sr(1)
39284 .m(m)
39285 .n(2)
39286 .k(1)
39287 .iterations(1)
39288 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x2__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
39289 }
39290 }
39291
TEST(QC8_GEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC,k_eq_1_subtile_n)39292 TEST(QC8_GEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC, k_eq_1_subtile_n) {
39293 for (uint32_t n = 1; n <= 2; n++) {
39294 GemmMicrokernelTester()
39295 .mr(3)
39296 .nr(2)
39297 .kr(1)
39298 .sr(1)
39299 .m(3)
39300 .n(n)
39301 .k(1)
39302 .iterations(1)
39303 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x2__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
39304 }
39305 }
39306
TEST(QC8_GEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC,k_gt_1)39307 TEST(QC8_GEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC, k_gt_1) {
39308 for (size_t k = 2; k < 10; k++) {
39309 GemmMicrokernelTester()
39310 .mr(3)
39311 .nr(2)
39312 .kr(1)
39313 .sr(1)
39314 .m(3)
39315 .n(2)
39316 .k(k)
39317 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x2__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
39318 }
39319 }
39320
TEST(QC8_GEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC,k_gt_1_strided_a)39321 TEST(QC8_GEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC, k_gt_1_strided_a) {
39322 for (size_t k = 2; k < 10; k++) {
39323 GemmMicrokernelTester()
39324 .mr(3)
39325 .nr(2)
39326 .kr(1)
39327 .sr(1)
39328 .m(3)
39329 .n(2)
39330 .k(k)
39331 .a_stride(11)
39332 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x2__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
39333 }
39334 }
39335
TEST(QC8_GEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC,k_gt_1_subtile)39336 TEST(QC8_GEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC, k_gt_1_subtile) {
39337 for (size_t k = 2; k < 10; k++) {
39338 for (uint32_t n = 1; n <= 2; n++) {
39339 for (uint32_t m = 1; m <= 3; m++) {
39340 GemmMicrokernelTester()
39341 .mr(3)
39342 .nr(2)
39343 .kr(1)
39344 .sr(1)
39345 .m(m)
39346 .n(n)
39347 .k(k)
39348 .iterations(1)
39349 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x2__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
39350 }
39351 }
39352 }
39353 }
39354
TEST(QC8_GEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC,n_gt_2)39355 TEST(QC8_GEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC, n_gt_2) {
39356 for (uint32_t n = 3; n < 4; n++) {
39357 for (size_t k = 1; k <= 5; k += 2) {
39358 GemmMicrokernelTester()
39359 .mr(3)
39360 .nr(2)
39361 .kr(1)
39362 .sr(1)
39363 .m(3)
39364 .n(n)
39365 .k(k)
39366 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x2__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
39367 }
39368 }
39369 }
39370
TEST(QC8_GEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC,n_gt_2_strided_cn)39371 TEST(QC8_GEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC, n_gt_2_strided_cn) {
39372 for (uint32_t n = 3; n < 4; n++) {
39373 for (size_t k = 1; k <= 5; k += 2) {
39374 GemmMicrokernelTester()
39375 .mr(3)
39376 .nr(2)
39377 .kr(1)
39378 .sr(1)
39379 .m(3)
39380 .n(n)
39381 .k(k)
39382 .cn_stride(5)
39383 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x2__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
39384 }
39385 }
39386 }
39387
TEST(QC8_GEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC,n_gt_2_strided_a)39388 TEST(QC8_GEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC, n_gt_2_strided_a) {
39389 for (uint32_t n = 3; n < 4; n++) {
39390 for (size_t k = 1; k <= 5; k += 2) {
39391 GemmMicrokernelTester()
39392 .mr(3)
39393 .nr(2)
39394 .kr(1)
39395 .sr(1)
39396 .m(3)
39397 .n(n)
39398 .k(k)
39399 .a_stride(7)
39400 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x2__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
39401 }
39402 }
39403 }
39404
TEST(QC8_GEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC,n_gt_2_subtile)39405 TEST(QC8_GEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC, n_gt_2_subtile) {
39406 for (uint32_t n = 3; n < 4; n++) {
39407 for (size_t k = 1; k <= 5; k += 2) {
39408 for (uint32_t m = 1; m <= 3; m++) {
39409 GemmMicrokernelTester()
39410 .mr(3)
39411 .nr(2)
39412 .kr(1)
39413 .sr(1)
39414 .m(m)
39415 .n(n)
39416 .k(k)
39417 .iterations(1)
39418 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x2__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
39419 }
39420 }
39421 }
39422 }
39423
TEST(QC8_GEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC,n_div_2)39424 TEST(QC8_GEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC, n_div_2) {
39425 for (uint32_t n = 4; n <= 6; n += 2) {
39426 for (size_t k = 1; k <= 5; k += 2) {
39427 GemmMicrokernelTester()
39428 .mr(3)
39429 .nr(2)
39430 .kr(1)
39431 .sr(1)
39432 .m(3)
39433 .n(n)
39434 .k(k)
39435 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x2__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
39436 }
39437 }
39438 }
39439
TEST(QC8_GEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC,n_div_2_strided_cn)39440 TEST(QC8_GEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC, n_div_2_strided_cn) {
39441 for (uint32_t n = 4; n <= 6; n += 2) {
39442 for (size_t k = 1; k <= 5; k += 2) {
39443 GemmMicrokernelTester()
39444 .mr(3)
39445 .nr(2)
39446 .kr(1)
39447 .sr(1)
39448 .m(3)
39449 .n(n)
39450 .k(k)
39451 .cn_stride(5)
39452 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x2__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
39453 }
39454 }
39455 }
39456
TEST(QC8_GEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC,n_div_2_strided_a)39457 TEST(QC8_GEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC, n_div_2_strided_a) {
39458 for (uint32_t n = 4; n <= 6; n += 2) {
39459 for (size_t k = 1; k <= 5; k += 2) {
39460 GemmMicrokernelTester()
39461 .mr(3)
39462 .nr(2)
39463 .kr(1)
39464 .sr(1)
39465 .m(3)
39466 .n(n)
39467 .k(k)
39468 .a_stride(7)
39469 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x2__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
39470 }
39471 }
39472 }
39473
TEST(QC8_GEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC,n_div_2_subtile)39474 TEST(QC8_GEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC, n_div_2_subtile) {
39475 for (uint32_t n = 4; n <= 6; n += 2) {
39476 for (size_t k = 1; k <= 5; k += 2) {
39477 for (uint32_t m = 1; m <= 3; m++) {
39478 GemmMicrokernelTester()
39479 .mr(3)
39480 .nr(2)
39481 .kr(1)
39482 .sr(1)
39483 .m(m)
39484 .n(n)
39485 .k(k)
39486 .iterations(1)
39487 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x2__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
39488 }
39489 }
39490 }
39491 }
39492
TEST(QC8_GEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC,strided_cm_subtile)39493 TEST(QC8_GEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC, strided_cm_subtile) {
39494 for (size_t k = 1; k <= 5; k += 2) {
39495 for (uint32_t n = 1; n <= 2; n++) {
39496 for (uint32_t m = 1; m <= 3; m++) {
39497 GemmMicrokernelTester()
39498 .mr(3)
39499 .nr(2)
39500 .kr(1)
39501 .sr(1)
39502 .m(m)
39503 .n(n)
39504 .k(k)
39505 .cm_stride(5)
39506 .iterations(1)
39507 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x2__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
39508 }
39509 }
39510 }
39511 }
39512
TEST(QC8_GEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC,qmin)39513 TEST(QC8_GEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC, qmin) {
39514 GemmMicrokernelTester()
39515 .mr(3)
39516 .nr(2)
39517 .kr(1)
39518 .sr(1)
39519 .m(3)
39520 .n(2)
39521 .k(1)
39522 .qmin(128)
39523 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x2__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
39524 }
39525
TEST(QC8_GEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC,qmax)39526 TEST(QC8_GEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC, qmax) {
39527 GemmMicrokernelTester()
39528 .mr(3)
39529 .nr(2)
39530 .kr(1)
39531 .sr(1)
39532 .m(3)
39533 .n(2)
39534 .k(1)
39535 .qmax(128)
39536 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x2__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
39537 }
39538
TEST(QC8_GEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC,strided_cm)39539 TEST(QC8_GEMM_MINMAX_FP32_3X2__SCALAR_IMAGIC, strided_cm) {
39540 GemmMicrokernelTester()
39541 .mr(3)
39542 .nr(2)
39543 .kr(1)
39544 .sr(1)
39545 .m(3)
39546 .n(2)
39547 .k(1)
39548 .cm_stride(5)
39549 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x2__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
39550 }
39551
39552
TEST(QC8_GEMM_MINMAX_FP32_3X2__SCALAR_LRINTF,k_eq_1)39553 TEST(QC8_GEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, k_eq_1) {
39554 GemmMicrokernelTester()
39555 .mr(3)
39556 .nr(2)
39557 .kr(1)
39558 .sr(1)
39559 .m(3)
39560 .n(2)
39561 .k(1)
39562 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
39563 }
39564
TEST(QC8_GEMM_MINMAX_FP32_3X2__SCALAR_LRINTF,strided_cn)39565 TEST(QC8_GEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, strided_cn) {
39566 GemmMicrokernelTester()
39567 .mr(3)
39568 .nr(2)
39569 .kr(1)
39570 .sr(1)
39571 .m(3)
39572 .n(2)
39573 .k(1)
39574 .cn_stride(5)
39575 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
39576 }
39577
TEST(QC8_GEMM_MINMAX_FP32_3X2__SCALAR_LRINTF,k_eq_1_strided_a)39578 TEST(QC8_GEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, k_eq_1_strided_a) {
39579 GemmMicrokernelTester()
39580 .mr(3)
39581 .nr(2)
39582 .kr(1)
39583 .sr(1)
39584 .m(3)
39585 .n(2)
39586 .k(1)
39587 .a_stride(3)
39588 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
39589 }
39590
TEST(QC8_GEMM_MINMAX_FP32_3X2__SCALAR_LRINTF,k_eq_1_subtile)39591 TEST(QC8_GEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, k_eq_1_subtile) {
39592 for (uint32_t n = 1; n <= 2; n++) {
39593 for (uint32_t m = 1; m <= 3; m++) {
39594 GemmMicrokernelTester()
39595 .mr(3)
39596 .nr(2)
39597 .kr(1)
39598 .sr(1)
39599 .m(m)
39600 .n(n)
39601 .k(1)
39602 .iterations(1)
39603 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
39604 }
39605 }
39606 }
39607
TEST(QC8_GEMM_MINMAX_FP32_3X2__SCALAR_LRINTF,k_eq_1_subtile_m)39608 TEST(QC8_GEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, k_eq_1_subtile_m) {
39609 for (uint32_t m = 1; m <= 3; m++) {
39610 GemmMicrokernelTester()
39611 .mr(3)
39612 .nr(2)
39613 .kr(1)
39614 .sr(1)
39615 .m(m)
39616 .n(2)
39617 .k(1)
39618 .iterations(1)
39619 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
39620 }
39621 }
39622
TEST(QC8_GEMM_MINMAX_FP32_3X2__SCALAR_LRINTF,k_eq_1_subtile_n)39623 TEST(QC8_GEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, k_eq_1_subtile_n) {
39624 for (uint32_t n = 1; n <= 2; n++) {
39625 GemmMicrokernelTester()
39626 .mr(3)
39627 .nr(2)
39628 .kr(1)
39629 .sr(1)
39630 .m(3)
39631 .n(n)
39632 .k(1)
39633 .iterations(1)
39634 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
39635 }
39636 }
39637
TEST(QC8_GEMM_MINMAX_FP32_3X2__SCALAR_LRINTF,k_gt_1)39638 TEST(QC8_GEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, k_gt_1) {
39639 for (size_t k = 2; k < 10; k++) {
39640 GemmMicrokernelTester()
39641 .mr(3)
39642 .nr(2)
39643 .kr(1)
39644 .sr(1)
39645 .m(3)
39646 .n(2)
39647 .k(k)
39648 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
39649 }
39650 }
39651
TEST(QC8_GEMM_MINMAX_FP32_3X2__SCALAR_LRINTF,k_gt_1_strided_a)39652 TEST(QC8_GEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, k_gt_1_strided_a) {
39653 for (size_t k = 2; k < 10; k++) {
39654 GemmMicrokernelTester()
39655 .mr(3)
39656 .nr(2)
39657 .kr(1)
39658 .sr(1)
39659 .m(3)
39660 .n(2)
39661 .k(k)
39662 .a_stride(11)
39663 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
39664 }
39665 }
39666
TEST(QC8_GEMM_MINMAX_FP32_3X2__SCALAR_LRINTF,k_gt_1_subtile)39667 TEST(QC8_GEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, k_gt_1_subtile) {
39668 for (size_t k = 2; k < 10; k++) {
39669 for (uint32_t n = 1; n <= 2; n++) {
39670 for (uint32_t m = 1; m <= 3; m++) {
39671 GemmMicrokernelTester()
39672 .mr(3)
39673 .nr(2)
39674 .kr(1)
39675 .sr(1)
39676 .m(m)
39677 .n(n)
39678 .k(k)
39679 .iterations(1)
39680 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
39681 }
39682 }
39683 }
39684 }
39685
TEST(QC8_GEMM_MINMAX_FP32_3X2__SCALAR_LRINTF,n_gt_2)39686 TEST(QC8_GEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, n_gt_2) {
39687 for (uint32_t n = 3; n < 4; n++) {
39688 for (size_t k = 1; k <= 5; k += 2) {
39689 GemmMicrokernelTester()
39690 .mr(3)
39691 .nr(2)
39692 .kr(1)
39693 .sr(1)
39694 .m(3)
39695 .n(n)
39696 .k(k)
39697 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
39698 }
39699 }
39700 }
39701
TEST(QC8_GEMM_MINMAX_FP32_3X2__SCALAR_LRINTF,n_gt_2_strided_cn)39702 TEST(QC8_GEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, n_gt_2_strided_cn) {
39703 for (uint32_t n = 3; n < 4; n++) {
39704 for (size_t k = 1; k <= 5; k += 2) {
39705 GemmMicrokernelTester()
39706 .mr(3)
39707 .nr(2)
39708 .kr(1)
39709 .sr(1)
39710 .m(3)
39711 .n(n)
39712 .k(k)
39713 .cn_stride(5)
39714 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
39715 }
39716 }
39717 }
39718
TEST(QC8_GEMM_MINMAX_FP32_3X2__SCALAR_LRINTF,n_gt_2_strided_a)39719 TEST(QC8_GEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, n_gt_2_strided_a) {
39720 for (uint32_t n = 3; n < 4; n++) {
39721 for (size_t k = 1; k <= 5; k += 2) {
39722 GemmMicrokernelTester()
39723 .mr(3)
39724 .nr(2)
39725 .kr(1)
39726 .sr(1)
39727 .m(3)
39728 .n(n)
39729 .k(k)
39730 .a_stride(7)
39731 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
39732 }
39733 }
39734 }
39735
TEST(QC8_GEMM_MINMAX_FP32_3X2__SCALAR_LRINTF,n_gt_2_subtile)39736 TEST(QC8_GEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, n_gt_2_subtile) {
39737 for (uint32_t n = 3; n < 4; n++) {
39738 for (size_t k = 1; k <= 5; k += 2) {
39739 for (uint32_t m = 1; m <= 3; m++) {
39740 GemmMicrokernelTester()
39741 .mr(3)
39742 .nr(2)
39743 .kr(1)
39744 .sr(1)
39745 .m(m)
39746 .n(n)
39747 .k(k)
39748 .iterations(1)
39749 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
39750 }
39751 }
39752 }
39753 }
39754
TEST(QC8_GEMM_MINMAX_FP32_3X2__SCALAR_LRINTF,n_div_2)39755 TEST(QC8_GEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, n_div_2) {
39756 for (uint32_t n = 4; n <= 6; n += 2) {
39757 for (size_t k = 1; k <= 5; k += 2) {
39758 GemmMicrokernelTester()
39759 .mr(3)
39760 .nr(2)
39761 .kr(1)
39762 .sr(1)
39763 .m(3)
39764 .n(n)
39765 .k(k)
39766 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
39767 }
39768 }
39769 }
39770
TEST(QC8_GEMM_MINMAX_FP32_3X2__SCALAR_LRINTF,n_div_2_strided_cn)39771 TEST(QC8_GEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, n_div_2_strided_cn) {
39772 for (uint32_t n = 4; n <= 6; n += 2) {
39773 for (size_t k = 1; k <= 5; k += 2) {
39774 GemmMicrokernelTester()
39775 .mr(3)
39776 .nr(2)
39777 .kr(1)
39778 .sr(1)
39779 .m(3)
39780 .n(n)
39781 .k(k)
39782 .cn_stride(5)
39783 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
39784 }
39785 }
39786 }
39787
TEST(QC8_GEMM_MINMAX_FP32_3X2__SCALAR_LRINTF,n_div_2_strided_a)39788 TEST(QC8_GEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, n_div_2_strided_a) {
39789 for (uint32_t n = 4; n <= 6; n += 2) {
39790 for (size_t k = 1; k <= 5; k += 2) {
39791 GemmMicrokernelTester()
39792 .mr(3)
39793 .nr(2)
39794 .kr(1)
39795 .sr(1)
39796 .m(3)
39797 .n(n)
39798 .k(k)
39799 .a_stride(7)
39800 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
39801 }
39802 }
39803 }
39804
TEST(QC8_GEMM_MINMAX_FP32_3X2__SCALAR_LRINTF,n_div_2_subtile)39805 TEST(QC8_GEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, n_div_2_subtile) {
39806 for (uint32_t n = 4; n <= 6; n += 2) {
39807 for (size_t k = 1; k <= 5; k += 2) {
39808 for (uint32_t m = 1; m <= 3; m++) {
39809 GemmMicrokernelTester()
39810 .mr(3)
39811 .nr(2)
39812 .kr(1)
39813 .sr(1)
39814 .m(m)
39815 .n(n)
39816 .k(k)
39817 .iterations(1)
39818 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
39819 }
39820 }
39821 }
39822 }
39823
TEST(QC8_GEMM_MINMAX_FP32_3X2__SCALAR_LRINTF,strided_cm_subtile)39824 TEST(QC8_GEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, strided_cm_subtile) {
39825 for (size_t k = 1; k <= 5; k += 2) {
39826 for (uint32_t n = 1; n <= 2; n++) {
39827 for (uint32_t m = 1; m <= 3; m++) {
39828 GemmMicrokernelTester()
39829 .mr(3)
39830 .nr(2)
39831 .kr(1)
39832 .sr(1)
39833 .m(m)
39834 .n(n)
39835 .k(k)
39836 .cm_stride(5)
39837 .iterations(1)
39838 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
39839 }
39840 }
39841 }
39842 }
39843
TEST(QC8_GEMM_MINMAX_FP32_3X2__SCALAR_LRINTF,qmin)39844 TEST(QC8_GEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, qmin) {
39845 GemmMicrokernelTester()
39846 .mr(3)
39847 .nr(2)
39848 .kr(1)
39849 .sr(1)
39850 .m(3)
39851 .n(2)
39852 .k(1)
39853 .qmin(128)
39854 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
39855 }
39856
TEST(QC8_GEMM_MINMAX_FP32_3X2__SCALAR_LRINTF,qmax)39857 TEST(QC8_GEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, qmax) {
39858 GemmMicrokernelTester()
39859 .mr(3)
39860 .nr(2)
39861 .kr(1)
39862 .sr(1)
39863 .m(3)
39864 .n(2)
39865 .k(1)
39866 .qmax(128)
39867 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
39868 }
39869
TEST(QC8_GEMM_MINMAX_FP32_3X2__SCALAR_LRINTF,strided_cm)39870 TEST(QC8_GEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, strided_cm) {
39871 GemmMicrokernelTester()
39872 .mr(3)
39873 .nr(2)
39874 .kr(1)
39875 .sr(1)
39876 .m(3)
39877 .n(2)
39878 .k(1)
39879 .cm_stride(5)
39880 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qc8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
39881 }
39882
39883
TEST(QC8_GEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC,k_eq_1)39884 TEST(QC8_GEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC, k_eq_1) {
39885 GemmMicrokernelTester()
39886 .mr(4)
39887 .nr(2)
39888 .kr(1)
39889 .sr(1)
39890 .m(4)
39891 .n(2)
39892 .k(1)
39893 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x2__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
39894 }
39895
TEST(QC8_GEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC,strided_cn)39896 TEST(QC8_GEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC, strided_cn) {
39897 GemmMicrokernelTester()
39898 .mr(4)
39899 .nr(2)
39900 .kr(1)
39901 .sr(1)
39902 .m(4)
39903 .n(2)
39904 .k(1)
39905 .cn_stride(5)
39906 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x2__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
39907 }
39908
TEST(QC8_GEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC,k_eq_1_strided_a)39909 TEST(QC8_GEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC, k_eq_1_strided_a) {
39910 GemmMicrokernelTester()
39911 .mr(4)
39912 .nr(2)
39913 .kr(1)
39914 .sr(1)
39915 .m(4)
39916 .n(2)
39917 .k(1)
39918 .a_stride(3)
39919 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x2__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
39920 }
39921
TEST(QC8_GEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC,k_eq_1_subtile)39922 TEST(QC8_GEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC, k_eq_1_subtile) {
39923 for (uint32_t n = 1; n <= 2; n++) {
39924 for (uint32_t m = 1; m <= 4; m++) {
39925 GemmMicrokernelTester()
39926 .mr(4)
39927 .nr(2)
39928 .kr(1)
39929 .sr(1)
39930 .m(m)
39931 .n(n)
39932 .k(1)
39933 .iterations(1)
39934 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x2__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
39935 }
39936 }
39937 }
39938
TEST(QC8_GEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC,k_eq_1_subtile_m)39939 TEST(QC8_GEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC, k_eq_1_subtile_m) {
39940 for (uint32_t m = 1; m <= 4; m++) {
39941 GemmMicrokernelTester()
39942 .mr(4)
39943 .nr(2)
39944 .kr(1)
39945 .sr(1)
39946 .m(m)
39947 .n(2)
39948 .k(1)
39949 .iterations(1)
39950 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x2__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
39951 }
39952 }
39953
TEST(QC8_GEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC,k_eq_1_subtile_n)39954 TEST(QC8_GEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC, k_eq_1_subtile_n) {
39955 for (uint32_t n = 1; n <= 2; n++) {
39956 GemmMicrokernelTester()
39957 .mr(4)
39958 .nr(2)
39959 .kr(1)
39960 .sr(1)
39961 .m(4)
39962 .n(n)
39963 .k(1)
39964 .iterations(1)
39965 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x2__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
39966 }
39967 }
39968
TEST(QC8_GEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC,k_gt_1)39969 TEST(QC8_GEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC, k_gt_1) {
39970 for (size_t k = 2; k < 10; k++) {
39971 GemmMicrokernelTester()
39972 .mr(4)
39973 .nr(2)
39974 .kr(1)
39975 .sr(1)
39976 .m(4)
39977 .n(2)
39978 .k(k)
39979 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x2__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
39980 }
39981 }
39982
TEST(QC8_GEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC,k_gt_1_strided_a)39983 TEST(QC8_GEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC, k_gt_1_strided_a) {
39984 for (size_t k = 2; k < 10; k++) {
39985 GemmMicrokernelTester()
39986 .mr(4)
39987 .nr(2)
39988 .kr(1)
39989 .sr(1)
39990 .m(4)
39991 .n(2)
39992 .k(k)
39993 .a_stride(11)
39994 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x2__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
39995 }
39996 }
39997
TEST(QC8_GEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC,k_gt_1_subtile)39998 TEST(QC8_GEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC, k_gt_1_subtile) {
39999 for (size_t k = 2; k < 10; k++) {
40000 for (uint32_t n = 1; n <= 2; n++) {
40001 for (uint32_t m = 1; m <= 4; m++) {
40002 GemmMicrokernelTester()
40003 .mr(4)
40004 .nr(2)
40005 .kr(1)
40006 .sr(1)
40007 .m(m)
40008 .n(n)
40009 .k(k)
40010 .iterations(1)
40011 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x2__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
40012 }
40013 }
40014 }
40015 }
40016
TEST(QC8_GEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC,n_gt_2)40017 TEST(QC8_GEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC, n_gt_2) {
40018 for (uint32_t n = 3; n < 4; n++) {
40019 for (size_t k = 1; k <= 5; k += 2) {
40020 GemmMicrokernelTester()
40021 .mr(4)
40022 .nr(2)
40023 .kr(1)
40024 .sr(1)
40025 .m(4)
40026 .n(n)
40027 .k(k)
40028 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x2__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
40029 }
40030 }
40031 }
40032
TEST(QC8_GEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC,n_gt_2_strided_cn)40033 TEST(QC8_GEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC, n_gt_2_strided_cn) {
40034 for (uint32_t n = 3; n < 4; n++) {
40035 for (size_t k = 1; k <= 5; k += 2) {
40036 GemmMicrokernelTester()
40037 .mr(4)
40038 .nr(2)
40039 .kr(1)
40040 .sr(1)
40041 .m(4)
40042 .n(n)
40043 .k(k)
40044 .cn_stride(5)
40045 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x2__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
40046 }
40047 }
40048 }
40049
TEST(QC8_GEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC,n_gt_2_strided_a)40050 TEST(QC8_GEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC, n_gt_2_strided_a) {
40051 for (uint32_t n = 3; n < 4; n++) {
40052 for (size_t k = 1; k <= 5; k += 2) {
40053 GemmMicrokernelTester()
40054 .mr(4)
40055 .nr(2)
40056 .kr(1)
40057 .sr(1)
40058 .m(4)
40059 .n(n)
40060 .k(k)
40061 .a_stride(7)
40062 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x2__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
40063 }
40064 }
40065 }
40066
TEST(QC8_GEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC,n_gt_2_subtile)40067 TEST(QC8_GEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC, n_gt_2_subtile) {
40068 for (uint32_t n = 3; n < 4; n++) {
40069 for (size_t k = 1; k <= 5; k += 2) {
40070 for (uint32_t m = 1; m <= 4; m++) {
40071 GemmMicrokernelTester()
40072 .mr(4)
40073 .nr(2)
40074 .kr(1)
40075 .sr(1)
40076 .m(m)
40077 .n(n)
40078 .k(k)
40079 .iterations(1)
40080 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x2__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
40081 }
40082 }
40083 }
40084 }
40085
TEST(QC8_GEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC,n_div_2)40086 TEST(QC8_GEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC, n_div_2) {
40087 for (uint32_t n = 4; n <= 6; n += 2) {
40088 for (size_t k = 1; k <= 5; k += 2) {
40089 GemmMicrokernelTester()
40090 .mr(4)
40091 .nr(2)
40092 .kr(1)
40093 .sr(1)
40094 .m(4)
40095 .n(n)
40096 .k(k)
40097 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x2__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
40098 }
40099 }
40100 }
40101
TEST(QC8_GEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC,n_div_2_strided_cn)40102 TEST(QC8_GEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC, n_div_2_strided_cn) {
40103 for (uint32_t n = 4; n <= 6; n += 2) {
40104 for (size_t k = 1; k <= 5; k += 2) {
40105 GemmMicrokernelTester()
40106 .mr(4)
40107 .nr(2)
40108 .kr(1)
40109 .sr(1)
40110 .m(4)
40111 .n(n)
40112 .k(k)
40113 .cn_stride(5)
40114 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x2__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
40115 }
40116 }
40117 }
40118
TEST(QC8_GEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC,n_div_2_strided_a)40119 TEST(QC8_GEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC, n_div_2_strided_a) {
40120 for (uint32_t n = 4; n <= 6; n += 2) {
40121 for (size_t k = 1; k <= 5; k += 2) {
40122 GemmMicrokernelTester()
40123 .mr(4)
40124 .nr(2)
40125 .kr(1)
40126 .sr(1)
40127 .m(4)
40128 .n(n)
40129 .k(k)
40130 .a_stride(7)
40131 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x2__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
40132 }
40133 }
40134 }
40135
TEST(QC8_GEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC,n_div_2_subtile)40136 TEST(QC8_GEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC, n_div_2_subtile) {
40137 for (uint32_t n = 4; n <= 6; n += 2) {
40138 for (size_t k = 1; k <= 5; k += 2) {
40139 for (uint32_t m = 1; m <= 4; m++) {
40140 GemmMicrokernelTester()
40141 .mr(4)
40142 .nr(2)
40143 .kr(1)
40144 .sr(1)
40145 .m(m)
40146 .n(n)
40147 .k(k)
40148 .iterations(1)
40149 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x2__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
40150 }
40151 }
40152 }
40153 }
40154
TEST(QC8_GEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC,strided_cm_subtile)40155 TEST(QC8_GEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC, strided_cm_subtile) {
40156 for (size_t k = 1; k <= 5; k += 2) {
40157 for (uint32_t n = 1; n <= 2; n++) {
40158 for (uint32_t m = 1; m <= 4; m++) {
40159 GemmMicrokernelTester()
40160 .mr(4)
40161 .nr(2)
40162 .kr(1)
40163 .sr(1)
40164 .m(m)
40165 .n(n)
40166 .k(k)
40167 .cm_stride(5)
40168 .iterations(1)
40169 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x2__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
40170 }
40171 }
40172 }
40173 }
40174
TEST(QC8_GEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC,qmin)40175 TEST(QC8_GEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC, qmin) {
40176 GemmMicrokernelTester()
40177 .mr(4)
40178 .nr(2)
40179 .kr(1)
40180 .sr(1)
40181 .m(4)
40182 .n(2)
40183 .k(1)
40184 .qmin(128)
40185 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x2__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
40186 }
40187
TEST(QC8_GEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC,qmax)40188 TEST(QC8_GEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC, qmax) {
40189 GemmMicrokernelTester()
40190 .mr(4)
40191 .nr(2)
40192 .kr(1)
40193 .sr(1)
40194 .m(4)
40195 .n(2)
40196 .k(1)
40197 .qmax(128)
40198 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x2__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
40199 }
40200
TEST(QC8_GEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC,strided_cm)40201 TEST(QC8_GEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC, strided_cm) {
40202 GemmMicrokernelTester()
40203 .mr(4)
40204 .nr(2)
40205 .kr(1)
40206 .sr(1)
40207 .m(4)
40208 .n(2)
40209 .k(1)
40210 .cm_stride(5)
40211 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x2__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
40212 }
40213
40214
TEST(QC8_GEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC,k_eq_1)40215 TEST(QC8_GEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC, k_eq_1) {
40216 GemmMicrokernelTester()
40217 .mr(4)
40218 .nr(4)
40219 .kr(1)
40220 .sr(1)
40221 .m(4)
40222 .n(4)
40223 .k(1)
40224 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
40225 }
40226
TEST(QC8_GEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC,strided_cn)40227 TEST(QC8_GEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC, strided_cn) {
40228 GemmMicrokernelTester()
40229 .mr(4)
40230 .nr(4)
40231 .kr(1)
40232 .sr(1)
40233 .m(4)
40234 .n(4)
40235 .k(1)
40236 .cn_stride(7)
40237 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
40238 }
40239
TEST(QC8_GEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC,k_eq_1_strided_a)40240 TEST(QC8_GEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC, k_eq_1_strided_a) {
40241 GemmMicrokernelTester()
40242 .mr(4)
40243 .nr(4)
40244 .kr(1)
40245 .sr(1)
40246 .m(4)
40247 .n(4)
40248 .k(1)
40249 .a_stride(3)
40250 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
40251 }
40252
TEST(QC8_GEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC,k_eq_1_subtile)40253 TEST(QC8_GEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC, k_eq_1_subtile) {
40254 for (uint32_t n = 1; n <= 4; n++) {
40255 for (uint32_t m = 1; m <= 4; m++) {
40256 GemmMicrokernelTester()
40257 .mr(4)
40258 .nr(4)
40259 .kr(1)
40260 .sr(1)
40261 .m(m)
40262 .n(n)
40263 .k(1)
40264 .iterations(1)
40265 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
40266 }
40267 }
40268 }
40269
TEST(QC8_GEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC,k_eq_1_subtile_m)40270 TEST(QC8_GEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC, k_eq_1_subtile_m) {
40271 for (uint32_t m = 1; m <= 4; m++) {
40272 GemmMicrokernelTester()
40273 .mr(4)
40274 .nr(4)
40275 .kr(1)
40276 .sr(1)
40277 .m(m)
40278 .n(4)
40279 .k(1)
40280 .iterations(1)
40281 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
40282 }
40283 }
40284
TEST(QC8_GEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC,k_eq_1_subtile_n)40285 TEST(QC8_GEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC, k_eq_1_subtile_n) {
40286 for (uint32_t n = 1; n <= 4; n++) {
40287 GemmMicrokernelTester()
40288 .mr(4)
40289 .nr(4)
40290 .kr(1)
40291 .sr(1)
40292 .m(4)
40293 .n(n)
40294 .k(1)
40295 .iterations(1)
40296 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
40297 }
40298 }
40299
TEST(QC8_GEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC,k_gt_1)40300 TEST(QC8_GEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC, k_gt_1) {
40301 for (size_t k = 2; k < 10; k++) {
40302 GemmMicrokernelTester()
40303 .mr(4)
40304 .nr(4)
40305 .kr(1)
40306 .sr(1)
40307 .m(4)
40308 .n(4)
40309 .k(k)
40310 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
40311 }
40312 }
40313
TEST(QC8_GEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC,k_gt_1_strided_a)40314 TEST(QC8_GEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC, k_gt_1_strided_a) {
40315 for (size_t k = 2; k < 10; k++) {
40316 GemmMicrokernelTester()
40317 .mr(4)
40318 .nr(4)
40319 .kr(1)
40320 .sr(1)
40321 .m(4)
40322 .n(4)
40323 .k(k)
40324 .a_stride(11)
40325 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
40326 }
40327 }
40328
TEST(QC8_GEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC,k_gt_1_subtile)40329 TEST(QC8_GEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC, k_gt_1_subtile) {
40330 for (size_t k = 2; k < 10; k++) {
40331 for (uint32_t n = 1; n <= 4; n++) {
40332 for (uint32_t m = 1; m <= 4; m++) {
40333 GemmMicrokernelTester()
40334 .mr(4)
40335 .nr(4)
40336 .kr(1)
40337 .sr(1)
40338 .m(m)
40339 .n(n)
40340 .k(k)
40341 .iterations(1)
40342 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
40343 }
40344 }
40345 }
40346 }
40347
TEST(QC8_GEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC,n_gt_4)40348 TEST(QC8_GEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC, n_gt_4) {
40349 for (uint32_t n = 5; n < 8; n++) {
40350 for (size_t k = 1; k <= 5; k += 2) {
40351 GemmMicrokernelTester()
40352 .mr(4)
40353 .nr(4)
40354 .kr(1)
40355 .sr(1)
40356 .m(4)
40357 .n(n)
40358 .k(k)
40359 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
40360 }
40361 }
40362 }
40363
TEST(QC8_GEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC,n_gt_4_strided_cn)40364 TEST(QC8_GEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC, n_gt_4_strided_cn) {
40365 for (uint32_t n = 5; n < 8; n++) {
40366 for (size_t k = 1; k <= 5; k += 2) {
40367 GemmMicrokernelTester()
40368 .mr(4)
40369 .nr(4)
40370 .kr(1)
40371 .sr(1)
40372 .m(4)
40373 .n(n)
40374 .k(k)
40375 .cn_stride(7)
40376 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
40377 }
40378 }
40379 }
40380
TEST(QC8_GEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC,n_gt_4_strided_a)40381 TEST(QC8_GEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC, n_gt_4_strided_a) {
40382 for (uint32_t n = 5; n < 8; n++) {
40383 for (size_t k = 1; k <= 5; k += 2) {
40384 GemmMicrokernelTester()
40385 .mr(4)
40386 .nr(4)
40387 .kr(1)
40388 .sr(1)
40389 .m(4)
40390 .n(n)
40391 .k(k)
40392 .a_stride(7)
40393 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
40394 }
40395 }
40396 }
40397
TEST(QC8_GEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC,n_gt_4_subtile)40398 TEST(QC8_GEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC, n_gt_4_subtile) {
40399 for (uint32_t n = 5; n < 8; n++) {
40400 for (size_t k = 1; k <= 5; k += 2) {
40401 for (uint32_t m = 1; m <= 4; m++) {
40402 GemmMicrokernelTester()
40403 .mr(4)
40404 .nr(4)
40405 .kr(1)
40406 .sr(1)
40407 .m(m)
40408 .n(n)
40409 .k(k)
40410 .iterations(1)
40411 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
40412 }
40413 }
40414 }
40415 }
40416
TEST(QC8_GEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC,n_div_4)40417 TEST(QC8_GEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC, n_div_4) {
40418 for (uint32_t n = 8; n <= 12; n += 4) {
40419 for (size_t k = 1; k <= 5; k += 2) {
40420 GemmMicrokernelTester()
40421 .mr(4)
40422 .nr(4)
40423 .kr(1)
40424 .sr(1)
40425 .m(4)
40426 .n(n)
40427 .k(k)
40428 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
40429 }
40430 }
40431 }
40432
TEST(QC8_GEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC,n_div_4_strided_cn)40433 TEST(QC8_GEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC, n_div_4_strided_cn) {
40434 for (uint32_t n = 8; n <= 12; n += 4) {
40435 for (size_t k = 1; k <= 5; k += 2) {
40436 GemmMicrokernelTester()
40437 .mr(4)
40438 .nr(4)
40439 .kr(1)
40440 .sr(1)
40441 .m(4)
40442 .n(n)
40443 .k(k)
40444 .cn_stride(7)
40445 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
40446 }
40447 }
40448 }
40449
TEST(QC8_GEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC,n_div_4_strided_a)40450 TEST(QC8_GEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC, n_div_4_strided_a) {
40451 for (uint32_t n = 8; n <= 12; n += 4) {
40452 for (size_t k = 1; k <= 5; k += 2) {
40453 GemmMicrokernelTester()
40454 .mr(4)
40455 .nr(4)
40456 .kr(1)
40457 .sr(1)
40458 .m(4)
40459 .n(n)
40460 .k(k)
40461 .a_stride(7)
40462 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
40463 }
40464 }
40465 }
40466
TEST(QC8_GEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC,n_div_4_subtile)40467 TEST(QC8_GEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC, n_div_4_subtile) {
40468 for (uint32_t n = 8; n <= 12; n += 4) {
40469 for (size_t k = 1; k <= 5; k += 2) {
40470 for (uint32_t m = 1; m <= 4; m++) {
40471 GemmMicrokernelTester()
40472 .mr(4)
40473 .nr(4)
40474 .kr(1)
40475 .sr(1)
40476 .m(m)
40477 .n(n)
40478 .k(k)
40479 .iterations(1)
40480 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
40481 }
40482 }
40483 }
40484 }
40485
TEST(QC8_GEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC,strided_cm_subtile)40486 TEST(QC8_GEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC, strided_cm_subtile) {
40487 for (size_t k = 1; k <= 5; k += 2) {
40488 for (uint32_t n = 1; n <= 4; n++) {
40489 for (uint32_t m = 1; m <= 4; m++) {
40490 GemmMicrokernelTester()
40491 .mr(4)
40492 .nr(4)
40493 .kr(1)
40494 .sr(1)
40495 .m(m)
40496 .n(n)
40497 .k(k)
40498 .cm_stride(7)
40499 .iterations(1)
40500 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
40501 }
40502 }
40503 }
40504 }
40505
TEST(QC8_GEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC,qmin)40506 TEST(QC8_GEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC, qmin) {
40507 GemmMicrokernelTester()
40508 .mr(4)
40509 .nr(4)
40510 .kr(1)
40511 .sr(1)
40512 .m(4)
40513 .n(4)
40514 .k(1)
40515 .qmin(128)
40516 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
40517 }
40518
TEST(QC8_GEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC,qmax)40519 TEST(QC8_GEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC, qmax) {
40520 GemmMicrokernelTester()
40521 .mr(4)
40522 .nr(4)
40523 .kr(1)
40524 .sr(1)
40525 .m(4)
40526 .n(4)
40527 .k(1)
40528 .qmax(128)
40529 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
40530 }
40531
TEST(QC8_GEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC,strided_cm)40532 TEST(QC8_GEMM_MINMAX_FP32_4X4__SCALAR_IMAGIC, strided_cm) {
40533 GemmMicrokernelTester()
40534 .mr(4)
40535 .nr(4)
40536 .kr(1)
40537 .sr(1)
40538 .m(4)
40539 .n(4)
40540 .k(1)
40541 .cm_stride(7)
40542 .Test(xnn_qc8_gemm_minmax_fp32_ukernel_4x4__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
40543 }
40544