1 // Copyright (c) Facebook, Inc. and its affiliates.
2 // All rights reserved.
3 //
4 // Copyright 2019 Google LLC
5 //
6 // This source code is licensed under the BSD-style license found in the
7 // LICENSE file in the root directory of this source tree.
8 //
9 // Auto-generated file. Do not edit!
10 // Specification: test/qc8-igemm-minmax-fp32.yaml
11 // Generator: tools/generate-gemm-test.py
12
13
14 #include <gtest/gtest.h>
15
16 #include <xnnpack/allocator.h>
17 #include <xnnpack/common.h>
18 #include <xnnpack/isa-checks.h>
19 #include <xnnpack/microparams-init.h>
20
21 #include <xnnpack/gemm.h>
22 #include <xnnpack/igemm.h>
23 #include <xnnpack/ppmm.h>
24 #include "gemm-microkernel-tester.h"
25
26
27 #if XNN_ARCH_ARM && XNN_ENABLE_ASSEMBLY
TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64,k_eq_8)28 TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64, k_eq_8) {
29 TEST_REQUIRES_ARM_NEON;
30 GemmMicrokernelTester()
31 .mr(4)
32 .nr(8)
33 .kr(1)
34 .sr(1)
35 .m(4)
36 .n(8)
37 .k(8)
38 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8__aarch32_neon_mlal_lane_prfm_ld64, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
39 }
40
TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64,strided_cn)41 TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64, strided_cn) {
42 TEST_REQUIRES_ARM_NEON;
43 GemmMicrokernelTester()
44 .mr(4)
45 .nr(8)
46 .kr(1)
47 .sr(1)
48 .m(4)
49 .n(8)
50 .k(8)
51 .cn_stride(11)
52 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8__aarch32_neon_mlal_lane_prfm_ld64, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
53 }
54
TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64,k_eq_8_subtile)55 TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64, k_eq_8_subtile) {
56 TEST_REQUIRES_ARM_NEON;
57 for (uint32_t n = 1; n <= 8; n++) {
58 for (uint32_t m = 1; m <= 4; m++) {
59 GemmMicrokernelTester()
60 .mr(4)
61 .nr(8)
62 .kr(1)
63 .sr(1)
64 .m(m)
65 .n(n)
66 .k(8)
67 .iterations(1)
68 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8__aarch32_neon_mlal_lane_prfm_ld64, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
69 }
70 }
71 }
72
TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64,k_eq_8_subtile_m)73 TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64, k_eq_8_subtile_m) {
74 TEST_REQUIRES_ARM_NEON;
75 for (uint32_t m = 1; m <= 4; m++) {
76 GemmMicrokernelTester()
77 .mr(4)
78 .nr(8)
79 .kr(1)
80 .sr(1)
81 .m(m)
82 .n(8)
83 .k(8)
84 .iterations(1)
85 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8__aarch32_neon_mlal_lane_prfm_ld64, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
86 }
87 }
88
TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64,k_eq_8_subtile_n)89 TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64, k_eq_8_subtile_n) {
90 TEST_REQUIRES_ARM_NEON;
91 for (uint32_t n = 1; n <= 8; n++) {
92 GemmMicrokernelTester()
93 .mr(4)
94 .nr(8)
95 .kr(1)
96 .sr(1)
97 .m(4)
98 .n(n)
99 .k(8)
100 .iterations(1)
101 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8__aarch32_neon_mlal_lane_prfm_ld64, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
102 }
103 }
104
TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64,k_lt_8)105 TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64, k_lt_8) {
106 TEST_REQUIRES_ARM_NEON;
107 for (size_t k = 1; k < 8; k++) {
108 GemmMicrokernelTester()
109 .mr(4)
110 .nr(8)
111 .kr(1)
112 .sr(1)
113 .m(4)
114 .n(8)
115 .k(k)
116 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8__aarch32_neon_mlal_lane_prfm_ld64, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
117 }
118 }
119
TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64,k_lt_8_subtile)120 TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64, k_lt_8_subtile) {
121 TEST_REQUIRES_ARM_NEON;
122 for (size_t k = 1; k < 8; k++) {
123 for (uint32_t n = 1; n <= 8; n++) {
124 for (uint32_t m = 1; m <= 4; m++) {
125 GemmMicrokernelTester()
126 .mr(4)
127 .nr(8)
128 .kr(1)
129 .sr(1)
130 .m(m)
131 .n(n)
132 .k(k)
133 .iterations(1)
134 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8__aarch32_neon_mlal_lane_prfm_ld64, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
135 }
136 }
137 }
138 }
139
TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64,k_gt_8)140 TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64, k_gt_8) {
141 TEST_REQUIRES_ARM_NEON;
142 for (size_t k = 9; k < 16; k++) {
143 GemmMicrokernelTester()
144 .mr(4)
145 .nr(8)
146 .kr(1)
147 .sr(1)
148 .m(4)
149 .n(8)
150 .k(k)
151 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8__aarch32_neon_mlal_lane_prfm_ld64, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
152 }
153 }
154
TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64,k_gt_8_subtile)155 TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64, k_gt_8_subtile) {
156 TEST_REQUIRES_ARM_NEON;
157 for (size_t k = 9; k < 16; k++) {
158 for (uint32_t n = 1; n <= 8; n++) {
159 for (uint32_t m = 1; m <= 4; m++) {
160 GemmMicrokernelTester()
161 .mr(4)
162 .nr(8)
163 .kr(1)
164 .sr(1)
165 .m(m)
166 .n(n)
167 .k(k)
168 .iterations(1)
169 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8__aarch32_neon_mlal_lane_prfm_ld64, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
170 }
171 }
172 }
173 }
174
TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64,k_div_8)175 TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64, k_div_8) {
176 TEST_REQUIRES_ARM_NEON;
177 for (size_t k = 16; k <= 80; k += 8) {
178 GemmMicrokernelTester()
179 .mr(4)
180 .nr(8)
181 .kr(1)
182 .sr(1)
183 .m(4)
184 .n(8)
185 .k(k)
186 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8__aarch32_neon_mlal_lane_prfm_ld64, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
187 }
188 }
189
TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64,k_div_8_subtile)190 TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64, k_div_8_subtile) {
191 TEST_REQUIRES_ARM_NEON;
192 for (size_t k = 16; k <= 80; k += 8) {
193 for (uint32_t n = 1; n <= 8; n++) {
194 for (uint32_t m = 1; m <= 4; m++) {
195 GemmMicrokernelTester()
196 .mr(4)
197 .nr(8)
198 .kr(1)
199 .sr(1)
200 .m(m)
201 .n(n)
202 .k(k)
203 .iterations(1)
204 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8__aarch32_neon_mlal_lane_prfm_ld64, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
205 }
206 }
207 }
208 }
209
TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64,n_gt_8)210 TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64, n_gt_8) {
211 TEST_REQUIRES_ARM_NEON;
212 for (uint32_t n = 9; n < 16; n++) {
213 for (size_t k = 1; k <= 40; k += 9) {
214 GemmMicrokernelTester()
215 .mr(4)
216 .nr(8)
217 .kr(1)
218 .sr(1)
219 .m(4)
220 .n(n)
221 .k(k)
222 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8__aarch32_neon_mlal_lane_prfm_ld64, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
223 }
224 }
225 }
226
TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64,n_gt_8_strided_cn)227 TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64, n_gt_8_strided_cn) {
228 TEST_REQUIRES_ARM_NEON;
229 for (uint32_t n = 9; n < 16; n++) {
230 for (size_t k = 1; k <= 40; k += 9) {
231 GemmMicrokernelTester()
232 .mr(4)
233 .nr(8)
234 .kr(1)
235 .sr(1)
236 .m(4)
237 .n(n)
238 .k(k)
239 .cn_stride(11)
240 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8__aarch32_neon_mlal_lane_prfm_ld64, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
241 }
242 }
243 }
244
TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64,n_gt_8_subtile)245 TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64, n_gt_8_subtile) {
246 TEST_REQUIRES_ARM_NEON;
247 for (uint32_t n = 9; n < 16; n++) {
248 for (size_t k = 1; k <= 40; k += 9) {
249 for (uint32_t m = 1; m <= 4; m++) {
250 GemmMicrokernelTester()
251 .mr(4)
252 .nr(8)
253 .kr(1)
254 .sr(1)
255 .m(m)
256 .n(n)
257 .k(k)
258 .iterations(1)
259 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8__aarch32_neon_mlal_lane_prfm_ld64, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
260 }
261 }
262 }
263 }
264
TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64,n_div_8)265 TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64, n_div_8) {
266 TEST_REQUIRES_ARM_NEON;
267 for (uint32_t n = 16; n <= 24; n += 8) {
268 for (size_t k = 1; k <= 40; k += 9) {
269 GemmMicrokernelTester()
270 .mr(4)
271 .nr(8)
272 .kr(1)
273 .sr(1)
274 .m(4)
275 .n(n)
276 .k(k)
277 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8__aarch32_neon_mlal_lane_prfm_ld64, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
278 }
279 }
280 }
281
TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64,n_div_8_strided_cn)282 TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64, n_div_8_strided_cn) {
283 TEST_REQUIRES_ARM_NEON;
284 for (uint32_t n = 16; n <= 24; n += 8) {
285 for (size_t k = 1; k <= 40; k += 9) {
286 GemmMicrokernelTester()
287 .mr(4)
288 .nr(8)
289 .kr(1)
290 .sr(1)
291 .m(4)
292 .n(n)
293 .k(k)
294 .cn_stride(11)
295 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8__aarch32_neon_mlal_lane_prfm_ld64, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
296 }
297 }
298 }
299
TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64,n_div_8_subtile)300 TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64, n_div_8_subtile) {
301 TEST_REQUIRES_ARM_NEON;
302 for (uint32_t n = 16; n <= 24; n += 8) {
303 for (size_t k = 1; k <= 40; k += 9) {
304 for (uint32_t m = 1; m <= 4; m++) {
305 GemmMicrokernelTester()
306 .mr(4)
307 .nr(8)
308 .kr(1)
309 .sr(1)
310 .m(m)
311 .n(n)
312 .k(k)
313 .iterations(1)
314 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8__aarch32_neon_mlal_lane_prfm_ld64, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
315 }
316 }
317 }
318 }
319
TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64,small_kernel)320 TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64, small_kernel) {
321 TEST_REQUIRES_ARM_NEON;
322 for (size_t k = 1; k <= 40; k += 9) {
323 GemmMicrokernelTester()
324 .mr(4)
325 .nr(8)
326 .kr(1)
327 .sr(1)
328 .m(4)
329 .n(8)
330 .k(k)
331 .ks(3)
332 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8__aarch32_neon_mlal_lane_prfm_ld64, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
333 }
334 }
335
TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64,small_kernel_subtile)336 TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64, small_kernel_subtile) {
337 TEST_REQUIRES_ARM_NEON;
338 for (size_t k = 1; k <= 40; k += 9) {
339 for (uint32_t n = 1; n <= 8; n++) {
340 for (uint32_t m = 1; m <= 4; m++) {
341 GemmMicrokernelTester()
342 .mr(4)
343 .nr(8)
344 .kr(1)
345 .sr(1)
346 .m(m)
347 .n(n)
348 .k(k)
349 .ks(3)
350 .iterations(1)
351 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8__aarch32_neon_mlal_lane_prfm_ld64, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
352 }
353 }
354 }
355 }
356
TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64,n_gt_8_small_kernel)357 TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64, n_gt_8_small_kernel) {
358 TEST_REQUIRES_ARM_NEON;
359 for (uint32_t n = 9; n < 16; n++) {
360 for (size_t k = 1; k <= 40; k += 9) {
361 GemmMicrokernelTester()
362 .mr(4)
363 .nr(8)
364 .kr(1)
365 .sr(1)
366 .m(4)
367 .n(n)
368 .k(k)
369 .ks(3)
370 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8__aarch32_neon_mlal_lane_prfm_ld64, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
371 }
372 }
373 }
374
TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64,n_div_8_small_kernel)375 TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64, n_div_8_small_kernel) {
376 TEST_REQUIRES_ARM_NEON;
377 for (uint32_t n = 16; n <= 24; n += 8) {
378 for (size_t k = 1; k <= 40; k += 9) {
379 GemmMicrokernelTester()
380 .mr(4)
381 .nr(8)
382 .kr(1)
383 .sr(1)
384 .m(4)
385 .n(n)
386 .k(k)
387 .ks(3)
388 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8__aarch32_neon_mlal_lane_prfm_ld64, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
389 }
390 }
391 }
392
TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64,strided_cm_subtile)393 TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64, strided_cm_subtile) {
394 TEST_REQUIRES_ARM_NEON;
395 for (size_t k = 1; k <= 40; k += 9) {
396 for (uint32_t n = 1; n <= 8; n++) {
397 for (uint32_t m = 1; m <= 4; m++) {
398 GemmMicrokernelTester()
399 .mr(4)
400 .nr(8)
401 .kr(1)
402 .sr(1)
403 .m(m)
404 .n(n)
405 .k(k)
406 .cm_stride(11)
407 .iterations(1)
408 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8__aarch32_neon_mlal_lane_prfm_ld64, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
409 }
410 }
411 }
412 }
413
TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64,a_offset)414 TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64, a_offset) {
415 TEST_REQUIRES_ARM_NEON;
416 for (size_t k = 1; k <= 40; k += 9) {
417 GemmMicrokernelTester()
418 .mr(4)
419 .nr(8)
420 .kr(1)
421 .sr(1)
422 .m(4)
423 .n(8)
424 .k(k)
425 .ks(3)
426 .a_offset(163)
427 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8__aarch32_neon_mlal_lane_prfm_ld64, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
428 }
429 }
430
TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64,zero)431 TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64, zero) {
432 TEST_REQUIRES_ARM_NEON;
433 for (size_t k = 1; k <= 40; k += 9) {
434 for (uint32_t mz = 0; mz < 4; mz++) {
435 GemmMicrokernelTester()
436 .mr(4)
437 .nr(8)
438 .kr(1)
439 .sr(1)
440 .m(4)
441 .n(8)
442 .k(k)
443 .ks(3)
444 .a_offset(163)
445 .zero_index(mz)
446 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8__aarch32_neon_mlal_lane_prfm_ld64, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
447 }
448 }
449 }
450
TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64,qmin)451 TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64, qmin) {
452 TEST_REQUIRES_ARM_NEON;
453 GemmMicrokernelTester()
454 .mr(4)
455 .nr(8)
456 .kr(1)
457 .sr(1)
458 .m(4)
459 .n(8)
460 .k(8)
461 .qmin(128)
462 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8__aarch32_neon_mlal_lane_prfm_ld64, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
463 }
464
TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64,qmax)465 TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64, qmax) {
466 TEST_REQUIRES_ARM_NEON;
467 GemmMicrokernelTester()
468 .mr(4)
469 .nr(8)
470 .kr(1)
471 .sr(1)
472 .m(4)
473 .n(8)
474 .k(8)
475 .qmax(128)
476 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8__aarch32_neon_mlal_lane_prfm_ld64, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
477 }
478
TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64,strided_cm)479 TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64, strided_cm) {
480 TEST_REQUIRES_ARM_NEON;
481 GemmMicrokernelTester()
482 .mr(4)
483 .nr(8)
484 .kr(1)
485 .sr(1)
486 .m(4)
487 .n(8)
488 .k(8)
489 .cm_stride(11)
490 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8__aarch32_neon_mlal_lane_prfm_ld64, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
491 }
492 #endif // XNN_ARCH_ARM && XNN_ENABLE_ASSEMBLY
493
494
495 #if XNN_ARCH_ARM && XNN_ENABLE_ASSEMBLY
TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_CORTEX_A53,k_eq_8)496 TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_CORTEX_A53, k_eq_8) {
497 TEST_REQUIRES_ARM_NEON_V8;
498 GemmMicrokernelTester()
499 .mr(4)
500 .nr(8)
501 .kr(1)
502 .sr(1)
503 .m(4)
504 .n(8)
505 .k(8)
506 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8__aarch32_neonv8_mlal_lane_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
507 }
508
TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_CORTEX_A53,strided_cn)509 TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_CORTEX_A53, strided_cn) {
510 TEST_REQUIRES_ARM_NEON_V8;
511 GemmMicrokernelTester()
512 .mr(4)
513 .nr(8)
514 .kr(1)
515 .sr(1)
516 .m(4)
517 .n(8)
518 .k(8)
519 .cn_stride(11)
520 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8__aarch32_neonv8_mlal_lane_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
521 }
522
TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_CORTEX_A53,k_eq_8_subtile)523 TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_CORTEX_A53, k_eq_8_subtile) {
524 TEST_REQUIRES_ARM_NEON_V8;
525 for (uint32_t n = 1; n <= 8; n++) {
526 for (uint32_t m = 1; m <= 4; m++) {
527 GemmMicrokernelTester()
528 .mr(4)
529 .nr(8)
530 .kr(1)
531 .sr(1)
532 .m(m)
533 .n(n)
534 .k(8)
535 .iterations(1)
536 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8__aarch32_neonv8_mlal_lane_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
537 }
538 }
539 }
540
TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_CORTEX_A53,k_eq_8_subtile_m)541 TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_CORTEX_A53, k_eq_8_subtile_m) {
542 TEST_REQUIRES_ARM_NEON_V8;
543 for (uint32_t m = 1; m <= 4; m++) {
544 GemmMicrokernelTester()
545 .mr(4)
546 .nr(8)
547 .kr(1)
548 .sr(1)
549 .m(m)
550 .n(8)
551 .k(8)
552 .iterations(1)
553 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8__aarch32_neonv8_mlal_lane_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
554 }
555 }
556
TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_CORTEX_A53,k_eq_8_subtile_n)557 TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_CORTEX_A53, k_eq_8_subtile_n) {
558 TEST_REQUIRES_ARM_NEON_V8;
559 for (uint32_t n = 1; n <= 8; n++) {
560 GemmMicrokernelTester()
561 .mr(4)
562 .nr(8)
563 .kr(1)
564 .sr(1)
565 .m(4)
566 .n(n)
567 .k(8)
568 .iterations(1)
569 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8__aarch32_neonv8_mlal_lane_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
570 }
571 }
572
TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_CORTEX_A53,k_lt_8)573 TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_CORTEX_A53, k_lt_8) {
574 TEST_REQUIRES_ARM_NEON_V8;
575 for (size_t k = 1; k < 8; k++) {
576 GemmMicrokernelTester()
577 .mr(4)
578 .nr(8)
579 .kr(1)
580 .sr(1)
581 .m(4)
582 .n(8)
583 .k(k)
584 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8__aarch32_neonv8_mlal_lane_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
585 }
586 }
587
TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_CORTEX_A53,k_lt_8_subtile)588 TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_CORTEX_A53, k_lt_8_subtile) {
589 TEST_REQUIRES_ARM_NEON_V8;
590 for (size_t k = 1; k < 8; k++) {
591 for (uint32_t n = 1; n <= 8; n++) {
592 for (uint32_t m = 1; m <= 4; m++) {
593 GemmMicrokernelTester()
594 .mr(4)
595 .nr(8)
596 .kr(1)
597 .sr(1)
598 .m(m)
599 .n(n)
600 .k(k)
601 .iterations(1)
602 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8__aarch32_neonv8_mlal_lane_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
603 }
604 }
605 }
606 }
607
TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_CORTEX_A53,k_gt_8)608 TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_CORTEX_A53, k_gt_8) {
609 TEST_REQUIRES_ARM_NEON_V8;
610 for (size_t k = 9; k < 16; k++) {
611 GemmMicrokernelTester()
612 .mr(4)
613 .nr(8)
614 .kr(1)
615 .sr(1)
616 .m(4)
617 .n(8)
618 .k(k)
619 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8__aarch32_neonv8_mlal_lane_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
620 }
621 }
622
TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_CORTEX_A53,k_gt_8_subtile)623 TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_CORTEX_A53, k_gt_8_subtile) {
624 TEST_REQUIRES_ARM_NEON_V8;
625 for (size_t k = 9; k < 16; k++) {
626 for (uint32_t n = 1; n <= 8; n++) {
627 for (uint32_t m = 1; m <= 4; m++) {
628 GemmMicrokernelTester()
629 .mr(4)
630 .nr(8)
631 .kr(1)
632 .sr(1)
633 .m(m)
634 .n(n)
635 .k(k)
636 .iterations(1)
637 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8__aarch32_neonv8_mlal_lane_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
638 }
639 }
640 }
641 }
642
TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_CORTEX_A53,k_div_8)643 TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_CORTEX_A53, k_div_8) {
644 TEST_REQUIRES_ARM_NEON_V8;
645 for (size_t k = 16; k <= 80; k += 8) {
646 GemmMicrokernelTester()
647 .mr(4)
648 .nr(8)
649 .kr(1)
650 .sr(1)
651 .m(4)
652 .n(8)
653 .k(k)
654 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8__aarch32_neonv8_mlal_lane_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
655 }
656 }
657
TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_CORTEX_A53,k_div_8_subtile)658 TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_CORTEX_A53, k_div_8_subtile) {
659 TEST_REQUIRES_ARM_NEON_V8;
660 for (size_t k = 16; k <= 80; k += 8) {
661 for (uint32_t n = 1; n <= 8; n++) {
662 for (uint32_t m = 1; m <= 4; m++) {
663 GemmMicrokernelTester()
664 .mr(4)
665 .nr(8)
666 .kr(1)
667 .sr(1)
668 .m(m)
669 .n(n)
670 .k(k)
671 .iterations(1)
672 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8__aarch32_neonv8_mlal_lane_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
673 }
674 }
675 }
676 }
677
TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_CORTEX_A53,n_gt_8)678 TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_CORTEX_A53, n_gt_8) {
679 TEST_REQUIRES_ARM_NEON_V8;
680 for (uint32_t n = 9; n < 16; n++) {
681 for (size_t k = 1; k <= 40; k += 9) {
682 GemmMicrokernelTester()
683 .mr(4)
684 .nr(8)
685 .kr(1)
686 .sr(1)
687 .m(4)
688 .n(n)
689 .k(k)
690 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8__aarch32_neonv8_mlal_lane_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
691 }
692 }
693 }
694
TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_CORTEX_A53,n_gt_8_strided_cn)695 TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_CORTEX_A53, n_gt_8_strided_cn) {
696 TEST_REQUIRES_ARM_NEON_V8;
697 for (uint32_t n = 9; n < 16; n++) {
698 for (size_t k = 1; k <= 40; k += 9) {
699 GemmMicrokernelTester()
700 .mr(4)
701 .nr(8)
702 .kr(1)
703 .sr(1)
704 .m(4)
705 .n(n)
706 .k(k)
707 .cn_stride(11)
708 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8__aarch32_neonv8_mlal_lane_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
709 }
710 }
711 }
712
TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_CORTEX_A53,n_gt_8_subtile)713 TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_CORTEX_A53, n_gt_8_subtile) {
714 TEST_REQUIRES_ARM_NEON_V8;
715 for (uint32_t n = 9; n < 16; n++) {
716 for (size_t k = 1; k <= 40; k += 9) {
717 for (uint32_t m = 1; m <= 4; m++) {
718 GemmMicrokernelTester()
719 .mr(4)
720 .nr(8)
721 .kr(1)
722 .sr(1)
723 .m(m)
724 .n(n)
725 .k(k)
726 .iterations(1)
727 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8__aarch32_neonv8_mlal_lane_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
728 }
729 }
730 }
731 }
732
TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_CORTEX_A53,n_div_8)733 TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_CORTEX_A53, n_div_8) {
734 TEST_REQUIRES_ARM_NEON_V8;
735 for (uint32_t n = 16; n <= 24; n += 8) {
736 for (size_t k = 1; k <= 40; k += 9) {
737 GemmMicrokernelTester()
738 .mr(4)
739 .nr(8)
740 .kr(1)
741 .sr(1)
742 .m(4)
743 .n(n)
744 .k(k)
745 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8__aarch32_neonv8_mlal_lane_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
746 }
747 }
748 }
749
TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_CORTEX_A53,n_div_8_strided_cn)750 TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_CORTEX_A53, n_div_8_strided_cn) {
751 TEST_REQUIRES_ARM_NEON_V8;
752 for (uint32_t n = 16; n <= 24; n += 8) {
753 for (size_t k = 1; k <= 40; k += 9) {
754 GemmMicrokernelTester()
755 .mr(4)
756 .nr(8)
757 .kr(1)
758 .sr(1)
759 .m(4)
760 .n(n)
761 .k(k)
762 .cn_stride(11)
763 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8__aarch32_neonv8_mlal_lane_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
764 }
765 }
766 }
767
TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_CORTEX_A53,n_div_8_subtile)768 TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_CORTEX_A53, n_div_8_subtile) {
769 TEST_REQUIRES_ARM_NEON_V8;
770 for (uint32_t n = 16; n <= 24; n += 8) {
771 for (size_t k = 1; k <= 40; k += 9) {
772 for (uint32_t m = 1; m <= 4; m++) {
773 GemmMicrokernelTester()
774 .mr(4)
775 .nr(8)
776 .kr(1)
777 .sr(1)
778 .m(m)
779 .n(n)
780 .k(k)
781 .iterations(1)
782 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8__aarch32_neonv8_mlal_lane_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
783 }
784 }
785 }
786 }
787
TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_CORTEX_A53,small_kernel)788 TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_CORTEX_A53, small_kernel) {
789 TEST_REQUIRES_ARM_NEON_V8;
790 for (size_t k = 1; k <= 40; k += 9) {
791 GemmMicrokernelTester()
792 .mr(4)
793 .nr(8)
794 .kr(1)
795 .sr(1)
796 .m(4)
797 .n(8)
798 .k(k)
799 .ks(3)
800 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8__aarch32_neonv8_mlal_lane_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
801 }
802 }
803
TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_CORTEX_A53,small_kernel_subtile)804 TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_CORTEX_A53, small_kernel_subtile) {
805 TEST_REQUIRES_ARM_NEON_V8;
806 for (size_t k = 1; k <= 40; k += 9) {
807 for (uint32_t n = 1; n <= 8; n++) {
808 for (uint32_t m = 1; m <= 4; m++) {
809 GemmMicrokernelTester()
810 .mr(4)
811 .nr(8)
812 .kr(1)
813 .sr(1)
814 .m(m)
815 .n(n)
816 .k(k)
817 .ks(3)
818 .iterations(1)
819 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8__aarch32_neonv8_mlal_lane_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
820 }
821 }
822 }
823 }
824
TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_CORTEX_A53,n_gt_8_small_kernel)825 TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_CORTEX_A53, n_gt_8_small_kernel) {
826 TEST_REQUIRES_ARM_NEON_V8;
827 for (uint32_t n = 9; n < 16; n++) {
828 for (size_t k = 1; k <= 40; k += 9) {
829 GemmMicrokernelTester()
830 .mr(4)
831 .nr(8)
832 .kr(1)
833 .sr(1)
834 .m(4)
835 .n(n)
836 .k(k)
837 .ks(3)
838 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8__aarch32_neonv8_mlal_lane_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
839 }
840 }
841 }
842
TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_CORTEX_A53,n_div_8_small_kernel)843 TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_CORTEX_A53, n_div_8_small_kernel) {
844 TEST_REQUIRES_ARM_NEON_V8;
845 for (uint32_t n = 16; n <= 24; n += 8) {
846 for (size_t k = 1; k <= 40; k += 9) {
847 GemmMicrokernelTester()
848 .mr(4)
849 .nr(8)
850 .kr(1)
851 .sr(1)
852 .m(4)
853 .n(n)
854 .k(k)
855 .ks(3)
856 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8__aarch32_neonv8_mlal_lane_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
857 }
858 }
859 }
860
TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_CORTEX_A53,strided_cm_subtile)861 TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_CORTEX_A53, strided_cm_subtile) {
862 TEST_REQUIRES_ARM_NEON_V8;
863 for (size_t k = 1; k <= 40; k += 9) {
864 for (uint32_t n = 1; n <= 8; n++) {
865 for (uint32_t m = 1; m <= 4; m++) {
866 GemmMicrokernelTester()
867 .mr(4)
868 .nr(8)
869 .kr(1)
870 .sr(1)
871 .m(m)
872 .n(n)
873 .k(k)
874 .cm_stride(11)
875 .iterations(1)
876 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8__aarch32_neonv8_mlal_lane_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
877 }
878 }
879 }
880 }
881
TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_CORTEX_A53,a_offset)882 TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_CORTEX_A53, a_offset) {
883 TEST_REQUIRES_ARM_NEON_V8;
884 for (size_t k = 1; k <= 40; k += 9) {
885 GemmMicrokernelTester()
886 .mr(4)
887 .nr(8)
888 .kr(1)
889 .sr(1)
890 .m(4)
891 .n(8)
892 .k(k)
893 .ks(3)
894 .a_offset(163)
895 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8__aarch32_neonv8_mlal_lane_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
896 }
897 }
898
TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_CORTEX_A53,zero)899 TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_CORTEX_A53, zero) {
900 TEST_REQUIRES_ARM_NEON_V8;
901 for (size_t k = 1; k <= 40; k += 9) {
902 for (uint32_t mz = 0; mz < 4; mz++) {
903 GemmMicrokernelTester()
904 .mr(4)
905 .nr(8)
906 .kr(1)
907 .sr(1)
908 .m(4)
909 .n(8)
910 .k(k)
911 .ks(3)
912 .a_offset(163)
913 .zero_index(mz)
914 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8__aarch32_neonv8_mlal_lane_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
915 }
916 }
917 }
918
TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_CORTEX_A53,qmin)919 TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_CORTEX_A53, qmin) {
920 TEST_REQUIRES_ARM_NEON_V8;
921 GemmMicrokernelTester()
922 .mr(4)
923 .nr(8)
924 .kr(1)
925 .sr(1)
926 .m(4)
927 .n(8)
928 .k(8)
929 .qmin(128)
930 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8__aarch32_neonv8_mlal_lane_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
931 }
932
TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_CORTEX_A53,qmax)933 TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_CORTEX_A53, qmax) {
934 TEST_REQUIRES_ARM_NEON_V8;
935 GemmMicrokernelTester()
936 .mr(4)
937 .nr(8)
938 .kr(1)
939 .sr(1)
940 .m(4)
941 .n(8)
942 .k(8)
943 .qmax(128)
944 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8__aarch32_neonv8_mlal_lane_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
945 }
946
TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_CORTEX_A53,strided_cm)947 TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_CORTEX_A53, strided_cm) {
948 TEST_REQUIRES_ARM_NEON_V8;
949 GemmMicrokernelTester()
950 .mr(4)
951 .nr(8)
952 .kr(1)
953 .sr(1)
954 .m(4)
955 .n(8)
956 .k(8)
957 .cm_stride(11)
958 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8__aarch32_neonv8_mlal_lane_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
959 }
960 #endif // XNN_ARCH_ARM && XNN_ENABLE_ASSEMBLY
961
962
963 #if XNN_ARCH_ARM && XNN_ENABLE_ASSEMBLY
TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_LD64,k_eq_8)964 TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_LD64, k_eq_8) {
965 TEST_REQUIRES_ARM_NEON_V8;
966 GemmMicrokernelTester()
967 .mr(4)
968 .nr(8)
969 .kr(1)
970 .sr(1)
971 .m(4)
972 .n(8)
973 .k(8)
974 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8__aarch32_neonv8_mlal_lane_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
975 }
976
TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_LD64,strided_cn)977 TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_LD64, strided_cn) {
978 TEST_REQUIRES_ARM_NEON_V8;
979 GemmMicrokernelTester()
980 .mr(4)
981 .nr(8)
982 .kr(1)
983 .sr(1)
984 .m(4)
985 .n(8)
986 .k(8)
987 .cn_stride(11)
988 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8__aarch32_neonv8_mlal_lane_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
989 }
990
TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_LD64,k_eq_8_subtile)991 TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_LD64, k_eq_8_subtile) {
992 TEST_REQUIRES_ARM_NEON_V8;
993 for (uint32_t n = 1; n <= 8; n++) {
994 for (uint32_t m = 1; m <= 4; m++) {
995 GemmMicrokernelTester()
996 .mr(4)
997 .nr(8)
998 .kr(1)
999 .sr(1)
1000 .m(m)
1001 .n(n)
1002 .k(8)
1003 .iterations(1)
1004 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8__aarch32_neonv8_mlal_lane_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
1005 }
1006 }
1007 }
1008
TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_LD64,k_eq_8_subtile_m)1009 TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_LD64, k_eq_8_subtile_m) {
1010 TEST_REQUIRES_ARM_NEON_V8;
1011 for (uint32_t m = 1; m <= 4; m++) {
1012 GemmMicrokernelTester()
1013 .mr(4)
1014 .nr(8)
1015 .kr(1)
1016 .sr(1)
1017 .m(m)
1018 .n(8)
1019 .k(8)
1020 .iterations(1)
1021 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8__aarch32_neonv8_mlal_lane_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
1022 }
1023 }
1024
TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_LD64,k_eq_8_subtile_n)1025 TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_LD64, k_eq_8_subtile_n) {
1026 TEST_REQUIRES_ARM_NEON_V8;
1027 for (uint32_t n = 1; n <= 8; n++) {
1028 GemmMicrokernelTester()
1029 .mr(4)
1030 .nr(8)
1031 .kr(1)
1032 .sr(1)
1033 .m(4)
1034 .n(n)
1035 .k(8)
1036 .iterations(1)
1037 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8__aarch32_neonv8_mlal_lane_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
1038 }
1039 }
1040
TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_LD64,k_lt_8)1041 TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_LD64, k_lt_8) {
1042 TEST_REQUIRES_ARM_NEON_V8;
1043 for (size_t k = 1; k < 8; k++) {
1044 GemmMicrokernelTester()
1045 .mr(4)
1046 .nr(8)
1047 .kr(1)
1048 .sr(1)
1049 .m(4)
1050 .n(8)
1051 .k(k)
1052 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8__aarch32_neonv8_mlal_lane_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
1053 }
1054 }
1055
TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_LD64,k_lt_8_subtile)1056 TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_LD64, k_lt_8_subtile) {
1057 TEST_REQUIRES_ARM_NEON_V8;
1058 for (size_t k = 1; k < 8; k++) {
1059 for (uint32_t n = 1; n <= 8; n++) {
1060 for (uint32_t m = 1; m <= 4; m++) {
1061 GemmMicrokernelTester()
1062 .mr(4)
1063 .nr(8)
1064 .kr(1)
1065 .sr(1)
1066 .m(m)
1067 .n(n)
1068 .k(k)
1069 .iterations(1)
1070 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8__aarch32_neonv8_mlal_lane_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
1071 }
1072 }
1073 }
1074 }
1075
TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_LD64,k_gt_8)1076 TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_LD64, k_gt_8) {
1077 TEST_REQUIRES_ARM_NEON_V8;
1078 for (size_t k = 9; k < 16; k++) {
1079 GemmMicrokernelTester()
1080 .mr(4)
1081 .nr(8)
1082 .kr(1)
1083 .sr(1)
1084 .m(4)
1085 .n(8)
1086 .k(k)
1087 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8__aarch32_neonv8_mlal_lane_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
1088 }
1089 }
1090
TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_LD64,k_gt_8_subtile)1091 TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_LD64, k_gt_8_subtile) {
1092 TEST_REQUIRES_ARM_NEON_V8;
1093 for (size_t k = 9; k < 16; k++) {
1094 for (uint32_t n = 1; n <= 8; n++) {
1095 for (uint32_t m = 1; m <= 4; m++) {
1096 GemmMicrokernelTester()
1097 .mr(4)
1098 .nr(8)
1099 .kr(1)
1100 .sr(1)
1101 .m(m)
1102 .n(n)
1103 .k(k)
1104 .iterations(1)
1105 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8__aarch32_neonv8_mlal_lane_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
1106 }
1107 }
1108 }
1109 }
1110
TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_LD64,k_div_8)1111 TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_LD64, k_div_8) {
1112 TEST_REQUIRES_ARM_NEON_V8;
1113 for (size_t k = 16; k <= 80; k += 8) {
1114 GemmMicrokernelTester()
1115 .mr(4)
1116 .nr(8)
1117 .kr(1)
1118 .sr(1)
1119 .m(4)
1120 .n(8)
1121 .k(k)
1122 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8__aarch32_neonv8_mlal_lane_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
1123 }
1124 }
1125
TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_LD64,k_div_8_subtile)1126 TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_LD64, k_div_8_subtile) {
1127 TEST_REQUIRES_ARM_NEON_V8;
1128 for (size_t k = 16; k <= 80; k += 8) {
1129 for (uint32_t n = 1; n <= 8; n++) {
1130 for (uint32_t m = 1; m <= 4; m++) {
1131 GemmMicrokernelTester()
1132 .mr(4)
1133 .nr(8)
1134 .kr(1)
1135 .sr(1)
1136 .m(m)
1137 .n(n)
1138 .k(k)
1139 .iterations(1)
1140 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8__aarch32_neonv8_mlal_lane_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
1141 }
1142 }
1143 }
1144 }
1145
TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_LD64,n_gt_8)1146 TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_LD64, n_gt_8) {
1147 TEST_REQUIRES_ARM_NEON_V8;
1148 for (uint32_t n = 9; n < 16; n++) {
1149 for (size_t k = 1; k <= 40; k += 9) {
1150 GemmMicrokernelTester()
1151 .mr(4)
1152 .nr(8)
1153 .kr(1)
1154 .sr(1)
1155 .m(4)
1156 .n(n)
1157 .k(k)
1158 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8__aarch32_neonv8_mlal_lane_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
1159 }
1160 }
1161 }
1162
TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_LD64,n_gt_8_strided_cn)1163 TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_LD64, n_gt_8_strided_cn) {
1164 TEST_REQUIRES_ARM_NEON_V8;
1165 for (uint32_t n = 9; n < 16; n++) {
1166 for (size_t k = 1; k <= 40; k += 9) {
1167 GemmMicrokernelTester()
1168 .mr(4)
1169 .nr(8)
1170 .kr(1)
1171 .sr(1)
1172 .m(4)
1173 .n(n)
1174 .k(k)
1175 .cn_stride(11)
1176 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8__aarch32_neonv8_mlal_lane_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
1177 }
1178 }
1179 }
1180
TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_LD64,n_gt_8_subtile)1181 TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_LD64, n_gt_8_subtile) {
1182 TEST_REQUIRES_ARM_NEON_V8;
1183 for (uint32_t n = 9; n < 16; n++) {
1184 for (size_t k = 1; k <= 40; k += 9) {
1185 for (uint32_t m = 1; m <= 4; m++) {
1186 GemmMicrokernelTester()
1187 .mr(4)
1188 .nr(8)
1189 .kr(1)
1190 .sr(1)
1191 .m(m)
1192 .n(n)
1193 .k(k)
1194 .iterations(1)
1195 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8__aarch32_neonv8_mlal_lane_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
1196 }
1197 }
1198 }
1199 }
1200
TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_LD64,n_div_8)1201 TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_LD64, n_div_8) {
1202 TEST_REQUIRES_ARM_NEON_V8;
1203 for (uint32_t n = 16; n <= 24; n += 8) {
1204 for (size_t k = 1; k <= 40; k += 9) {
1205 GemmMicrokernelTester()
1206 .mr(4)
1207 .nr(8)
1208 .kr(1)
1209 .sr(1)
1210 .m(4)
1211 .n(n)
1212 .k(k)
1213 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8__aarch32_neonv8_mlal_lane_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
1214 }
1215 }
1216 }
1217
TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_LD64,n_div_8_strided_cn)1218 TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_LD64, n_div_8_strided_cn) {
1219 TEST_REQUIRES_ARM_NEON_V8;
1220 for (uint32_t n = 16; n <= 24; n += 8) {
1221 for (size_t k = 1; k <= 40; k += 9) {
1222 GemmMicrokernelTester()
1223 .mr(4)
1224 .nr(8)
1225 .kr(1)
1226 .sr(1)
1227 .m(4)
1228 .n(n)
1229 .k(k)
1230 .cn_stride(11)
1231 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8__aarch32_neonv8_mlal_lane_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
1232 }
1233 }
1234 }
1235
TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_LD64,n_div_8_subtile)1236 TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_LD64, n_div_8_subtile) {
1237 TEST_REQUIRES_ARM_NEON_V8;
1238 for (uint32_t n = 16; n <= 24; n += 8) {
1239 for (size_t k = 1; k <= 40; k += 9) {
1240 for (uint32_t m = 1; m <= 4; m++) {
1241 GemmMicrokernelTester()
1242 .mr(4)
1243 .nr(8)
1244 .kr(1)
1245 .sr(1)
1246 .m(m)
1247 .n(n)
1248 .k(k)
1249 .iterations(1)
1250 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8__aarch32_neonv8_mlal_lane_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
1251 }
1252 }
1253 }
1254 }
1255
TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_LD64,small_kernel)1256 TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_LD64, small_kernel) {
1257 TEST_REQUIRES_ARM_NEON_V8;
1258 for (size_t k = 1; k <= 40; k += 9) {
1259 GemmMicrokernelTester()
1260 .mr(4)
1261 .nr(8)
1262 .kr(1)
1263 .sr(1)
1264 .m(4)
1265 .n(8)
1266 .k(k)
1267 .ks(3)
1268 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8__aarch32_neonv8_mlal_lane_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
1269 }
1270 }
1271
TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_LD64,small_kernel_subtile)1272 TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_LD64, small_kernel_subtile) {
1273 TEST_REQUIRES_ARM_NEON_V8;
1274 for (size_t k = 1; k <= 40; k += 9) {
1275 for (uint32_t n = 1; n <= 8; n++) {
1276 for (uint32_t m = 1; m <= 4; m++) {
1277 GemmMicrokernelTester()
1278 .mr(4)
1279 .nr(8)
1280 .kr(1)
1281 .sr(1)
1282 .m(m)
1283 .n(n)
1284 .k(k)
1285 .ks(3)
1286 .iterations(1)
1287 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8__aarch32_neonv8_mlal_lane_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
1288 }
1289 }
1290 }
1291 }
1292
TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_LD64,n_gt_8_small_kernel)1293 TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_LD64, n_gt_8_small_kernel) {
1294 TEST_REQUIRES_ARM_NEON_V8;
1295 for (uint32_t n = 9; n < 16; n++) {
1296 for (size_t k = 1; k <= 40; k += 9) {
1297 GemmMicrokernelTester()
1298 .mr(4)
1299 .nr(8)
1300 .kr(1)
1301 .sr(1)
1302 .m(4)
1303 .n(n)
1304 .k(k)
1305 .ks(3)
1306 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8__aarch32_neonv8_mlal_lane_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
1307 }
1308 }
1309 }
1310
TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_LD64,n_div_8_small_kernel)1311 TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_LD64, n_div_8_small_kernel) {
1312 TEST_REQUIRES_ARM_NEON_V8;
1313 for (uint32_t n = 16; n <= 24; n += 8) {
1314 for (size_t k = 1; k <= 40; k += 9) {
1315 GemmMicrokernelTester()
1316 .mr(4)
1317 .nr(8)
1318 .kr(1)
1319 .sr(1)
1320 .m(4)
1321 .n(n)
1322 .k(k)
1323 .ks(3)
1324 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8__aarch32_neonv8_mlal_lane_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
1325 }
1326 }
1327 }
1328
TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_LD64,strided_cm_subtile)1329 TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_LD64, strided_cm_subtile) {
1330 TEST_REQUIRES_ARM_NEON_V8;
1331 for (size_t k = 1; k <= 40; k += 9) {
1332 for (uint32_t n = 1; n <= 8; n++) {
1333 for (uint32_t m = 1; m <= 4; m++) {
1334 GemmMicrokernelTester()
1335 .mr(4)
1336 .nr(8)
1337 .kr(1)
1338 .sr(1)
1339 .m(m)
1340 .n(n)
1341 .k(k)
1342 .cm_stride(11)
1343 .iterations(1)
1344 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8__aarch32_neonv8_mlal_lane_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
1345 }
1346 }
1347 }
1348 }
1349
TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_LD64,a_offset)1350 TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_LD64, a_offset) {
1351 TEST_REQUIRES_ARM_NEON_V8;
1352 for (size_t k = 1; k <= 40; k += 9) {
1353 GemmMicrokernelTester()
1354 .mr(4)
1355 .nr(8)
1356 .kr(1)
1357 .sr(1)
1358 .m(4)
1359 .n(8)
1360 .k(k)
1361 .ks(3)
1362 .a_offset(163)
1363 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8__aarch32_neonv8_mlal_lane_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
1364 }
1365 }
1366
TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_LD64,zero)1367 TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_LD64, zero) {
1368 TEST_REQUIRES_ARM_NEON_V8;
1369 for (size_t k = 1; k <= 40; k += 9) {
1370 for (uint32_t mz = 0; mz < 4; mz++) {
1371 GemmMicrokernelTester()
1372 .mr(4)
1373 .nr(8)
1374 .kr(1)
1375 .sr(1)
1376 .m(4)
1377 .n(8)
1378 .k(k)
1379 .ks(3)
1380 .a_offset(163)
1381 .zero_index(mz)
1382 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8__aarch32_neonv8_mlal_lane_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
1383 }
1384 }
1385 }
1386
TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_LD64,qmin)1387 TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_LD64, qmin) {
1388 TEST_REQUIRES_ARM_NEON_V8;
1389 GemmMicrokernelTester()
1390 .mr(4)
1391 .nr(8)
1392 .kr(1)
1393 .sr(1)
1394 .m(4)
1395 .n(8)
1396 .k(8)
1397 .qmin(128)
1398 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8__aarch32_neonv8_mlal_lane_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
1399 }
1400
TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_LD64,qmax)1401 TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_LD64, qmax) {
1402 TEST_REQUIRES_ARM_NEON_V8;
1403 GemmMicrokernelTester()
1404 .mr(4)
1405 .nr(8)
1406 .kr(1)
1407 .sr(1)
1408 .m(4)
1409 .n(8)
1410 .k(8)
1411 .qmax(128)
1412 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8__aarch32_neonv8_mlal_lane_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
1413 }
1414
TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_LD64,strided_cm)1415 TEST(QC8_IGEMM_MINMAX_FP32_4X8__AARCH32_NEONV8_MLAL_LANE_LD64, strided_cm) {
1416 TEST_REQUIRES_ARM_NEON_V8;
1417 GemmMicrokernelTester()
1418 .mr(4)
1419 .nr(8)
1420 .kr(1)
1421 .sr(1)
1422 .m(4)
1423 .n(8)
1424 .k(8)
1425 .cm_stride(11)
1426 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8__aarch32_neonv8_mlal_lane_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
1427 }
1428 #endif // XNN_ARCH_ARM && XNN_ENABLE_ASSEMBLY
1429
1430
1431 #if XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL,k_eq_16)1432 TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL, k_eq_16) {
1433 TEST_REQUIRES_ARM_NEON;
1434 GemmMicrokernelTester()
1435 .mr(1)
1436 .nr(8)
1437 .kr(8)
1438 .sr(1)
1439 .m(1)
1440 .n(8)
1441 .k(16)
1442 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
1443 }
1444
TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL,strided_cn)1445 TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL, strided_cn) {
1446 TEST_REQUIRES_ARM_NEON;
1447 GemmMicrokernelTester()
1448 .mr(1)
1449 .nr(8)
1450 .kr(8)
1451 .sr(1)
1452 .m(1)
1453 .n(8)
1454 .k(16)
1455 .cn_stride(11)
1456 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
1457 }
1458
TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL,k_eq_16_subtile)1459 TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL, k_eq_16_subtile) {
1460 TEST_REQUIRES_ARM_NEON;
1461 for (uint32_t n = 1; n <= 8; n++) {
1462 for (uint32_t m = 1; m <= 1; m++) {
1463 GemmMicrokernelTester()
1464 .mr(1)
1465 .nr(8)
1466 .kr(8)
1467 .sr(1)
1468 .m(m)
1469 .n(n)
1470 .k(16)
1471 .iterations(1)
1472 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
1473 }
1474 }
1475 }
1476
TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL,k_eq_16_subtile_m)1477 TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL, k_eq_16_subtile_m) {
1478 TEST_REQUIRES_ARM_NEON;
1479 for (uint32_t m = 1; m <= 1; m++) {
1480 GemmMicrokernelTester()
1481 .mr(1)
1482 .nr(8)
1483 .kr(8)
1484 .sr(1)
1485 .m(m)
1486 .n(8)
1487 .k(16)
1488 .iterations(1)
1489 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
1490 }
1491 }
1492
TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL,k_eq_16_subtile_n)1493 TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL, k_eq_16_subtile_n) {
1494 TEST_REQUIRES_ARM_NEON;
1495 for (uint32_t n = 1; n <= 8; n++) {
1496 GemmMicrokernelTester()
1497 .mr(1)
1498 .nr(8)
1499 .kr(8)
1500 .sr(1)
1501 .m(1)
1502 .n(n)
1503 .k(16)
1504 .iterations(1)
1505 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
1506 }
1507 }
1508
TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL,k_lt_16)1509 TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL, k_lt_16) {
1510 TEST_REQUIRES_ARM_NEON;
1511 for (size_t k = 1; k < 16; k++) {
1512 GemmMicrokernelTester()
1513 .mr(1)
1514 .nr(8)
1515 .kr(8)
1516 .sr(1)
1517 .m(1)
1518 .n(8)
1519 .k(k)
1520 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
1521 }
1522 }
1523
TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL,k_lt_16_subtile)1524 TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL, k_lt_16_subtile) {
1525 TEST_REQUIRES_ARM_NEON;
1526 for (size_t k = 1; k < 16; k++) {
1527 for (uint32_t n = 1; n <= 8; n++) {
1528 for (uint32_t m = 1; m <= 1; m++) {
1529 GemmMicrokernelTester()
1530 .mr(1)
1531 .nr(8)
1532 .kr(8)
1533 .sr(1)
1534 .m(m)
1535 .n(n)
1536 .k(k)
1537 .iterations(1)
1538 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
1539 }
1540 }
1541 }
1542 }
1543
TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL,k_gt_16)1544 TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL, k_gt_16) {
1545 TEST_REQUIRES_ARM_NEON;
1546 for (size_t k = 17; k < 32; k++) {
1547 GemmMicrokernelTester()
1548 .mr(1)
1549 .nr(8)
1550 .kr(8)
1551 .sr(1)
1552 .m(1)
1553 .n(8)
1554 .k(k)
1555 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
1556 }
1557 }
1558
TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL,k_gt_16_subtile)1559 TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL, k_gt_16_subtile) {
1560 TEST_REQUIRES_ARM_NEON;
1561 for (size_t k = 17; k < 32; k++) {
1562 for (uint32_t n = 1; n <= 8; n++) {
1563 for (uint32_t m = 1; m <= 1; m++) {
1564 GemmMicrokernelTester()
1565 .mr(1)
1566 .nr(8)
1567 .kr(8)
1568 .sr(1)
1569 .m(m)
1570 .n(n)
1571 .k(k)
1572 .iterations(1)
1573 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
1574 }
1575 }
1576 }
1577 }
1578
TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL,k_div_16)1579 TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL, k_div_16) {
1580 TEST_REQUIRES_ARM_NEON;
1581 for (size_t k = 32; k <= 160; k += 16) {
1582 GemmMicrokernelTester()
1583 .mr(1)
1584 .nr(8)
1585 .kr(8)
1586 .sr(1)
1587 .m(1)
1588 .n(8)
1589 .k(k)
1590 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
1591 }
1592 }
1593
TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL,k_div_16_subtile)1594 TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL, k_div_16_subtile) {
1595 TEST_REQUIRES_ARM_NEON;
1596 for (size_t k = 32; k <= 160; k += 16) {
1597 for (uint32_t n = 1; n <= 8; n++) {
1598 for (uint32_t m = 1; m <= 1; m++) {
1599 GemmMicrokernelTester()
1600 .mr(1)
1601 .nr(8)
1602 .kr(8)
1603 .sr(1)
1604 .m(m)
1605 .n(n)
1606 .k(k)
1607 .iterations(1)
1608 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
1609 }
1610 }
1611 }
1612 }
1613
TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL,n_gt_8)1614 TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL, n_gt_8) {
1615 TEST_REQUIRES_ARM_NEON;
1616 for (uint32_t n = 9; n < 16; n++) {
1617 for (size_t k = 1; k <= 80; k += 17) {
1618 GemmMicrokernelTester()
1619 .mr(1)
1620 .nr(8)
1621 .kr(8)
1622 .sr(1)
1623 .m(1)
1624 .n(n)
1625 .k(k)
1626 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
1627 }
1628 }
1629 }
1630
TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL,n_gt_8_strided_cn)1631 TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL, n_gt_8_strided_cn) {
1632 TEST_REQUIRES_ARM_NEON;
1633 for (uint32_t n = 9; n < 16; n++) {
1634 for (size_t k = 1; k <= 80; k += 17) {
1635 GemmMicrokernelTester()
1636 .mr(1)
1637 .nr(8)
1638 .kr(8)
1639 .sr(1)
1640 .m(1)
1641 .n(n)
1642 .k(k)
1643 .cn_stride(11)
1644 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
1645 }
1646 }
1647 }
1648
TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL,n_gt_8_subtile)1649 TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL, n_gt_8_subtile) {
1650 TEST_REQUIRES_ARM_NEON;
1651 for (uint32_t n = 9; n < 16; n++) {
1652 for (size_t k = 1; k <= 80; k += 17) {
1653 for (uint32_t m = 1; m <= 1; m++) {
1654 GemmMicrokernelTester()
1655 .mr(1)
1656 .nr(8)
1657 .kr(8)
1658 .sr(1)
1659 .m(m)
1660 .n(n)
1661 .k(k)
1662 .iterations(1)
1663 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
1664 }
1665 }
1666 }
1667 }
1668
TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL,n_div_8)1669 TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL, n_div_8) {
1670 TEST_REQUIRES_ARM_NEON;
1671 for (uint32_t n = 16; n <= 24; n += 8) {
1672 for (size_t k = 1; k <= 80; k += 17) {
1673 GemmMicrokernelTester()
1674 .mr(1)
1675 .nr(8)
1676 .kr(8)
1677 .sr(1)
1678 .m(1)
1679 .n(n)
1680 .k(k)
1681 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
1682 }
1683 }
1684 }
1685
TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL,n_div_8_strided_cn)1686 TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL, n_div_8_strided_cn) {
1687 TEST_REQUIRES_ARM_NEON;
1688 for (uint32_t n = 16; n <= 24; n += 8) {
1689 for (size_t k = 1; k <= 80; k += 17) {
1690 GemmMicrokernelTester()
1691 .mr(1)
1692 .nr(8)
1693 .kr(8)
1694 .sr(1)
1695 .m(1)
1696 .n(n)
1697 .k(k)
1698 .cn_stride(11)
1699 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
1700 }
1701 }
1702 }
1703
TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL,n_div_8_subtile)1704 TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL, n_div_8_subtile) {
1705 TEST_REQUIRES_ARM_NEON;
1706 for (uint32_t n = 16; n <= 24; n += 8) {
1707 for (size_t k = 1; k <= 80; k += 17) {
1708 for (uint32_t m = 1; m <= 1; m++) {
1709 GemmMicrokernelTester()
1710 .mr(1)
1711 .nr(8)
1712 .kr(8)
1713 .sr(1)
1714 .m(m)
1715 .n(n)
1716 .k(k)
1717 .iterations(1)
1718 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
1719 }
1720 }
1721 }
1722 }
1723
TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL,small_kernel)1724 TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL, small_kernel) {
1725 TEST_REQUIRES_ARM_NEON;
1726 for (size_t k = 1; k <= 80; k += 17) {
1727 GemmMicrokernelTester()
1728 .mr(1)
1729 .nr(8)
1730 .kr(8)
1731 .sr(1)
1732 .m(1)
1733 .n(8)
1734 .k(k)
1735 .ks(3)
1736 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
1737 }
1738 }
1739
TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL,small_kernel_subtile)1740 TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL, small_kernel_subtile) {
1741 TEST_REQUIRES_ARM_NEON;
1742 for (size_t k = 1; k <= 80; k += 17) {
1743 for (uint32_t n = 1; n <= 8; n++) {
1744 for (uint32_t m = 1; m <= 1; m++) {
1745 GemmMicrokernelTester()
1746 .mr(1)
1747 .nr(8)
1748 .kr(8)
1749 .sr(1)
1750 .m(m)
1751 .n(n)
1752 .k(k)
1753 .ks(3)
1754 .iterations(1)
1755 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
1756 }
1757 }
1758 }
1759 }
1760
TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL,n_gt_8_small_kernel)1761 TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL, n_gt_8_small_kernel) {
1762 TEST_REQUIRES_ARM_NEON;
1763 for (uint32_t n = 9; n < 16; n++) {
1764 for (size_t k = 1; k <= 80; k += 17) {
1765 GemmMicrokernelTester()
1766 .mr(1)
1767 .nr(8)
1768 .kr(8)
1769 .sr(1)
1770 .m(1)
1771 .n(n)
1772 .k(k)
1773 .ks(3)
1774 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
1775 }
1776 }
1777 }
1778
TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL,n_div_8_small_kernel)1779 TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL, n_div_8_small_kernel) {
1780 TEST_REQUIRES_ARM_NEON;
1781 for (uint32_t n = 16; n <= 24; n += 8) {
1782 for (size_t k = 1; k <= 80; k += 17) {
1783 GemmMicrokernelTester()
1784 .mr(1)
1785 .nr(8)
1786 .kr(8)
1787 .sr(1)
1788 .m(1)
1789 .n(n)
1790 .k(k)
1791 .ks(3)
1792 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
1793 }
1794 }
1795 }
1796
TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL,strided_cm_subtile)1797 TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL, strided_cm_subtile) {
1798 TEST_REQUIRES_ARM_NEON;
1799 for (size_t k = 1; k <= 80; k += 17) {
1800 for (uint32_t n = 1; n <= 8; n++) {
1801 for (uint32_t m = 1; m <= 1; m++) {
1802 GemmMicrokernelTester()
1803 .mr(1)
1804 .nr(8)
1805 .kr(8)
1806 .sr(1)
1807 .m(m)
1808 .n(n)
1809 .k(k)
1810 .cm_stride(11)
1811 .iterations(1)
1812 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
1813 }
1814 }
1815 }
1816 }
1817
TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL,a_offset)1818 TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL, a_offset) {
1819 TEST_REQUIRES_ARM_NEON;
1820 for (size_t k = 1; k <= 80; k += 17) {
1821 GemmMicrokernelTester()
1822 .mr(1)
1823 .nr(8)
1824 .kr(8)
1825 .sr(1)
1826 .m(1)
1827 .n(8)
1828 .k(k)
1829 .ks(3)
1830 .a_offset(83)
1831 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
1832 }
1833 }
1834
TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL,zero)1835 TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL, zero) {
1836 TEST_REQUIRES_ARM_NEON;
1837 for (size_t k = 1; k <= 80; k += 17) {
1838 for (uint32_t mz = 0; mz < 1; mz++) {
1839 GemmMicrokernelTester()
1840 .mr(1)
1841 .nr(8)
1842 .kr(8)
1843 .sr(1)
1844 .m(1)
1845 .n(8)
1846 .k(k)
1847 .ks(3)
1848 .a_offset(83)
1849 .zero_index(mz)
1850 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
1851 }
1852 }
1853 }
1854
TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL,qmin)1855 TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL, qmin) {
1856 TEST_REQUIRES_ARM_NEON;
1857 GemmMicrokernelTester()
1858 .mr(1)
1859 .nr(8)
1860 .kr(8)
1861 .sr(1)
1862 .m(1)
1863 .n(8)
1864 .k(16)
1865 .qmin(128)
1866 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
1867 }
1868
TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL,qmax)1869 TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL, qmax) {
1870 TEST_REQUIRES_ARM_NEON;
1871 GemmMicrokernelTester()
1872 .mr(1)
1873 .nr(8)
1874 .kr(8)
1875 .sr(1)
1876 .m(1)
1877 .n(8)
1878 .k(16)
1879 .qmax(128)
1880 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
1881 }
1882
TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL,strided_cm)1883 TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL, strided_cm) {
1884 TEST_REQUIRES_ARM_NEON;
1885 GemmMicrokernelTester()
1886 .mr(1)
1887 .nr(8)
1888 .kr(8)
1889 .sr(1)
1890 .m(1)
1891 .n(8)
1892 .k(16)
1893 .cm_stride(11)
1894 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
1895 }
1896 #endif // XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
1897
1898
1899 #if XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_CORTEX_A53,k_eq_16)1900 TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_CORTEX_A53, k_eq_16) {
1901 TEST_REQUIRES_ARM_NEON;
1902 GemmMicrokernelTester()
1903 .mr(1)
1904 .nr(8)
1905 .kr(8)
1906 .sr(1)
1907 .m(1)
1908 .n(8)
1909 .k(16)
1910 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
1911 }
1912
TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_CORTEX_A53,strided_cn)1913 TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_CORTEX_A53, strided_cn) {
1914 TEST_REQUIRES_ARM_NEON;
1915 GemmMicrokernelTester()
1916 .mr(1)
1917 .nr(8)
1918 .kr(8)
1919 .sr(1)
1920 .m(1)
1921 .n(8)
1922 .k(16)
1923 .cn_stride(11)
1924 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
1925 }
1926
TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_CORTEX_A53,k_eq_16_subtile)1927 TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_CORTEX_A53, k_eq_16_subtile) {
1928 TEST_REQUIRES_ARM_NEON;
1929 for (uint32_t n = 1; n <= 8; n++) {
1930 for (uint32_t m = 1; m <= 1; m++) {
1931 GemmMicrokernelTester()
1932 .mr(1)
1933 .nr(8)
1934 .kr(8)
1935 .sr(1)
1936 .m(m)
1937 .n(n)
1938 .k(16)
1939 .iterations(1)
1940 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
1941 }
1942 }
1943 }
1944
TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_CORTEX_A53,k_eq_16_subtile_m)1945 TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_CORTEX_A53, k_eq_16_subtile_m) {
1946 TEST_REQUIRES_ARM_NEON;
1947 for (uint32_t m = 1; m <= 1; m++) {
1948 GemmMicrokernelTester()
1949 .mr(1)
1950 .nr(8)
1951 .kr(8)
1952 .sr(1)
1953 .m(m)
1954 .n(8)
1955 .k(16)
1956 .iterations(1)
1957 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
1958 }
1959 }
1960
TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_CORTEX_A53,k_eq_16_subtile_n)1961 TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_CORTEX_A53, k_eq_16_subtile_n) {
1962 TEST_REQUIRES_ARM_NEON;
1963 for (uint32_t n = 1; n <= 8; n++) {
1964 GemmMicrokernelTester()
1965 .mr(1)
1966 .nr(8)
1967 .kr(8)
1968 .sr(1)
1969 .m(1)
1970 .n(n)
1971 .k(16)
1972 .iterations(1)
1973 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
1974 }
1975 }
1976
TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_CORTEX_A53,k_lt_16)1977 TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_CORTEX_A53, k_lt_16) {
1978 TEST_REQUIRES_ARM_NEON;
1979 for (size_t k = 1; k < 16; k++) {
1980 GemmMicrokernelTester()
1981 .mr(1)
1982 .nr(8)
1983 .kr(8)
1984 .sr(1)
1985 .m(1)
1986 .n(8)
1987 .k(k)
1988 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
1989 }
1990 }
1991
TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_CORTEX_A53,k_lt_16_subtile)1992 TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_CORTEX_A53, k_lt_16_subtile) {
1993 TEST_REQUIRES_ARM_NEON;
1994 for (size_t k = 1; k < 16; k++) {
1995 for (uint32_t n = 1; n <= 8; n++) {
1996 for (uint32_t m = 1; m <= 1; m++) {
1997 GemmMicrokernelTester()
1998 .mr(1)
1999 .nr(8)
2000 .kr(8)
2001 .sr(1)
2002 .m(m)
2003 .n(n)
2004 .k(k)
2005 .iterations(1)
2006 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2007 }
2008 }
2009 }
2010 }
2011
TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_CORTEX_A53,k_gt_16)2012 TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_CORTEX_A53, k_gt_16) {
2013 TEST_REQUIRES_ARM_NEON;
2014 for (size_t k = 17; k < 32; k++) {
2015 GemmMicrokernelTester()
2016 .mr(1)
2017 .nr(8)
2018 .kr(8)
2019 .sr(1)
2020 .m(1)
2021 .n(8)
2022 .k(k)
2023 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2024 }
2025 }
2026
TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_CORTEX_A53,k_gt_16_subtile)2027 TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_CORTEX_A53, k_gt_16_subtile) {
2028 TEST_REQUIRES_ARM_NEON;
2029 for (size_t k = 17; k < 32; k++) {
2030 for (uint32_t n = 1; n <= 8; n++) {
2031 for (uint32_t m = 1; m <= 1; m++) {
2032 GemmMicrokernelTester()
2033 .mr(1)
2034 .nr(8)
2035 .kr(8)
2036 .sr(1)
2037 .m(m)
2038 .n(n)
2039 .k(k)
2040 .iterations(1)
2041 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2042 }
2043 }
2044 }
2045 }
2046
TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_CORTEX_A53,k_div_16)2047 TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_CORTEX_A53, k_div_16) {
2048 TEST_REQUIRES_ARM_NEON;
2049 for (size_t k = 32; k <= 160; k += 16) {
2050 GemmMicrokernelTester()
2051 .mr(1)
2052 .nr(8)
2053 .kr(8)
2054 .sr(1)
2055 .m(1)
2056 .n(8)
2057 .k(k)
2058 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2059 }
2060 }
2061
TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_CORTEX_A53,k_div_16_subtile)2062 TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_CORTEX_A53, k_div_16_subtile) {
2063 TEST_REQUIRES_ARM_NEON;
2064 for (size_t k = 32; k <= 160; k += 16) {
2065 for (uint32_t n = 1; n <= 8; n++) {
2066 for (uint32_t m = 1; m <= 1; m++) {
2067 GemmMicrokernelTester()
2068 .mr(1)
2069 .nr(8)
2070 .kr(8)
2071 .sr(1)
2072 .m(m)
2073 .n(n)
2074 .k(k)
2075 .iterations(1)
2076 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2077 }
2078 }
2079 }
2080 }
2081
TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_CORTEX_A53,n_gt_8)2082 TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_CORTEX_A53, n_gt_8) {
2083 TEST_REQUIRES_ARM_NEON;
2084 for (uint32_t n = 9; n < 16; n++) {
2085 for (size_t k = 1; k <= 80; k += 17) {
2086 GemmMicrokernelTester()
2087 .mr(1)
2088 .nr(8)
2089 .kr(8)
2090 .sr(1)
2091 .m(1)
2092 .n(n)
2093 .k(k)
2094 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2095 }
2096 }
2097 }
2098
TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_CORTEX_A53,n_gt_8_strided_cn)2099 TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_CORTEX_A53, n_gt_8_strided_cn) {
2100 TEST_REQUIRES_ARM_NEON;
2101 for (uint32_t n = 9; n < 16; n++) {
2102 for (size_t k = 1; k <= 80; k += 17) {
2103 GemmMicrokernelTester()
2104 .mr(1)
2105 .nr(8)
2106 .kr(8)
2107 .sr(1)
2108 .m(1)
2109 .n(n)
2110 .k(k)
2111 .cn_stride(11)
2112 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2113 }
2114 }
2115 }
2116
TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_CORTEX_A53,n_gt_8_subtile)2117 TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_CORTEX_A53, n_gt_8_subtile) {
2118 TEST_REQUIRES_ARM_NEON;
2119 for (uint32_t n = 9; n < 16; n++) {
2120 for (size_t k = 1; k <= 80; k += 17) {
2121 for (uint32_t m = 1; m <= 1; m++) {
2122 GemmMicrokernelTester()
2123 .mr(1)
2124 .nr(8)
2125 .kr(8)
2126 .sr(1)
2127 .m(m)
2128 .n(n)
2129 .k(k)
2130 .iterations(1)
2131 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2132 }
2133 }
2134 }
2135 }
2136
TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_CORTEX_A53,n_div_8)2137 TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_CORTEX_A53, n_div_8) {
2138 TEST_REQUIRES_ARM_NEON;
2139 for (uint32_t n = 16; n <= 24; n += 8) {
2140 for (size_t k = 1; k <= 80; k += 17) {
2141 GemmMicrokernelTester()
2142 .mr(1)
2143 .nr(8)
2144 .kr(8)
2145 .sr(1)
2146 .m(1)
2147 .n(n)
2148 .k(k)
2149 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2150 }
2151 }
2152 }
2153
TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_CORTEX_A53,n_div_8_strided_cn)2154 TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_CORTEX_A53, n_div_8_strided_cn) {
2155 TEST_REQUIRES_ARM_NEON;
2156 for (uint32_t n = 16; n <= 24; n += 8) {
2157 for (size_t k = 1; k <= 80; k += 17) {
2158 GemmMicrokernelTester()
2159 .mr(1)
2160 .nr(8)
2161 .kr(8)
2162 .sr(1)
2163 .m(1)
2164 .n(n)
2165 .k(k)
2166 .cn_stride(11)
2167 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2168 }
2169 }
2170 }
2171
TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_CORTEX_A53,n_div_8_subtile)2172 TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_CORTEX_A53, n_div_8_subtile) {
2173 TEST_REQUIRES_ARM_NEON;
2174 for (uint32_t n = 16; n <= 24; n += 8) {
2175 for (size_t k = 1; k <= 80; k += 17) {
2176 for (uint32_t m = 1; m <= 1; m++) {
2177 GemmMicrokernelTester()
2178 .mr(1)
2179 .nr(8)
2180 .kr(8)
2181 .sr(1)
2182 .m(m)
2183 .n(n)
2184 .k(k)
2185 .iterations(1)
2186 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2187 }
2188 }
2189 }
2190 }
2191
TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_CORTEX_A53,small_kernel)2192 TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_CORTEX_A53, small_kernel) {
2193 TEST_REQUIRES_ARM_NEON;
2194 for (size_t k = 1; k <= 80; k += 17) {
2195 GemmMicrokernelTester()
2196 .mr(1)
2197 .nr(8)
2198 .kr(8)
2199 .sr(1)
2200 .m(1)
2201 .n(8)
2202 .k(k)
2203 .ks(3)
2204 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2205 }
2206 }
2207
TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_CORTEX_A53,small_kernel_subtile)2208 TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_CORTEX_A53, small_kernel_subtile) {
2209 TEST_REQUIRES_ARM_NEON;
2210 for (size_t k = 1; k <= 80; k += 17) {
2211 for (uint32_t n = 1; n <= 8; n++) {
2212 for (uint32_t m = 1; m <= 1; m++) {
2213 GemmMicrokernelTester()
2214 .mr(1)
2215 .nr(8)
2216 .kr(8)
2217 .sr(1)
2218 .m(m)
2219 .n(n)
2220 .k(k)
2221 .ks(3)
2222 .iterations(1)
2223 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2224 }
2225 }
2226 }
2227 }
2228
TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_CORTEX_A53,n_gt_8_small_kernel)2229 TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_CORTEX_A53, n_gt_8_small_kernel) {
2230 TEST_REQUIRES_ARM_NEON;
2231 for (uint32_t n = 9; n < 16; n++) {
2232 for (size_t k = 1; k <= 80; k += 17) {
2233 GemmMicrokernelTester()
2234 .mr(1)
2235 .nr(8)
2236 .kr(8)
2237 .sr(1)
2238 .m(1)
2239 .n(n)
2240 .k(k)
2241 .ks(3)
2242 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2243 }
2244 }
2245 }
2246
TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_CORTEX_A53,n_div_8_small_kernel)2247 TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_CORTEX_A53, n_div_8_small_kernel) {
2248 TEST_REQUIRES_ARM_NEON;
2249 for (uint32_t n = 16; n <= 24; n += 8) {
2250 for (size_t k = 1; k <= 80; k += 17) {
2251 GemmMicrokernelTester()
2252 .mr(1)
2253 .nr(8)
2254 .kr(8)
2255 .sr(1)
2256 .m(1)
2257 .n(n)
2258 .k(k)
2259 .ks(3)
2260 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2261 }
2262 }
2263 }
2264
TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_CORTEX_A53,strided_cm_subtile)2265 TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_CORTEX_A53, strided_cm_subtile) {
2266 TEST_REQUIRES_ARM_NEON;
2267 for (size_t k = 1; k <= 80; k += 17) {
2268 for (uint32_t n = 1; n <= 8; n++) {
2269 for (uint32_t m = 1; m <= 1; m++) {
2270 GemmMicrokernelTester()
2271 .mr(1)
2272 .nr(8)
2273 .kr(8)
2274 .sr(1)
2275 .m(m)
2276 .n(n)
2277 .k(k)
2278 .cm_stride(11)
2279 .iterations(1)
2280 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2281 }
2282 }
2283 }
2284 }
2285
TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_CORTEX_A53,a_offset)2286 TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_CORTEX_A53, a_offset) {
2287 TEST_REQUIRES_ARM_NEON;
2288 for (size_t k = 1; k <= 80; k += 17) {
2289 GemmMicrokernelTester()
2290 .mr(1)
2291 .nr(8)
2292 .kr(8)
2293 .sr(1)
2294 .m(1)
2295 .n(8)
2296 .k(k)
2297 .ks(3)
2298 .a_offset(83)
2299 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2300 }
2301 }
2302
TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_CORTEX_A53,zero)2303 TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_CORTEX_A53, zero) {
2304 TEST_REQUIRES_ARM_NEON;
2305 for (size_t k = 1; k <= 80; k += 17) {
2306 for (uint32_t mz = 0; mz < 1; mz++) {
2307 GemmMicrokernelTester()
2308 .mr(1)
2309 .nr(8)
2310 .kr(8)
2311 .sr(1)
2312 .m(1)
2313 .n(8)
2314 .k(k)
2315 .ks(3)
2316 .a_offset(83)
2317 .zero_index(mz)
2318 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2319 }
2320 }
2321 }
2322
TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_CORTEX_A53,qmin)2323 TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_CORTEX_A53, qmin) {
2324 TEST_REQUIRES_ARM_NEON;
2325 GemmMicrokernelTester()
2326 .mr(1)
2327 .nr(8)
2328 .kr(8)
2329 .sr(1)
2330 .m(1)
2331 .n(8)
2332 .k(16)
2333 .qmin(128)
2334 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2335 }
2336
TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_CORTEX_A53,qmax)2337 TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_CORTEX_A53, qmax) {
2338 TEST_REQUIRES_ARM_NEON;
2339 GemmMicrokernelTester()
2340 .mr(1)
2341 .nr(8)
2342 .kr(8)
2343 .sr(1)
2344 .m(1)
2345 .n(8)
2346 .k(16)
2347 .qmax(128)
2348 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2349 }
2350
TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_CORTEX_A53,strided_cm)2351 TEST(QC8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_CORTEX_A53, strided_cm) {
2352 TEST_REQUIRES_ARM_NEON;
2353 GemmMicrokernelTester()
2354 .mr(1)
2355 .nr(8)
2356 .kr(8)
2357 .sr(1)
2358 .m(1)
2359 .n(8)
2360 .k(16)
2361 .cm_stride(11)
2362 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2363 }
2364 #endif // XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
2365
2366
2367 #if XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_PRFM,k_eq_16)2368 TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_PRFM, k_eq_16) {
2369 TEST_REQUIRES_ARM_NEON;
2370 GemmMicrokernelTester()
2371 .mr(2)
2372 .nr(8)
2373 .kr(8)
2374 .sr(1)
2375 .m(2)
2376 .n(8)
2377 .k(16)
2378 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2379 }
2380
TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_PRFM,strided_cn)2381 TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_PRFM, strided_cn) {
2382 TEST_REQUIRES_ARM_NEON;
2383 GemmMicrokernelTester()
2384 .mr(2)
2385 .nr(8)
2386 .kr(8)
2387 .sr(1)
2388 .m(2)
2389 .n(8)
2390 .k(16)
2391 .cn_stride(11)
2392 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2393 }
2394
TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_PRFM,k_eq_16_subtile)2395 TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_PRFM, k_eq_16_subtile) {
2396 TEST_REQUIRES_ARM_NEON;
2397 for (uint32_t n = 1; n <= 8; n++) {
2398 for (uint32_t m = 1; m <= 2; m++) {
2399 GemmMicrokernelTester()
2400 .mr(2)
2401 .nr(8)
2402 .kr(8)
2403 .sr(1)
2404 .m(m)
2405 .n(n)
2406 .k(16)
2407 .iterations(1)
2408 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2409 }
2410 }
2411 }
2412
TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_PRFM,k_eq_16_subtile_m)2413 TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_PRFM, k_eq_16_subtile_m) {
2414 TEST_REQUIRES_ARM_NEON;
2415 for (uint32_t m = 1; m <= 2; m++) {
2416 GemmMicrokernelTester()
2417 .mr(2)
2418 .nr(8)
2419 .kr(8)
2420 .sr(1)
2421 .m(m)
2422 .n(8)
2423 .k(16)
2424 .iterations(1)
2425 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2426 }
2427 }
2428
TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_PRFM,k_eq_16_subtile_n)2429 TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_PRFM, k_eq_16_subtile_n) {
2430 TEST_REQUIRES_ARM_NEON;
2431 for (uint32_t n = 1; n <= 8; n++) {
2432 GemmMicrokernelTester()
2433 .mr(2)
2434 .nr(8)
2435 .kr(8)
2436 .sr(1)
2437 .m(2)
2438 .n(n)
2439 .k(16)
2440 .iterations(1)
2441 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2442 }
2443 }
2444
TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_PRFM,k_lt_16)2445 TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_PRFM, k_lt_16) {
2446 TEST_REQUIRES_ARM_NEON;
2447 for (size_t k = 1; k < 16; k++) {
2448 GemmMicrokernelTester()
2449 .mr(2)
2450 .nr(8)
2451 .kr(8)
2452 .sr(1)
2453 .m(2)
2454 .n(8)
2455 .k(k)
2456 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2457 }
2458 }
2459
TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_PRFM,k_lt_16_subtile)2460 TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_PRFM, k_lt_16_subtile) {
2461 TEST_REQUIRES_ARM_NEON;
2462 for (size_t k = 1; k < 16; k++) {
2463 for (uint32_t n = 1; n <= 8; n++) {
2464 for (uint32_t m = 1; m <= 2; m++) {
2465 GemmMicrokernelTester()
2466 .mr(2)
2467 .nr(8)
2468 .kr(8)
2469 .sr(1)
2470 .m(m)
2471 .n(n)
2472 .k(k)
2473 .iterations(1)
2474 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2475 }
2476 }
2477 }
2478 }
2479
TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_PRFM,k_gt_16)2480 TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_PRFM, k_gt_16) {
2481 TEST_REQUIRES_ARM_NEON;
2482 for (size_t k = 17; k < 32; k++) {
2483 GemmMicrokernelTester()
2484 .mr(2)
2485 .nr(8)
2486 .kr(8)
2487 .sr(1)
2488 .m(2)
2489 .n(8)
2490 .k(k)
2491 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2492 }
2493 }
2494
TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_PRFM,k_gt_16_subtile)2495 TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_PRFM, k_gt_16_subtile) {
2496 TEST_REQUIRES_ARM_NEON;
2497 for (size_t k = 17; k < 32; k++) {
2498 for (uint32_t n = 1; n <= 8; n++) {
2499 for (uint32_t m = 1; m <= 2; m++) {
2500 GemmMicrokernelTester()
2501 .mr(2)
2502 .nr(8)
2503 .kr(8)
2504 .sr(1)
2505 .m(m)
2506 .n(n)
2507 .k(k)
2508 .iterations(1)
2509 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2510 }
2511 }
2512 }
2513 }
2514
TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_PRFM,k_div_16)2515 TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_PRFM, k_div_16) {
2516 TEST_REQUIRES_ARM_NEON;
2517 for (size_t k = 32; k <= 160; k += 16) {
2518 GemmMicrokernelTester()
2519 .mr(2)
2520 .nr(8)
2521 .kr(8)
2522 .sr(1)
2523 .m(2)
2524 .n(8)
2525 .k(k)
2526 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2527 }
2528 }
2529
TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_PRFM,k_div_16_subtile)2530 TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_PRFM, k_div_16_subtile) {
2531 TEST_REQUIRES_ARM_NEON;
2532 for (size_t k = 32; k <= 160; k += 16) {
2533 for (uint32_t n = 1; n <= 8; n++) {
2534 for (uint32_t m = 1; m <= 2; m++) {
2535 GemmMicrokernelTester()
2536 .mr(2)
2537 .nr(8)
2538 .kr(8)
2539 .sr(1)
2540 .m(m)
2541 .n(n)
2542 .k(k)
2543 .iterations(1)
2544 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2545 }
2546 }
2547 }
2548 }
2549
TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_PRFM,n_gt_8)2550 TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_PRFM, n_gt_8) {
2551 TEST_REQUIRES_ARM_NEON;
2552 for (uint32_t n = 9; n < 16; n++) {
2553 for (size_t k = 1; k <= 80; k += 17) {
2554 GemmMicrokernelTester()
2555 .mr(2)
2556 .nr(8)
2557 .kr(8)
2558 .sr(1)
2559 .m(2)
2560 .n(n)
2561 .k(k)
2562 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2563 }
2564 }
2565 }
2566
TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_PRFM,n_gt_8_strided_cn)2567 TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_PRFM, n_gt_8_strided_cn) {
2568 TEST_REQUIRES_ARM_NEON;
2569 for (uint32_t n = 9; n < 16; n++) {
2570 for (size_t k = 1; k <= 80; k += 17) {
2571 GemmMicrokernelTester()
2572 .mr(2)
2573 .nr(8)
2574 .kr(8)
2575 .sr(1)
2576 .m(2)
2577 .n(n)
2578 .k(k)
2579 .cn_stride(11)
2580 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2581 }
2582 }
2583 }
2584
TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_PRFM,n_gt_8_subtile)2585 TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_PRFM, n_gt_8_subtile) {
2586 TEST_REQUIRES_ARM_NEON;
2587 for (uint32_t n = 9; n < 16; n++) {
2588 for (size_t k = 1; k <= 80; k += 17) {
2589 for (uint32_t m = 1; m <= 2; m++) {
2590 GemmMicrokernelTester()
2591 .mr(2)
2592 .nr(8)
2593 .kr(8)
2594 .sr(1)
2595 .m(m)
2596 .n(n)
2597 .k(k)
2598 .iterations(1)
2599 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2600 }
2601 }
2602 }
2603 }
2604
TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_PRFM,n_div_8)2605 TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_PRFM, n_div_8) {
2606 TEST_REQUIRES_ARM_NEON;
2607 for (uint32_t n = 16; n <= 24; n += 8) {
2608 for (size_t k = 1; k <= 80; k += 17) {
2609 GemmMicrokernelTester()
2610 .mr(2)
2611 .nr(8)
2612 .kr(8)
2613 .sr(1)
2614 .m(2)
2615 .n(n)
2616 .k(k)
2617 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2618 }
2619 }
2620 }
2621
TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_PRFM,n_div_8_strided_cn)2622 TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_PRFM, n_div_8_strided_cn) {
2623 TEST_REQUIRES_ARM_NEON;
2624 for (uint32_t n = 16; n <= 24; n += 8) {
2625 for (size_t k = 1; k <= 80; k += 17) {
2626 GemmMicrokernelTester()
2627 .mr(2)
2628 .nr(8)
2629 .kr(8)
2630 .sr(1)
2631 .m(2)
2632 .n(n)
2633 .k(k)
2634 .cn_stride(11)
2635 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2636 }
2637 }
2638 }
2639
TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_PRFM,n_div_8_subtile)2640 TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_PRFM, n_div_8_subtile) {
2641 TEST_REQUIRES_ARM_NEON;
2642 for (uint32_t n = 16; n <= 24; n += 8) {
2643 for (size_t k = 1; k <= 80; k += 17) {
2644 for (uint32_t m = 1; m <= 2; m++) {
2645 GemmMicrokernelTester()
2646 .mr(2)
2647 .nr(8)
2648 .kr(8)
2649 .sr(1)
2650 .m(m)
2651 .n(n)
2652 .k(k)
2653 .iterations(1)
2654 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2655 }
2656 }
2657 }
2658 }
2659
TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_PRFM,small_kernel)2660 TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_PRFM, small_kernel) {
2661 TEST_REQUIRES_ARM_NEON;
2662 for (size_t k = 1; k <= 80; k += 17) {
2663 GemmMicrokernelTester()
2664 .mr(2)
2665 .nr(8)
2666 .kr(8)
2667 .sr(1)
2668 .m(2)
2669 .n(8)
2670 .k(k)
2671 .ks(3)
2672 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2673 }
2674 }
2675
TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_PRFM,small_kernel_subtile)2676 TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_PRFM, small_kernel_subtile) {
2677 TEST_REQUIRES_ARM_NEON;
2678 for (size_t k = 1; k <= 80; k += 17) {
2679 for (uint32_t n = 1; n <= 8; n++) {
2680 for (uint32_t m = 1; m <= 2; m++) {
2681 GemmMicrokernelTester()
2682 .mr(2)
2683 .nr(8)
2684 .kr(8)
2685 .sr(1)
2686 .m(m)
2687 .n(n)
2688 .k(k)
2689 .ks(3)
2690 .iterations(1)
2691 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2692 }
2693 }
2694 }
2695 }
2696
TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_PRFM,n_gt_8_small_kernel)2697 TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_PRFM, n_gt_8_small_kernel) {
2698 TEST_REQUIRES_ARM_NEON;
2699 for (uint32_t n = 9; n < 16; n++) {
2700 for (size_t k = 1; k <= 80; k += 17) {
2701 GemmMicrokernelTester()
2702 .mr(2)
2703 .nr(8)
2704 .kr(8)
2705 .sr(1)
2706 .m(2)
2707 .n(n)
2708 .k(k)
2709 .ks(3)
2710 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2711 }
2712 }
2713 }
2714
TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_PRFM,n_div_8_small_kernel)2715 TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_PRFM, n_div_8_small_kernel) {
2716 TEST_REQUIRES_ARM_NEON;
2717 for (uint32_t n = 16; n <= 24; n += 8) {
2718 for (size_t k = 1; k <= 80; k += 17) {
2719 GemmMicrokernelTester()
2720 .mr(2)
2721 .nr(8)
2722 .kr(8)
2723 .sr(1)
2724 .m(2)
2725 .n(n)
2726 .k(k)
2727 .ks(3)
2728 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2729 }
2730 }
2731 }
2732
TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_PRFM,strided_cm_subtile)2733 TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_PRFM, strided_cm_subtile) {
2734 TEST_REQUIRES_ARM_NEON;
2735 for (size_t k = 1; k <= 80; k += 17) {
2736 for (uint32_t n = 1; n <= 8; n++) {
2737 for (uint32_t m = 1; m <= 2; m++) {
2738 GemmMicrokernelTester()
2739 .mr(2)
2740 .nr(8)
2741 .kr(8)
2742 .sr(1)
2743 .m(m)
2744 .n(n)
2745 .k(k)
2746 .cm_stride(11)
2747 .iterations(1)
2748 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2749 }
2750 }
2751 }
2752 }
2753
TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_PRFM,a_offset)2754 TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_PRFM, a_offset) {
2755 TEST_REQUIRES_ARM_NEON;
2756 for (size_t k = 1; k <= 80; k += 17) {
2757 GemmMicrokernelTester()
2758 .mr(2)
2759 .nr(8)
2760 .kr(8)
2761 .sr(1)
2762 .m(2)
2763 .n(8)
2764 .k(k)
2765 .ks(3)
2766 .a_offset(163)
2767 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2768 }
2769 }
2770
TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_PRFM,zero)2771 TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_PRFM, zero) {
2772 TEST_REQUIRES_ARM_NEON;
2773 for (size_t k = 1; k <= 80; k += 17) {
2774 for (uint32_t mz = 0; mz < 2; mz++) {
2775 GemmMicrokernelTester()
2776 .mr(2)
2777 .nr(8)
2778 .kr(8)
2779 .sr(1)
2780 .m(2)
2781 .n(8)
2782 .k(k)
2783 .ks(3)
2784 .a_offset(163)
2785 .zero_index(mz)
2786 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2787 }
2788 }
2789 }
2790
TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_PRFM,qmin)2791 TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_PRFM, qmin) {
2792 TEST_REQUIRES_ARM_NEON;
2793 GemmMicrokernelTester()
2794 .mr(2)
2795 .nr(8)
2796 .kr(8)
2797 .sr(1)
2798 .m(2)
2799 .n(8)
2800 .k(16)
2801 .qmin(128)
2802 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2803 }
2804
TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_PRFM,qmax)2805 TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_PRFM, qmax) {
2806 TEST_REQUIRES_ARM_NEON;
2807 GemmMicrokernelTester()
2808 .mr(2)
2809 .nr(8)
2810 .kr(8)
2811 .sr(1)
2812 .m(2)
2813 .n(8)
2814 .k(16)
2815 .qmax(128)
2816 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2817 }
2818
TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_PRFM,strided_cm)2819 TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_PRFM, strided_cm) {
2820 TEST_REQUIRES_ARM_NEON;
2821 GemmMicrokernelTester()
2822 .mr(2)
2823 .nr(8)
2824 .kr(8)
2825 .sr(1)
2826 .m(2)
2827 .n(8)
2828 .k(16)
2829 .cm_stride(11)
2830 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2831 }
2832 #endif // XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
2833
2834
2835 #if XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53,k_eq_16)2836 TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53, k_eq_16) {
2837 TEST_REQUIRES_ARM_NEON;
2838 GemmMicrokernelTester()
2839 .mr(2)
2840 .nr(8)
2841 .kr(8)
2842 .sr(1)
2843 .m(2)
2844 .n(8)
2845 .k(16)
2846 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal_prfm_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2847 }
2848
TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53,strided_cn)2849 TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53, strided_cn) {
2850 TEST_REQUIRES_ARM_NEON;
2851 GemmMicrokernelTester()
2852 .mr(2)
2853 .nr(8)
2854 .kr(8)
2855 .sr(1)
2856 .m(2)
2857 .n(8)
2858 .k(16)
2859 .cn_stride(11)
2860 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal_prfm_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2861 }
2862
TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53,k_eq_16_subtile)2863 TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53, k_eq_16_subtile) {
2864 TEST_REQUIRES_ARM_NEON;
2865 for (uint32_t n = 1; n <= 8; n++) {
2866 for (uint32_t m = 1; m <= 2; m++) {
2867 GemmMicrokernelTester()
2868 .mr(2)
2869 .nr(8)
2870 .kr(8)
2871 .sr(1)
2872 .m(m)
2873 .n(n)
2874 .k(16)
2875 .iterations(1)
2876 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal_prfm_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2877 }
2878 }
2879 }
2880
TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53,k_eq_16_subtile_m)2881 TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53, k_eq_16_subtile_m) {
2882 TEST_REQUIRES_ARM_NEON;
2883 for (uint32_t m = 1; m <= 2; m++) {
2884 GemmMicrokernelTester()
2885 .mr(2)
2886 .nr(8)
2887 .kr(8)
2888 .sr(1)
2889 .m(m)
2890 .n(8)
2891 .k(16)
2892 .iterations(1)
2893 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal_prfm_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2894 }
2895 }
2896
TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53,k_eq_16_subtile_n)2897 TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53, k_eq_16_subtile_n) {
2898 TEST_REQUIRES_ARM_NEON;
2899 for (uint32_t n = 1; n <= 8; n++) {
2900 GemmMicrokernelTester()
2901 .mr(2)
2902 .nr(8)
2903 .kr(8)
2904 .sr(1)
2905 .m(2)
2906 .n(n)
2907 .k(16)
2908 .iterations(1)
2909 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal_prfm_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2910 }
2911 }
2912
TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53,k_lt_16)2913 TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53, k_lt_16) {
2914 TEST_REQUIRES_ARM_NEON;
2915 for (size_t k = 1; k < 16; k++) {
2916 GemmMicrokernelTester()
2917 .mr(2)
2918 .nr(8)
2919 .kr(8)
2920 .sr(1)
2921 .m(2)
2922 .n(8)
2923 .k(k)
2924 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal_prfm_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2925 }
2926 }
2927
TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53,k_lt_16_subtile)2928 TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53, k_lt_16_subtile) {
2929 TEST_REQUIRES_ARM_NEON;
2930 for (size_t k = 1; k < 16; k++) {
2931 for (uint32_t n = 1; n <= 8; n++) {
2932 for (uint32_t m = 1; m <= 2; m++) {
2933 GemmMicrokernelTester()
2934 .mr(2)
2935 .nr(8)
2936 .kr(8)
2937 .sr(1)
2938 .m(m)
2939 .n(n)
2940 .k(k)
2941 .iterations(1)
2942 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal_prfm_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2943 }
2944 }
2945 }
2946 }
2947
TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53,k_gt_16)2948 TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53, k_gt_16) {
2949 TEST_REQUIRES_ARM_NEON;
2950 for (size_t k = 17; k < 32; k++) {
2951 GemmMicrokernelTester()
2952 .mr(2)
2953 .nr(8)
2954 .kr(8)
2955 .sr(1)
2956 .m(2)
2957 .n(8)
2958 .k(k)
2959 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal_prfm_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2960 }
2961 }
2962
TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53,k_gt_16_subtile)2963 TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53, k_gt_16_subtile) {
2964 TEST_REQUIRES_ARM_NEON;
2965 for (size_t k = 17; k < 32; k++) {
2966 for (uint32_t n = 1; n <= 8; n++) {
2967 for (uint32_t m = 1; m <= 2; m++) {
2968 GemmMicrokernelTester()
2969 .mr(2)
2970 .nr(8)
2971 .kr(8)
2972 .sr(1)
2973 .m(m)
2974 .n(n)
2975 .k(k)
2976 .iterations(1)
2977 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal_prfm_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2978 }
2979 }
2980 }
2981 }
2982
TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53,k_div_16)2983 TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53, k_div_16) {
2984 TEST_REQUIRES_ARM_NEON;
2985 for (size_t k = 32; k <= 160; k += 16) {
2986 GemmMicrokernelTester()
2987 .mr(2)
2988 .nr(8)
2989 .kr(8)
2990 .sr(1)
2991 .m(2)
2992 .n(8)
2993 .k(k)
2994 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal_prfm_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2995 }
2996 }
2997
TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53,k_div_16_subtile)2998 TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53, k_div_16_subtile) {
2999 TEST_REQUIRES_ARM_NEON;
3000 for (size_t k = 32; k <= 160; k += 16) {
3001 for (uint32_t n = 1; n <= 8; n++) {
3002 for (uint32_t m = 1; m <= 2; m++) {
3003 GemmMicrokernelTester()
3004 .mr(2)
3005 .nr(8)
3006 .kr(8)
3007 .sr(1)
3008 .m(m)
3009 .n(n)
3010 .k(k)
3011 .iterations(1)
3012 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal_prfm_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3013 }
3014 }
3015 }
3016 }
3017
TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53,n_gt_8)3018 TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53, n_gt_8) {
3019 TEST_REQUIRES_ARM_NEON;
3020 for (uint32_t n = 9; n < 16; n++) {
3021 for (size_t k = 1; k <= 80; k += 17) {
3022 GemmMicrokernelTester()
3023 .mr(2)
3024 .nr(8)
3025 .kr(8)
3026 .sr(1)
3027 .m(2)
3028 .n(n)
3029 .k(k)
3030 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal_prfm_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3031 }
3032 }
3033 }
3034
TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53,n_gt_8_strided_cn)3035 TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53, n_gt_8_strided_cn) {
3036 TEST_REQUIRES_ARM_NEON;
3037 for (uint32_t n = 9; n < 16; n++) {
3038 for (size_t k = 1; k <= 80; k += 17) {
3039 GemmMicrokernelTester()
3040 .mr(2)
3041 .nr(8)
3042 .kr(8)
3043 .sr(1)
3044 .m(2)
3045 .n(n)
3046 .k(k)
3047 .cn_stride(11)
3048 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal_prfm_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3049 }
3050 }
3051 }
3052
TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53,n_gt_8_subtile)3053 TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53, n_gt_8_subtile) {
3054 TEST_REQUIRES_ARM_NEON;
3055 for (uint32_t n = 9; n < 16; n++) {
3056 for (size_t k = 1; k <= 80; k += 17) {
3057 for (uint32_t m = 1; m <= 2; m++) {
3058 GemmMicrokernelTester()
3059 .mr(2)
3060 .nr(8)
3061 .kr(8)
3062 .sr(1)
3063 .m(m)
3064 .n(n)
3065 .k(k)
3066 .iterations(1)
3067 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal_prfm_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3068 }
3069 }
3070 }
3071 }
3072
TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53,n_div_8)3073 TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53, n_div_8) {
3074 TEST_REQUIRES_ARM_NEON;
3075 for (uint32_t n = 16; n <= 24; n += 8) {
3076 for (size_t k = 1; k <= 80; k += 17) {
3077 GemmMicrokernelTester()
3078 .mr(2)
3079 .nr(8)
3080 .kr(8)
3081 .sr(1)
3082 .m(2)
3083 .n(n)
3084 .k(k)
3085 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal_prfm_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3086 }
3087 }
3088 }
3089
TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53,n_div_8_strided_cn)3090 TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53, n_div_8_strided_cn) {
3091 TEST_REQUIRES_ARM_NEON;
3092 for (uint32_t n = 16; n <= 24; n += 8) {
3093 for (size_t k = 1; k <= 80; k += 17) {
3094 GemmMicrokernelTester()
3095 .mr(2)
3096 .nr(8)
3097 .kr(8)
3098 .sr(1)
3099 .m(2)
3100 .n(n)
3101 .k(k)
3102 .cn_stride(11)
3103 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal_prfm_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3104 }
3105 }
3106 }
3107
TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53,n_div_8_subtile)3108 TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53, n_div_8_subtile) {
3109 TEST_REQUIRES_ARM_NEON;
3110 for (uint32_t n = 16; n <= 24; n += 8) {
3111 for (size_t k = 1; k <= 80; k += 17) {
3112 for (uint32_t m = 1; m <= 2; m++) {
3113 GemmMicrokernelTester()
3114 .mr(2)
3115 .nr(8)
3116 .kr(8)
3117 .sr(1)
3118 .m(m)
3119 .n(n)
3120 .k(k)
3121 .iterations(1)
3122 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal_prfm_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3123 }
3124 }
3125 }
3126 }
3127
TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53,small_kernel)3128 TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53, small_kernel) {
3129 TEST_REQUIRES_ARM_NEON;
3130 for (size_t k = 1; k <= 80; k += 17) {
3131 GemmMicrokernelTester()
3132 .mr(2)
3133 .nr(8)
3134 .kr(8)
3135 .sr(1)
3136 .m(2)
3137 .n(8)
3138 .k(k)
3139 .ks(3)
3140 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal_prfm_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3141 }
3142 }
3143
TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53,small_kernel_subtile)3144 TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53, small_kernel_subtile) {
3145 TEST_REQUIRES_ARM_NEON;
3146 for (size_t k = 1; k <= 80; k += 17) {
3147 for (uint32_t n = 1; n <= 8; n++) {
3148 for (uint32_t m = 1; m <= 2; m++) {
3149 GemmMicrokernelTester()
3150 .mr(2)
3151 .nr(8)
3152 .kr(8)
3153 .sr(1)
3154 .m(m)
3155 .n(n)
3156 .k(k)
3157 .ks(3)
3158 .iterations(1)
3159 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal_prfm_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3160 }
3161 }
3162 }
3163 }
3164
TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53,n_gt_8_small_kernel)3165 TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53, n_gt_8_small_kernel) {
3166 TEST_REQUIRES_ARM_NEON;
3167 for (uint32_t n = 9; n < 16; n++) {
3168 for (size_t k = 1; k <= 80; k += 17) {
3169 GemmMicrokernelTester()
3170 .mr(2)
3171 .nr(8)
3172 .kr(8)
3173 .sr(1)
3174 .m(2)
3175 .n(n)
3176 .k(k)
3177 .ks(3)
3178 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal_prfm_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3179 }
3180 }
3181 }
3182
TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53,n_div_8_small_kernel)3183 TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53, n_div_8_small_kernel) {
3184 TEST_REQUIRES_ARM_NEON;
3185 for (uint32_t n = 16; n <= 24; n += 8) {
3186 for (size_t k = 1; k <= 80; k += 17) {
3187 GemmMicrokernelTester()
3188 .mr(2)
3189 .nr(8)
3190 .kr(8)
3191 .sr(1)
3192 .m(2)
3193 .n(n)
3194 .k(k)
3195 .ks(3)
3196 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal_prfm_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3197 }
3198 }
3199 }
3200
TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53,strided_cm_subtile)3201 TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53, strided_cm_subtile) {
3202 TEST_REQUIRES_ARM_NEON;
3203 for (size_t k = 1; k <= 80; k += 17) {
3204 for (uint32_t n = 1; n <= 8; n++) {
3205 for (uint32_t m = 1; m <= 2; m++) {
3206 GemmMicrokernelTester()
3207 .mr(2)
3208 .nr(8)
3209 .kr(8)
3210 .sr(1)
3211 .m(m)
3212 .n(n)
3213 .k(k)
3214 .cm_stride(11)
3215 .iterations(1)
3216 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal_prfm_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3217 }
3218 }
3219 }
3220 }
3221
TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53,a_offset)3222 TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53, a_offset) {
3223 TEST_REQUIRES_ARM_NEON;
3224 for (size_t k = 1; k <= 80; k += 17) {
3225 GemmMicrokernelTester()
3226 .mr(2)
3227 .nr(8)
3228 .kr(8)
3229 .sr(1)
3230 .m(2)
3231 .n(8)
3232 .k(k)
3233 .ks(3)
3234 .a_offset(163)
3235 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal_prfm_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3236 }
3237 }
3238
TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53,zero)3239 TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53, zero) {
3240 TEST_REQUIRES_ARM_NEON;
3241 for (size_t k = 1; k <= 80; k += 17) {
3242 for (uint32_t mz = 0; mz < 2; mz++) {
3243 GemmMicrokernelTester()
3244 .mr(2)
3245 .nr(8)
3246 .kr(8)
3247 .sr(1)
3248 .m(2)
3249 .n(8)
3250 .k(k)
3251 .ks(3)
3252 .a_offset(163)
3253 .zero_index(mz)
3254 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal_prfm_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3255 }
3256 }
3257 }
3258
TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53,qmin)3259 TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53, qmin) {
3260 TEST_REQUIRES_ARM_NEON;
3261 GemmMicrokernelTester()
3262 .mr(2)
3263 .nr(8)
3264 .kr(8)
3265 .sr(1)
3266 .m(2)
3267 .n(8)
3268 .k(16)
3269 .qmin(128)
3270 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal_prfm_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3271 }
3272
TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53,qmax)3273 TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53, qmax) {
3274 TEST_REQUIRES_ARM_NEON;
3275 GemmMicrokernelTester()
3276 .mr(2)
3277 .nr(8)
3278 .kr(8)
3279 .sr(1)
3280 .m(2)
3281 .n(8)
3282 .k(16)
3283 .qmax(128)
3284 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal_prfm_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3285 }
3286
TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53,strided_cm)3287 TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__AARCH64_NEON_MLAL_PRFM_CORTEX_A53, strided_cm) {
3288 TEST_REQUIRES_ARM_NEON;
3289 GemmMicrokernelTester()
3290 .mr(2)
3291 .nr(8)
3292 .kr(8)
3293 .sr(1)
3294 .m(2)
3295 .n(8)
3296 .k(16)
3297 .cm_stride(11)
3298 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c8__aarch64_neon_mlal_prfm_cortex_a53, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3299 }
3300 #endif // XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
3301
3302
3303 #if XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
TEST(QC8_IGEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64,k_eq_8)3304 TEST(QC8_IGEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64, k_eq_8) {
3305 TEST_REQUIRES_ARM_NEON;
3306 GemmMicrokernelTester()
3307 .mr(4)
3308 .nr(16)
3309 .kr(1)
3310 .sr(1)
3311 .m(4)
3312 .n(16)
3313 .k(8)
3314 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x16__aarch64_neon_mlal_lane_prfm_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3315 }
3316
TEST(QC8_IGEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64,strided_cn)3317 TEST(QC8_IGEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64, strided_cn) {
3318 TEST_REQUIRES_ARM_NEON;
3319 GemmMicrokernelTester()
3320 .mr(4)
3321 .nr(16)
3322 .kr(1)
3323 .sr(1)
3324 .m(4)
3325 .n(16)
3326 .k(8)
3327 .cn_stride(19)
3328 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x16__aarch64_neon_mlal_lane_prfm_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3329 }
3330
TEST(QC8_IGEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64,k_eq_8_subtile)3331 TEST(QC8_IGEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64, k_eq_8_subtile) {
3332 TEST_REQUIRES_ARM_NEON;
3333 for (uint32_t n = 1; n <= 16; n++) {
3334 for (uint32_t m = 1; m <= 4; m++) {
3335 GemmMicrokernelTester()
3336 .mr(4)
3337 .nr(16)
3338 .kr(1)
3339 .sr(1)
3340 .m(m)
3341 .n(n)
3342 .k(8)
3343 .iterations(1)
3344 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x16__aarch64_neon_mlal_lane_prfm_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3345 }
3346 }
3347 }
3348
TEST(QC8_IGEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64,k_eq_8_subtile_m)3349 TEST(QC8_IGEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64, k_eq_8_subtile_m) {
3350 TEST_REQUIRES_ARM_NEON;
3351 for (uint32_t m = 1; m <= 4; m++) {
3352 GemmMicrokernelTester()
3353 .mr(4)
3354 .nr(16)
3355 .kr(1)
3356 .sr(1)
3357 .m(m)
3358 .n(16)
3359 .k(8)
3360 .iterations(1)
3361 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x16__aarch64_neon_mlal_lane_prfm_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3362 }
3363 }
3364
TEST(QC8_IGEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64,k_eq_8_subtile_n)3365 TEST(QC8_IGEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64, k_eq_8_subtile_n) {
3366 TEST_REQUIRES_ARM_NEON;
3367 for (uint32_t n = 1; n <= 16; n++) {
3368 GemmMicrokernelTester()
3369 .mr(4)
3370 .nr(16)
3371 .kr(1)
3372 .sr(1)
3373 .m(4)
3374 .n(n)
3375 .k(8)
3376 .iterations(1)
3377 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x16__aarch64_neon_mlal_lane_prfm_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3378 }
3379 }
3380
TEST(QC8_IGEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64,k_lt_8)3381 TEST(QC8_IGEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64, k_lt_8) {
3382 TEST_REQUIRES_ARM_NEON;
3383 for (size_t k = 1; k < 8; k++) {
3384 GemmMicrokernelTester()
3385 .mr(4)
3386 .nr(16)
3387 .kr(1)
3388 .sr(1)
3389 .m(4)
3390 .n(16)
3391 .k(k)
3392 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x16__aarch64_neon_mlal_lane_prfm_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3393 }
3394 }
3395
TEST(QC8_IGEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64,k_lt_8_subtile)3396 TEST(QC8_IGEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64, k_lt_8_subtile) {
3397 TEST_REQUIRES_ARM_NEON;
3398 for (size_t k = 1; k < 8; k++) {
3399 for (uint32_t n = 1; n <= 16; n++) {
3400 for (uint32_t m = 1; m <= 4; m++) {
3401 GemmMicrokernelTester()
3402 .mr(4)
3403 .nr(16)
3404 .kr(1)
3405 .sr(1)
3406 .m(m)
3407 .n(n)
3408 .k(k)
3409 .iterations(1)
3410 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x16__aarch64_neon_mlal_lane_prfm_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3411 }
3412 }
3413 }
3414 }
3415
TEST(QC8_IGEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64,k_gt_8)3416 TEST(QC8_IGEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64, k_gt_8) {
3417 TEST_REQUIRES_ARM_NEON;
3418 for (size_t k = 9; k < 16; k++) {
3419 GemmMicrokernelTester()
3420 .mr(4)
3421 .nr(16)
3422 .kr(1)
3423 .sr(1)
3424 .m(4)
3425 .n(16)
3426 .k(k)
3427 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x16__aarch64_neon_mlal_lane_prfm_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3428 }
3429 }
3430
TEST(QC8_IGEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64,k_gt_8_subtile)3431 TEST(QC8_IGEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64, k_gt_8_subtile) {
3432 TEST_REQUIRES_ARM_NEON;
3433 for (size_t k = 9; k < 16; k++) {
3434 for (uint32_t n = 1; n <= 16; n++) {
3435 for (uint32_t m = 1; m <= 4; m++) {
3436 GemmMicrokernelTester()
3437 .mr(4)
3438 .nr(16)
3439 .kr(1)
3440 .sr(1)
3441 .m(m)
3442 .n(n)
3443 .k(k)
3444 .iterations(1)
3445 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x16__aarch64_neon_mlal_lane_prfm_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3446 }
3447 }
3448 }
3449 }
3450
TEST(QC8_IGEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64,k_div_8)3451 TEST(QC8_IGEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64, k_div_8) {
3452 TEST_REQUIRES_ARM_NEON;
3453 for (size_t k = 16; k <= 80; k += 8) {
3454 GemmMicrokernelTester()
3455 .mr(4)
3456 .nr(16)
3457 .kr(1)
3458 .sr(1)
3459 .m(4)
3460 .n(16)
3461 .k(k)
3462 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x16__aarch64_neon_mlal_lane_prfm_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3463 }
3464 }
3465
TEST(QC8_IGEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64,k_div_8_subtile)3466 TEST(QC8_IGEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64, k_div_8_subtile) {
3467 TEST_REQUIRES_ARM_NEON;
3468 for (size_t k = 16; k <= 80; k += 8) {
3469 for (uint32_t n = 1; n <= 16; n++) {
3470 for (uint32_t m = 1; m <= 4; m++) {
3471 GemmMicrokernelTester()
3472 .mr(4)
3473 .nr(16)
3474 .kr(1)
3475 .sr(1)
3476 .m(m)
3477 .n(n)
3478 .k(k)
3479 .iterations(1)
3480 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x16__aarch64_neon_mlal_lane_prfm_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3481 }
3482 }
3483 }
3484 }
3485
TEST(QC8_IGEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64,n_gt_16)3486 TEST(QC8_IGEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64, n_gt_16) {
3487 TEST_REQUIRES_ARM_NEON;
3488 for (uint32_t n = 17; n < 32; n++) {
3489 for (size_t k = 1; k <= 40; k += 9) {
3490 GemmMicrokernelTester()
3491 .mr(4)
3492 .nr(16)
3493 .kr(1)
3494 .sr(1)
3495 .m(4)
3496 .n(n)
3497 .k(k)
3498 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x16__aarch64_neon_mlal_lane_prfm_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3499 }
3500 }
3501 }
3502
TEST(QC8_IGEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64,n_gt_16_strided_cn)3503 TEST(QC8_IGEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64, n_gt_16_strided_cn) {
3504 TEST_REQUIRES_ARM_NEON;
3505 for (uint32_t n = 17; n < 32; n++) {
3506 for (size_t k = 1; k <= 40; k += 9) {
3507 GemmMicrokernelTester()
3508 .mr(4)
3509 .nr(16)
3510 .kr(1)
3511 .sr(1)
3512 .m(4)
3513 .n(n)
3514 .k(k)
3515 .cn_stride(19)
3516 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x16__aarch64_neon_mlal_lane_prfm_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3517 }
3518 }
3519 }
3520
TEST(QC8_IGEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64,n_gt_16_subtile)3521 TEST(QC8_IGEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64, n_gt_16_subtile) {
3522 TEST_REQUIRES_ARM_NEON;
3523 for (uint32_t n = 17; n < 32; n++) {
3524 for (size_t k = 1; k <= 40; k += 9) {
3525 for (uint32_t m = 1; m <= 4; m++) {
3526 GemmMicrokernelTester()
3527 .mr(4)
3528 .nr(16)
3529 .kr(1)
3530 .sr(1)
3531 .m(m)
3532 .n(n)
3533 .k(k)
3534 .iterations(1)
3535 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x16__aarch64_neon_mlal_lane_prfm_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3536 }
3537 }
3538 }
3539 }
3540
TEST(QC8_IGEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64,n_div_16)3541 TEST(QC8_IGEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64, n_div_16) {
3542 TEST_REQUIRES_ARM_NEON;
3543 for (uint32_t n = 32; n <= 48; n += 16) {
3544 for (size_t k = 1; k <= 40; k += 9) {
3545 GemmMicrokernelTester()
3546 .mr(4)
3547 .nr(16)
3548 .kr(1)
3549 .sr(1)
3550 .m(4)
3551 .n(n)
3552 .k(k)
3553 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x16__aarch64_neon_mlal_lane_prfm_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3554 }
3555 }
3556 }
3557
TEST(QC8_IGEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64,n_div_16_strided_cn)3558 TEST(QC8_IGEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64, n_div_16_strided_cn) {
3559 TEST_REQUIRES_ARM_NEON;
3560 for (uint32_t n = 32; n <= 48; n += 16) {
3561 for (size_t k = 1; k <= 40; k += 9) {
3562 GemmMicrokernelTester()
3563 .mr(4)
3564 .nr(16)
3565 .kr(1)
3566 .sr(1)
3567 .m(4)
3568 .n(n)
3569 .k(k)
3570 .cn_stride(19)
3571 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x16__aarch64_neon_mlal_lane_prfm_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3572 }
3573 }
3574 }
3575
TEST(QC8_IGEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64,n_div_16_subtile)3576 TEST(QC8_IGEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64, n_div_16_subtile) {
3577 TEST_REQUIRES_ARM_NEON;
3578 for (uint32_t n = 32; n <= 48; n += 16) {
3579 for (size_t k = 1; k <= 40; k += 9) {
3580 for (uint32_t m = 1; m <= 4; m++) {
3581 GemmMicrokernelTester()
3582 .mr(4)
3583 .nr(16)
3584 .kr(1)
3585 .sr(1)
3586 .m(m)
3587 .n(n)
3588 .k(k)
3589 .iterations(1)
3590 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x16__aarch64_neon_mlal_lane_prfm_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3591 }
3592 }
3593 }
3594 }
3595
TEST(QC8_IGEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64,small_kernel)3596 TEST(QC8_IGEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64, small_kernel) {
3597 TEST_REQUIRES_ARM_NEON;
3598 for (size_t k = 1; k <= 40; k += 9) {
3599 GemmMicrokernelTester()
3600 .mr(4)
3601 .nr(16)
3602 .kr(1)
3603 .sr(1)
3604 .m(4)
3605 .n(16)
3606 .k(k)
3607 .ks(3)
3608 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x16__aarch64_neon_mlal_lane_prfm_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3609 }
3610 }
3611
TEST(QC8_IGEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64,small_kernel_subtile)3612 TEST(QC8_IGEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64, small_kernel_subtile) {
3613 TEST_REQUIRES_ARM_NEON;
3614 for (size_t k = 1; k <= 40; k += 9) {
3615 for (uint32_t n = 1; n <= 16; n++) {
3616 for (uint32_t m = 1; m <= 4; m++) {
3617 GemmMicrokernelTester()
3618 .mr(4)
3619 .nr(16)
3620 .kr(1)
3621 .sr(1)
3622 .m(m)
3623 .n(n)
3624 .k(k)
3625 .ks(3)
3626 .iterations(1)
3627 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x16__aarch64_neon_mlal_lane_prfm_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3628 }
3629 }
3630 }
3631 }
3632
TEST(QC8_IGEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64,n_gt_16_small_kernel)3633 TEST(QC8_IGEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64, n_gt_16_small_kernel) {
3634 TEST_REQUIRES_ARM_NEON;
3635 for (uint32_t n = 17; n < 32; n++) {
3636 for (size_t k = 1; k <= 40; k += 9) {
3637 GemmMicrokernelTester()
3638 .mr(4)
3639 .nr(16)
3640 .kr(1)
3641 .sr(1)
3642 .m(4)
3643 .n(n)
3644 .k(k)
3645 .ks(3)
3646 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x16__aarch64_neon_mlal_lane_prfm_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3647 }
3648 }
3649 }
3650
TEST(QC8_IGEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64,n_div_16_small_kernel)3651 TEST(QC8_IGEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64, n_div_16_small_kernel) {
3652 TEST_REQUIRES_ARM_NEON;
3653 for (uint32_t n = 32; n <= 48; n += 16) {
3654 for (size_t k = 1; k <= 40; k += 9) {
3655 GemmMicrokernelTester()
3656 .mr(4)
3657 .nr(16)
3658 .kr(1)
3659 .sr(1)
3660 .m(4)
3661 .n(n)
3662 .k(k)
3663 .ks(3)
3664 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x16__aarch64_neon_mlal_lane_prfm_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3665 }
3666 }
3667 }
3668
TEST(QC8_IGEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64,strided_cm_subtile)3669 TEST(QC8_IGEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64, strided_cm_subtile) {
3670 TEST_REQUIRES_ARM_NEON;
3671 for (size_t k = 1; k <= 40; k += 9) {
3672 for (uint32_t n = 1; n <= 16; n++) {
3673 for (uint32_t m = 1; m <= 4; m++) {
3674 GemmMicrokernelTester()
3675 .mr(4)
3676 .nr(16)
3677 .kr(1)
3678 .sr(1)
3679 .m(m)
3680 .n(n)
3681 .k(k)
3682 .cm_stride(19)
3683 .iterations(1)
3684 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x16__aarch64_neon_mlal_lane_prfm_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3685 }
3686 }
3687 }
3688 }
3689
TEST(QC8_IGEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64,a_offset)3690 TEST(QC8_IGEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64, a_offset) {
3691 TEST_REQUIRES_ARM_NEON;
3692 for (size_t k = 1; k <= 40; k += 9) {
3693 GemmMicrokernelTester()
3694 .mr(4)
3695 .nr(16)
3696 .kr(1)
3697 .sr(1)
3698 .m(4)
3699 .n(16)
3700 .k(k)
3701 .ks(3)
3702 .a_offset(163)
3703 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x16__aarch64_neon_mlal_lane_prfm_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3704 }
3705 }
3706
TEST(QC8_IGEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64,zero)3707 TEST(QC8_IGEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64, zero) {
3708 TEST_REQUIRES_ARM_NEON;
3709 for (size_t k = 1; k <= 40; k += 9) {
3710 for (uint32_t mz = 0; mz < 4; mz++) {
3711 GemmMicrokernelTester()
3712 .mr(4)
3713 .nr(16)
3714 .kr(1)
3715 .sr(1)
3716 .m(4)
3717 .n(16)
3718 .k(k)
3719 .ks(3)
3720 .a_offset(163)
3721 .zero_index(mz)
3722 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x16__aarch64_neon_mlal_lane_prfm_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3723 }
3724 }
3725 }
3726
TEST(QC8_IGEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64,qmin)3727 TEST(QC8_IGEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64, qmin) {
3728 TEST_REQUIRES_ARM_NEON;
3729 GemmMicrokernelTester()
3730 .mr(4)
3731 .nr(16)
3732 .kr(1)
3733 .sr(1)
3734 .m(4)
3735 .n(16)
3736 .k(8)
3737 .qmin(128)
3738 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x16__aarch64_neon_mlal_lane_prfm_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3739 }
3740
TEST(QC8_IGEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64,qmax)3741 TEST(QC8_IGEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64, qmax) {
3742 TEST_REQUIRES_ARM_NEON;
3743 GemmMicrokernelTester()
3744 .mr(4)
3745 .nr(16)
3746 .kr(1)
3747 .sr(1)
3748 .m(4)
3749 .n(16)
3750 .k(8)
3751 .qmax(128)
3752 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x16__aarch64_neon_mlal_lane_prfm_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3753 }
3754
TEST(QC8_IGEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64,strided_cm)3755 TEST(QC8_IGEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64, strided_cm) {
3756 TEST_REQUIRES_ARM_NEON;
3757 GemmMicrokernelTester()
3758 .mr(4)
3759 .nr(16)
3760 .kr(1)
3761 .sr(1)
3762 .m(4)
3763 .n(16)
3764 .k(8)
3765 .cm_stride(19)
3766 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x16__aarch64_neon_mlal_lane_prfm_ld64, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3767 }
3768 #endif // XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
3769
3770
3771 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QC8_IGEMM_MINMAX_FP32_1X8__NEON_MLAL_LANE_PRFM,k_eq_8)3772 TEST(QC8_IGEMM_MINMAX_FP32_1X8__NEON_MLAL_LANE_PRFM, k_eq_8) {
3773 TEST_REQUIRES_ARM_NEON;
3774 GemmMicrokernelTester()
3775 .mr(1)
3776 .nr(8)
3777 .kr(1)
3778 .sr(1)
3779 .m(1)
3780 .n(8)
3781 .k(8)
3782 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
3783 }
3784
TEST(QC8_IGEMM_MINMAX_FP32_1X8__NEON_MLAL_LANE_PRFM,strided_cn)3785 TEST(QC8_IGEMM_MINMAX_FP32_1X8__NEON_MLAL_LANE_PRFM, strided_cn) {
3786 TEST_REQUIRES_ARM_NEON;
3787 GemmMicrokernelTester()
3788 .mr(1)
3789 .nr(8)
3790 .kr(1)
3791 .sr(1)
3792 .m(1)
3793 .n(8)
3794 .k(8)
3795 .cn_stride(11)
3796 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
3797 }
3798
TEST(QC8_IGEMM_MINMAX_FP32_1X8__NEON_MLAL_LANE_PRFM,k_eq_8_subtile)3799 TEST(QC8_IGEMM_MINMAX_FP32_1X8__NEON_MLAL_LANE_PRFM, k_eq_8_subtile) {
3800 TEST_REQUIRES_ARM_NEON;
3801 for (uint32_t n = 1; n <= 8; n++) {
3802 for (uint32_t m = 1; m <= 1; m++) {
3803 GemmMicrokernelTester()
3804 .mr(1)
3805 .nr(8)
3806 .kr(1)
3807 .sr(1)
3808 .m(m)
3809 .n(n)
3810 .k(8)
3811 .iterations(1)
3812 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
3813 }
3814 }
3815 }
3816
TEST(QC8_IGEMM_MINMAX_FP32_1X8__NEON_MLAL_LANE_PRFM,k_eq_8_subtile_m)3817 TEST(QC8_IGEMM_MINMAX_FP32_1X8__NEON_MLAL_LANE_PRFM, k_eq_8_subtile_m) {
3818 TEST_REQUIRES_ARM_NEON;
3819 for (uint32_t m = 1; m <= 1; m++) {
3820 GemmMicrokernelTester()
3821 .mr(1)
3822 .nr(8)
3823 .kr(1)
3824 .sr(1)
3825 .m(m)
3826 .n(8)
3827 .k(8)
3828 .iterations(1)
3829 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
3830 }
3831 }
3832
TEST(QC8_IGEMM_MINMAX_FP32_1X8__NEON_MLAL_LANE_PRFM,k_eq_8_subtile_n)3833 TEST(QC8_IGEMM_MINMAX_FP32_1X8__NEON_MLAL_LANE_PRFM, k_eq_8_subtile_n) {
3834 TEST_REQUIRES_ARM_NEON;
3835 for (uint32_t n = 1; n <= 8; n++) {
3836 GemmMicrokernelTester()
3837 .mr(1)
3838 .nr(8)
3839 .kr(1)
3840 .sr(1)
3841 .m(1)
3842 .n(n)
3843 .k(8)
3844 .iterations(1)
3845 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
3846 }
3847 }
3848
TEST(QC8_IGEMM_MINMAX_FP32_1X8__NEON_MLAL_LANE_PRFM,k_lt_8)3849 TEST(QC8_IGEMM_MINMAX_FP32_1X8__NEON_MLAL_LANE_PRFM, k_lt_8) {
3850 TEST_REQUIRES_ARM_NEON;
3851 for (size_t k = 1; k < 8; k++) {
3852 GemmMicrokernelTester()
3853 .mr(1)
3854 .nr(8)
3855 .kr(1)
3856 .sr(1)
3857 .m(1)
3858 .n(8)
3859 .k(k)
3860 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
3861 }
3862 }
3863
TEST(QC8_IGEMM_MINMAX_FP32_1X8__NEON_MLAL_LANE_PRFM,k_lt_8_subtile)3864 TEST(QC8_IGEMM_MINMAX_FP32_1X8__NEON_MLAL_LANE_PRFM, k_lt_8_subtile) {
3865 TEST_REQUIRES_ARM_NEON;
3866 for (size_t k = 1; k < 8; k++) {
3867 for (uint32_t n = 1; n <= 8; n++) {
3868 for (uint32_t m = 1; m <= 1; m++) {
3869 GemmMicrokernelTester()
3870 .mr(1)
3871 .nr(8)
3872 .kr(1)
3873 .sr(1)
3874 .m(m)
3875 .n(n)
3876 .k(k)
3877 .iterations(1)
3878 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
3879 }
3880 }
3881 }
3882 }
3883
TEST(QC8_IGEMM_MINMAX_FP32_1X8__NEON_MLAL_LANE_PRFM,k_gt_8)3884 TEST(QC8_IGEMM_MINMAX_FP32_1X8__NEON_MLAL_LANE_PRFM, k_gt_8) {
3885 TEST_REQUIRES_ARM_NEON;
3886 for (size_t k = 9; k < 16; k++) {
3887 GemmMicrokernelTester()
3888 .mr(1)
3889 .nr(8)
3890 .kr(1)
3891 .sr(1)
3892 .m(1)
3893 .n(8)
3894 .k(k)
3895 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
3896 }
3897 }
3898
TEST(QC8_IGEMM_MINMAX_FP32_1X8__NEON_MLAL_LANE_PRFM,k_gt_8_subtile)3899 TEST(QC8_IGEMM_MINMAX_FP32_1X8__NEON_MLAL_LANE_PRFM, k_gt_8_subtile) {
3900 TEST_REQUIRES_ARM_NEON;
3901 for (size_t k = 9; k < 16; k++) {
3902 for (uint32_t n = 1; n <= 8; n++) {
3903 for (uint32_t m = 1; m <= 1; m++) {
3904 GemmMicrokernelTester()
3905 .mr(1)
3906 .nr(8)
3907 .kr(1)
3908 .sr(1)
3909 .m(m)
3910 .n(n)
3911 .k(k)
3912 .iterations(1)
3913 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
3914 }
3915 }
3916 }
3917 }
3918
TEST(QC8_IGEMM_MINMAX_FP32_1X8__NEON_MLAL_LANE_PRFM,k_div_8)3919 TEST(QC8_IGEMM_MINMAX_FP32_1X8__NEON_MLAL_LANE_PRFM, k_div_8) {
3920 TEST_REQUIRES_ARM_NEON;
3921 for (size_t k = 16; k <= 80; k += 8) {
3922 GemmMicrokernelTester()
3923 .mr(1)
3924 .nr(8)
3925 .kr(1)
3926 .sr(1)
3927 .m(1)
3928 .n(8)
3929 .k(k)
3930 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
3931 }
3932 }
3933
TEST(QC8_IGEMM_MINMAX_FP32_1X8__NEON_MLAL_LANE_PRFM,k_div_8_subtile)3934 TEST(QC8_IGEMM_MINMAX_FP32_1X8__NEON_MLAL_LANE_PRFM, k_div_8_subtile) {
3935 TEST_REQUIRES_ARM_NEON;
3936 for (size_t k = 16; k <= 80; k += 8) {
3937 for (uint32_t n = 1; n <= 8; n++) {
3938 for (uint32_t m = 1; m <= 1; m++) {
3939 GemmMicrokernelTester()
3940 .mr(1)
3941 .nr(8)
3942 .kr(1)
3943 .sr(1)
3944 .m(m)
3945 .n(n)
3946 .k(k)
3947 .iterations(1)
3948 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
3949 }
3950 }
3951 }
3952 }
3953
TEST(QC8_IGEMM_MINMAX_FP32_1X8__NEON_MLAL_LANE_PRFM,n_gt_8)3954 TEST(QC8_IGEMM_MINMAX_FP32_1X8__NEON_MLAL_LANE_PRFM, n_gt_8) {
3955 TEST_REQUIRES_ARM_NEON;
3956 for (uint32_t n = 9; n < 16; n++) {
3957 for (size_t k = 1; k <= 40; k += 9) {
3958 GemmMicrokernelTester()
3959 .mr(1)
3960 .nr(8)
3961 .kr(1)
3962 .sr(1)
3963 .m(1)
3964 .n(n)
3965 .k(k)
3966 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
3967 }
3968 }
3969 }
3970
TEST(QC8_IGEMM_MINMAX_FP32_1X8__NEON_MLAL_LANE_PRFM,n_gt_8_strided_cn)3971 TEST(QC8_IGEMM_MINMAX_FP32_1X8__NEON_MLAL_LANE_PRFM, n_gt_8_strided_cn) {
3972 TEST_REQUIRES_ARM_NEON;
3973 for (uint32_t n = 9; n < 16; n++) {
3974 for (size_t k = 1; k <= 40; k += 9) {
3975 GemmMicrokernelTester()
3976 .mr(1)
3977 .nr(8)
3978 .kr(1)
3979 .sr(1)
3980 .m(1)
3981 .n(n)
3982 .k(k)
3983 .cn_stride(11)
3984 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
3985 }
3986 }
3987 }
3988
TEST(QC8_IGEMM_MINMAX_FP32_1X8__NEON_MLAL_LANE_PRFM,n_gt_8_subtile)3989 TEST(QC8_IGEMM_MINMAX_FP32_1X8__NEON_MLAL_LANE_PRFM, n_gt_8_subtile) {
3990 TEST_REQUIRES_ARM_NEON;
3991 for (uint32_t n = 9; n < 16; n++) {
3992 for (size_t k = 1; k <= 40; k += 9) {
3993 for (uint32_t m = 1; m <= 1; m++) {
3994 GemmMicrokernelTester()
3995 .mr(1)
3996 .nr(8)
3997 .kr(1)
3998 .sr(1)
3999 .m(m)
4000 .n(n)
4001 .k(k)
4002 .iterations(1)
4003 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
4004 }
4005 }
4006 }
4007 }
4008
TEST(QC8_IGEMM_MINMAX_FP32_1X8__NEON_MLAL_LANE_PRFM,n_div_8)4009 TEST(QC8_IGEMM_MINMAX_FP32_1X8__NEON_MLAL_LANE_PRFM, n_div_8) {
4010 TEST_REQUIRES_ARM_NEON;
4011 for (uint32_t n = 16; n <= 24; n += 8) {
4012 for (size_t k = 1; k <= 40; k += 9) {
4013 GemmMicrokernelTester()
4014 .mr(1)
4015 .nr(8)
4016 .kr(1)
4017 .sr(1)
4018 .m(1)
4019 .n(n)
4020 .k(k)
4021 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
4022 }
4023 }
4024 }
4025
TEST(QC8_IGEMM_MINMAX_FP32_1X8__NEON_MLAL_LANE_PRFM,n_div_8_strided_cn)4026 TEST(QC8_IGEMM_MINMAX_FP32_1X8__NEON_MLAL_LANE_PRFM, n_div_8_strided_cn) {
4027 TEST_REQUIRES_ARM_NEON;
4028 for (uint32_t n = 16; n <= 24; n += 8) {
4029 for (size_t k = 1; k <= 40; k += 9) {
4030 GemmMicrokernelTester()
4031 .mr(1)
4032 .nr(8)
4033 .kr(1)
4034 .sr(1)
4035 .m(1)
4036 .n(n)
4037 .k(k)
4038 .cn_stride(11)
4039 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
4040 }
4041 }
4042 }
4043
TEST(QC8_IGEMM_MINMAX_FP32_1X8__NEON_MLAL_LANE_PRFM,n_div_8_subtile)4044 TEST(QC8_IGEMM_MINMAX_FP32_1X8__NEON_MLAL_LANE_PRFM, n_div_8_subtile) {
4045 TEST_REQUIRES_ARM_NEON;
4046 for (uint32_t n = 16; n <= 24; n += 8) {
4047 for (size_t k = 1; k <= 40; k += 9) {
4048 for (uint32_t m = 1; m <= 1; m++) {
4049 GemmMicrokernelTester()
4050 .mr(1)
4051 .nr(8)
4052 .kr(1)
4053 .sr(1)
4054 .m(m)
4055 .n(n)
4056 .k(k)
4057 .iterations(1)
4058 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
4059 }
4060 }
4061 }
4062 }
4063
TEST(QC8_IGEMM_MINMAX_FP32_1X8__NEON_MLAL_LANE_PRFM,small_kernel)4064 TEST(QC8_IGEMM_MINMAX_FP32_1X8__NEON_MLAL_LANE_PRFM, small_kernel) {
4065 TEST_REQUIRES_ARM_NEON;
4066 for (size_t k = 1; k <= 40; k += 9) {
4067 GemmMicrokernelTester()
4068 .mr(1)
4069 .nr(8)
4070 .kr(1)
4071 .sr(1)
4072 .m(1)
4073 .n(8)
4074 .k(k)
4075 .ks(3)
4076 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
4077 }
4078 }
4079
TEST(QC8_IGEMM_MINMAX_FP32_1X8__NEON_MLAL_LANE_PRFM,small_kernel_subtile)4080 TEST(QC8_IGEMM_MINMAX_FP32_1X8__NEON_MLAL_LANE_PRFM, small_kernel_subtile) {
4081 TEST_REQUIRES_ARM_NEON;
4082 for (size_t k = 1; k <= 40; k += 9) {
4083 for (uint32_t n = 1; n <= 8; n++) {
4084 for (uint32_t m = 1; m <= 1; m++) {
4085 GemmMicrokernelTester()
4086 .mr(1)
4087 .nr(8)
4088 .kr(1)
4089 .sr(1)
4090 .m(m)
4091 .n(n)
4092 .k(k)
4093 .ks(3)
4094 .iterations(1)
4095 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
4096 }
4097 }
4098 }
4099 }
4100
TEST(QC8_IGEMM_MINMAX_FP32_1X8__NEON_MLAL_LANE_PRFM,n_gt_8_small_kernel)4101 TEST(QC8_IGEMM_MINMAX_FP32_1X8__NEON_MLAL_LANE_PRFM, n_gt_8_small_kernel) {
4102 TEST_REQUIRES_ARM_NEON;
4103 for (uint32_t n = 9; n < 16; n++) {
4104 for (size_t k = 1; k <= 40; k += 9) {
4105 GemmMicrokernelTester()
4106 .mr(1)
4107 .nr(8)
4108 .kr(1)
4109 .sr(1)
4110 .m(1)
4111 .n(n)
4112 .k(k)
4113 .ks(3)
4114 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
4115 }
4116 }
4117 }
4118
TEST(QC8_IGEMM_MINMAX_FP32_1X8__NEON_MLAL_LANE_PRFM,n_div_8_small_kernel)4119 TEST(QC8_IGEMM_MINMAX_FP32_1X8__NEON_MLAL_LANE_PRFM, n_div_8_small_kernel) {
4120 TEST_REQUIRES_ARM_NEON;
4121 for (uint32_t n = 16; n <= 24; n += 8) {
4122 for (size_t k = 1; k <= 40; k += 9) {
4123 GemmMicrokernelTester()
4124 .mr(1)
4125 .nr(8)
4126 .kr(1)
4127 .sr(1)
4128 .m(1)
4129 .n(n)
4130 .k(k)
4131 .ks(3)
4132 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
4133 }
4134 }
4135 }
4136
TEST(QC8_IGEMM_MINMAX_FP32_1X8__NEON_MLAL_LANE_PRFM,strided_cm_subtile)4137 TEST(QC8_IGEMM_MINMAX_FP32_1X8__NEON_MLAL_LANE_PRFM, strided_cm_subtile) {
4138 TEST_REQUIRES_ARM_NEON;
4139 for (size_t k = 1; k <= 40; k += 9) {
4140 for (uint32_t n = 1; n <= 8; n++) {
4141 for (uint32_t m = 1; m <= 1; m++) {
4142 GemmMicrokernelTester()
4143 .mr(1)
4144 .nr(8)
4145 .kr(1)
4146 .sr(1)
4147 .m(m)
4148 .n(n)
4149 .k(k)
4150 .cm_stride(11)
4151 .iterations(1)
4152 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
4153 }
4154 }
4155 }
4156 }
4157
TEST(QC8_IGEMM_MINMAX_FP32_1X8__NEON_MLAL_LANE_PRFM,a_offset)4158 TEST(QC8_IGEMM_MINMAX_FP32_1X8__NEON_MLAL_LANE_PRFM, a_offset) {
4159 TEST_REQUIRES_ARM_NEON;
4160 for (size_t k = 1; k <= 40; k += 9) {
4161 GemmMicrokernelTester()
4162 .mr(1)
4163 .nr(8)
4164 .kr(1)
4165 .sr(1)
4166 .m(1)
4167 .n(8)
4168 .k(k)
4169 .ks(3)
4170 .a_offset(43)
4171 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
4172 }
4173 }
4174
TEST(QC8_IGEMM_MINMAX_FP32_1X8__NEON_MLAL_LANE_PRFM,zero)4175 TEST(QC8_IGEMM_MINMAX_FP32_1X8__NEON_MLAL_LANE_PRFM, zero) {
4176 TEST_REQUIRES_ARM_NEON;
4177 for (size_t k = 1; k <= 40; k += 9) {
4178 for (uint32_t mz = 0; mz < 1; mz++) {
4179 GemmMicrokernelTester()
4180 .mr(1)
4181 .nr(8)
4182 .kr(1)
4183 .sr(1)
4184 .m(1)
4185 .n(8)
4186 .k(k)
4187 .ks(3)
4188 .a_offset(43)
4189 .zero_index(mz)
4190 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
4191 }
4192 }
4193 }
4194
TEST(QC8_IGEMM_MINMAX_FP32_1X8__NEON_MLAL_LANE_PRFM,qmin)4195 TEST(QC8_IGEMM_MINMAX_FP32_1X8__NEON_MLAL_LANE_PRFM, qmin) {
4196 TEST_REQUIRES_ARM_NEON;
4197 GemmMicrokernelTester()
4198 .mr(1)
4199 .nr(8)
4200 .kr(1)
4201 .sr(1)
4202 .m(1)
4203 .n(8)
4204 .k(8)
4205 .qmin(128)
4206 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
4207 }
4208
TEST(QC8_IGEMM_MINMAX_FP32_1X8__NEON_MLAL_LANE_PRFM,qmax)4209 TEST(QC8_IGEMM_MINMAX_FP32_1X8__NEON_MLAL_LANE_PRFM, qmax) {
4210 TEST_REQUIRES_ARM_NEON;
4211 GemmMicrokernelTester()
4212 .mr(1)
4213 .nr(8)
4214 .kr(1)
4215 .sr(1)
4216 .m(1)
4217 .n(8)
4218 .k(8)
4219 .qmax(128)
4220 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
4221 }
4222
TEST(QC8_IGEMM_MINMAX_FP32_1X8__NEON_MLAL_LANE_PRFM,strided_cm)4223 TEST(QC8_IGEMM_MINMAX_FP32_1X8__NEON_MLAL_LANE_PRFM, strided_cm) {
4224 TEST_REQUIRES_ARM_NEON;
4225 GemmMicrokernelTester()
4226 .mr(1)
4227 .nr(8)
4228 .kr(1)
4229 .sr(1)
4230 .m(1)
4231 .n(8)
4232 .k(8)
4233 .cm_stride(11)
4234 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
4235 }
4236 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
4237
4238
4239 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QC8_IGEMM_MINMAX_FP32_1X8__NEONV8_MLAL_LANE_PRFM,k_eq_8)4240 TEST(QC8_IGEMM_MINMAX_FP32_1X8__NEONV8_MLAL_LANE_PRFM, k_eq_8) {
4241 TEST_REQUIRES_ARM_NEON_V8;
4242 GemmMicrokernelTester()
4243 .mr(1)
4244 .nr(8)
4245 .kr(1)
4246 .sr(1)
4247 .m(1)
4248 .n(8)
4249 .k(8)
4250 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4251 }
4252
TEST(QC8_IGEMM_MINMAX_FP32_1X8__NEONV8_MLAL_LANE_PRFM,strided_cn)4253 TEST(QC8_IGEMM_MINMAX_FP32_1X8__NEONV8_MLAL_LANE_PRFM, strided_cn) {
4254 TEST_REQUIRES_ARM_NEON_V8;
4255 GemmMicrokernelTester()
4256 .mr(1)
4257 .nr(8)
4258 .kr(1)
4259 .sr(1)
4260 .m(1)
4261 .n(8)
4262 .k(8)
4263 .cn_stride(11)
4264 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4265 }
4266
TEST(QC8_IGEMM_MINMAX_FP32_1X8__NEONV8_MLAL_LANE_PRFM,k_eq_8_subtile)4267 TEST(QC8_IGEMM_MINMAX_FP32_1X8__NEONV8_MLAL_LANE_PRFM, k_eq_8_subtile) {
4268 TEST_REQUIRES_ARM_NEON_V8;
4269 for (uint32_t n = 1; n <= 8; n++) {
4270 for (uint32_t m = 1; m <= 1; m++) {
4271 GemmMicrokernelTester()
4272 .mr(1)
4273 .nr(8)
4274 .kr(1)
4275 .sr(1)
4276 .m(m)
4277 .n(n)
4278 .k(8)
4279 .iterations(1)
4280 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4281 }
4282 }
4283 }
4284
TEST(QC8_IGEMM_MINMAX_FP32_1X8__NEONV8_MLAL_LANE_PRFM,k_eq_8_subtile_m)4285 TEST(QC8_IGEMM_MINMAX_FP32_1X8__NEONV8_MLAL_LANE_PRFM, k_eq_8_subtile_m) {
4286 TEST_REQUIRES_ARM_NEON_V8;
4287 for (uint32_t m = 1; m <= 1; m++) {
4288 GemmMicrokernelTester()
4289 .mr(1)
4290 .nr(8)
4291 .kr(1)
4292 .sr(1)
4293 .m(m)
4294 .n(8)
4295 .k(8)
4296 .iterations(1)
4297 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4298 }
4299 }
4300
TEST(QC8_IGEMM_MINMAX_FP32_1X8__NEONV8_MLAL_LANE_PRFM,k_eq_8_subtile_n)4301 TEST(QC8_IGEMM_MINMAX_FP32_1X8__NEONV8_MLAL_LANE_PRFM, k_eq_8_subtile_n) {
4302 TEST_REQUIRES_ARM_NEON_V8;
4303 for (uint32_t n = 1; n <= 8; n++) {
4304 GemmMicrokernelTester()
4305 .mr(1)
4306 .nr(8)
4307 .kr(1)
4308 .sr(1)
4309 .m(1)
4310 .n(n)
4311 .k(8)
4312 .iterations(1)
4313 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4314 }
4315 }
4316
TEST(QC8_IGEMM_MINMAX_FP32_1X8__NEONV8_MLAL_LANE_PRFM,k_lt_8)4317 TEST(QC8_IGEMM_MINMAX_FP32_1X8__NEONV8_MLAL_LANE_PRFM, k_lt_8) {
4318 TEST_REQUIRES_ARM_NEON_V8;
4319 for (size_t k = 1; k < 8; k++) {
4320 GemmMicrokernelTester()
4321 .mr(1)
4322 .nr(8)
4323 .kr(1)
4324 .sr(1)
4325 .m(1)
4326 .n(8)
4327 .k(k)
4328 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4329 }
4330 }
4331
TEST(QC8_IGEMM_MINMAX_FP32_1X8__NEONV8_MLAL_LANE_PRFM,k_lt_8_subtile)4332 TEST(QC8_IGEMM_MINMAX_FP32_1X8__NEONV8_MLAL_LANE_PRFM, k_lt_8_subtile) {
4333 TEST_REQUIRES_ARM_NEON_V8;
4334 for (size_t k = 1; k < 8; k++) {
4335 for (uint32_t n = 1; n <= 8; n++) {
4336 for (uint32_t m = 1; m <= 1; m++) {
4337 GemmMicrokernelTester()
4338 .mr(1)
4339 .nr(8)
4340 .kr(1)
4341 .sr(1)
4342 .m(m)
4343 .n(n)
4344 .k(k)
4345 .iterations(1)
4346 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4347 }
4348 }
4349 }
4350 }
4351
TEST(QC8_IGEMM_MINMAX_FP32_1X8__NEONV8_MLAL_LANE_PRFM,k_gt_8)4352 TEST(QC8_IGEMM_MINMAX_FP32_1X8__NEONV8_MLAL_LANE_PRFM, k_gt_8) {
4353 TEST_REQUIRES_ARM_NEON_V8;
4354 for (size_t k = 9; k < 16; k++) {
4355 GemmMicrokernelTester()
4356 .mr(1)
4357 .nr(8)
4358 .kr(1)
4359 .sr(1)
4360 .m(1)
4361 .n(8)
4362 .k(k)
4363 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4364 }
4365 }
4366
TEST(QC8_IGEMM_MINMAX_FP32_1X8__NEONV8_MLAL_LANE_PRFM,k_gt_8_subtile)4367 TEST(QC8_IGEMM_MINMAX_FP32_1X8__NEONV8_MLAL_LANE_PRFM, k_gt_8_subtile) {
4368 TEST_REQUIRES_ARM_NEON_V8;
4369 for (size_t k = 9; k < 16; k++) {
4370 for (uint32_t n = 1; n <= 8; n++) {
4371 for (uint32_t m = 1; m <= 1; m++) {
4372 GemmMicrokernelTester()
4373 .mr(1)
4374 .nr(8)
4375 .kr(1)
4376 .sr(1)
4377 .m(m)
4378 .n(n)
4379 .k(k)
4380 .iterations(1)
4381 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4382 }
4383 }
4384 }
4385 }
4386
TEST(QC8_IGEMM_MINMAX_FP32_1X8__NEONV8_MLAL_LANE_PRFM,k_div_8)4387 TEST(QC8_IGEMM_MINMAX_FP32_1X8__NEONV8_MLAL_LANE_PRFM, k_div_8) {
4388 TEST_REQUIRES_ARM_NEON_V8;
4389 for (size_t k = 16; k <= 80; k += 8) {
4390 GemmMicrokernelTester()
4391 .mr(1)
4392 .nr(8)
4393 .kr(1)
4394 .sr(1)
4395 .m(1)
4396 .n(8)
4397 .k(k)
4398 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4399 }
4400 }
4401
TEST(QC8_IGEMM_MINMAX_FP32_1X8__NEONV8_MLAL_LANE_PRFM,k_div_8_subtile)4402 TEST(QC8_IGEMM_MINMAX_FP32_1X8__NEONV8_MLAL_LANE_PRFM, k_div_8_subtile) {
4403 TEST_REQUIRES_ARM_NEON_V8;
4404 for (size_t k = 16; k <= 80; k += 8) {
4405 for (uint32_t n = 1; n <= 8; n++) {
4406 for (uint32_t m = 1; m <= 1; m++) {
4407 GemmMicrokernelTester()
4408 .mr(1)
4409 .nr(8)
4410 .kr(1)
4411 .sr(1)
4412 .m(m)
4413 .n(n)
4414 .k(k)
4415 .iterations(1)
4416 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4417 }
4418 }
4419 }
4420 }
4421
TEST(QC8_IGEMM_MINMAX_FP32_1X8__NEONV8_MLAL_LANE_PRFM,n_gt_8)4422 TEST(QC8_IGEMM_MINMAX_FP32_1X8__NEONV8_MLAL_LANE_PRFM, n_gt_8) {
4423 TEST_REQUIRES_ARM_NEON_V8;
4424 for (uint32_t n = 9; n < 16; n++) {
4425 for (size_t k = 1; k <= 40; k += 9) {
4426 GemmMicrokernelTester()
4427 .mr(1)
4428 .nr(8)
4429 .kr(1)
4430 .sr(1)
4431 .m(1)
4432 .n(n)
4433 .k(k)
4434 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4435 }
4436 }
4437 }
4438
TEST(QC8_IGEMM_MINMAX_FP32_1X8__NEONV8_MLAL_LANE_PRFM,n_gt_8_strided_cn)4439 TEST(QC8_IGEMM_MINMAX_FP32_1X8__NEONV8_MLAL_LANE_PRFM, n_gt_8_strided_cn) {
4440 TEST_REQUIRES_ARM_NEON_V8;
4441 for (uint32_t n = 9; n < 16; n++) {
4442 for (size_t k = 1; k <= 40; k += 9) {
4443 GemmMicrokernelTester()
4444 .mr(1)
4445 .nr(8)
4446 .kr(1)
4447 .sr(1)
4448 .m(1)
4449 .n(n)
4450 .k(k)
4451 .cn_stride(11)
4452 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4453 }
4454 }
4455 }
4456
TEST(QC8_IGEMM_MINMAX_FP32_1X8__NEONV8_MLAL_LANE_PRFM,n_gt_8_subtile)4457 TEST(QC8_IGEMM_MINMAX_FP32_1X8__NEONV8_MLAL_LANE_PRFM, n_gt_8_subtile) {
4458 TEST_REQUIRES_ARM_NEON_V8;
4459 for (uint32_t n = 9; n < 16; n++) {
4460 for (size_t k = 1; k <= 40; k += 9) {
4461 for (uint32_t m = 1; m <= 1; m++) {
4462 GemmMicrokernelTester()
4463 .mr(1)
4464 .nr(8)
4465 .kr(1)
4466 .sr(1)
4467 .m(m)
4468 .n(n)
4469 .k(k)
4470 .iterations(1)
4471 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4472 }
4473 }
4474 }
4475 }
4476
TEST(QC8_IGEMM_MINMAX_FP32_1X8__NEONV8_MLAL_LANE_PRFM,n_div_8)4477 TEST(QC8_IGEMM_MINMAX_FP32_1X8__NEONV8_MLAL_LANE_PRFM, n_div_8) {
4478 TEST_REQUIRES_ARM_NEON_V8;
4479 for (uint32_t n = 16; n <= 24; n += 8) {
4480 for (size_t k = 1; k <= 40; k += 9) {
4481 GemmMicrokernelTester()
4482 .mr(1)
4483 .nr(8)
4484 .kr(1)
4485 .sr(1)
4486 .m(1)
4487 .n(n)
4488 .k(k)
4489 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4490 }
4491 }
4492 }
4493
TEST(QC8_IGEMM_MINMAX_FP32_1X8__NEONV8_MLAL_LANE_PRFM,n_div_8_strided_cn)4494 TEST(QC8_IGEMM_MINMAX_FP32_1X8__NEONV8_MLAL_LANE_PRFM, n_div_8_strided_cn) {
4495 TEST_REQUIRES_ARM_NEON_V8;
4496 for (uint32_t n = 16; n <= 24; n += 8) {
4497 for (size_t k = 1; k <= 40; k += 9) {
4498 GemmMicrokernelTester()
4499 .mr(1)
4500 .nr(8)
4501 .kr(1)
4502 .sr(1)
4503 .m(1)
4504 .n(n)
4505 .k(k)
4506 .cn_stride(11)
4507 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4508 }
4509 }
4510 }
4511
TEST(QC8_IGEMM_MINMAX_FP32_1X8__NEONV8_MLAL_LANE_PRFM,n_div_8_subtile)4512 TEST(QC8_IGEMM_MINMAX_FP32_1X8__NEONV8_MLAL_LANE_PRFM, n_div_8_subtile) {
4513 TEST_REQUIRES_ARM_NEON_V8;
4514 for (uint32_t n = 16; n <= 24; n += 8) {
4515 for (size_t k = 1; k <= 40; k += 9) {
4516 for (uint32_t m = 1; m <= 1; m++) {
4517 GemmMicrokernelTester()
4518 .mr(1)
4519 .nr(8)
4520 .kr(1)
4521 .sr(1)
4522 .m(m)
4523 .n(n)
4524 .k(k)
4525 .iterations(1)
4526 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4527 }
4528 }
4529 }
4530 }
4531
TEST(QC8_IGEMM_MINMAX_FP32_1X8__NEONV8_MLAL_LANE_PRFM,small_kernel)4532 TEST(QC8_IGEMM_MINMAX_FP32_1X8__NEONV8_MLAL_LANE_PRFM, small_kernel) {
4533 TEST_REQUIRES_ARM_NEON_V8;
4534 for (size_t k = 1; k <= 40; k += 9) {
4535 GemmMicrokernelTester()
4536 .mr(1)
4537 .nr(8)
4538 .kr(1)
4539 .sr(1)
4540 .m(1)
4541 .n(8)
4542 .k(k)
4543 .ks(3)
4544 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4545 }
4546 }
4547
TEST(QC8_IGEMM_MINMAX_FP32_1X8__NEONV8_MLAL_LANE_PRFM,small_kernel_subtile)4548 TEST(QC8_IGEMM_MINMAX_FP32_1X8__NEONV8_MLAL_LANE_PRFM, small_kernel_subtile) {
4549 TEST_REQUIRES_ARM_NEON_V8;
4550 for (size_t k = 1; k <= 40; k += 9) {
4551 for (uint32_t n = 1; n <= 8; n++) {
4552 for (uint32_t m = 1; m <= 1; m++) {
4553 GemmMicrokernelTester()
4554 .mr(1)
4555 .nr(8)
4556 .kr(1)
4557 .sr(1)
4558 .m(m)
4559 .n(n)
4560 .k(k)
4561 .ks(3)
4562 .iterations(1)
4563 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4564 }
4565 }
4566 }
4567 }
4568
TEST(QC8_IGEMM_MINMAX_FP32_1X8__NEONV8_MLAL_LANE_PRFM,n_gt_8_small_kernel)4569 TEST(QC8_IGEMM_MINMAX_FP32_1X8__NEONV8_MLAL_LANE_PRFM, n_gt_8_small_kernel) {
4570 TEST_REQUIRES_ARM_NEON_V8;
4571 for (uint32_t n = 9; n < 16; n++) {
4572 for (size_t k = 1; k <= 40; k += 9) {
4573 GemmMicrokernelTester()
4574 .mr(1)
4575 .nr(8)
4576 .kr(1)
4577 .sr(1)
4578 .m(1)
4579 .n(n)
4580 .k(k)
4581 .ks(3)
4582 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4583 }
4584 }
4585 }
4586
TEST(QC8_IGEMM_MINMAX_FP32_1X8__NEONV8_MLAL_LANE_PRFM,n_div_8_small_kernel)4587 TEST(QC8_IGEMM_MINMAX_FP32_1X8__NEONV8_MLAL_LANE_PRFM, n_div_8_small_kernel) {
4588 TEST_REQUIRES_ARM_NEON_V8;
4589 for (uint32_t n = 16; n <= 24; n += 8) {
4590 for (size_t k = 1; k <= 40; k += 9) {
4591 GemmMicrokernelTester()
4592 .mr(1)
4593 .nr(8)
4594 .kr(1)
4595 .sr(1)
4596 .m(1)
4597 .n(n)
4598 .k(k)
4599 .ks(3)
4600 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4601 }
4602 }
4603 }
4604
TEST(QC8_IGEMM_MINMAX_FP32_1X8__NEONV8_MLAL_LANE_PRFM,strided_cm_subtile)4605 TEST(QC8_IGEMM_MINMAX_FP32_1X8__NEONV8_MLAL_LANE_PRFM, strided_cm_subtile) {
4606 TEST_REQUIRES_ARM_NEON_V8;
4607 for (size_t k = 1; k <= 40; k += 9) {
4608 for (uint32_t n = 1; n <= 8; n++) {
4609 for (uint32_t m = 1; m <= 1; m++) {
4610 GemmMicrokernelTester()
4611 .mr(1)
4612 .nr(8)
4613 .kr(1)
4614 .sr(1)
4615 .m(m)
4616 .n(n)
4617 .k(k)
4618 .cm_stride(11)
4619 .iterations(1)
4620 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4621 }
4622 }
4623 }
4624 }
4625
TEST(QC8_IGEMM_MINMAX_FP32_1X8__NEONV8_MLAL_LANE_PRFM,a_offset)4626 TEST(QC8_IGEMM_MINMAX_FP32_1X8__NEONV8_MLAL_LANE_PRFM, a_offset) {
4627 TEST_REQUIRES_ARM_NEON_V8;
4628 for (size_t k = 1; k <= 40; k += 9) {
4629 GemmMicrokernelTester()
4630 .mr(1)
4631 .nr(8)
4632 .kr(1)
4633 .sr(1)
4634 .m(1)
4635 .n(8)
4636 .k(k)
4637 .ks(3)
4638 .a_offset(43)
4639 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4640 }
4641 }
4642
TEST(QC8_IGEMM_MINMAX_FP32_1X8__NEONV8_MLAL_LANE_PRFM,zero)4643 TEST(QC8_IGEMM_MINMAX_FP32_1X8__NEONV8_MLAL_LANE_PRFM, zero) {
4644 TEST_REQUIRES_ARM_NEON_V8;
4645 for (size_t k = 1; k <= 40; k += 9) {
4646 for (uint32_t mz = 0; mz < 1; mz++) {
4647 GemmMicrokernelTester()
4648 .mr(1)
4649 .nr(8)
4650 .kr(1)
4651 .sr(1)
4652 .m(1)
4653 .n(8)
4654 .k(k)
4655 .ks(3)
4656 .a_offset(43)
4657 .zero_index(mz)
4658 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4659 }
4660 }
4661 }
4662
TEST(QC8_IGEMM_MINMAX_FP32_1X8__NEONV8_MLAL_LANE_PRFM,qmin)4663 TEST(QC8_IGEMM_MINMAX_FP32_1X8__NEONV8_MLAL_LANE_PRFM, qmin) {
4664 TEST_REQUIRES_ARM_NEON_V8;
4665 GemmMicrokernelTester()
4666 .mr(1)
4667 .nr(8)
4668 .kr(1)
4669 .sr(1)
4670 .m(1)
4671 .n(8)
4672 .k(8)
4673 .qmin(128)
4674 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4675 }
4676
TEST(QC8_IGEMM_MINMAX_FP32_1X8__NEONV8_MLAL_LANE_PRFM,qmax)4677 TEST(QC8_IGEMM_MINMAX_FP32_1X8__NEONV8_MLAL_LANE_PRFM, qmax) {
4678 TEST_REQUIRES_ARM_NEON_V8;
4679 GemmMicrokernelTester()
4680 .mr(1)
4681 .nr(8)
4682 .kr(1)
4683 .sr(1)
4684 .m(1)
4685 .n(8)
4686 .k(8)
4687 .qmax(128)
4688 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4689 }
4690
TEST(QC8_IGEMM_MINMAX_FP32_1X8__NEONV8_MLAL_LANE_PRFM,strided_cm)4691 TEST(QC8_IGEMM_MINMAX_FP32_1X8__NEONV8_MLAL_LANE_PRFM, strided_cm) {
4692 TEST_REQUIRES_ARM_NEON_V8;
4693 GemmMicrokernelTester()
4694 .mr(1)
4695 .nr(8)
4696 .kr(1)
4697 .sr(1)
4698 .m(1)
4699 .n(8)
4700 .k(8)
4701 .cm_stride(11)
4702 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4703 }
4704 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
4705
4706
4707 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_DUP,k_eq_16)4708 TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_DUP, k_eq_16) {
4709 TEST_REQUIRES_ARM_NEON_V8;
4710 GemmMicrokernelTester()
4711 .mr(1)
4712 .nr(8)
4713 .kr(2)
4714 .sr(1)
4715 .m(1)
4716 .n(8)
4717 .k(16)
4718 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4719 }
4720
TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_DUP,strided_cn)4721 TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_DUP, strided_cn) {
4722 TEST_REQUIRES_ARM_NEON_V8;
4723 GemmMicrokernelTester()
4724 .mr(1)
4725 .nr(8)
4726 .kr(2)
4727 .sr(1)
4728 .m(1)
4729 .n(8)
4730 .k(16)
4731 .cn_stride(11)
4732 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4733 }
4734
TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_DUP,k_eq_16_subtile)4735 TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_DUP, k_eq_16_subtile) {
4736 TEST_REQUIRES_ARM_NEON_V8;
4737 for (uint32_t n = 1; n <= 8; n++) {
4738 for (uint32_t m = 1; m <= 1; m++) {
4739 GemmMicrokernelTester()
4740 .mr(1)
4741 .nr(8)
4742 .kr(2)
4743 .sr(1)
4744 .m(m)
4745 .n(n)
4746 .k(16)
4747 .iterations(1)
4748 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4749 }
4750 }
4751 }
4752
TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_DUP,k_eq_16_subtile_m)4753 TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_DUP, k_eq_16_subtile_m) {
4754 TEST_REQUIRES_ARM_NEON_V8;
4755 for (uint32_t m = 1; m <= 1; m++) {
4756 GemmMicrokernelTester()
4757 .mr(1)
4758 .nr(8)
4759 .kr(2)
4760 .sr(1)
4761 .m(m)
4762 .n(8)
4763 .k(16)
4764 .iterations(1)
4765 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4766 }
4767 }
4768
TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_DUP,k_eq_16_subtile_n)4769 TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_DUP, k_eq_16_subtile_n) {
4770 TEST_REQUIRES_ARM_NEON_V8;
4771 for (uint32_t n = 1; n <= 8; n++) {
4772 GemmMicrokernelTester()
4773 .mr(1)
4774 .nr(8)
4775 .kr(2)
4776 .sr(1)
4777 .m(1)
4778 .n(n)
4779 .k(16)
4780 .iterations(1)
4781 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4782 }
4783 }
4784
TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_DUP,k_lt_16)4785 TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_DUP, k_lt_16) {
4786 TEST_REQUIRES_ARM_NEON_V8;
4787 for (size_t k = 1; k < 16; k++) {
4788 GemmMicrokernelTester()
4789 .mr(1)
4790 .nr(8)
4791 .kr(2)
4792 .sr(1)
4793 .m(1)
4794 .n(8)
4795 .k(k)
4796 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4797 }
4798 }
4799
TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_DUP,k_lt_16_subtile)4800 TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_DUP, k_lt_16_subtile) {
4801 TEST_REQUIRES_ARM_NEON_V8;
4802 for (size_t k = 1; k < 16; k++) {
4803 for (uint32_t n = 1; n <= 8; n++) {
4804 for (uint32_t m = 1; m <= 1; m++) {
4805 GemmMicrokernelTester()
4806 .mr(1)
4807 .nr(8)
4808 .kr(2)
4809 .sr(1)
4810 .m(m)
4811 .n(n)
4812 .k(k)
4813 .iterations(1)
4814 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4815 }
4816 }
4817 }
4818 }
4819
TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_DUP,k_gt_16)4820 TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_DUP, k_gt_16) {
4821 TEST_REQUIRES_ARM_NEON_V8;
4822 for (size_t k = 17; k < 32; k++) {
4823 GemmMicrokernelTester()
4824 .mr(1)
4825 .nr(8)
4826 .kr(2)
4827 .sr(1)
4828 .m(1)
4829 .n(8)
4830 .k(k)
4831 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4832 }
4833 }
4834
TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_DUP,k_gt_16_subtile)4835 TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_DUP, k_gt_16_subtile) {
4836 TEST_REQUIRES_ARM_NEON_V8;
4837 for (size_t k = 17; k < 32; k++) {
4838 for (uint32_t n = 1; n <= 8; n++) {
4839 for (uint32_t m = 1; m <= 1; m++) {
4840 GemmMicrokernelTester()
4841 .mr(1)
4842 .nr(8)
4843 .kr(2)
4844 .sr(1)
4845 .m(m)
4846 .n(n)
4847 .k(k)
4848 .iterations(1)
4849 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4850 }
4851 }
4852 }
4853 }
4854
TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_DUP,k_div_16)4855 TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_DUP, k_div_16) {
4856 TEST_REQUIRES_ARM_NEON_V8;
4857 for (size_t k = 32; k <= 160; k += 16) {
4858 GemmMicrokernelTester()
4859 .mr(1)
4860 .nr(8)
4861 .kr(2)
4862 .sr(1)
4863 .m(1)
4864 .n(8)
4865 .k(k)
4866 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4867 }
4868 }
4869
TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_DUP,k_div_16_subtile)4870 TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_DUP, k_div_16_subtile) {
4871 TEST_REQUIRES_ARM_NEON_V8;
4872 for (size_t k = 32; k <= 160; k += 16) {
4873 for (uint32_t n = 1; n <= 8; n++) {
4874 for (uint32_t m = 1; m <= 1; m++) {
4875 GemmMicrokernelTester()
4876 .mr(1)
4877 .nr(8)
4878 .kr(2)
4879 .sr(1)
4880 .m(m)
4881 .n(n)
4882 .k(k)
4883 .iterations(1)
4884 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4885 }
4886 }
4887 }
4888 }
4889
TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_DUP,n_gt_8)4890 TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_DUP, n_gt_8) {
4891 TEST_REQUIRES_ARM_NEON_V8;
4892 for (uint32_t n = 9; n < 16; n++) {
4893 for (size_t k = 1; k <= 80; k += 17) {
4894 GemmMicrokernelTester()
4895 .mr(1)
4896 .nr(8)
4897 .kr(2)
4898 .sr(1)
4899 .m(1)
4900 .n(n)
4901 .k(k)
4902 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4903 }
4904 }
4905 }
4906
TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_DUP,n_gt_8_strided_cn)4907 TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_DUP, n_gt_8_strided_cn) {
4908 TEST_REQUIRES_ARM_NEON_V8;
4909 for (uint32_t n = 9; n < 16; n++) {
4910 for (size_t k = 1; k <= 80; k += 17) {
4911 GemmMicrokernelTester()
4912 .mr(1)
4913 .nr(8)
4914 .kr(2)
4915 .sr(1)
4916 .m(1)
4917 .n(n)
4918 .k(k)
4919 .cn_stride(11)
4920 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4921 }
4922 }
4923 }
4924
TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_DUP,n_gt_8_subtile)4925 TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_DUP, n_gt_8_subtile) {
4926 TEST_REQUIRES_ARM_NEON_V8;
4927 for (uint32_t n = 9; n < 16; n++) {
4928 for (size_t k = 1; k <= 80; k += 17) {
4929 for (uint32_t m = 1; m <= 1; m++) {
4930 GemmMicrokernelTester()
4931 .mr(1)
4932 .nr(8)
4933 .kr(2)
4934 .sr(1)
4935 .m(m)
4936 .n(n)
4937 .k(k)
4938 .iterations(1)
4939 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4940 }
4941 }
4942 }
4943 }
4944
TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_DUP,n_div_8)4945 TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_DUP, n_div_8) {
4946 TEST_REQUIRES_ARM_NEON_V8;
4947 for (uint32_t n = 16; n <= 24; n += 8) {
4948 for (size_t k = 1; k <= 80; k += 17) {
4949 GemmMicrokernelTester()
4950 .mr(1)
4951 .nr(8)
4952 .kr(2)
4953 .sr(1)
4954 .m(1)
4955 .n(n)
4956 .k(k)
4957 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4958 }
4959 }
4960 }
4961
TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_DUP,n_div_8_strided_cn)4962 TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_DUP, n_div_8_strided_cn) {
4963 TEST_REQUIRES_ARM_NEON_V8;
4964 for (uint32_t n = 16; n <= 24; n += 8) {
4965 for (size_t k = 1; k <= 80; k += 17) {
4966 GemmMicrokernelTester()
4967 .mr(1)
4968 .nr(8)
4969 .kr(2)
4970 .sr(1)
4971 .m(1)
4972 .n(n)
4973 .k(k)
4974 .cn_stride(11)
4975 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4976 }
4977 }
4978 }
4979
TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_DUP,n_div_8_subtile)4980 TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_DUP, n_div_8_subtile) {
4981 TEST_REQUIRES_ARM_NEON_V8;
4982 for (uint32_t n = 16; n <= 24; n += 8) {
4983 for (size_t k = 1; k <= 80; k += 17) {
4984 for (uint32_t m = 1; m <= 1; m++) {
4985 GemmMicrokernelTester()
4986 .mr(1)
4987 .nr(8)
4988 .kr(2)
4989 .sr(1)
4990 .m(m)
4991 .n(n)
4992 .k(k)
4993 .iterations(1)
4994 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4995 }
4996 }
4997 }
4998 }
4999
TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_DUP,small_kernel)5000 TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_DUP, small_kernel) {
5001 TEST_REQUIRES_ARM_NEON_V8;
5002 for (size_t k = 1; k <= 80; k += 17) {
5003 GemmMicrokernelTester()
5004 .mr(1)
5005 .nr(8)
5006 .kr(2)
5007 .sr(1)
5008 .m(1)
5009 .n(8)
5010 .k(k)
5011 .ks(3)
5012 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
5013 }
5014 }
5015
TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_DUP,small_kernel_subtile)5016 TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_DUP, small_kernel_subtile) {
5017 TEST_REQUIRES_ARM_NEON_V8;
5018 for (size_t k = 1; k <= 80; k += 17) {
5019 for (uint32_t n = 1; n <= 8; n++) {
5020 for (uint32_t m = 1; m <= 1; m++) {
5021 GemmMicrokernelTester()
5022 .mr(1)
5023 .nr(8)
5024 .kr(2)
5025 .sr(1)
5026 .m(m)
5027 .n(n)
5028 .k(k)
5029 .ks(3)
5030 .iterations(1)
5031 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
5032 }
5033 }
5034 }
5035 }
5036
TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_DUP,n_gt_8_small_kernel)5037 TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_DUP, n_gt_8_small_kernel) {
5038 TEST_REQUIRES_ARM_NEON_V8;
5039 for (uint32_t n = 9; n < 16; n++) {
5040 for (size_t k = 1; k <= 80; k += 17) {
5041 GemmMicrokernelTester()
5042 .mr(1)
5043 .nr(8)
5044 .kr(2)
5045 .sr(1)
5046 .m(1)
5047 .n(n)
5048 .k(k)
5049 .ks(3)
5050 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
5051 }
5052 }
5053 }
5054
TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_DUP,n_div_8_small_kernel)5055 TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_DUP, n_div_8_small_kernel) {
5056 TEST_REQUIRES_ARM_NEON_V8;
5057 for (uint32_t n = 16; n <= 24; n += 8) {
5058 for (size_t k = 1; k <= 80; k += 17) {
5059 GemmMicrokernelTester()
5060 .mr(1)
5061 .nr(8)
5062 .kr(2)
5063 .sr(1)
5064 .m(1)
5065 .n(n)
5066 .k(k)
5067 .ks(3)
5068 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
5069 }
5070 }
5071 }
5072
TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_DUP,strided_cm_subtile)5073 TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_DUP, strided_cm_subtile) {
5074 TEST_REQUIRES_ARM_NEON_V8;
5075 for (size_t k = 1; k <= 80; k += 17) {
5076 for (uint32_t n = 1; n <= 8; n++) {
5077 for (uint32_t m = 1; m <= 1; m++) {
5078 GemmMicrokernelTester()
5079 .mr(1)
5080 .nr(8)
5081 .kr(2)
5082 .sr(1)
5083 .m(m)
5084 .n(n)
5085 .k(k)
5086 .cm_stride(11)
5087 .iterations(1)
5088 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
5089 }
5090 }
5091 }
5092 }
5093
TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_DUP,a_offset)5094 TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_DUP, a_offset) {
5095 TEST_REQUIRES_ARM_NEON_V8;
5096 for (size_t k = 1; k <= 80; k += 17) {
5097 GemmMicrokernelTester()
5098 .mr(1)
5099 .nr(8)
5100 .kr(2)
5101 .sr(1)
5102 .m(1)
5103 .n(8)
5104 .k(k)
5105 .ks(3)
5106 .a_offset(83)
5107 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
5108 }
5109 }
5110
TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_DUP,zero)5111 TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_DUP, zero) {
5112 TEST_REQUIRES_ARM_NEON_V8;
5113 for (size_t k = 1; k <= 80; k += 17) {
5114 for (uint32_t mz = 0; mz < 1; mz++) {
5115 GemmMicrokernelTester()
5116 .mr(1)
5117 .nr(8)
5118 .kr(2)
5119 .sr(1)
5120 .m(1)
5121 .n(8)
5122 .k(k)
5123 .ks(3)
5124 .a_offset(83)
5125 .zero_index(mz)
5126 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
5127 }
5128 }
5129 }
5130
TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_DUP,qmin)5131 TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_DUP, qmin) {
5132 TEST_REQUIRES_ARM_NEON_V8;
5133 GemmMicrokernelTester()
5134 .mr(1)
5135 .nr(8)
5136 .kr(2)
5137 .sr(1)
5138 .m(1)
5139 .n(8)
5140 .k(16)
5141 .qmin(128)
5142 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
5143 }
5144
TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_DUP,qmax)5145 TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_DUP, qmax) {
5146 TEST_REQUIRES_ARM_NEON_V8;
5147 GemmMicrokernelTester()
5148 .mr(1)
5149 .nr(8)
5150 .kr(2)
5151 .sr(1)
5152 .m(1)
5153 .n(8)
5154 .k(16)
5155 .qmax(128)
5156 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
5157 }
5158
TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_DUP,strided_cm)5159 TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_DUP, strided_cm) {
5160 TEST_REQUIRES_ARM_NEON_V8;
5161 GemmMicrokernelTester()
5162 .mr(1)
5163 .nr(8)
5164 .kr(2)
5165 .sr(1)
5166 .m(1)
5167 .n(8)
5168 .k(16)
5169 .cm_stride(11)
5170 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
5171 }
5172 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
5173
5174
5175 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD2R,k_eq_16)5176 TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD2R, k_eq_16) {
5177 TEST_REQUIRES_ARM_NEON_V8;
5178 GemmMicrokernelTester()
5179 .mr(1)
5180 .nr(8)
5181 .kr(2)
5182 .sr(1)
5183 .m(1)
5184 .n(8)
5185 .k(16)
5186 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
5187 }
5188
TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD2R,strided_cn)5189 TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD2R, strided_cn) {
5190 TEST_REQUIRES_ARM_NEON_V8;
5191 GemmMicrokernelTester()
5192 .mr(1)
5193 .nr(8)
5194 .kr(2)
5195 .sr(1)
5196 .m(1)
5197 .n(8)
5198 .k(16)
5199 .cn_stride(11)
5200 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
5201 }
5202
TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD2R,k_eq_16_subtile)5203 TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD2R, k_eq_16_subtile) {
5204 TEST_REQUIRES_ARM_NEON_V8;
5205 for (uint32_t n = 1; n <= 8; n++) {
5206 for (uint32_t m = 1; m <= 1; m++) {
5207 GemmMicrokernelTester()
5208 .mr(1)
5209 .nr(8)
5210 .kr(2)
5211 .sr(1)
5212 .m(m)
5213 .n(n)
5214 .k(16)
5215 .iterations(1)
5216 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
5217 }
5218 }
5219 }
5220
TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD2R,k_eq_16_subtile_m)5221 TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD2R, k_eq_16_subtile_m) {
5222 TEST_REQUIRES_ARM_NEON_V8;
5223 for (uint32_t m = 1; m <= 1; m++) {
5224 GemmMicrokernelTester()
5225 .mr(1)
5226 .nr(8)
5227 .kr(2)
5228 .sr(1)
5229 .m(m)
5230 .n(8)
5231 .k(16)
5232 .iterations(1)
5233 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
5234 }
5235 }
5236
TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD2R,k_eq_16_subtile_n)5237 TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD2R, k_eq_16_subtile_n) {
5238 TEST_REQUIRES_ARM_NEON_V8;
5239 for (uint32_t n = 1; n <= 8; n++) {
5240 GemmMicrokernelTester()
5241 .mr(1)
5242 .nr(8)
5243 .kr(2)
5244 .sr(1)
5245 .m(1)
5246 .n(n)
5247 .k(16)
5248 .iterations(1)
5249 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
5250 }
5251 }
5252
TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD2R,k_lt_16)5253 TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD2R, k_lt_16) {
5254 TEST_REQUIRES_ARM_NEON_V8;
5255 for (size_t k = 1; k < 16; k++) {
5256 GemmMicrokernelTester()
5257 .mr(1)
5258 .nr(8)
5259 .kr(2)
5260 .sr(1)
5261 .m(1)
5262 .n(8)
5263 .k(k)
5264 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
5265 }
5266 }
5267
TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD2R,k_lt_16_subtile)5268 TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD2R, k_lt_16_subtile) {
5269 TEST_REQUIRES_ARM_NEON_V8;
5270 for (size_t k = 1; k < 16; k++) {
5271 for (uint32_t n = 1; n <= 8; n++) {
5272 for (uint32_t m = 1; m <= 1; m++) {
5273 GemmMicrokernelTester()
5274 .mr(1)
5275 .nr(8)
5276 .kr(2)
5277 .sr(1)
5278 .m(m)
5279 .n(n)
5280 .k(k)
5281 .iterations(1)
5282 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
5283 }
5284 }
5285 }
5286 }
5287
TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD2R,k_gt_16)5288 TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD2R, k_gt_16) {
5289 TEST_REQUIRES_ARM_NEON_V8;
5290 for (size_t k = 17; k < 32; k++) {
5291 GemmMicrokernelTester()
5292 .mr(1)
5293 .nr(8)
5294 .kr(2)
5295 .sr(1)
5296 .m(1)
5297 .n(8)
5298 .k(k)
5299 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
5300 }
5301 }
5302
TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD2R,k_gt_16_subtile)5303 TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD2R, k_gt_16_subtile) {
5304 TEST_REQUIRES_ARM_NEON_V8;
5305 for (size_t k = 17; k < 32; k++) {
5306 for (uint32_t n = 1; n <= 8; n++) {
5307 for (uint32_t m = 1; m <= 1; m++) {
5308 GemmMicrokernelTester()
5309 .mr(1)
5310 .nr(8)
5311 .kr(2)
5312 .sr(1)
5313 .m(m)
5314 .n(n)
5315 .k(k)
5316 .iterations(1)
5317 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
5318 }
5319 }
5320 }
5321 }
5322
TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD2R,k_div_16)5323 TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD2R, k_div_16) {
5324 TEST_REQUIRES_ARM_NEON_V8;
5325 for (size_t k = 32; k <= 160; k += 16) {
5326 GemmMicrokernelTester()
5327 .mr(1)
5328 .nr(8)
5329 .kr(2)
5330 .sr(1)
5331 .m(1)
5332 .n(8)
5333 .k(k)
5334 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
5335 }
5336 }
5337
TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD2R,k_div_16_subtile)5338 TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD2R, k_div_16_subtile) {
5339 TEST_REQUIRES_ARM_NEON_V8;
5340 for (size_t k = 32; k <= 160; k += 16) {
5341 for (uint32_t n = 1; n <= 8; n++) {
5342 for (uint32_t m = 1; m <= 1; m++) {
5343 GemmMicrokernelTester()
5344 .mr(1)
5345 .nr(8)
5346 .kr(2)
5347 .sr(1)
5348 .m(m)
5349 .n(n)
5350 .k(k)
5351 .iterations(1)
5352 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
5353 }
5354 }
5355 }
5356 }
5357
TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD2R,n_gt_8)5358 TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD2R, n_gt_8) {
5359 TEST_REQUIRES_ARM_NEON_V8;
5360 for (uint32_t n = 9; n < 16; n++) {
5361 for (size_t k = 1; k <= 80; k += 17) {
5362 GemmMicrokernelTester()
5363 .mr(1)
5364 .nr(8)
5365 .kr(2)
5366 .sr(1)
5367 .m(1)
5368 .n(n)
5369 .k(k)
5370 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
5371 }
5372 }
5373 }
5374
TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD2R,n_gt_8_strided_cn)5375 TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD2R, n_gt_8_strided_cn) {
5376 TEST_REQUIRES_ARM_NEON_V8;
5377 for (uint32_t n = 9; n < 16; n++) {
5378 for (size_t k = 1; k <= 80; k += 17) {
5379 GemmMicrokernelTester()
5380 .mr(1)
5381 .nr(8)
5382 .kr(2)
5383 .sr(1)
5384 .m(1)
5385 .n(n)
5386 .k(k)
5387 .cn_stride(11)
5388 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
5389 }
5390 }
5391 }
5392
TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD2R,n_gt_8_subtile)5393 TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD2R, n_gt_8_subtile) {
5394 TEST_REQUIRES_ARM_NEON_V8;
5395 for (uint32_t n = 9; n < 16; n++) {
5396 for (size_t k = 1; k <= 80; k += 17) {
5397 for (uint32_t m = 1; m <= 1; m++) {
5398 GemmMicrokernelTester()
5399 .mr(1)
5400 .nr(8)
5401 .kr(2)
5402 .sr(1)
5403 .m(m)
5404 .n(n)
5405 .k(k)
5406 .iterations(1)
5407 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
5408 }
5409 }
5410 }
5411 }
5412
TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD2R,n_div_8)5413 TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD2R, n_div_8) {
5414 TEST_REQUIRES_ARM_NEON_V8;
5415 for (uint32_t n = 16; n <= 24; n += 8) {
5416 for (size_t k = 1; k <= 80; k += 17) {
5417 GemmMicrokernelTester()
5418 .mr(1)
5419 .nr(8)
5420 .kr(2)
5421 .sr(1)
5422 .m(1)
5423 .n(n)
5424 .k(k)
5425 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
5426 }
5427 }
5428 }
5429
TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD2R,n_div_8_strided_cn)5430 TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD2R, n_div_8_strided_cn) {
5431 TEST_REQUIRES_ARM_NEON_V8;
5432 for (uint32_t n = 16; n <= 24; n += 8) {
5433 for (size_t k = 1; k <= 80; k += 17) {
5434 GemmMicrokernelTester()
5435 .mr(1)
5436 .nr(8)
5437 .kr(2)
5438 .sr(1)
5439 .m(1)
5440 .n(n)
5441 .k(k)
5442 .cn_stride(11)
5443 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
5444 }
5445 }
5446 }
5447
TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD2R,n_div_8_subtile)5448 TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD2R, n_div_8_subtile) {
5449 TEST_REQUIRES_ARM_NEON_V8;
5450 for (uint32_t n = 16; n <= 24; n += 8) {
5451 for (size_t k = 1; k <= 80; k += 17) {
5452 for (uint32_t m = 1; m <= 1; m++) {
5453 GemmMicrokernelTester()
5454 .mr(1)
5455 .nr(8)
5456 .kr(2)
5457 .sr(1)
5458 .m(m)
5459 .n(n)
5460 .k(k)
5461 .iterations(1)
5462 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
5463 }
5464 }
5465 }
5466 }
5467
TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD2R,small_kernel)5468 TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD2R, small_kernel) {
5469 TEST_REQUIRES_ARM_NEON_V8;
5470 for (size_t k = 1; k <= 80; k += 17) {
5471 GemmMicrokernelTester()
5472 .mr(1)
5473 .nr(8)
5474 .kr(2)
5475 .sr(1)
5476 .m(1)
5477 .n(8)
5478 .k(k)
5479 .ks(3)
5480 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
5481 }
5482 }
5483
TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD2R,small_kernel_subtile)5484 TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD2R, small_kernel_subtile) {
5485 TEST_REQUIRES_ARM_NEON_V8;
5486 for (size_t k = 1; k <= 80; k += 17) {
5487 for (uint32_t n = 1; n <= 8; n++) {
5488 for (uint32_t m = 1; m <= 1; m++) {
5489 GemmMicrokernelTester()
5490 .mr(1)
5491 .nr(8)
5492 .kr(2)
5493 .sr(1)
5494 .m(m)
5495 .n(n)
5496 .k(k)
5497 .ks(3)
5498 .iterations(1)
5499 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
5500 }
5501 }
5502 }
5503 }
5504
TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD2R,n_gt_8_small_kernel)5505 TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD2R, n_gt_8_small_kernel) {
5506 TEST_REQUIRES_ARM_NEON_V8;
5507 for (uint32_t n = 9; n < 16; n++) {
5508 for (size_t k = 1; k <= 80; k += 17) {
5509 GemmMicrokernelTester()
5510 .mr(1)
5511 .nr(8)
5512 .kr(2)
5513 .sr(1)
5514 .m(1)
5515 .n(n)
5516 .k(k)
5517 .ks(3)
5518 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
5519 }
5520 }
5521 }
5522
TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD2R,n_div_8_small_kernel)5523 TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD2R, n_div_8_small_kernel) {
5524 TEST_REQUIRES_ARM_NEON_V8;
5525 for (uint32_t n = 16; n <= 24; n += 8) {
5526 for (size_t k = 1; k <= 80; k += 17) {
5527 GemmMicrokernelTester()
5528 .mr(1)
5529 .nr(8)
5530 .kr(2)
5531 .sr(1)
5532 .m(1)
5533 .n(n)
5534 .k(k)
5535 .ks(3)
5536 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
5537 }
5538 }
5539 }
5540
TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD2R,strided_cm_subtile)5541 TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD2R, strided_cm_subtile) {
5542 TEST_REQUIRES_ARM_NEON_V8;
5543 for (size_t k = 1; k <= 80; k += 17) {
5544 for (uint32_t n = 1; n <= 8; n++) {
5545 for (uint32_t m = 1; m <= 1; m++) {
5546 GemmMicrokernelTester()
5547 .mr(1)
5548 .nr(8)
5549 .kr(2)
5550 .sr(1)
5551 .m(m)
5552 .n(n)
5553 .k(k)
5554 .cm_stride(11)
5555 .iterations(1)
5556 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
5557 }
5558 }
5559 }
5560 }
5561
TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD2R,a_offset)5562 TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD2R, a_offset) {
5563 TEST_REQUIRES_ARM_NEON_V8;
5564 for (size_t k = 1; k <= 80; k += 17) {
5565 GemmMicrokernelTester()
5566 .mr(1)
5567 .nr(8)
5568 .kr(2)
5569 .sr(1)
5570 .m(1)
5571 .n(8)
5572 .k(k)
5573 .ks(3)
5574 .a_offset(83)
5575 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
5576 }
5577 }
5578
TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD2R,zero)5579 TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD2R, zero) {
5580 TEST_REQUIRES_ARM_NEON_V8;
5581 for (size_t k = 1; k <= 80; k += 17) {
5582 for (uint32_t mz = 0; mz < 1; mz++) {
5583 GemmMicrokernelTester()
5584 .mr(1)
5585 .nr(8)
5586 .kr(2)
5587 .sr(1)
5588 .m(1)
5589 .n(8)
5590 .k(k)
5591 .ks(3)
5592 .a_offset(83)
5593 .zero_index(mz)
5594 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
5595 }
5596 }
5597 }
5598
TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD2R,qmin)5599 TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD2R, qmin) {
5600 TEST_REQUIRES_ARM_NEON_V8;
5601 GemmMicrokernelTester()
5602 .mr(1)
5603 .nr(8)
5604 .kr(2)
5605 .sr(1)
5606 .m(1)
5607 .n(8)
5608 .k(16)
5609 .qmin(128)
5610 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
5611 }
5612
TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD2R,qmax)5613 TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD2R, qmax) {
5614 TEST_REQUIRES_ARM_NEON_V8;
5615 GemmMicrokernelTester()
5616 .mr(1)
5617 .nr(8)
5618 .kr(2)
5619 .sr(1)
5620 .m(1)
5621 .n(8)
5622 .k(16)
5623 .qmax(128)
5624 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
5625 }
5626
TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD2R,strided_cm)5627 TEST(QC8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD2R, strided_cm) {
5628 TEST_REQUIRES_ARM_NEON_V8;
5629 GemmMicrokernelTester()
5630 .mr(1)
5631 .nr(8)
5632 .kr(2)
5633 .sr(1)
5634 .m(1)
5635 .n(8)
5636 .k(16)
5637 .cm_stride(11)
5638 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
5639 }
5640 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
5641
5642
5643 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QC8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD2R,k_eq_16)5644 TEST(QC8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD2R, k_eq_16) {
5645 TEST_REQUIRES_ARM_NEON;
5646 GemmMicrokernelTester()
5647 .mr(1)
5648 .nr(8)
5649 .kr(4)
5650 .sr(1)
5651 .m(1)
5652 .n(8)
5653 .k(16)
5654 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c4__neon_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
5655 }
5656
TEST(QC8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD2R,strided_cn)5657 TEST(QC8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD2R, strided_cn) {
5658 TEST_REQUIRES_ARM_NEON;
5659 GemmMicrokernelTester()
5660 .mr(1)
5661 .nr(8)
5662 .kr(4)
5663 .sr(1)
5664 .m(1)
5665 .n(8)
5666 .k(16)
5667 .cn_stride(11)
5668 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c4__neon_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
5669 }
5670
TEST(QC8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD2R,k_eq_16_subtile)5671 TEST(QC8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD2R, k_eq_16_subtile) {
5672 TEST_REQUIRES_ARM_NEON;
5673 for (uint32_t n = 1; n <= 8; n++) {
5674 for (uint32_t m = 1; m <= 1; m++) {
5675 GemmMicrokernelTester()
5676 .mr(1)
5677 .nr(8)
5678 .kr(4)
5679 .sr(1)
5680 .m(m)
5681 .n(n)
5682 .k(16)
5683 .iterations(1)
5684 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c4__neon_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
5685 }
5686 }
5687 }
5688
TEST(QC8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD2R,k_eq_16_subtile_m)5689 TEST(QC8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD2R, k_eq_16_subtile_m) {
5690 TEST_REQUIRES_ARM_NEON;
5691 for (uint32_t m = 1; m <= 1; m++) {
5692 GemmMicrokernelTester()
5693 .mr(1)
5694 .nr(8)
5695 .kr(4)
5696 .sr(1)
5697 .m(m)
5698 .n(8)
5699 .k(16)
5700 .iterations(1)
5701 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c4__neon_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
5702 }
5703 }
5704
TEST(QC8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD2R,k_eq_16_subtile_n)5705 TEST(QC8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD2R, k_eq_16_subtile_n) {
5706 TEST_REQUIRES_ARM_NEON;
5707 for (uint32_t n = 1; n <= 8; n++) {
5708 GemmMicrokernelTester()
5709 .mr(1)
5710 .nr(8)
5711 .kr(4)
5712 .sr(1)
5713 .m(1)
5714 .n(n)
5715 .k(16)
5716 .iterations(1)
5717 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c4__neon_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
5718 }
5719 }
5720
TEST(QC8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD2R,k_lt_16)5721 TEST(QC8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD2R, k_lt_16) {
5722 TEST_REQUIRES_ARM_NEON;
5723 for (size_t k = 1; k < 16; k++) {
5724 GemmMicrokernelTester()
5725 .mr(1)
5726 .nr(8)
5727 .kr(4)
5728 .sr(1)
5729 .m(1)
5730 .n(8)
5731 .k(k)
5732 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c4__neon_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
5733 }
5734 }
5735
TEST(QC8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD2R,k_lt_16_subtile)5736 TEST(QC8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD2R, k_lt_16_subtile) {
5737 TEST_REQUIRES_ARM_NEON;
5738 for (size_t k = 1; k < 16; k++) {
5739 for (uint32_t n = 1; n <= 8; n++) {
5740 for (uint32_t m = 1; m <= 1; m++) {
5741 GemmMicrokernelTester()
5742 .mr(1)
5743 .nr(8)
5744 .kr(4)
5745 .sr(1)
5746 .m(m)
5747 .n(n)
5748 .k(k)
5749 .iterations(1)
5750 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c4__neon_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
5751 }
5752 }
5753 }
5754 }
5755
TEST(QC8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD2R,k_gt_16)5756 TEST(QC8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD2R, k_gt_16) {
5757 TEST_REQUIRES_ARM_NEON;
5758 for (size_t k = 17; k < 32; k++) {
5759 GemmMicrokernelTester()
5760 .mr(1)
5761 .nr(8)
5762 .kr(4)
5763 .sr(1)
5764 .m(1)
5765 .n(8)
5766 .k(k)
5767 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c4__neon_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
5768 }
5769 }
5770
TEST(QC8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD2R,k_gt_16_subtile)5771 TEST(QC8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD2R, k_gt_16_subtile) {
5772 TEST_REQUIRES_ARM_NEON;
5773 for (size_t k = 17; k < 32; k++) {
5774 for (uint32_t n = 1; n <= 8; n++) {
5775 for (uint32_t m = 1; m <= 1; m++) {
5776 GemmMicrokernelTester()
5777 .mr(1)
5778 .nr(8)
5779 .kr(4)
5780 .sr(1)
5781 .m(m)
5782 .n(n)
5783 .k(k)
5784 .iterations(1)
5785 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c4__neon_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
5786 }
5787 }
5788 }
5789 }
5790
TEST(QC8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD2R,k_div_16)5791 TEST(QC8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD2R, k_div_16) {
5792 TEST_REQUIRES_ARM_NEON;
5793 for (size_t k = 32; k <= 160; k += 16) {
5794 GemmMicrokernelTester()
5795 .mr(1)
5796 .nr(8)
5797 .kr(4)
5798 .sr(1)
5799 .m(1)
5800 .n(8)
5801 .k(k)
5802 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c4__neon_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
5803 }
5804 }
5805
TEST(QC8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD2R,k_div_16_subtile)5806 TEST(QC8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD2R, k_div_16_subtile) {
5807 TEST_REQUIRES_ARM_NEON;
5808 for (size_t k = 32; k <= 160; k += 16) {
5809 for (uint32_t n = 1; n <= 8; n++) {
5810 for (uint32_t m = 1; m <= 1; m++) {
5811 GemmMicrokernelTester()
5812 .mr(1)
5813 .nr(8)
5814 .kr(4)
5815 .sr(1)
5816 .m(m)
5817 .n(n)
5818 .k(k)
5819 .iterations(1)
5820 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c4__neon_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
5821 }
5822 }
5823 }
5824 }
5825
TEST(QC8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD2R,n_gt_8)5826 TEST(QC8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD2R, n_gt_8) {
5827 TEST_REQUIRES_ARM_NEON;
5828 for (uint32_t n = 9; n < 16; n++) {
5829 for (size_t k = 1; k <= 80; k += 17) {
5830 GemmMicrokernelTester()
5831 .mr(1)
5832 .nr(8)
5833 .kr(4)
5834 .sr(1)
5835 .m(1)
5836 .n(n)
5837 .k(k)
5838 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c4__neon_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
5839 }
5840 }
5841 }
5842
TEST(QC8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD2R,n_gt_8_strided_cn)5843 TEST(QC8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD2R, n_gt_8_strided_cn) {
5844 TEST_REQUIRES_ARM_NEON;
5845 for (uint32_t n = 9; n < 16; n++) {
5846 for (size_t k = 1; k <= 80; k += 17) {
5847 GemmMicrokernelTester()
5848 .mr(1)
5849 .nr(8)
5850 .kr(4)
5851 .sr(1)
5852 .m(1)
5853 .n(n)
5854 .k(k)
5855 .cn_stride(11)
5856 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c4__neon_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
5857 }
5858 }
5859 }
5860
TEST(QC8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD2R,n_gt_8_subtile)5861 TEST(QC8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD2R, n_gt_8_subtile) {
5862 TEST_REQUIRES_ARM_NEON;
5863 for (uint32_t n = 9; n < 16; n++) {
5864 for (size_t k = 1; k <= 80; k += 17) {
5865 for (uint32_t m = 1; m <= 1; m++) {
5866 GemmMicrokernelTester()
5867 .mr(1)
5868 .nr(8)
5869 .kr(4)
5870 .sr(1)
5871 .m(m)
5872 .n(n)
5873 .k(k)
5874 .iterations(1)
5875 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c4__neon_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
5876 }
5877 }
5878 }
5879 }
5880
TEST(QC8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD2R,n_div_8)5881 TEST(QC8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD2R, n_div_8) {
5882 TEST_REQUIRES_ARM_NEON;
5883 for (uint32_t n = 16; n <= 24; n += 8) {
5884 for (size_t k = 1; k <= 80; k += 17) {
5885 GemmMicrokernelTester()
5886 .mr(1)
5887 .nr(8)
5888 .kr(4)
5889 .sr(1)
5890 .m(1)
5891 .n(n)
5892 .k(k)
5893 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c4__neon_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
5894 }
5895 }
5896 }
5897
TEST(QC8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD2R,n_div_8_strided_cn)5898 TEST(QC8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD2R, n_div_8_strided_cn) {
5899 TEST_REQUIRES_ARM_NEON;
5900 for (uint32_t n = 16; n <= 24; n += 8) {
5901 for (size_t k = 1; k <= 80; k += 17) {
5902 GemmMicrokernelTester()
5903 .mr(1)
5904 .nr(8)
5905 .kr(4)
5906 .sr(1)
5907 .m(1)
5908 .n(n)
5909 .k(k)
5910 .cn_stride(11)
5911 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c4__neon_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
5912 }
5913 }
5914 }
5915
TEST(QC8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD2R,n_div_8_subtile)5916 TEST(QC8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD2R, n_div_8_subtile) {
5917 TEST_REQUIRES_ARM_NEON;
5918 for (uint32_t n = 16; n <= 24; n += 8) {
5919 for (size_t k = 1; k <= 80; k += 17) {
5920 for (uint32_t m = 1; m <= 1; m++) {
5921 GemmMicrokernelTester()
5922 .mr(1)
5923 .nr(8)
5924 .kr(4)
5925 .sr(1)
5926 .m(m)
5927 .n(n)
5928 .k(k)
5929 .iterations(1)
5930 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c4__neon_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
5931 }
5932 }
5933 }
5934 }
5935
TEST(QC8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD2R,small_kernel)5936 TEST(QC8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD2R, small_kernel) {
5937 TEST_REQUIRES_ARM_NEON;
5938 for (size_t k = 1; k <= 80; k += 17) {
5939 GemmMicrokernelTester()
5940 .mr(1)
5941 .nr(8)
5942 .kr(4)
5943 .sr(1)
5944 .m(1)
5945 .n(8)
5946 .k(k)
5947 .ks(3)
5948 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c4__neon_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
5949 }
5950 }
5951
TEST(QC8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD2R,small_kernel_subtile)5952 TEST(QC8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD2R, small_kernel_subtile) {
5953 TEST_REQUIRES_ARM_NEON;
5954 for (size_t k = 1; k <= 80; k += 17) {
5955 for (uint32_t n = 1; n <= 8; n++) {
5956 for (uint32_t m = 1; m <= 1; m++) {
5957 GemmMicrokernelTester()
5958 .mr(1)
5959 .nr(8)
5960 .kr(4)
5961 .sr(1)
5962 .m(m)
5963 .n(n)
5964 .k(k)
5965 .ks(3)
5966 .iterations(1)
5967 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c4__neon_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
5968 }
5969 }
5970 }
5971 }
5972
TEST(QC8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD2R,n_gt_8_small_kernel)5973 TEST(QC8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD2R, n_gt_8_small_kernel) {
5974 TEST_REQUIRES_ARM_NEON;
5975 for (uint32_t n = 9; n < 16; n++) {
5976 for (size_t k = 1; k <= 80; k += 17) {
5977 GemmMicrokernelTester()
5978 .mr(1)
5979 .nr(8)
5980 .kr(4)
5981 .sr(1)
5982 .m(1)
5983 .n(n)
5984 .k(k)
5985 .ks(3)
5986 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c4__neon_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
5987 }
5988 }
5989 }
5990
TEST(QC8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD2R,n_div_8_small_kernel)5991 TEST(QC8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD2R, n_div_8_small_kernel) {
5992 TEST_REQUIRES_ARM_NEON;
5993 for (uint32_t n = 16; n <= 24; n += 8) {
5994 for (size_t k = 1; k <= 80; k += 17) {
5995 GemmMicrokernelTester()
5996 .mr(1)
5997 .nr(8)
5998 .kr(4)
5999 .sr(1)
6000 .m(1)
6001 .n(n)
6002 .k(k)
6003 .ks(3)
6004 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c4__neon_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
6005 }
6006 }
6007 }
6008
TEST(QC8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD2R,strided_cm_subtile)6009 TEST(QC8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD2R, strided_cm_subtile) {
6010 TEST_REQUIRES_ARM_NEON;
6011 for (size_t k = 1; k <= 80; k += 17) {
6012 for (uint32_t n = 1; n <= 8; n++) {
6013 for (uint32_t m = 1; m <= 1; m++) {
6014 GemmMicrokernelTester()
6015 .mr(1)
6016 .nr(8)
6017 .kr(4)
6018 .sr(1)
6019 .m(m)
6020 .n(n)
6021 .k(k)
6022 .cm_stride(11)
6023 .iterations(1)
6024 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c4__neon_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
6025 }
6026 }
6027 }
6028 }
6029
TEST(QC8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD2R,a_offset)6030 TEST(QC8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD2R, a_offset) {
6031 TEST_REQUIRES_ARM_NEON;
6032 for (size_t k = 1; k <= 80; k += 17) {
6033 GemmMicrokernelTester()
6034 .mr(1)
6035 .nr(8)
6036 .kr(4)
6037 .sr(1)
6038 .m(1)
6039 .n(8)
6040 .k(k)
6041 .ks(3)
6042 .a_offset(83)
6043 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c4__neon_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
6044 }
6045 }
6046
TEST(QC8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD2R,zero)6047 TEST(QC8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD2R, zero) {
6048 TEST_REQUIRES_ARM_NEON;
6049 for (size_t k = 1; k <= 80; k += 17) {
6050 for (uint32_t mz = 0; mz < 1; mz++) {
6051 GemmMicrokernelTester()
6052 .mr(1)
6053 .nr(8)
6054 .kr(4)
6055 .sr(1)
6056 .m(1)
6057 .n(8)
6058 .k(k)
6059 .ks(3)
6060 .a_offset(83)
6061 .zero_index(mz)
6062 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c4__neon_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
6063 }
6064 }
6065 }
6066
TEST(QC8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD2R,qmin)6067 TEST(QC8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD2R, qmin) {
6068 TEST_REQUIRES_ARM_NEON;
6069 GemmMicrokernelTester()
6070 .mr(1)
6071 .nr(8)
6072 .kr(4)
6073 .sr(1)
6074 .m(1)
6075 .n(8)
6076 .k(16)
6077 .qmin(128)
6078 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c4__neon_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
6079 }
6080
TEST(QC8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD2R,qmax)6081 TEST(QC8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD2R, qmax) {
6082 TEST_REQUIRES_ARM_NEON;
6083 GemmMicrokernelTester()
6084 .mr(1)
6085 .nr(8)
6086 .kr(4)
6087 .sr(1)
6088 .m(1)
6089 .n(8)
6090 .k(16)
6091 .qmax(128)
6092 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c4__neon_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
6093 }
6094
TEST(QC8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD2R,strided_cm)6095 TEST(QC8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD2R, strided_cm) {
6096 TEST_REQUIRES_ARM_NEON;
6097 GemmMicrokernelTester()
6098 .mr(1)
6099 .nr(8)
6100 .kr(4)
6101 .sr(1)
6102 .m(1)
6103 .n(8)
6104 .k(16)
6105 .cm_stride(11)
6106 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c4__neon_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
6107 }
6108 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
6109
6110
6111 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QC8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_DUP,k_eq_16)6112 TEST(QC8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_DUP, k_eq_16) {
6113 TEST_REQUIRES_ARM_NEON_V8;
6114 GemmMicrokernelTester()
6115 .mr(1)
6116 .nr(8)
6117 .kr(4)
6118 .sr(1)
6119 .m(1)
6120 .n(8)
6121 .k(16)
6122 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c4__neonv8_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
6123 }
6124
TEST(QC8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_DUP,strided_cn)6125 TEST(QC8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_DUP, strided_cn) {
6126 TEST_REQUIRES_ARM_NEON_V8;
6127 GemmMicrokernelTester()
6128 .mr(1)
6129 .nr(8)
6130 .kr(4)
6131 .sr(1)
6132 .m(1)
6133 .n(8)
6134 .k(16)
6135 .cn_stride(11)
6136 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c4__neonv8_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
6137 }
6138
TEST(QC8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_DUP,k_eq_16_subtile)6139 TEST(QC8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_DUP, k_eq_16_subtile) {
6140 TEST_REQUIRES_ARM_NEON_V8;
6141 for (uint32_t n = 1; n <= 8; n++) {
6142 for (uint32_t m = 1; m <= 1; m++) {
6143 GemmMicrokernelTester()
6144 .mr(1)
6145 .nr(8)
6146 .kr(4)
6147 .sr(1)
6148 .m(m)
6149 .n(n)
6150 .k(16)
6151 .iterations(1)
6152 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c4__neonv8_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
6153 }
6154 }
6155 }
6156
TEST(QC8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_DUP,k_eq_16_subtile_m)6157 TEST(QC8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_DUP, k_eq_16_subtile_m) {
6158 TEST_REQUIRES_ARM_NEON_V8;
6159 for (uint32_t m = 1; m <= 1; m++) {
6160 GemmMicrokernelTester()
6161 .mr(1)
6162 .nr(8)
6163 .kr(4)
6164 .sr(1)
6165 .m(m)
6166 .n(8)
6167 .k(16)
6168 .iterations(1)
6169 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c4__neonv8_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
6170 }
6171 }
6172
TEST(QC8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_DUP,k_eq_16_subtile_n)6173 TEST(QC8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_DUP, k_eq_16_subtile_n) {
6174 TEST_REQUIRES_ARM_NEON_V8;
6175 for (uint32_t n = 1; n <= 8; n++) {
6176 GemmMicrokernelTester()
6177 .mr(1)
6178 .nr(8)
6179 .kr(4)
6180 .sr(1)
6181 .m(1)
6182 .n(n)
6183 .k(16)
6184 .iterations(1)
6185 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c4__neonv8_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
6186 }
6187 }
6188
TEST(QC8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_DUP,k_lt_16)6189 TEST(QC8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_DUP, k_lt_16) {
6190 TEST_REQUIRES_ARM_NEON_V8;
6191 for (size_t k = 1; k < 16; k++) {
6192 GemmMicrokernelTester()
6193 .mr(1)
6194 .nr(8)
6195 .kr(4)
6196 .sr(1)
6197 .m(1)
6198 .n(8)
6199 .k(k)
6200 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c4__neonv8_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
6201 }
6202 }
6203
TEST(QC8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_DUP,k_lt_16_subtile)6204 TEST(QC8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_DUP, k_lt_16_subtile) {
6205 TEST_REQUIRES_ARM_NEON_V8;
6206 for (size_t k = 1; k < 16; k++) {
6207 for (uint32_t n = 1; n <= 8; n++) {
6208 for (uint32_t m = 1; m <= 1; m++) {
6209 GemmMicrokernelTester()
6210 .mr(1)
6211 .nr(8)
6212 .kr(4)
6213 .sr(1)
6214 .m(m)
6215 .n(n)
6216 .k(k)
6217 .iterations(1)
6218 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c4__neonv8_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
6219 }
6220 }
6221 }
6222 }
6223
TEST(QC8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_DUP,k_gt_16)6224 TEST(QC8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_DUP, k_gt_16) {
6225 TEST_REQUIRES_ARM_NEON_V8;
6226 for (size_t k = 17; k < 32; k++) {
6227 GemmMicrokernelTester()
6228 .mr(1)
6229 .nr(8)
6230 .kr(4)
6231 .sr(1)
6232 .m(1)
6233 .n(8)
6234 .k(k)
6235 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c4__neonv8_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
6236 }
6237 }
6238
TEST(QC8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_DUP,k_gt_16_subtile)6239 TEST(QC8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_DUP, k_gt_16_subtile) {
6240 TEST_REQUIRES_ARM_NEON_V8;
6241 for (size_t k = 17; k < 32; k++) {
6242 for (uint32_t n = 1; n <= 8; n++) {
6243 for (uint32_t m = 1; m <= 1; m++) {
6244 GemmMicrokernelTester()
6245 .mr(1)
6246 .nr(8)
6247 .kr(4)
6248 .sr(1)
6249 .m(m)
6250 .n(n)
6251 .k(k)
6252 .iterations(1)
6253 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c4__neonv8_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
6254 }
6255 }
6256 }
6257 }
6258
TEST(QC8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_DUP,k_div_16)6259 TEST(QC8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_DUP, k_div_16) {
6260 TEST_REQUIRES_ARM_NEON_V8;
6261 for (size_t k = 32; k <= 160; k += 16) {
6262 GemmMicrokernelTester()
6263 .mr(1)
6264 .nr(8)
6265 .kr(4)
6266 .sr(1)
6267 .m(1)
6268 .n(8)
6269 .k(k)
6270 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c4__neonv8_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
6271 }
6272 }
6273
TEST(QC8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_DUP,k_div_16_subtile)6274 TEST(QC8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_DUP, k_div_16_subtile) {
6275 TEST_REQUIRES_ARM_NEON_V8;
6276 for (size_t k = 32; k <= 160; k += 16) {
6277 for (uint32_t n = 1; n <= 8; n++) {
6278 for (uint32_t m = 1; m <= 1; m++) {
6279 GemmMicrokernelTester()
6280 .mr(1)
6281 .nr(8)
6282 .kr(4)
6283 .sr(1)
6284 .m(m)
6285 .n(n)
6286 .k(k)
6287 .iterations(1)
6288 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c4__neonv8_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
6289 }
6290 }
6291 }
6292 }
6293
TEST(QC8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_DUP,n_gt_8)6294 TEST(QC8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_DUP, n_gt_8) {
6295 TEST_REQUIRES_ARM_NEON_V8;
6296 for (uint32_t n = 9; n < 16; n++) {
6297 for (size_t k = 1; k <= 80; k += 17) {
6298 GemmMicrokernelTester()
6299 .mr(1)
6300 .nr(8)
6301 .kr(4)
6302 .sr(1)
6303 .m(1)
6304 .n(n)
6305 .k(k)
6306 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c4__neonv8_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
6307 }
6308 }
6309 }
6310
TEST(QC8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_DUP,n_gt_8_strided_cn)6311 TEST(QC8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_DUP, n_gt_8_strided_cn) {
6312 TEST_REQUIRES_ARM_NEON_V8;
6313 for (uint32_t n = 9; n < 16; n++) {
6314 for (size_t k = 1; k <= 80; k += 17) {
6315 GemmMicrokernelTester()
6316 .mr(1)
6317 .nr(8)
6318 .kr(4)
6319 .sr(1)
6320 .m(1)
6321 .n(n)
6322 .k(k)
6323 .cn_stride(11)
6324 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c4__neonv8_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
6325 }
6326 }
6327 }
6328
TEST(QC8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_DUP,n_gt_8_subtile)6329 TEST(QC8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_DUP, n_gt_8_subtile) {
6330 TEST_REQUIRES_ARM_NEON_V8;
6331 for (uint32_t n = 9; n < 16; n++) {
6332 for (size_t k = 1; k <= 80; k += 17) {
6333 for (uint32_t m = 1; m <= 1; m++) {
6334 GemmMicrokernelTester()
6335 .mr(1)
6336 .nr(8)
6337 .kr(4)
6338 .sr(1)
6339 .m(m)
6340 .n(n)
6341 .k(k)
6342 .iterations(1)
6343 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c4__neonv8_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
6344 }
6345 }
6346 }
6347 }
6348
TEST(QC8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_DUP,n_div_8)6349 TEST(QC8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_DUP, n_div_8) {
6350 TEST_REQUIRES_ARM_NEON_V8;
6351 for (uint32_t n = 16; n <= 24; n += 8) {
6352 for (size_t k = 1; k <= 80; k += 17) {
6353 GemmMicrokernelTester()
6354 .mr(1)
6355 .nr(8)
6356 .kr(4)
6357 .sr(1)
6358 .m(1)
6359 .n(n)
6360 .k(k)
6361 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c4__neonv8_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
6362 }
6363 }
6364 }
6365
TEST(QC8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_DUP,n_div_8_strided_cn)6366 TEST(QC8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_DUP, n_div_8_strided_cn) {
6367 TEST_REQUIRES_ARM_NEON_V8;
6368 for (uint32_t n = 16; n <= 24; n += 8) {
6369 for (size_t k = 1; k <= 80; k += 17) {
6370 GemmMicrokernelTester()
6371 .mr(1)
6372 .nr(8)
6373 .kr(4)
6374 .sr(1)
6375 .m(1)
6376 .n(n)
6377 .k(k)
6378 .cn_stride(11)
6379 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c4__neonv8_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
6380 }
6381 }
6382 }
6383
TEST(QC8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_DUP,n_div_8_subtile)6384 TEST(QC8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_DUP, n_div_8_subtile) {
6385 TEST_REQUIRES_ARM_NEON_V8;
6386 for (uint32_t n = 16; n <= 24; n += 8) {
6387 for (size_t k = 1; k <= 80; k += 17) {
6388 for (uint32_t m = 1; m <= 1; m++) {
6389 GemmMicrokernelTester()
6390 .mr(1)
6391 .nr(8)
6392 .kr(4)
6393 .sr(1)
6394 .m(m)
6395 .n(n)
6396 .k(k)
6397 .iterations(1)
6398 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c4__neonv8_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
6399 }
6400 }
6401 }
6402 }
6403
TEST(QC8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_DUP,small_kernel)6404 TEST(QC8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_DUP, small_kernel) {
6405 TEST_REQUIRES_ARM_NEON_V8;
6406 for (size_t k = 1; k <= 80; k += 17) {
6407 GemmMicrokernelTester()
6408 .mr(1)
6409 .nr(8)
6410 .kr(4)
6411 .sr(1)
6412 .m(1)
6413 .n(8)
6414 .k(k)
6415 .ks(3)
6416 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c4__neonv8_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
6417 }
6418 }
6419
TEST(QC8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_DUP,small_kernel_subtile)6420 TEST(QC8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_DUP, small_kernel_subtile) {
6421 TEST_REQUIRES_ARM_NEON_V8;
6422 for (size_t k = 1; k <= 80; k += 17) {
6423 for (uint32_t n = 1; n <= 8; n++) {
6424 for (uint32_t m = 1; m <= 1; m++) {
6425 GemmMicrokernelTester()
6426 .mr(1)
6427 .nr(8)
6428 .kr(4)
6429 .sr(1)
6430 .m(m)
6431 .n(n)
6432 .k(k)
6433 .ks(3)
6434 .iterations(1)
6435 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c4__neonv8_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
6436 }
6437 }
6438 }
6439 }
6440
TEST(QC8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_DUP,n_gt_8_small_kernel)6441 TEST(QC8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_DUP, n_gt_8_small_kernel) {
6442 TEST_REQUIRES_ARM_NEON_V8;
6443 for (uint32_t n = 9; n < 16; n++) {
6444 for (size_t k = 1; k <= 80; k += 17) {
6445 GemmMicrokernelTester()
6446 .mr(1)
6447 .nr(8)
6448 .kr(4)
6449 .sr(1)
6450 .m(1)
6451 .n(n)
6452 .k(k)
6453 .ks(3)
6454 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c4__neonv8_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
6455 }
6456 }
6457 }
6458
TEST(QC8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_DUP,n_div_8_small_kernel)6459 TEST(QC8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_DUP, n_div_8_small_kernel) {
6460 TEST_REQUIRES_ARM_NEON_V8;
6461 for (uint32_t n = 16; n <= 24; n += 8) {
6462 for (size_t k = 1; k <= 80; k += 17) {
6463 GemmMicrokernelTester()
6464 .mr(1)
6465 .nr(8)
6466 .kr(4)
6467 .sr(1)
6468 .m(1)
6469 .n(n)
6470 .k(k)
6471 .ks(3)
6472 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c4__neonv8_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
6473 }
6474 }
6475 }
6476
TEST(QC8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_DUP,strided_cm_subtile)6477 TEST(QC8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_DUP, strided_cm_subtile) {
6478 TEST_REQUIRES_ARM_NEON_V8;
6479 for (size_t k = 1; k <= 80; k += 17) {
6480 for (uint32_t n = 1; n <= 8; n++) {
6481 for (uint32_t m = 1; m <= 1; m++) {
6482 GemmMicrokernelTester()
6483 .mr(1)
6484 .nr(8)
6485 .kr(4)
6486 .sr(1)
6487 .m(m)
6488 .n(n)
6489 .k(k)
6490 .cm_stride(11)
6491 .iterations(1)
6492 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c4__neonv8_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
6493 }
6494 }
6495 }
6496 }
6497
TEST(QC8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_DUP,a_offset)6498 TEST(QC8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_DUP, a_offset) {
6499 TEST_REQUIRES_ARM_NEON_V8;
6500 for (size_t k = 1; k <= 80; k += 17) {
6501 GemmMicrokernelTester()
6502 .mr(1)
6503 .nr(8)
6504 .kr(4)
6505 .sr(1)
6506 .m(1)
6507 .n(8)
6508 .k(k)
6509 .ks(3)
6510 .a_offset(83)
6511 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c4__neonv8_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
6512 }
6513 }
6514
TEST(QC8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_DUP,zero)6515 TEST(QC8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_DUP, zero) {
6516 TEST_REQUIRES_ARM_NEON_V8;
6517 for (size_t k = 1; k <= 80; k += 17) {
6518 for (uint32_t mz = 0; mz < 1; mz++) {
6519 GemmMicrokernelTester()
6520 .mr(1)
6521 .nr(8)
6522 .kr(4)
6523 .sr(1)
6524 .m(1)
6525 .n(8)
6526 .k(k)
6527 .ks(3)
6528 .a_offset(83)
6529 .zero_index(mz)
6530 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c4__neonv8_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
6531 }
6532 }
6533 }
6534
TEST(QC8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_DUP,qmin)6535 TEST(QC8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_DUP, qmin) {
6536 TEST_REQUIRES_ARM_NEON_V8;
6537 GemmMicrokernelTester()
6538 .mr(1)
6539 .nr(8)
6540 .kr(4)
6541 .sr(1)
6542 .m(1)
6543 .n(8)
6544 .k(16)
6545 .qmin(128)
6546 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c4__neonv8_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
6547 }
6548
TEST(QC8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_DUP,qmax)6549 TEST(QC8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_DUP, qmax) {
6550 TEST_REQUIRES_ARM_NEON_V8;
6551 GemmMicrokernelTester()
6552 .mr(1)
6553 .nr(8)
6554 .kr(4)
6555 .sr(1)
6556 .m(1)
6557 .n(8)
6558 .k(16)
6559 .qmax(128)
6560 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c4__neonv8_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
6561 }
6562
TEST(QC8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_DUP,strided_cm)6563 TEST(QC8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_DUP, strided_cm) {
6564 TEST_REQUIRES_ARM_NEON_V8;
6565 GemmMicrokernelTester()
6566 .mr(1)
6567 .nr(8)
6568 .kr(4)
6569 .sr(1)
6570 .m(1)
6571 .n(8)
6572 .k(16)
6573 .cm_stride(11)
6574 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c4__neonv8_mlal_dup, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
6575 }
6576 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
6577
6578
6579 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QC8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_LD1R,k_eq_16)6580 TEST(QC8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_LD1R, k_eq_16) {
6581 TEST_REQUIRES_ARM_NEON_V8;
6582 GemmMicrokernelTester()
6583 .mr(1)
6584 .nr(8)
6585 .kr(4)
6586 .sr(1)
6587 .m(1)
6588 .n(8)
6589 .k(16)
6590 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c4__neonv8_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
6591 }
6592
TEST(QC8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_LD1R,strided_cn)6593 TEST(QC8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_LD1R, strided_cn) {
6594 TEST_REQUIRES_ARM_NEON_V8;
6595 GemmMicrokernelTester()
6596 .mr(1)
6597 .nr(8)
6598 .kr(4)
6599 .sr(1)
6600 .m(1)
6601 .n(8)
6602 .k(16)
6603 .cn_stride(11)
6604 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c4__neonv8_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
6605 }
6606
TEST(QC8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_LD1R,k_eq_16_subtile)6607 TEST(QC8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_LD1R, k_eq_16_subtile) {
6608 TEST_REQUIRES_ARM_NEON_V8;
6609 for (uint32_t n = 1; n <= 8; n++) {
6610 for (uint32_t m = 1; m <= 1; m++) {
6611 GemmMicrokernelTester()
6612 .mr(1)
6613 .nr(8)
6614 .kr(4)
6615 .sr(1)
6616 .m(m)
6617 .n(n)
6618 .k(16)
6619 .iterations(1)
6620 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c4__neonv8_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
6621 }
6622 }
6623 }
6624
TEST(QC8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_LD1R,k_eq_16_subtile_m)6625 TEST(QC8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_LD1R, k_eq_16_subtile_m) {
6626 TEST_REQUIRES_ARM_NEON_V8;
6627 for (uint32_t m = 1; m <= 1; m++) {
6628 GemmMicrokernelTester()
6629 .mr(1)
6630 .nr(8)
6631 .kr(4)
6632 .sr(1)
6633 .m(m)
6634 .n(8)
6635 .k(16)
6636 .iterations(1)
6637 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c4__neonv8_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
6638 }
6639 }
6640
TEST(QC8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_LD1R,k_eq_16_subtile_n)6641 TEST(QC8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_LD1R, k_eq_16_subtile_n) {
6642 TEST_REQUIRES_ARM_NEON_V8;
6643 for (uint32_t n = 1; n <= 8; n++) {
6644 GemmMicrokernelTester()
6645 .mr(1)
6646 .nr(8)
6647 .kr(4)
6648 .sr(1)
6649 .m(1)
6650 .n(n)
6651 .k(16)
6652 .iterations(1)
6653 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c4__neonv8_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
6654 }
6655 }
6656
TEST(QC8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_LD1R,k_lt_16)6657 TEST(QC8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_LD1R, k_lt_16) {
6658 TEST_REQUIRES_ARM_NEON_V8;
6659 for (size_t k = 1; k < 16; k++) {
6660 GemmMicrokernelTester()
6661 .mr(1)
6662 .nr(8)
6663 .kr(4)
6664 .sr(1)
6665 .m(1)
6666 .n(8)
6667 .k(k)
6668 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c4__neonv8_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
6669 }
6670 }
6671
TEST(QC8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_LD1R,k_lt_16_subtile)6672 TEST(QC8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_LD1R, k_lt_16_subtile) {
6673 TEST_REQUIRES_ARM_NEON_V8;
6674 for (size_t k = 1; k < 16; k++) {
6675 for (uint32_t n = 1; n <= 8; n++) {
6676 for (uint32_t m = 1; m <= 1; m++) {
6677 GemmMicrokernelTester()
6678 .mr(1)
6679 .nr(8)
6680 .kr(4)
6681 .sr(1)
6682 .m(m)
6683 .n(n)
6684 .k(k)
6685 .iterations(1)
6686 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c4__neonv8_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
6687 }
6688 }
6689 }
6690 }
6691
TEST(QC8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_LD1R,k_gt_16)6692 TEST(QC8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_LD1R, k_gt_16) {
6693 TEST_REQUIRES_ARM_NEON_V8;
6694 for (size_t k = 17; k < 32; k++) {
6695 GemmMicrokernelTester()
6696 .mr(1)
6697 .nr(8)
6698 .kr(4)
6699 .sr(1)
6700 .m(1)
6701 .n(8)
6702 .k(k)
6703 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c4__neonv8_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
6704 }
6705 }
6706
TEST(QC8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_LD1R,k_gt_16_subtile)6707 TEST(QC8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_LD1R, k_gt_16_subtile) {
6708 TEST_REQUIRES_ARM_NEON_V8;
6709 for (size_t k = 17; k < 32; k++) {
6710 for (uint32_t n = 1; n <= 8; n++) {
6711 for (uint32_t m = 1; m <= 1; m++) {
6712 GemmMicrokernelTester()
6713 .mr(1)
6714 .nr(8)
6715 .kr(4)
6716 .sr(1)
6717 .m(m)
6718 .n(n)
6719 .k(k)
6720 .iterations(1)
6721 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c4__neonv8_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
6722 }
6723 }
6724 }
6725 }
6726
TEST(QC8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_LD1R,k_div_16)6727 TEST(QC8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_LD1R, k_div_16) {
6728 TEST_REQUIRES_ARM_NEON_V8;
6729 for (size_t k = 32; k <= 160; k += 16) {
6730 GemmMicrokernelTester()
6731 .mr(1)
6732 .nr(8)
6733 .kr(4)
6734 .sr(1)
6735 .m(1)
6736 .n(8)
6737 .k(k)
6738 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c4__neonv8_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
6739 }
6740 }
6741
TEST(QC8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_LD1R,k_div_16_subtile)6742 TEST(QC8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_LD1R, k_div_16_subtile) {
6743 TEST_REQUIRES_ARM_NEON_V8;
6744 for (size_t k = 32; k <= 160; k += 16) {
6745 for (uint32_t n = 1; n <= 8; n++) {
6746 for (uint32_t m = 1; m <= 1; m++) {
6747 GemmMicrokernelTester()
6748 .mr(1)
6749 .nr(8)
6750 .kr(4)
6751 .sr(1)
6752 .m(m)
6753 .n(n)
6754 .k(k)
6755 .iterations(1)
6756 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c4__neonv8_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
6757 }
6758 }
6759 }
6760 }
6761
TEST(QC8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_LD1R,n_gt_8)6762 TEST(QC8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_LD1R, n_gt_8) {
6763 TEST_REQUIRES_ARM_NEON_V8;
6764 for (uint32_t n = 9; n < 16; n++) {
6765 for (size_t k = 1; k <= 80; k += 17) {
6766 GemmMicrokernelTester()
6767 .mr(1)
6768 .nr(8)
6769 .kr(4)
6770 .sr(1)
6771 .m(1)
6772 .n(n)
6773 .k(k)
6774 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c4__neonv8_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
6775 }
6776 }
6777 }
6778
TEST(QC8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_LD1R,n_gt_8_strided_cn)6779 TEST(QC8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_LD1R, n_gt_8_strided_cn) {
6780 TEST_REQUIRES_ARM_NEON_V8;
6781 for (uint32_t n = 9; n < 16; n++) {
6782 for (size_t k = 1; k <= 80; k += 17) {
6783 GemmMicrokernelTester()
6784 .mr(1)
6785 .nr(8)
6786 .kr(4)
6787 .sr(1)
6788 .m(1)
6789 .n(n)
6790 .k(k)
6791 .cn_stride(11)
6792 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c4__neonv8_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
6793 }
6794 }
6795 }
6796
TEST(QC8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_LD1R,n_gt_8_subtile)6797 TEST(QC8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_LD1R, n_gt_8_subtile) {
6798 TEST_REQUIRES_ARM_NEON_V8;
6799 for (uint32_t n = 9; n < 16; n++) {
6800 for (size_t k = 1; k <= 80; k += 17) {
6801 for (uint32_t m = 1; m <= 1; m++) {
6802 GemmMicrokernelTester()
6803 .mr(1)
6804 .nr(8)
6805 .kr(4)
6806 .sr(1)
6807 .m(m)
6808 .n(n)
6809 .k(k)
6810 .iterations(1)
6811 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c4__neonv8_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
6812 }
6813 }
6814 }
6815 }
6816
TEST(QC8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_LD1R,n_div_8)6817 TEST(QC8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_LD1R, n_div_8) {
6818 TEST_REQUIRES_ARM_NEON_V8;
6819 for (uint32_t n = 16; n <= 24; n += 8) {
6820 for (size_t k = 1; k <= 80; k += 17) {
6821 GemmMicrokernelTester()
6822 .mr(1)
6823 .nr(8)
6824 .kr(4)
6825 .sr(1)
6826 .m(1)
6827 .n(n)
6828 .k(k)
6829 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c4__neonv8_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
6830 }
6831 }
6832 }
6833
TEST(QC8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_LD1R,n_div_8_strided_cn)6834 TEST(QC8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_LD1R, n_div_8_strided_cn) {
6835 TEST_REQUIRES_ARM_NEON_V8;
6836 for (uint32_t n = 16; n <= 24; n += 8) {
6837 for (size_t k = 1; k <= 80; k += 17) {
6838 GemmMicrokernelTester()
6839 .mr(1)
6840 .nr(8)
6841 .kr(4)
6842 .sr(1)
6843 .m(1)
6844 .n(n)
6845 .k(k)
6846 .cn_stride(11)
6847 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c4__neonv8_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
6848 }
6849 }
6850 }
6851
TEST(QC8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_LD1R,n_div_8_subtile)6852 TEST(QC8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_LD1R, n_div_8_subtile) {
6853 TEST_REQUIRES_ARM_NEON_V8;
6854 for (uint32_t n = 16; n <= 24; n += 8) {
6855 for (size_t k = 1; k <= 80; k += 17) {
6856 for (uint32_t m = 1; m <= 1; m++) {
6857 GemmMicrokernelTester()
6858 .mr(1)
6859 .nr(8)
6860 .kr(4)
6861 .sr(1)
6862 .m(m)
6863 .n(n)
6864 .k(k)
6865 .iterations(1)
6866 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c4__neonv8_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
6867 }
6868 }
6869 }
6870 }
6871
TEST(QC8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_LD1R,small_kernel)6872 TEST(QC8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_LD1R, small_kernel) {
6873 TEST_REQUIRES_ARM_NEON_V8;
6874 for (size_t k = 1; k <= 80; k += 17) {
6875 GemmMicrokernelTester()
6876 .mr(1)
6877 .nr(8)
6878 .kr(4)
6879 .sr(1)
6880 .m(1)
6881 .n(8)
6882 .k(k)
6883 .ks(3)
6884 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c4__neonv8_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
6885 }
6886 }
6887
TEST(QC8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_LD1R,small_kernel_subtile)6888 TEST(QC8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_LD1R, small_kernel_subtile) {
6889 TEST_REQUIRES_ARM_NEON_V8;
6890 for (size_t k = 1; k <= 80; k += 17) {
6891 for (uint32_t n = 1; n <= 8; n++) {
6892 for (uint32_t m = 1; m <= 1; m++) {
6893 GemmMicrokernelTester()
6894 .mr(1)
6895 .nr(8)
6896 .kr(4)
6897 .sr(1)
6898 .m(m)
6899 .n(n)
6900 .k(k)
6901 .ks(3)
6902 .iterations(1)
6903 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c4__neonv8_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
6904 }
6905 }
6906 }
6907 }
6908
TEST(QC8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_LD1R,n_gt_8_small_kernel)6909 TEST(QC8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_LD1R, n_gt_8_small_kernel) {
6910 TEST_REQUIRES_ARM_NEON_V8;
6911 for (uint32_t n = 9; n < 16; n++) {
6912 for (size_t k = 1; k <= 80; k += 17) {
6913 GemmMicrokernelTester()
6914 .mr(1)
6915 .nr(8)
6916 .kr(4)
6917 .sr(1)
6918 .m(1)
6919 .n(n)
6920 .k(k)
6921 .ks(3)
6922 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c4__neonv8_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
6923 }
6924 }
6925 }
6926
TEST(QC8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_LD1R,n_div_8_small_kernel)6927 TEST(QC8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_LD1R, n_div_8_small_kernel) {
6928 TEST_REQUIRES_ARM_NEON_V8;
6929 for (uint32_t n = 16; n <= 24; n += 8) {
6930 for (size_t k = 1; k <= 80; k += 17) {
6931 GemmMicrokernelTester()
6932 .mr(1)
6933 .nr(8)
6934 .kr(4)
6935 .sr(1)
6936 .m(1)
6937 .n(n)
6938 .k(k)
6939 .ks(3)
6940 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c4__neonv8_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
6941 }
6942 }
6943 }
6944
TEST(QC8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_LD1R,strided_cm_subtile)6945 TEST(QC8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_LD1R, strided_cm_subtile) {
6946 TEST_REQUIRES_ARM_NEON_V8;
6947 for (size_t k = 1; k <= 80; k += 17) {
6948 for (uint32_t n = 1; n <= 8; n++) {
6949 for (uint32_t m = 1; m <= 1; m++) {
6950 GemmMicrokernelTester()
6951 .mr(1)
6952 .nr(8)
6953 .kr(4)
6954 .sr(1)
6955 .m(m)
6956 .n(n)
6957 .k(k)
6958 .cm_stride(11)
6959 .iterations(1)
6960 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c4__neonv8_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
6961 }
6962 }
6963 }
6964 }
6965
TEST(QC8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_LD1R,a_offset)6966 TEST(QC8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_LD1R, a_offset) {
6967 TEST_REQUIRES_ARM_NEON_V8;
6968 for (size_t k = 1; k <= 80; k += 17) {
6969 GemmMicrokernelTester()
6970 .mr(1)
6971 .nr(8)
6972 .kr(4)
6973 .sr(1)
6974 .m(1)
6975 .n(8)
6976 .k(k)
6977 .ks(3)
6978 .a_offset(83)
6979 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c4__neonv8_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
6980 }
6981 }
6982
TEST(QC8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_LD1R,zero)6983 TEST(QC8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_LD1R, zero) {
6984 TEST_REQUIRES_ARM_NEON_V8;
6985 for (size_t k = 1; k <= 80; k += 17) {
6986 for (uint32_t mz = 0; mz < 1; mz++) {
6987 GemmMicrokernelTester()
6988 .mr(1)
6989 .nr(8)
6990 .kr(4)
6991 .sr(1)
6992 .m(1)
6993 .n(8)
6994 .k(k)
6995 .ks(3)
6996 .a_offset(83)
6997 .zero_index(mz)
6998 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c4__neonv8_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
6999 }
7000 }
7001 }
7002
TEST(QC8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_LD1R,qmin)7003 TEST(QC8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_LD1R, qmin) {
7004 TEST_REQUIRES_ARM_NEON_V8;
7005 GemmMicrokernelTester()
7006 .mr(1)
7007 .nr(8)
7008 .kr(4)
7009 .sr(1)
7010 .m(1)
7011 .n(8)
7012 .k(16)
7013 .qmin(128)
7014 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c4__neonv8_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
7015 }
7016
TEST(QC8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_LD1R,qmax)7017 TEST(QC8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_LD1R, qmax) {
7018 TEST_REQUIRES_ARM_NEON_V8;
7019 GemmMicrokernelTester()
7020 .mr(1)
7021 .nr(8)
7022 .kr(4)
7023 .sr(1)
7024 .m(1)
7025 .n(8)
7026 .k(16)
7027 .qmax(128)
7028 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c4__neonv8_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
7029 }
7030
TEST(QC8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_LD1R,strided_cm)7031 TEST(QC8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_LD1R, strided_cm) {
7032 TEST_REQUIRES_ARM_NEON_V8;
7033 GemmMicrokernelTester()
7034 .mr(1)
7035 .nr(8)
7036 .kr(4)
7037 .sr(1)
7038 .m(1)
7039 .n(8)
7040 .k(16)
7041 .cm_stride(11)
7042 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c4__neonv8_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
7043 }
7044 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
7045
7046
7047 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QC8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_LD2R,k_eq_16)7048 TEST(QC8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_LD2R, k_eq_16) {
7049 TEST_REQUIRES_ARM_NEON_V8;
7050 GemmMicrokernelTester()
7051 .mr(1)
7052 .nr(8)
7053 .kr(4)
7054 .sr(1)
7055 .m(1)
7056 .n(8)
7057 .k(16)
7058 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c4__neonv8_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
7059 }
7060
TEST(QC8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_LD2R,strided_cn)7061 TEST(QC8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_LD2R, strided_cn) {
7062 TEST_REQUIRES_ARM_NEON_V8;
7063 GemmMicrokernelTester()
7064 .mr(1)
7065 .nr(8)
7066 .kr(4)
7067 .sr(1)
7068 .m(1)
7069 .n(8)
7070 .k(16)
7071 .cn_stride(11)
7072 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c4__neonv8_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
7073 }
7074
TEST(QC8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_LD2R,k_eq_16_subtile)7075 TEST(QC8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_LD2R, k_eq_16_subtile) {
7076 TEST_REQUIRES_ARM_NEON_V8;
7077 for (uint32_t n = 1; n <= 8; n++) {
7078 for (uint32_t m = 1; m <= 1; m++) {
7079 GemmMicrokernelTester()
7080 .mr(1)
7081 .nr(8)
7082 .kr(4)
7083 .sr(1)
7084 .m(m)
7085 .n(n)
7086 .k(16)
7087 .iterations(1)
7088 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c4__neonv8_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
7089 }
7090 }
7091 }
7092
TEST(QC8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_LD2R,k_eq_16_subtile_m)7093 TEST(QC8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_LD2R, k_eq_16_subtile_m) {
7094 TEST_REQUIRES_ARM_NEON_V8;
7095 for (uint32_t m = 1; m <= 1; m++) {
7096 GemmMicrokernelTester()
7097 .mr(1)
7098 .nr(8)
7099 .kr(4)
7100 .sr(1)
7101 .m(m)
7102 .n(8)
7103 .k(16)
7104 .iterations(1)
7105 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c4__neonv8_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
7106 }
7107 }
7108
TEST(QC8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_LD2R,k_eq_16_subtile_n)7109 TEST(QC8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_LD2R, k_eq_16_subtile_n) {
7110 TEST_REQUIRES_ARM_NEON_V8;
7111 for (uint32_t n = 1; n <= 8; n++) {
7112 GemmMicrokernelTester()
7113 .mr(1)
7114 .nr(8)
7115 .kr(4)
7116 .sr(1)
7117 .m(1)
7118 .n(n)
7119 .k(16)
7120 .iterations(1)
7121 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c4__neonv8_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
7122 }
7123 }
7124
TEST(QC8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_LD2R,k_lt_16)7125 TEST(QC8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_LD2R, k_lt_16) {
7126 TEST_REQUIRES_ARM_NEON_V8;
7127 for (size_t k = 1; k < 16; k++) {
7128 GemmMicrokernelTester()
7129 .mr(1)
7130 .nr(8)
7131 .kr(4)
7132 .sr(1)
7133 .m(1)
7134 .n(8)
7135 .k(k)
7136 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c4__neonv8_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
7137 }
7138 }
7139
TEST(QC8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_LD2R,k_lt_16_subtile)7140 TEST(QC8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_LD2R, k_lt_16_subtile) {
7141 TEST_REQUIRES_ARM_NEON_V8;
7142 for (size_t k = 1; k < 16; k++) {
7143 for (uint32_t n = 1; n <= 8; n++) {
7144 for (uint32_t m = 1; m <= 1; m++) {
7145 GemmMicrokernelTester()
7146 .mr(1)
7147 .nr(8)
7148 .kr(4)
7149 .sr(1)
7150 .m(m)
7151 .n(n)
7152 .k(k)
7153 .iterations(1)
7154 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c4__neonv8_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
7155 }
7156 }
7157 }
7158 }
7159
TEST(QC8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_LD2R,k_gt_16)7160 TEST(QC8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_LD2R, k_gt_16) {
7161 TEST_REQUIRES_ARM_NEON_V8;
7162 for (size_t k = 17; k < 32; k++) {
7163 GemmMicrokernelTester()
7164 .mr(1)
7165 .nr(8)
7166 .kr(4)
7167 .sr(1)
7168 .m(1)
7169 .n(8)
7170 .k(k)
7171 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c4__neonv8_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
7172 }
7173 }
7174
TEST(QC8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_LD2R,k_gt_16_subtile)7175 TEST(QC8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_LD2R, k_gt_16_subtile) {
7176 TEST_REQUIRES_ARM_NEON_V8;
7177 for (size_t k = 17; k < 32; k++) {
7178 for (uint32_t n = 1; n <= 8; n++) {
7179 for (uint32_t m = 1; m <= 1; m++) {
7180 GemmMicrokernelTester()
7181 .mr(1)
7182 .nr(8)
7183 .kr(4)
7184 .sr(1)
7185 .m(m)
7186 .n(n)
7187 .k(k)
7188 .iterations(1)
7189 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c4__neonv8_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
7190 }
7191 }
7192 }
7193 }
7194
TEST(QC8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_LD2R,k_div_16)7195 TEST(QC8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_LD2R, k_div_16) {
7196 TEST_REQUIRES_ARM_NEON_V8;
7197 for (size_t k = 32; k <= 160; k += 16) {
7198 GemmMicrokernelTester()
7199 .mr(1)
7200 .nr(8)
7201 .kr(4)
7202 .sr(1)
7203 .m(1)
7204 .n(8)
7205 .k(k)
7206 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c4__neonv8_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
7207 }
7208 }
7209
TEST(QC8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_LD2R,k_div_16_subtile)7210 TEST(QC8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_LD2R, k_div_16_subtile) {
7211 TEST_REQUIRES_ARM_NEON_V8;
7212 for (size_t k = 32; k <= 160; k += 16) {
7213 for (uint32_t n = 1; n <= 8; n++) {
7214 for (uint32_t m = 1; m <= 1; m++) {
7215 GemmMicrokernelTester()
7216 .mr(1)
7217 .nr(8)
7218 .kr(4)
7219 .sr(1)
7220 .m(m)
7221 .n(n)
7222 .k(k)
7223 .iterations(1)
7224 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c4__neonv8_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
7225 }
7226 }
7227 }
7228 }
7229
TEST(QC8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_LD2R,n_gt_8)7230 TEST(QC8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_LD2R, n_gt_8) {
7231 TEST_REQUIRES_ARM_NEON_V8;
7232 for (uint32_t n = 9; n < 16; n++) {
7233 for (size_t k = 1; k <= 80; k += 17) {
7234 GemmMicrokernelTester()
7235 .mr(1)
7236 .nr(8)
7237 .kr(4)
7238 .sr(1)
7239 .m(1)
7240 .n(n)
7241 .k(k)
7242 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c4__neonv8_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
7243 }
7244 }
7245 }
7246
TEST(QC8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_LD2R,n_gt_8_strided_cn)7247 TEST(QC8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_LD2R, n_gt_8_strided_cn) {
7248 TEST_REQUIRES_ARM_NEON_V8;
7249 for (uint32_t n = 9; n < 16; n++) {
7250 for (size_t k = 1; k <= 80; k += 17) {
7251 GemmMicrokernelTester()
7252 .mr(1)
7253 .nr(8)
7254 .kr(4)
7255 .sr(1)
7256 .m(1)
7257 .n(n)
7258 .k(k)
7259 .cn_stride(11)
7260 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c4__neonv8_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
7261 }
7262 }
7263 }
7264
TEST(QC8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_LD2R,n_gt_8_subtile)7265 TEST(QC8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_LD2R, n_gt_8_subtile) {
7266 TEST_REQUIRES_ARM_NEON_V8;
7267 for (uint32_t n = 9; n < 16; n++) {
7268 for (size_t k = 1; k <= 80; k += 17) {
7269 for (uint32_t m = 1; m <= 1; m++) {
7270 GemmMicrokernelTester()
7271 .mr(1)
7272 .nr(8)
7273 .kr(4)
7274 .sr(1)
7275 .m(m)
7276 .n(n)
7277 .k(k)
7278 .iterations(1)
7279 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c4__neonv8_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
7280 }
7281 }
7282 }
7283 }
7284
TEST(QC8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_LD2R,n_div_8)7285 TEST(QC8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_LD2R, n_div_8) {
7286 TEST_REQUIRES_ARM_NEON_V8;
7287 for (uint32_t n = 16; n <= 24; n += 8) {
7288 for (size_t k = 1; k <= 80; k += 17) {
7289 GemmMicrokernelTester()
7290 .mr(1)
7291 .nr(8)
7292 .kr(4)
7293 .sr(1)
7294 .m(1)
7295 .n(n)
7296 .k(k)
7297 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c4__neonv8_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
7298 }
7299 }
7300 }
7301
TEST(QC8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_LD2R,n_div_8_strided_cn)7302 TEST(QC8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_LD2R, n_div_8_strided_cn) {
7303 TEST_REQUIRES_ARM_NEON_V8;
7304 for (uint32_t n = 16; n <= 24; n += 8) {
7305 for (size_t k = 1; k <= 80; k += 17) {
7306 GemmMicrokernelTester()
7307 .mr(1)
7308 .nr(8)
7309 .kr(4)
7310 .sr(1)
7311 .m(1)
7312 .n(n)
7313 .k(k)
7314 .cn_stride(11)
7315 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c4__neonv8_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
7316 }
7317 }
7318 }
7319
TEST(QC8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_LD2R,n_div_8_subtile)7320 TEST(QC8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_LD2R, n_div_8_subtile) {
7321 TEST_REQUIRES_ARM_NEON_V8;
7322 for (uint32_t n = 16; n <= 24; n += 8) {
7323 for (size_t k = 1; k <= 80; k += 17) {
7324 for (uint32_t m = 1; m <= 1; m++) {
7325 GemmMicrokernelTester()
7326 .mr(1)
7327 .nr(8)
7328 .kr(4)
7329 .sr(1)
7330 .m(m)
7331 .n(n)
7332 .k(k)
7333 .iterations(1)
7334 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c4__neonv8_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
7335 }
7336 }
7337 }
7338 }
7339
TEST(QC8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_LD2R,small_kernel)7340 TEST(QC8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_LD2R, small_kernel) {
7341 TEST_REQUIRES_ARM_NEON_V8;
7342 for (size_t k = 1; k <= 80; k += 17) {
7343 GemmMicrokernelTester()
7344 .mr(1)
7345 .nr(8)
7346 .kr(4)
7347 .sr(1)
7348 .m(1)
7349 .n(8)
7350 .k(k)
7351 .ks(3)
7352 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c4__neonv8_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
7353 }
7354 }
7355
TEST(QC8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_LD2R,small_kernel_subtile)7356 TEST(QC8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_LD2R, small_kernel_subtile) {
7357 TEST_REQUIRES_ARM_NEON_V8;
7358 for (size_t k = 1; k <= 80; k += 17) {
7359 for (uint32_t n = 1; n <= 8; n++) {
7360 for (uint32_t m = 1; m <= 1; m++) {
7361 GemmMicrokernelTester()
7362 .mr(1)
7363 .nr(8)
7364 .kr(4)
7365 .sr(1)
7366 .m(m)
7367 .n(n)
7368 .k(k)
7369 .ks(3)
7370 .iterations(1)
7371 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c4__neonv8_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
7372 }
7373 }
7374 }
7375 }
7376
TEST(QC8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_LD2R,n_gt_8_small_kernel)7377 TEST(QC8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_LD2R, n_gt_8_small_kernel) {
7378 TEST_REQUIRES_ARM_NEON_V8;
7379 for (uint32_t n = 9; n < 16; n++) {
7380 for (size_t k = 1; k <= 80; k += 17) {
7381 GemmMicrokernelTester()
7382 .mr(1)
7383 .nr(8)
7384 .kr(4)
7385 .sr(1)
7386 .m(1)
7387 .n(n)
7388 .k(k)
7389 .ks(3)
7390 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c4__neonv8_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
7391 }
7392 }
7393 }
7394
TEST(QC8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_LD2R,n_div_8_small_kernel)7395 TEST(QC8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_LD2R, n_div_8_small_kernel) {
7396 TEST_REQUIRES_ARM_NEON_V8;
7397 for (uint32_t n = 16; n <= 24; n += 8) {
7398 for (size_t k = 1; k <= 80; k += 17) {
7399 GemmMicrokernelTester()
7400 .mr(1)
7401 .nr(8)
7402 .kr(4)
7403 .sr(1)
7404 .m(1)
7405 .n(n)
7406 .k(k)
7407 .ks(3)
7408 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c4__neonv8_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
7409 }
7410 }
7411 }
7412
TEST(QC8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_LD2R,strided_cm_subtile)7413 TEST(QC8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_LD2R, strided_cm_subtile) {
7414 TEST_REQUIRES_ARM_NEON_V8;
7415 for (size_t k = 1; k <= 80; k += 17) {
7416 for (uint32_t n = 1; n <= 8; n++) {
7417 for (uint32_t m = 1; m <= 1; m++) {
7418 GemmMicrokernelTester()
7419 .mr(1)
7420 .nr(8)
7421 .kr(4)
7422 .sr(1)
7423 .m(m)
7424 .n(n)
7425 .k(k)
7426 .cm_stride(11)
7427 .iterations(1)
7428 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c4__neonv8_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
7429 }
7430 }
7431 }
7432 }
7433
TEST(QC8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_LD2R,a_offset)7434 TEST(QC8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_LD2R, a_offset) {
7435 TEST_REQUIRES_ARM_NEON_V8;
7436 for (size_t k = 1; k <= 80; k += 17) {
7437 GemmMicrokernelTester()
7438 .mr(1)
7439 .nr(8)
7440 .kr(4)
7441 .sr(1)
7442 .m(1)
7443 .n(8)
7444 .k(k)
7445 .ks(3)
7446 .a_offset(83)
7447 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c4__neonv8_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
7448 }
7449 }
7450
TEST(QC8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_LD2R,zero)7451 TEST(QC8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_LD2R, zero) {
7452 TEST_REQUIRES_ARM_NEON_V8;
7453 for (size_t k = 1; k <= 80; k += 17) {
7454 for (uint32_t mz = 0; mz < 1; mz++) {
7455 GemmMicrokernelTester()
7456 .mr(1)
7457 .nr(8)
7458 .kr(4)
7459 .sr(1)
7460 .m(1)
7461 .n(8)
7462 .k(k)
7463 .ks(3)
7464 .a_offset(83)
7465 .zero_index(mz)
7466 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c4__neonv8_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
7467 }
7468 }
7469 }
7470
TEST(QC8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_LD2R,qmin)7471 TEST(QC8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_LD2R, qmin) {
7472 TEST_REQUIRES_ARM_NEON_V8;
7473 GemmMicrokernelTester()
7474 .mr(1)
7475 .nr(8)
7476 .kr(4)
7477 .sr(1)
7478 .m(1)
7479 .n(8)
7480 .k(16)
7481 .qmin(128)
7482 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c4__neonv8_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
7483 }
7484
TEST(QC8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_LD2R,qmax)7485 TEST(QC8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_LD2R, qmax) {
7486 TEST_REQUIRES_ARM_NEON_V8;
7487 GemmMicrokernelTester()
7488 .mr(1)
7489 .nr(8)
7490 .kr(4)
7491 .sr(1)
7492 .m(1)
7493 .n(8)
7494 .k(16)
7495 .qmax(128)
7496 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c4__neonv8_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
7497 }
7498
TEST(QC8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_LD2R,strided_cm)7499 TEST(QC8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_LD2R, strided_cm) {
7500 TEST_REQUIRES_ARM_NEON_V8;
7501 GemmMicrokernelTester()
7502 .mr(1)
7503 .nr(8)
7504 .kr(4)
7505 .sr(1)
7506 .m(1)
7507 .n(8)
7508 .k(16)
7509 .cm_stride(11)
7510 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x8c4__neonv8_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
7511 }
7512 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
7513
7514
7515 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QC8_IGEMM_MINMAX_FP32_2X8__NEON_MLAL_LANE_PRFM,k_eq_8)7516 TEST(QC8_IGEMM_MINMAX_FP32_2X8__NEON_MLAL_LANE_PRFM, k_eq_8) {
7517 TEST_REQUIRES_ARM_NEON;
7518 GemmMicrokernelTester()
7519 .mr(2)
7520 .nr(8)
7521 .kr(1)
7522 .sr(1)
7523 .m(2)
7524 .n(8)
7525 .k(8)
7526 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
7527 }
7528
TEST(QC8_IGEMM_MINMAX_FP32_2X8__NEON_MLAL_LANE_PRFM,strided_cn)7529 TEST(QC8_IGEMM_MINMAX_FP32_2X8__NEON_MLAL_LANE_PRFM, strided_cn) {
7530 TEST_REQUIRES_ARM_NEON;
7531 GemmMicrokernelTester()
7532 .mr(2)
7533 .nr(8)
7534 .kr(1)
7535 .sr(1)
7536 .m(2)
7537 .n(8)
7538 .k(8)
7539 .cn_stride(11)
7540 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
7541 }
7542
TEST(QC8_IGEMM_MINMAX_FP32_2X8__NEON_MLAL_LANE_PRFM,k_eq_8_subtile)7543 TEST(QC8_IGEMM_MINMAX_FP32_2X8__NEON_MLAL_LANE_PRFM, k_eq_8_subtile) {
7544 TEST_REQUIRES_ARM_NEON;
7545 for (uint32_t n = 1; n <= 8; n++) {
7546 for (uint32_t m = 1; m <= 2; m++) {
7547 GemmMicrokernelTester()
7548 .mr(2)
7549 .nr(8)
7550 .kr(1)
7551 .sr(1)
7552 .m(m)
7553 .n(n)
7554 .k(8)
7555 .iterations(1)
7556 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
7557 }
7558 }
7559 }
7560
TEST(QC8_IGEMM_MINMAX_FP32_2X8__NEON_MLAL_LANE_PRFM,k_eq_8_subtile_m)7561 TEST(QC8_IGEMM_MINMAX_FP32_2X8__NEON_MLAL_LANE_PRFM, k_eq_8_subtile_m) {
7562 TEST_REQUIRES_ARM_NEON;
7563 for (uint32_t m = 1; m <= 2; m++) {
7564 GemmMicrokernelTester()
7565 .mr(2)
7566 .nr(8)
7567 .kr(1)
7568 .sr(1)
7569 .m(m)
7570 .n(8)
7571 .k(8)
7572 .iterations(1)
7573 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
7574 }
7575 }
7576
TEST(QC8_IGEMM_MINMAX_FP32_2X8__NEON_MLAL_LANE_PRFM,k_eq_8_subtile_n)7577 TEST(QC8_IGEMM_MINMAX_FP32_2X8__NEON_MLAL_LANE_PRFM, k_eq_8_subtile_n) {
7578 TEST_REQUIRES_ARM_NEON;
7579 for (uint32_t n = 1; n <= 8; n++) {
7580 GemmMicrokernelTester()
7581 .mr(2)
7582 .nr(8)
7583 .kr(1)
7584 .sr(1)
7585 .m(2)
7586 .n(n)
7587 .k(8)
7588 .iterations(1)
7589 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
7590 }
7591 }
7592
TEST(QC8_IGEMM_MINMAX_FP32_2X8__NEON_MLAL_LANE_PRFM,k_lt_8)7593 TEST(QC8_IGEMM_MINMAX_FP32_2X8__NEON_MLAL_LANE_PRFM, k_lt_8) {
7594 TEST_REQUIRES_ARM_NEON;
7595 for (size_t k = 1; k < 8; k++) {
7596 GemmMicrokernelTester()
7597 .mr(2)
7598 .nr(8)
7599 .kr(1)
7600 .sr(1)
7601 .m(2)
7602 .n(8)
7603 .k(k)
7604 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
7605 }
7606 }
7607
TEST(QC8_IGEMM_MINMAX_FP32_2X8__NEON_MLAL_LANE_PRFM,k_lt_8_subtile)7608 TEST(QC8_IGEMM_MINMAX_FP32_2X8__NEON_MLAL_LANE_PRFM, k_lt_8_subtile) {
7609 TEST_REQUIRES_ARM_NEON;
7610 for (size_t k = 1; k < 8; k++) {
7611 for (uint32_t n = 1; n <= 8; n++) {
7612 for (uint32_t m = 1; m <= 2; m++) {
7613 GemmMicrokernelTester()
7614 .mr(2)
7615 .nr(8)
7616 .kr(1)
7617 .sr(1)
7618 .m(m)
7619 .n(n)
7620 .k(k)
7621 .iterations(1)
7622 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
7623 }
7624 }
7625 }
7626 }
7627
TEST(QC8_IGEMM_MINMAX_FP32_2X8__NEON_MLAL_LANE_PRFM,k_gt_8)7628 TEST(QC8_IGEMM_MINMAX_FP32_2X8__NEON_MLAL_LANE_PRFM, k_gt_8) {
7629 TEST_REQUIRES_ARM_NEON;
7630 for (size_t k = 9; k < 16; k++) {
7631 GemmMicrokernelTester()
7632 .mr(2)
7633 .nr(8)
7634 .kr(1)
7635 .sr(1)
7636 .m(2)
7637 .n(8)
7638 .k(k)
7639 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
7640 }
7641 }
7642
TEST(QC8_IGEMM_MINMAX_FP32_2X8__NEON_MLAL_LANE_PRFM,k_gt_8_subtile)7643 TEST(QC8_IGEMM_MINMAX_FP32_2X8__NEON_MLAL_LANE_PRFM, k_gt_8_subtile) {
7644 TEST_REQUIRES_ARM_NEON;
7645 for (size_t k = 9; k < 16; k++) {
7646 for (uint32_t n = 1; n <= 8; n++) {
7647 for (uint32_t m = 1; m <= 2; m++) {
7648 GemmMicrokernelTester()
7649 .mr(2)
7650 .nr(8)
7651 .kr(1)
7652 .sr(1)
7653 .m(m)
7654 .n(n)
7655 .k(k)
7656 .iterations(1)
7657 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
7658 }
7659 }
7660 }
7661 }
7662
TEST(QC8_IGEMM_MINMAX_FP32_2X8__NEON_MLAL_LANE_PRFM,k_div_8)7663 TEST(QC8_IGEMM_MINMAX_FP32_2X8__NEON_MLAL_LANE_PRFM, k_div_8) {
7664 TEST_REQUIRES_ARM_NEON;
7665 for (size_t k = 16; k <= 80; k += 8) {
7666 GemmMicrokernelTester()
7667 .mr(2)
7668 .nr(8)
7669 .kr(1)
7670 .sr(1)
7671 .m(2)
7672 .n(8)
7673 .k(k)
7674 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
7675 }
7676 }
7677
TEST(QC8_IGEMM_MINMAX_FP32_2X8__NEON_MLAL_LANE_PRFM,k_div_8_subtile)7678 TEST(QC8_IGEMM_MINMAX_FP32_2X8__NEON_MLAL_LANE_PRFM, k_div_8_subtile) {
7679 TEST_REQUIRES_ARM_NEON;
7680 for (size_t k = 16; k <= 80; k += 8) {
7681 for (uint32_t n = 1; n <= 8; n++) {
7682 for (uint32_t m = 1; m <= 2; m++) {
7683 GemmMicrokernelTester()
7684 .mr(2)
7685 .nr(8)
7686 .kr(1)
7687 .sr(1)
7688 .m(m)
7689 .n(n)
7690 .k(k)
7691 .iterations(1)
7692 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
7693 }
7694 }
7695 }
7696 }
7697
TEST(QC8_IGEMM_MINMAX_FP32_2X8__NEON_MLAL_LANE_PRFM,n_gt_8)7698 TEST(QC8_IGEMM_MINMAX_FP32_2X8__NEON_MLAL_LANE_PRFM, n_gt_8) {
7699 TEST_REQUIRES_ARM_NEON;
7700 for (uint32_t n = 9; n < 16; n++) {
7701 for (size_t k = 1; k <= 40; k += 9) {
7702 GemmMicrokernelTester()
7703 .mr(2)
7704 .nr(8)
7705 .kr(1)
7706 .sr(1)
7707 .m(2)
7708 .n(n)
7709 .k(k)
7710 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
7711 }
7712 }
7713 }
7714
TEST(QC8_IGEMM_MINMAX_FP32_2X8__NEON_MLAL_LANE_PRFM,n_gt_8_strided_cn)7715 TEST(QC8_IGEMM_MINMAX_FP32_2X8__NEON_MLAL_LANE_PRFM, n_gt_8_strided_cn) {
7716 TEST_REQUIRES_ARM_NEON;
7717 for (uint32_t n = 9; n < 16; n++) {
7718 for (size_t k = 1; k <= 40; k += 9) {
7719 GemmMicrokernelTester()
7720 .mr(2)
7721 .nr(8)
7722 .kr(1)
7723 .sr(1)
7724 .m(2)
7725 .n(n)
7726 .k(k)
7727 .cn_stride(11)
7728 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
7729 }
7730 }
7731 }
7732
TEST(QC8_IGEMM_MINMAX_FP32_2X8__NEON_MLAL_LANE_PRFM,n_gt_8_subtile)7733 TEST(QC8_IGEMM_MINMAX_FP32_2X8__NEON_MLAL_LANE_PRFM, n_gt_8_subtile) {
7734 TEST_REQUIRES_ARM_NEON;
7735 for (uint32_t n = 9; n < 16; n++) {
7736 for (size_t k = 1; k <= 40; k += 9) {
7737 for (uint32_t m = 1; m <= 2; m++) {
7738 GemmMicrokernelTester()
7739 .mr(2)
7740 .nr(8)
7741 .kr(1)
7742 .sr(1)
7743 .m(m)
7744 .n(n)
7745 .k(k)
7746 .iterations(1)
7747 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
7748 }
7749 }
7750 }
7751 }
7752
TEST(QC8_IGEMM_MINMAX_FP32_2X8__NEON_MLAL_LANE_PRFM,n_div_8)7753 TEST(QC8_IGEMM_MINMAX_FP32_2X8__NEON_MLAL_LANE_PRFM, n_div_8) {
7754 TEST_REQUIRES_ARM_NEON;
7755 for (uint32_t n = 16; n <= 24; n += 8) {
7756 for (size_t k = 1; k <= 40; k += 9) {
7757 GemmMicrokernelTester()
7758 .mr(2)
7759 .nr(8)
7760 .kr(1)
7761 .sr(1)
7762 .m(2)
7763 .n(n)
7764 .k(k)
7765 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
7766 }
7767 }
7768 }
7769
TEST(QC8_IGEMM_MINMAX_FP32_2X8__NEON_MLAL_LANE_PRFM,n_div_8_strided_cn)7770 TEST(QC8_IGEMM_MINMAX_FP32_2X8__NEON_MLAL_LANE_PRFM, n_div_8_strided_cn) {
7771 TEST_REQUIRES_ARM_NEON;
7772 for (uint32_t n = 16; n <= 24; n += 8) {
7773 for (size_t k = 1; k <= 40; k += 9) {
7774 GemmMicrokernelTester()
7775 .mr(2)
7776 .nr(8)
7777 .kr(1)
7778 .sr(1)
7779 .m(2)
7780 .n(n)
7781 .k(k)
7782 .cn_stride(11)
7783 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
7784 }
7785 }
7786 }
7787
TEST(QC8_IGEMM_MINMAX_FP32_2X8__NEON_MLAL_LANE_PRFM,n_div_8_subtile)7788 TEST(QC8_IGEMM_MINMAX_FP32_2X8__NEON_MLAL_LANE_PRFM, n_div_8_subtile) {
7789 TEST_REQUIRES_ARM_NEON;
7790 for (uint32_t n = 16; n <= 24; n += 8) {
7791 for (size_t k = 1; k <= 40; k += 9) {
7792 for (uint32_t m = 1; m <= 2; m++) {
7793 GemmMicrokernelTester()
7794 .mr(2)
7795 .nr(8)
7796 .kr(1)
7797 .sr(1)
7798 .m(m)
7799 .n(n)
7800 .k(k)
7801 .iterations(1)
7802 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
7803 }
7804 }
7805 }
7806 }
7807
TEST(QC8_IGEMM_MINMAX_FP32_2X8__NEON_MLAL_LANE_PRFM,small_kernel)7808 TEST(QC8_IGEMM_MINMAX_FP32_2X8__NEON_MLAL_LANE_PRFM, small_kernel) {
7809 TEST_REQUIRES_ARM_NEON;
7810 for (size_t k = 1; k <= 40; k += 9) {
7811 GemmMicrokernelTester()
7812 .mr(2)
7813 .nr(8)
7814 .kr(1)
7815 .sr(1)
7816 .m(2)
7817 .n(8)
7818 .k(k)
7819 .ks(3)
7820 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
7821 }
7822 }
7823
TEST(QC8_IGEMM_MINMAX_FP32_2X8__NEON_MLAL_LANE_PRFM,small_kernel_subtile)7824 TEST(QC8_IGEMM_MINMAX_FP32_2X8__NEON_MLAL_LANE_PRFM, small_kernel_subtile) {
7825 TEST_REQUIRES_ARM_NEON;
7826 for (size_t k = 1; k <= 40; k += 9) {
7827 for (uint32_t n = 1; n <= 8; n++) {
7828 for (uint32_t m = 1; m <= 2; m++) {
7829 GemmMicrokernelTester()
7830 .mr(2)
7831 .nr(8)
7832 .kr(1)
7833 .sr(1)
7834 .m(m)
7835 .n(n)
7836 .k(k)
7837 .ks(3)
7838 .iterations(1)
7839 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
7840 }
7841 }
7842 }
7843 }
7844
TEST(QC8_IGEMM_MINMAX_FP32_2X8__NEON_MLAL_LANE_PRFM,n_gt_8_small_kernel)7845 TEST(QC8_IGEMM_MINMAX_FP32_2X8__NEON_MLAL_LANE_PRFM, n_gt_8_small_kernel) {
7846 TEST_REQUIRES_ARM_NEON;
7847 for (uint32_t n = 9; n < 16; n++) {
7848 for (size_t k = 1; k <= 40; k += 9) {
7849 GemmMicrokernelTester()
7850 .mr(2)
7851 .nr(8)
7852 .kr(1)
7853 .sr(1)
7854 .m(2)
7855 .n(n)
7856 .k(k)
7857 .ks(3)
7858 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
7859 }
7860 }
7861 }
7862
TEST(QC8_IGEMM_MINMAX_FP32_2X8__NEON_MLAL_LANE_PRFM,n_div_8_small_kernel)7863 TEST(QC8_IGEMM_MINMAX_FP32_2X8__NEON_MLAL_LANE_PRFM, n_div_8_small_kernel) {
7864 TEST_REQUIRES_ARM_NEON;
7865 for (uint32_t n = 16; n <= 24; n += 8) {
7866 for (size_t k = 1; k <= 40; k += 9) {
7867 GemmMicrokernelTester()
7868 .mr(2)
7869 .nr(8)
7870 .kr(1)
7871 .sr(1)
7872 .m(2)
7873 .n(n)
7874 .k(k)
7875 .ks(3)
7876 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
7877 }
7878 }
7879 }
7880
TEST(QC8_IGEMM_MINMAX_FP32_2X8__NEON_MLAL_LANE_PRFM,strided_cm_subtile)7881 TEST(QC8_IGEMM_MINMAX_FP32_2X8__NEON_MLAL_LANE_PRFM, strided_cm_subtile) {
7882 TEST_REQUIRES_ARM_NEON;
7883 for (size_t k = 1; k <= 40; k += 9) {
7884 for (uint32_t n = 1; n <= 8; n++) {
7885 for (uint32_t m = 1; m <= 2; m++) {
7886 GemmMicrokernelTester()
7887 .mr(2)
7888 .nr(8)
7889 .kr(1)
7890 .sr(1)
7891 .m(m)
7892 .n(n)
7893 .k(k)
7894 .cm_stride(11)
7895 .iterations(1)
7896 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
7897 }
7898 }
7899 }
7900 }
7901
TEST(QC8_IGEMM_MINMAX_FP32_2X8__NEON_MLAL_LANE_PRFM,a_offset)7902 TEST(QC8_IGEMM_MINMAX_FP32_2X8__NEON_MLAL_LANE_PRFM, a_offset) {
7903 TEST_REQUIRES_ARM_NEON;
7904 for (size_t k = 1; k <= 40; k += 9) {
7905 GemmMicrokernelTester()
7906 .mr(2)
7907 .nr(8)
7908 .kr(1)
7909 .sr(1)
7910 .m(2)
7911 .n(8)
7912 .k(k)
7913 .ks(3)
7914 .a_offset(83)
7915 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
7916 }
7917 }
7918
TEST(QC8_IGEMM_MINMAX_FP32_2X8__NEON_MLAL_LANE_PRFM,zero)7919 TEST(QC8_IGEMM_MINMAX_FP32_2X8__NEON_MLAL_LANE_PRFM, zero) {
7920 TEST_REQUIRES_ARM_NEON;
7921 for (size_t k = 1; k <= 40; k += 9) {
7922 for (uint32_t mz = 0; mz < 2; mz++) {
7923 GemmMicrokernelTester()
7924 .mr(2)
7925 .nr(8)
7926 .kr(1)
7927 .sr(1)
7928 .m(2)
7929 .n(8)
7930 .k(k)
7931 .ks(3)
7932 .a_offset(83)
7933 .zero_index(mz)
7934 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
7935 }
7936 }
7937 }
7938
TEST(QC8_IGEMM_MINMAX_FP32_2X8__NEON_MLAL_LANE_PRFM,qmin)7939 TEST(QC8_IGEMM_MINMAX_FP32_2X8__NEON_MLAL_LANE_PRFM, qmin) {
7940 TEST_REQUIRES_ARM_NEON;
7941 GemmMicrokernelTester()
7942 .mr(2)
7943 .nr(8)
7944 .kr(1)
7945 .sr(1)
7946 .m(2)
7947 .n(8)
7948 .k(8)
7949 .qmin(128)
7950 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
7951 }
7952
TEST(QC8_IGEMM_MINMAX_FP32_2X8__NEON_MLAL_LANE_PRFM,qmax)7953 TEST(QC8_IGEMM_MINMAX_FP32_2X8__NEON_MLAL_LANE_PRFM, qmax) {
7954 TEST_REQUIRES_ARM_NEON;
7955 GemmMicrokernelTester()
7956 .mr(2)
7957 .nr(8)
7958 .kr(1)
7959 .sr(1)
7960 .m(2)
7961 .n(8)
7962 .k(8)
7963 .qmax(128)
7964 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
7965 }
7966
TEST(QC8_IGEMM_MINMAX_FP32_2X8__NEON_MLAL_LANE_PRFM,strided_cm)7967 TEST(QC8_IGEMM_MINMAX_FP32_2X8__NEON_MLAL_LANE_PRFM, strided_cm) {
7968 TEST_REQUIRES_ARM_NEON;
7969 GemmMicrokernelTester()
7970 .mr(2)
7971 .nr(8)
7972 .kr(1)
7973 .sr(1)
7974 .m(2)
7975 .n(8)
7976 .k(8)
7977 .cm_stride(11)
7978 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
7979 }
7980 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
7981
7982
7983 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QC8_IGEMM_MINMAX_FP32_2X8__NEONV8_MLAL_LANE,k_eq_8)7984 TEST(QC8_IGEMM_MINMAX_FP32_2X8__NEONV8_MLAL_LANE, k_eq_8) {
7985 TEST_REQUIRES_ARM_NEON_V8;
7986 GemmMicrokernelTester()
7987 .mr(2)
7988 .nr(8)
7989 .kr(1)
7990 .sr(1)
7991 .m(2)
7992 .n(8)
7993 .k(8)
7994 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
7995 }
7996
TEST(QC8_IGEMM_MINMAX_FP32_2X8__NEONV8_MLAL_LANE,strided_cn)7997 TEST(QC8_IGEMM_MINMAX_FP32_2X8__NEONV8_MLAL_LANE, strided_cn) {
7998 TEST_REQUIRES_ARM_NEON_V8;
7999 GemmMicrokernelTester()
8000 .mr(2)
8001 .nr(8)
8002 .kr(1)
8003 .sr(1)
8004 .m(2)
8005 .n(8)
8006 .k(8)
8007 .cn_stride(11)
8008 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8009 }
8010
TEST(QC8_IGEMM_MINMAX_FP32_2X8__NEONV8_MLAL_LANE,k_eq_8_subtile)8011 TEST(QC8_IGEMM_MINMAX_FP32_2X8__NEONV8_MLAL_LANE, k_eq_8_subtile) {
8012 TEST_REQUIRES_ARM_NEON_V8;
8013 for (uint32_t n = 1; n <= 8; n++) {
8014 for (uint32_t m = 1; m <= 2; m++) {
8015 GemmMicrokernelTester()
8016 .mr(2)
8017 .nr(8)
8018 .kr(1)
8019 .sr(1)
8020 .m(m)
8021 .n(n)
8022 .k(8)
8023 .iterations(1)
8024 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8025 }
8026 }
8027 }
8028
TEST(QC8_IGEMM_MINMAX_FP32_2X8__NEONV8_MLAL_LANE,k_eq_8_subtile_m)8029 TEST(QC8_IGEMM_MINMAX_FP32_2X8__NEONV8_MLAL_LANE, k_eq_8_subtile_m) {
8030 TEST_REQUIRES_ARM_NEON_V8;
8031 for (uint32_t m = 1; m <= 2; m++) {
8032 GemmMicrokernelTester()
8033 .mr(2)
8034 .nr(8)
8035 .kr(1)
8036 .sr(1)
8037 .m(m)
8038 .n(8)
8039 .k(8)
8040 .iterations(1)
8041 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8042 }
8043 }
8044
TEST(QC8_IGEMM_MINMAX_FP32_2X8__NEONV8_MLAL_LANE,k_eq_8_subtile_n)8045 TEST(QC8_IGEMM_MINMAX_FP32_2X8__NEONV8_MLAL_LANE, k_eq_8_subtile_n) {
8046 TEST_REQUIRES_ARM_NEON_V8;
8047 for (uint32_t n = 1; n <= 8; n++) {
8048 GemmMicrokernelTester()
8049 .mr(2)
8050 .nr(8)
8051 .kr(1)
8052 .sr(1)
8053 .m(2)
8054 .n(n)
8055 .k(8)
8056 .iterations(1)
8057 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8058 }
8059 }
8060
TEST(QC8_IGEMM_MINMAX_FP32_2X8__NEONV8_MLAL_LANE,k_lt_8)8061 TEST(QC8_IGEMM_MINMAX_FP32_2X8__NEONV8_MLAL_LANE, k_lt_8) {
8062 TEST_REQUIRES_ARM_NEON_V8;
8063 for (size_t k = 1; k < 8; k++) {
8064 GemmMicrokernelTester()
8065 .mr(2)
8066 .nr(8)
8067 .kr(1)
8068 .sr(1)
8069 .m(2)
8070 .n(8)
8071 .k(k)
8072 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8073 }
8074 }
8075
TEST(QC8_IGEMM_MINMAX_FP32_2X8__NEONV8_MLAL_LANE,k_lt_8_subtile)8076 TEST(QC8_IGEMM_MINMAX_FP32_2X8__NEONV8_MLAL_LANE, k_lt_8_subtile) {
8077 TEST_REQUIRES_ARM_NEON_V8;
8078 for (size_t k = 1; k < 8; k++) {
8079 for (uint32_t n = 1; n <= 8; n++) {
8080 for (uint32_t m = 1; m <= 2; m++) {
8081 GemmMicrokernelTester()
8082 .mr(2)
8083 .nr(8)
8084 .kr(1)
8085 .sr(1)
8086 .m(m)
8087 .n(n)
8088 .k(k)
8089 .iterations(1)
8090 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8091 }
8092 }
8093 }
8094 }
8095
TEST(QC8_IGEMM_MINMAX_FP32_2X8__NEONV8_MLAL_LANE,k_gt_8)8096 TEST(QC8_IGEMM_MINMAX_FP32_2X8__NEONV8_MLAL_LANE, k_gt_8) {
8097 TEST_REQUIRES_ARM_NEON_V8;
8098 for (size_t k = 9; k < 16; k++) {
8099 GemmMicrokernelTester()
8100 .mr(2)
8101 .nr(8)
8102 .kr(1)
8103 .sr(1)
8104 .m(2)
8105 .n(8)
8106 .k(k)
8107 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8108 }
8109 }
8110
TEST(QC8_IGEMM_MINMAX_FP32_2X8__NEONV8_MLAL_LANE,k_gt_8_subtile)8111 TEST(QC8_IGEMM_MINMAX_FP32_2X8__NEONV8_MLAL_LANE, k_gt_8_subtile) {
8112 TEST_REQUIRES_ARM_NEON_V8;
8113 for (size_t k = 9; k < 16; k++) {
8114 for (uint32_t n = 1; n <= 8; n++) {
8115 for (uint32_t m = 1; m <= 2; m++) {
8116 GemmMicrokernelTester()
8117 .mr(2)
8118 .nr(8)
8119 .kr(1)
8120 .sr(1)
8121 .m(m)
8122 .n(n)
8123 .k(k)
8124 .iterations(1)
8125 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8126 }
8127 }
8128 }
8129 }
8130
TEST(QC8_IGEMM_MINMAX_FP32_2X8__NEONV8_MLAL_LANE,k_div_8)8131 TEST(QC8_IGEMM_MINMAX_FP32_2X8__NEONV8_MLAL_LANE, k_div_8) {
8132 TEST_REQUIRES_ARM_NEON_V8;
8133 for (size_t k = 16; k <= 80; k += 8) {
8134 GemmMicrokernelTester()
8135 .mr(2)
8136 .nr(8)
8137 .kr(1)
8138 .sr(1)
8139 .m(2)
8140 .n(8)
8141 .k(k)
8142 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8143 }
8144 }
8145
TEST(QC8_IGEMM_MINMAX_FP32_2X8__NEONV8_MLAL_LANE,k_div_8_subtile)8146 TEST(QC8_IGEMM_MINMAX_FP32_2X8__NEONV8_MLAL_LANE, k_div_8_subtile) {
8147 TEST_REQUIRES_ARM_NEON_V8;
8148 for (size_t k = 16; k <= 80; k += 8) {
8149 for (uint32_t n = 1; n <= 8; n++) {
8150 for (uint32_t m = 1; m <= 2; m++) {
8151 GemmMicrokernelTester()
8152 .mr(2)
8153 .nr(8)
8154 .kr(1)
8155 .sr(1)
8156 .m(m)
8157 .n(n)
8158 .k(k)
8159 .iterations(1)
8160 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8161 }
8162 }
8163 }
8164 }
8165
TEST(QC8_IGEMM_MINMAX_FP32_2X8__NEONV8_MLAL_LANE,n_gt_8)8166 TEST(QC8_IGEMM_MINMAX_FP32_2X8__NEONV8_MLAL_LANE, n_gt_8) {
8167 TEST_REQUIRES_ARM_NEON_V8;
8168 for (uint32_t n = 9; n < 16; n++) {
8169 for (size_t k = 1; k <= 40; k += 9) {
8170 GemmMicrokernelTester()
8171 .mr(2)
8172 .nr(8)
8173 .kr(1)
8174 .sr(1)
8175 .m(2)
8176 .n(n)
8177 .k(k)
8178 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8179 }
8180 }
8181 }
8182
TEST(QC8_IGEMM_MINMAX_FP32_2X8__NEONV8_MLAL_LANE,n_gt_8_strided_cn)8183 TEST(QC8_IGEMM_MINMAX_FP32_2X8__NEONV8_MLAL_LANE, n_gt_8_strided_cn) {
8184 TEST_REQUIRES_ARM_NEON_V8;
8185 for (uint32_t n = 9; n < 16; n++) {
8186 for (size_t k = 1; k <= 40; k += 9) {
8187 GemmMicrokernelTester()
8188 .mr(2)
8189 .nr(8)
8190 .kr(1)
8191 .sr(1)
8192 .m(2)
8193 .n(n)
8194 .k(k)
8195 .cn_stride(11)
8196 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8197 }
8198 }
8199 }
8200
TEST(QC8_IGEMM_MINMAX_FP32_2X8__NEONV8_MLAL_LANE,n_gt_8_subtile)8201 TEST(QC8_IGEMM_MINMAX_FP32_2X8__NEONV8_MLAL_LANE, n_gt_8_subtile) {
8202 TEST_REQUIRES_ARM_NEON_V8;
8203 for (uint32_t n = 9; n < 16; n++) {
8204 for (size_t k = 1; k <= 40; k += 9) {
8205 for (uint32_t m = 1; m <= 2; m++) {
8206 GemmMicrokernelTester()
8207 .mr(2)
8208 .nr(8)
8209 .kr(1)
8210 .sr(1)
8211 .m(m)
8212 .n(n)
8213 .k(k)
8214 .iterations(1)
8215 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8216 }
8217 }
8218 }
8219 }
8220
TEST(QC8_IGEMM_MINMAX_FP32_2X8__NEONV8_MLAL_LANE,n_div_8)8221 TEST(QC8_IGEMM_MINMAX_FP32_2X8__NEONV8_MLAL_LANE, n_div_8) {
8222 TEST_REQUIRES_ARM_NEON_V8;
8223 for (uint32_t n = 16; n <= 24; n += 8) {
8224 for (size_t k = 1; k <= 40; k += 9) {
8225 GemmMicrokernelTester()
8226 .mr(2)
8227 .nr(8)
8228 .kr(1)
8229 .sr(1)
8230 .m(2)
8231 .n(n)
8232 .k(k)
8233 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8234 }
8235 }
8236 }
8237
TEST(QC8_IGEMM_MINMAX_FP32_2X8__NEONV8_MLAL_LANE,n_div_8_strided_cn)8238 TEST(QC8_IGEMM_MINMAX_FP32_2X8__NEONV8_MLAL_LANE, n_div_8_strided_cn) {
8239 TEST_REQUIRES_ARM_NEON_V8;
8240 for (uint32_t n = 16; n <= 24; n += 8) {
8241 for (size_t k = 1; k <= 40; k += 9) {
8242 GemmMicrokernelTester()
8243 .mr(2)
8244 .nr(8)
8245 .kr(1)
8246 .sr(1)
8247 .m(2)
8248 .n(n)
8249 .k(k)
8250 .cn_stride(11)
8251 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8252 }
8253 }
8254 }
8255
TEST(QC8_IGEMM_MINMAX_FP32_2X8__NEONV8_MLAL_LANE,n_div_8_subtile)8256 TEST(QC8_IGEMM_MINMAX_FP32_2X8__NEONV8_MLAL_LANE, n_div_8_subtile) {
8257 TEST_REQUIRES_ARM_NEON_V8;
8258 for (uint32_t n = 16; n <= 24; n += 8) {
8259 for (size_t k = 1; k <= 40; k += 9) {
8260 for (uint32_t m = 1; m <= 2; m++) {
8261 GemmMicrokernelTester()
8262 .mr(2)
8263 .nr(8)
8264 .kr(1)
8265 .sr(1)
8266 .m(m)
8267 .n(n)
8268 .k(k)
8269 .iterations(1)
8270 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8271 }
8272 }
8273 }
8274 }
8275
TEST(QC8_IGEMM_MINMAX_FP32_2X8__NEONV8_MLAL_LANE,small_kernel)8276 TEST(QC8_IGEMM_MINMAX_FP32_2X8__NEONV8_MLAL_LANE, small_kernel) {
8277 TEST_REQUIRES_ARM_NEON_V8;
8278 for (size_t k = 1; k <= 40; k += 9) {
8279 GemmMicrokernelTester()
8280 .mr(2)
8281 .nr(8)
8282 .kr(1)
8283 .sr(1)
8284 .m(2)
8285 .n(8)
8286 .k(k)
8287 .ks(3)
8288 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8289 }
8290 }
8291
TEST(QC8_IGEMM_MINMAX_FP32_2X8__NEONV8_MLAL_LANE,small_kernel_subtile)8292 TEST(QC8_IGEMM_MINMAX_FP32_2X8__NEONV8_MLAL_LANE, small_kernel_subtile) {
8293 TEST_REQUIRES_ARM_NEON_V8;
8294 for (size_t k = 1; k <= 40; k += 9) {
8295 for (uint32_t n = 1; n <= 8; n++) {
8296 for (uint32_t m = 1; m <= 2; m++) {
8297 GemmMicrokernelTester()
8298 .mr(2)
8299 .nr(8)
8300 .kr(1)
8301 .sr(1)
8302 .m(m)
8303 .n(n)
8304 .k(k)
8305 .ks(3)
8306 .iterations(1)
8307 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8308 }
8309 }
8310 }
8311 }
8312
TEST(QC8_IGEMM_MINMAX_FP32_2X8__NEONV8_MLAL_LANE,n_gt_8_small_kernel)8313 TEST(QC8_IGEMM_MINMAX_FP32_2X8__NEONV8_MLAL_LANE, n_gt_8_small_kernel) {
8314 TEST_REQUIRES_ARM_NEON_V8;
8315 for (uint32_t n = 9; n < 16; n++) {
8316 for (size_t k = 1; k <= 40; k += 9) {
8317 GemmMicrokernelTester()
8318 .mr(2)
8319 .nr(8)
8320 .kr(1)
8321 .sr(1)
8322 .m(2)
8323 .n(n)
8324 .k(k)
8325 .ks(3)
8326 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8327 }
8328 }
8329 }
8330
TEST(QC8_IGEMM_MINMAX_FP32_2X8__NEONV8_MLAL_LANE,n_div_8_small_kernel)8331 TEST(QC8_IGEMM_MINMAX_FP32_2X8__NEONV8_MLAL_LANE, n_div_8_small_kernel) {
8332 TEST_REQUIRES_ARM_NEON_V8;
8333 for (uint32_t n = 16; n <= 24; n += 8) {
8334 for (size_t k = 1; k <= 40; k += 9) {
8335 GemmMicrokernelTester()
8336 .mr(2)
8337 .nr(8)
8338 .kr(1)
8339 .sr(1)
8340 .m(2)
8341 .n(n)
8342 .k(k)
8343 .ks(3)
8344 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8345 }
8346 }
8347 }
8348
TEST(QC8_IGEMM_MINMAX_FP32_2X8__NEONV8_MLAL_LANE,strided_cm_subtile)8349 TEST(QC8_IGEMM_MINMAX_FP32_2X8__NEONV8_MLAL_LANE, strided_cm_subtile) {
8350 TEST_REQUIRES_ARM_NEON_V8;
8351 for (size_t k = 1; k <= 40; k += 9) {
8352 for (uint32_t n = 1; n <= 8; n++) {
8353 for (uint32_t m = 1; m <= 2; m++) {
8354 GemmMicrokernelTester()
8355 .mr(2)
8356 .nr(8)
8357 .kr(1)
8358 .sr(1)
8359 .m(m)
8360 .n(n)
8361 .k(k)
8362 .cm_stride(11)
8363 .iterations(1)
8364 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8365 }
8366 }
8367 }
8368 }
8369
TEST(QC8_IGEMM_MINMAX_FP32_2X8__NEONV8_MLAL_LANE,a_offset)8370 TEST(QC8_IGEMM_MINMAX_FP32_2X8__NEONV8_MLAL_LANE, a_offset) {
8371 TEST_REQUIRES_ARM_NEON_V8;
8372 for (size_t k = 1; k <= 40; k += 9) {
8373 GemmMicrokernelTester()
8374 .mr(2)
8375 .nr(8)
8376 .kr(1)
8377 .sr(1)
8378 .m(2)
8379 .n(8)
8380 .k(k)
8381 .ks(3)
8382 .a_offset(83)
8383 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8384 }
8385 }
8386
TEST(QC8_IGEMM_MINMAX_FP32_2X8__NEONV8_MLAL_LANE,zero)8387 TEST(QC8_IGEMM_MINMAX_FP32_2X8__NEONV8_MLAL_LANE, zero) {
8388 TEST_REQUIRES_ARM_NEON_V8;
8389 for (size_t k = 1; k <= 40; k += 9) {
8390 for (uint32_t mz = 0; mz < 2; mz++) {
8391 GemmMicrokernelTester()
8392 .mr(2)
8393 .nr(8)
8394 .kr(1)
8395 .sr(1)
8396 .m(2)
8397 .n(8)
8398 .k(k)
8399 .ks(3)
8400 .a_offset(83)
8401 .zero_index(mz)
8402 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8403 }
8404 }
8405 }
8406
TEST(QC8_IGEMM_MINMAX_FP32_2X8__NEONV8_MLAL_LANE,qmin)8407 TEST(QC8_IGEMM_MINMAX_FP32_2X8__NEONV8_MLAL_LANE, qmin) {
8408 TEST_REQUIRES_ARM_NEON_V8;
8409 GemmMicrokernelTester()
8410 .mr(2)
8411 .nr(8)
8412 .kr(1)
8413 .sr(1)
8414 .m(2)
8415 .n(8)
8416 .k(8)
8417 .qmin(128)
8418 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8419 }
8420
TEST(QC8_IGEMM_MINMAX_FP32_2X8__NEONV8_MLAL_LANE,qmax)8421 TEST(QC8_IGEMM_MINMAX_FP32_2X8__NEONV8_MLAL_LANE, qmax) {
8422 TEST_REQUIRES_ARM_NEON_V8;
8423 GemmMicrokernelTester()
8424 .mr(2)
8425 .nr(8)
8426 .kr(1)
8427 .sr(1)
8428 .m(2)
8429 .n(8)
8430 .k(8)
8431 .qmax(128)
8432 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8433 }
8434
TEST(QC8_IGEMM_MINMAX_FP32_2X8__NEONV8_MLAL_LANE,strided_cm)8435 TEST(QC8_IGEMM_MINMAX_FP32_2X8__NEONV8_MLAL_LANE, strided_cm) {
8436 TEST_REQUIRES_ARM_NEON_V8;
8437 GemmMicrokernelTester()
8438 .mr(2)
8439 .nr(8)
8440 .kr(1)
8441 .sr(1)
8442 .m(2)
8443 .n(8)
8444 .k(8)
8445 .cm_stride(11)
8446 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8447 }
8448 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
8449
8450
8451 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QC8_IGEMM_MINMAX_FP32_2X8__NEONV8_MLAL_LANE_PRFM,k_eq_8)8452 TEST(QC8_IGEMM_MINMAX_FP32_2X8__NEONV8_MLAL_LANE_PRFM, k_eq_8) {
8453 TEST_REQUIRES_ARM_NEON_V8;
8454 GemmMicrokernelTester()
8455 .mr(2)
8456 .nr(8)
8457 .kr(1)
8458 .sr(1)
8459 .m(2)
8460 .n(8)
8461 .k(8)
8462 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8463 }
8464
TEST(QC8_IGEMM_MINMAX_FP32_2X8__NEONV8_MLAL_LANE_PRFM,strided_cn)8465 TEST(QC8_IGEMM_MINMAX_FP32_2X8__NEONV8_MLAL_LANE_PRFM, strided_cn) {
8466 TEST_REQUIRES_ARM_NEON_V8;
8467 GemmMicrokernelTester()
8468 .mr(2)
8469 .nr(8)
8470 .kr(1)
8471 .sr(1)
8472 .m(2)
8473 .n(8)
8474 .k(8)
8475 .cn_stride(11)
8476 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8477 }
8478
TEST(QC8_IGEMM_MINMAX_FP32_2X8__NEONV8_MLAL_LANE_PRFM,k_eq_8_subtile)8479 TEST(QC8_IGEMM_MINMAX_FP32_2X8__NEONV8_MLAL_LANE_PRFM, k_eq_8_subtile) {
8480 TEST_REQUIRES_ARM_NEON_V8;
8481 for (uint32_t n = 1; n <= 8; n++) {
8482 for (uint32_t m = 1; m <= 2; m++) {
8483 GemmMicrokernelTester()
8484 .mr(2)
8485 .nr(8)
8486 .kr(1)
8487 .sr(1)
8488 .m(m)
8489 .n(n)
8490 .k(8)
8491 .iterations(1)
8492 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8493 }
8494 }
8495 }
8496
TEST(QC8_IGEMM_MINMAX_FP32_2X8__NEONV8_MLAL_LANE_PRFM,k_eq_8_subtile_m)8497 TEST(QC8_IGEMM_MINMAX_FP32_2X8__NEONV8_MLAL_LANE_PRFM, k_eq_8_subtile_m) {
8498 TEST_REQUIRES_ARM_NEON_V8;
8499 for (uint32_t m = 1; m <= 2; m++) {
8500 GemmMicrokernelTester()
8501 .mr(2)
8502 .nr(8)
8503 .kr(1)
8504 .sr(1)
8505 .m(m)
8506 .n(8)
8507 .k(8)
8508 .iterations(1)
8509 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8510 }
8511 }
8512
TEST(QC8_IGEMM_MINMAX_FP32_2X8__NEONV8_MLAL_LANE_PRFM,k_eq_8_subtile_n)8513 TEST(QC8_IGEMM_MINMAX_FP32_2X8__NEONV8_MLAL_LANE_PRFM, k_eq_8_subtile_n) {
8514 TEST_REQUIRES_ARM_NEON_V8;
8515 for (uint32_t n = 1; n <= 8; n++) {
8516 GemmMicrokernelTester()
8517 .mr(2)
8518 .nr(8)
8519 .kr(1)
8520 .sr(1)
8521 .m(2)
8522 .n(n)
8523 .k(8)
8524 .iterations(1)
8525 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8526 }
8527 }
8528
TEST(QC8_IGEMM_MINMAX_FP32_2X8__NEONV8_MLAL_LANE_PRFM,k_lt_8)8529 TEST(QC8_IGEMM_MINMAX_FP32_2X8__NEONV8_MLAL_LANE_PRFM, k_lt_8) {
8530 TEST_REQUIRES_ARM_NEON_V8;
8531 for (size_t k = 1; k < 8; k++) {
8532 GemmMicrokernelTester()
8533 .mr(2)
8534 .nr(8)
8535 .kr(1)
8536 .sr(1)
8537 .m(2)
8538 .n(8)
8539 .k(k)
8540 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8541 }
8542 }
8543
TEST(QC8_IGEMM_MINMAX_FP32_2X8__NEONV8_MLAL_LANE_PRFM,k_lt_8_subtile)8544 TEST(QC8_IGEMM_MINMAX_FP32_2X8__NEONV8_MLAL_LANE_PRFM, k_lt_8_subtile) {
8545 TEST_REQUIRES_ARM_NEON_V8;
8546 for (size_t k = 1; k < 8; k++) {
8547 for (uint32_t n = 1; n <= 8; n++) {
8548 for (uint32_t m = 1; m <= 2; m++) {
8549 GemmMicrokernelTester()
8550 .mr(2)
8551 .nr(8)
8552 .kr(1)
8553 .sr(1)
8554 .m(m)
8555 .n(n)
8556 .k(k)
8557 .iterations(1)
8558 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8559 }
8560 }
8561 }
8562 }
8563
TEST(QC8_IGEMM_MINMAX_FP32_2X8__NEONV8_MLAL_LANE_PRFM,k_gt_8)8564 TEST(QC8_IGEMM_MINMAX_FP32_2X8__NEONV8_MLAL_LANE_PRFM, k_gt_8) {
8565 TEST_REQUIRES_ARM_NEON_V8;
8566 for (size_t k = 9; k < 16; k++) {
8567 GemmMicrokernelTester()
8568 .mr(2)
8569 .nr(8)
8570 .kr(1)
8571 .sr(1)
8572 .m(2)
8573 .n(8)
8574 .k(k)
8575 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8576 }
8577 }
8578
TEST(QC8_IGEMM_MINMAX_FP32_2X8__NEONV8_MLAL_LANE_PRFM,k_gt_8_subtile)8579 TEST(QC8_IGEMM_MINMAX_FP32_2X8__NEONV8_MLAL_LANE_PRFM, k_gt_8_subtile) {
8580 TEST_REQUIRES_ARM_NEON_V8;
8581 for (size_t k = 9; k < 16; k++) {
8582 for (uint32_t n = 1; n <= 8; n++) {
8583 for (uint32_t m = 1; m <= 2; m++) {
8584 GemmMicrokernelTester()
8585 .mr(2)
8586 .nr(8)
8587 .kr(1)
8588 .sr(1)
8589 .m(m)
8590 .n(n)
8591 .k(k)
8592 .iterations(1)
8593 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8594 }
8595 }
8596 }
8597 }
8598
TEST(QC8_IGEMM_MINMAX_FP32_2X8__NEONV8_MLAL_LANE_PRFM,k_div_8)8599 TEST(QC8_IGEMM_MINMAX_FP32_2X8__NEONV8_MLAL_LANE_PRFM, k_div_8) {
8600 TEST_REQUIRES_ARM_NEON_V8;
8601 for (size_t k = 16; k <= 80; k += 8) {
8602 GemmMicrokernelTester()
8603 .mr(2)
8604 .nr(8)
8605 .kr(1)
8606 .sr(1)
8607 .m(2)
8608 .n(8)
8609 .k(k)
8610 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8611 }
8612 }
8613
TEST(QC8_IGEMM_MINMAX_FP32_2X8__NEONV8_MLAL_LANE_PRFM,k_div_8_subtile)8614 TEST(QC8_IGEMM_MINMAX_FP32_2X8__NEONV8_MLAL_LANE_PRFM, k_div_8_subtile) {
8615 TEST_REQUIRES_ARM_NEON_V8;
8616 for (size_t k = 16; k <= 80; k += 8) {
8617 for (uint32_t n = 1; n <= 8; n++) {
8618 for (uint32_t m = 1; m <= 2; m++) {
8619 GemmMicrokernelTester()
8620 .mr(2)
8621 .nr(8)
8622 .kr(1)
8623 .sr(1)
8624 .m(m)
8625 .n(n)
8626 .k(k)
8627 .iterations(1)
8628 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8629 }
8630 }
8631 }
8632 }
8633
TEST(QC8_IGEMM_MINMAX_FP32_2X8__NEONV8_MLAL_LANE_PRFM,n_gt_8)8634 TEST(QC8_IGEMM_MINMAX_FP32_2X8__NEONV8_MLAL_LANE_PRFM, n_gt_8) {
8635 TEST_REQUIRES_ARM_NEON_V8;
8636 for (uint32_t n = 9; n < 16; n++) {
8637 for (size_t k = 1; k <= 40; k += 9) {
8638 GemmMicrokernelTester()
8639 .mr(2)
8640 .nr(8)
8641 .kr(1)
8642 .sr(1)
8643 .m(2)
8644 .n(n)
8645 .k(k)
8646 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8647 }
8648 }
8649 }
8650
TEST(QC8_IGEMM_MINMAX_FP32_2X8__NEONV8_MLAL_LANE_PRFM,n_gt_8_strided_cn)8651 TEST(QC8_IGEMM_MINMAX_FP32_2X8__NEONV8_MLAL_LANE_PRFM, n_gt_8_strided_cn) {
8652 TEST_REQUIRES_ARM_NEON_V8;
8653 for (uint32_t n = 9; n < 16; n++) {
8654 for (size_t k = 1; k <= 40; k += 9) {
8655 GemmMicrokernelTester()
8656 .mr(2)
8657 .nr(8)
8658 .kr(1)
8659 .sr(1)
8660 .m(2)
8661 .n(n)
8662 .k(k)
8663 .cn_stride(11)
8664 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8665 }
8666 }
8667 }
8668
TEST(QC8_IGEMM_MINMAX_FP32_2X8__NEONV8_MLAL_LANE_PRFM,n_gt_8_subtile)8669 TEST(QC8_IGEMM_MINMAX_FP32_2X8__NEONV8_MLAL_LANE_PRFM, n_gt_8_subtile) {
8670 TEST_REQUIRES_ARM_NEON_V8;
8671 for (uint32_t n = 9; n < 16; n++) {
8672 for (size_t k = 1; k <= 40; k += 9) {
8673 for (uint32_t m = 1; m <= 2; m++) {
8674 GemmMicrokernelTester()
8675 .mr(2)
8676 .nr(8)
8677 .kr(1)
8678 .sr(1)
8679 .m(m)
8680 .n(n)
8681 .k(k)
8682 .iterations(1)
8683 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8684 }
8685 }
8686 }
8687 }
8688
TEST(QC8_IGEMM_MINMAX_FP32_2X8__NEONV8_MLAL_LANE_PRFM,n_div_8)8689 TEST(QC8_IGEMM_MINMAX_FP32_2X8__NEONV8_MLAL_LANE_PRFM, n_div_8) {
8690 TEST_REQUIRES_ARM_NEON_V8;
8691 for (uint32_t n = 16; n <= 24; n += 8) {
8692 for (size_t k = 1; k <= 40; k += 9) {
8693 GemmMicrokernelTester()
8694 .mr(2)
8695 .nr(8)
8696 .kr(1)
8697 .sr(1)
8698 .m(2)
8699 .n(n)
8700 .k(k)
8701 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8702 }
8703 }
8704 }
8705
TEST(QC8_IGEMM_MINMAX_FP32_2X8__NEONV8_MLAL_LANE_PRFM,n_div_8_strided_cn)8706 TEST(QC8_IGEMM_MINMAX_FP32_2X8__NEONV8_MLAL_LANE_PRFM, n_div_8_strided_cn) {
8707 TEST_REQUIRES_ARM_NEON_V8;
8708 for (uint32_t n = 16; n <= 24; n += 8) {
8709 for (size_t k = 1; k <= 40; k += 9) {
8710 GemmMicrokernelTester()
8711 .mr(2)
8712 .nr(8)
8713 .kr(1)
8714 .sr(1)
8715 .m(2)
8716 .n(n)
8717 .k(k)
8718 .cn_stride(11)
8719 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8720 }
8721 }
8722 }
8723
TEST(QC8_IGEMM_MINMAX_FP32_2X8__NEONV8_MLAL_LANE_PRFM,n_div_8_subtile)8724 TEST(QC8_IGEMM_MINMAX_FP32_2X8__NEONV8_MLAL_LANE_PRFM, n_div_8_subtile) {
8725 TEST_REQUIRES_ARM_NEON_V8;
8726 for (uint32_t n = 16; n <= 24; n += 8) {
8727 for (size_t k = 1; k <= 40; k += 9) {
8728 for (uint32_t m = 1; m <= 2; m++) {
8729 GemmMicrokernelTester()
8730 .mr(2)
8731 .nr(8)
8732 .kr(1)
8733 .sr(1)
8734 .m(m)
8735 .n(n)
8736 .k(k)
8737 .iterations(1)
8738 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8739 }
8740 }
8741 }
8742 }
8743
TEST(QC8_IGEMM_MINMAX_FP32_2X8__NEONV8_MLAL_LANE_PRFM,small_kernel)8744 TEST(QC8_IGEMM_MINMAX_FP32_2X8__NEONV8_MLAL_LANE_PRFM, small_kernel) {
8745 TEST_REQUIRES_ARM_NEON_V8;
8746 for (size_t k = 1; k <= 40; k += 9) {
8747 GemmMicrokernelTester()
8748 .mr(2)
8749 .nr(8)
8750 .kr(1)
8751 .sr(1)
8752 .m(2)
8753 .n(8)
8754 .k(k)
8755 .ks(3)
8756 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8757 }
8758 }
8759
TEST(QC8_IGEMM_MINMAX_FP32_2X8__NEONV8_MLAL_LANE_PRFM,small_kernel_subtile)8760 TEST(QC8_IGEMM_MINMAX_FP32_2X8__NEONV8_MLAL_LANE_PRFM, small_kernel_subtile) {
8761 TEST_REQUIRES_ARM_NEON_V8;
8762 for (size_t k = 1; k <= 40; k += 9) {
8763 for (uint32_t n = 1; n <= 8; n++) {
8764 for (uint32_t m = 1; m <= 2; m++) {
8765 GemmMicrokernelTester()
8766 .mr(2)
8767 .nr(8)
8768 .kr(1)
8769 .sr(1)
8770 .m(m)
8771 .n(n)
8772 .k(k)
8773 .ks(3)
8774 .iterations(1)
8775 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8776 }
8777 }
8778 }
8779 }
8780
TEST(QC8_IGEMM_MINMAX_FP32_2X8__NEONV8_MLAL_LANE_PRFM,n_gt_8_small_kernel)8781 TEST(QC8_IGEMM_MINMAX_FP32_2X8__NEONV8_MLAL_LANE_PRFM, n_gt_8_small_kernel) {
8782 TEST_REQUIRES_ARM_NEON_V8;
8783 for (uint32_t n = 9; n < 16; n++) {
8784 for (size_t k = 1; k <= 40; k += 9) {
8785 GemmMicrokernelTester()
8786 .mr(2)
8787 .nr(8)
8788 .kr(1)
8789 .sr(1)
8790 .m(2)
8791 .n(n)
8792 .k(k)
8793 .ks(3)
8794 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8795 }
8796 }
8797 }
8798
TEST(QC8_IGEMM_MINMAX_FP32_2X8__NEONV8_MLAL_LANE_PRFM,n_div_8_small_kernel)8799 TEST(QC8_IGEMM_MINMAX_FP32_2X8__NEONV8_MLAL_LANE_PRFM, n_div_8_small_kernel) {
8800 TEST_REQUIRES_ARM_NEON_V8;
8801 for (uint32_t n = 16; n <= 24; n += 8) {
8802 for (size_t k = 1; k <= 40; k += 9) {
8803 GemmMicrokernelTester()
8804 .mr(2)
8805 .nr(8)
8806 .kr(1)
8807 .sr(1)
8808 .m(2)
8809 .n(n)
8810 .k(k)
8811 .ks(3)
8812 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8813 }
8814 }
8815 }
8816
TEST(QC8_IGEMM_MINMAX_FP32_2X8__NEONV8_MLAL_LANE_PRFM,strided_cm_subtile)8817 TEST(QC8_IGEMM_MINMAX_FP32_2X8__NEONV8_MLAL_LANE_PRFM, strided_cm_subtile) {
8818 TEST_REQUIRES_ARM_NEON_V8;
8819 for (size_t k = 1; k <= 40; k += 9) {
8820 for (uint32_t n = 1; n <= 8; n++) {
8821 for (uint32_t m = 1; m <= 2; m++) {
8822 GemmMicrokernelTester()
8823 .mr(2)
8824 .nr(8)
8825 .kr(1)
8826 .sr(1)
8827 .m(m)
8828 .n(n)
8829 .k(k)
8830 .cm_stride(11)
8831 .iterations(1)
8832 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8833 }
8834 }
8835 }
8836 }
8837
TEST(QC8_IGEMM_MINMAX_FP32_2X8__NEONV8_MLAL_LANE_PRFM,a_offset)8838 TEST(QC8_IGEMM_MINMAX_FP32_2X8__NEONV8_MLAL_LANE_PRFM, a_offset) {
8839 TEST_REQUIRES_ARM_NEON_V8;
8840 for (size_t k = 1; k <= 40; k += 9) {
8841 GemmMicrokernelTester()
8842 .mr(2)
8843 .nr(8)
8844 .kr(1)
8845 .sr(1)
8846 .m(2)
8847 .n(8)
8848 .k(k)
8849 .ks(3)
8850 .a_offset(83)
8851 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8852 }
8853 }
8854
TEST(QC8_IGEMM_MINMAX_FP32_2X8__NEONV8_MLAL_LANE_PRFM,zero)8855 TEST(QC8_IGEMM_MINMAX_FP32_2X8__NEONV8_MLAL_LANE_PRFM, zero) {
8856 TEST_REQUIRES_ARM_NEON_V8;
8857 for (size_t k = 1; k <= 40; k += 9) {
8858 for (uint32_t mz = 0; mz < 2; mz++) {
8859 GemmMicrokernelTester()
8860 .mr(2)
8861 .nr(8)
8862 .kr(1)
8863 .sr(1)
8864 .m(2)
8865 .n(8)
8866 .k(k)
8867 .ks(3)
8868 .a_offset(83)
8869 .zero_index(mz)
8870 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8871 }
8872 }
8873 }
8874
TEST(QC8_IGEMM_MINMAX_FP32_2X8__NEONV8_MLAL_LANE_PRFM,qmin)8875 TEST(QC8_IGEMM_MINMAX_FP32_2X8__NEONV8_MLAL_LANE_PRFM, qmin) {
8876 TEST_REQUIRES_ARM_NEON_V8;
8877 GemmMicrokernelTester()
8878 .mr(2)
8879 .nr(8)
8880 .kr(1)
8881 .sr(1)
8882 .m(2)
8883 .n(8)
8884 .k(8)
8885 .qmin(128)
8886 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8887 }
8888
TEST(QC8_IGEMM_MINMAX_FP32_2X8__NEONV8_MLAL_LANE_PRFM,qmax)8889 TEST(QC8_IGEMM_MINMAX_FP32_2X8__NEONV8_MLAL_LANE_PRFM, qmax) {
8890 TEST_REQUIRES_ARM_NEON_V8;
8891 GemmMicrokernelTester()
8892 .mr(2)
8893 .nr(8)
8894 .kr(1)
8895 .sr(1)
8896 .m(2)
8897 .n(8)
8898 .k(8)
8899 .qmax(128)
8900 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8901 }
8902
TEST(QC8_IGEMM_MINMAX_FP32_2X8__NEONV8_MLAL_LANE_PRFM,strided_cm)8903 TEST(QC8_IGEMM_MINMAX_FP32_2X8__NEONV8_MLAL_LANE_PRFM, strided_cm) {
8904 TEST_REQUIRES_ARM_NEON_V8;
8905 GemmMicrokernelTester()
8906 .mr(2)
8907 .nr(8)
8908 .kr(1)
8909 .sr(1)
8910 .m(2)
8911 .n(8)
8912 .k(8)
8913 .cm_stride(11)
8914 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8915 }
8916 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
8917
8918
8919 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD1R,k_eq_16)8920 TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD1R, k_eq_16) {
8921 TEST_REQUIRES_ARM_NEON_V8;
8922 GemmMicrokernelTester()
8923 .mr(2)
8924 .nr(8)
8925 .kr(2)
8926 .sr(1)
8927 .m(2)
8928 .n(8)
8929 .k(16)
8930 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8931 }
8932
TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD1R,strided_cn)8933 TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD1R, strided_cn) {
8934 TEST_REQUIRES_ARM_NEON_V8;
8935 GemmMicrokernelTester()
8936 .mr(2)
8937 .nr(8)
8938 .kr(2)
8939 .sr(1)
8940 .m(2)
8941 .n(8)
8942 .k(16)
8943 .cn_stride(11)
8944 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8945 }
8946
TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD1R,k_eq_16_subtile)8947 TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD1R, k_eq_16_subtile) {
8948 TEST_REQUIRES_ARM_NEON_V8;
8949 for (uint32_t n = 1; n <= 8; n++) {
8950 for (uint32_t m = 1; m <= 2; m++) {
8951 GemmMicrokernelTester()
8952 .mr(2)
8953 .nr(8)
8954 .kr(2)
8955 .sr(1)
8956 .m(m)
8957 .n(n)
8958 .k(16)
8959 .iterations(1)
8960 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8961 }
8962 }
8963 }
8964
TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD1R,k_eq_16_subtile_m)8965 TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD1R, k_eq_16_subtile_m) {
8966 TEST_REQUIRES_ARM_NEON_V8;
8967 for (uint32_t m = 1; m <= 2; m++) {
8968 GemmMicrokernelTester()
8969 .mr(2)
8970 .nr(8)
8971 .kr(2)
8972 .sr(1)
8973 .m(m)
8974 .n(8)
8975 .k(16)
8976 .iterations(1)
8977 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8978 }
8979 }
8980
TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD1R,k_eq_16_subtile_n)8981 TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD1R, k_eq_16_subtile_n) {
8982 TEST_REQUIRES_ARM_NEON_V8;
8983 for (uint32_t n = 1; n <= 8; n++) {
8984 GemmMicrokernelTester()
8985 .mr(2)
8986 .nr(8)
8987 .kr(2)
8988 .sr(1)
8989 .m(2)
8990 .n(n)
8991 .k(16)
8992 .iterations(1)
8993 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8994 }
8995 }
8996
TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD1R,k_lt_16)8997 TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD1R, k_lt_16) {
8998 TEST_REQUIRES_ARM_NEON_V8;
8999 for (size_t k = 1; k < 16; k++) {
9000 GemmMicrokernelTester()
9001 .mr(2)
9002 .nr(8)
9003 .kr(2)
9004 .sr(1)
9005 .m(2)
9006 .n(8)
9007 .k(k)
9008 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
9009 }
9010 }
9011
TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD1R,k_lt_16_subtile)9012 TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD1R, k_lt_16_subtile) {
9013 TEST_REQUIRES_ARM_NEON_V8;
9014 for (size_t k = 1; k < 16; k++) {
9015 for (uint32_t n = 1; n <= 8; n++) {
9016 for (uint32_t m = 1; m <= 2; m++) {
9017 GemmMicrokernelTester()
9018 .mr(2)
9019 .nr(8)
9020 .kr(2)
9021 .sr(1)
9022 .m(m)
9023 .n(n)
9024 .k(k)
9025 .iterations(1)
9026 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
9027 }
9028 }
9029 }
9030 }
9031
TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD1R,k_gt_16)9032 TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD1R, k_gt_16) {
9033 TEST_REQUIRES_ARM_NEON_V8;
9034 for (size_t k = 17; k < 32; k++) {
9035 GemmMicrokernelTester()
9036 .mr(2)
9037 .nr(8)
9038 .kr(2)
9039 .sr(1)
9040 .m(2)
9041 .n(8)
9042 .k(k)
9043 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
9044 }
9045 }
9046
TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD1R,k_gt_16_subtile)9047 TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD1R, k_gt_16_subtile) {
9048 TEST_REQUIRES_ARM_NEON_V8;
9049 for (size_t k = 17; k < 32; k++) {
9050 for (uint32_t n = 1; n <= 8; n++) {
9051 for (uint32_t m = 1; m <= 2; m++) {
9052 GemmMicrokernelTester()
9053 .mr(2)
9054 .nr(8)
9055 .kr(2)
9056 .sr(1)
9057 .m(m)
9058 .n(n)
9059 .k(k)
9060 .iterations(1)
9061 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
9062 }
9063 }
9064 }
9065 }
9066
TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD1R,k_div_16)9067 TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD1R, k_div_16) {
9068 TEST_REQUIRES_ARM_NEON_V8;
9069 for (size_t k = 32; k <= 160; k += 16) {
9070 GemmMicrokernelTester()
9071 .mr(2)
9072 .nr(8)
9073 .kr(2)
9074 .sr(1)
9075 .m(2)
9076 .n(8)
9077 .k(k)
9078 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
9079 }
9080 }
9081
TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD1R,k_div_16_subtile)9082 TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD1R, k_div_16_subtile) {
9083 TEST_REQUIRES_ARM_NEON_V8;
9084 for (size_t k = 32; k <= 160; k += 16) {
9085 for (uint32_t n = 1; n <= 8; n++) {
9086 for (uint32_t m = 1; m <= 2; m++) {
9087 GemmMicrokernelTester()
9088 .mr(2)
9089 .nr(8)
9090 .kr(2)
9091 .sr(1)
9092 .m(m)
9093 .n(n)
9094 .k(k)
9095 .iterations(1)
9096 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
9097 }
9098 }
9099 }
9100 }
9101
TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD1R,n_gt_8)9102 TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD1R, n_gt_8) {
9103 TEST_REQUIRES_ARM_NEON_V8;
9104 for (uint32_t n = 9; n < 16; n++) {
9105 for (size_t k = 1; k <= 80; k += 17) {
9106 GemmMicrokernelTester()
9107 .mr(2)
9108 .nr(8)
9109 .kr(2)
9110 .sr(1)
9111 .m(2)
9112 .n(n)
9113 .k(k)
9114 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
9115 }
9116 }
9117 }
9118
TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD1R,n_gt_8_strided_cn)9119 TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD1R, n_gt_8_strided_cn) {
9120 TEST_REQUIRES_ARM_NEON_V8;
9121 for (uint32_t n = 9; n < 16; n++) {
9122 for (size_t k = 1; k <= 80; k += 17) {
9123 GemmMicrokernelTester()
9124 .mr(2)
9125 .nr(8)
9126 .kr(2)
9127 .sr(1)
9128 .m(2)
9129 .n(n)
9130 .k(k)
9131 .cn_stride(11)
9132 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
9133 }
9134 }
9135 }
9136
TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD1R,n_gt_8_subtile)9137 TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD1R, n_gt_8_subtile) {
9138 TEST_REQUIRES_ARM_NEON_V8;
9139 for (uint32_t n = 9; n < 16; n++) {
9140 for (size_t k = 1; k <= 80; k += 17) {
9141 for (uint32_t m = 1; m <= 2; m++) {
9142 GemmMicrokernelTester()
9143 .mr(2)
9144 .nr(8)
9145 .kr(2)
9146 .sr(1)
9147 .m(m)
9148 .n(n)
9149 .k(k)
9150 .iterations(1)
9151 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
9152 }
9153 }
9154 }
9155 }
9156
TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD1R,n_div_8)9157 TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD1R, n_div_8) {
9158 TEST_REQUIRES_ARM_NEON_V8;
9159 for (uint32_t n = 16; n <= 24; n += 8) {
9160 for (size_t k = 1; k <= 80; k += 17) {
9161 GemmMicrokernelTester()
9162 .mr(2)
9163 .nr(8)
9164 .kr(2)
9165 .sr(1)
9166 .m(2)
9167 .n(n)
9168 .k(k)
9169 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
9170 }
9171 }
9172 }
9173
TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD1R,n_div_8_strided_cn)9174 TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD1R, n_div_8_strided_cn) {
9175 TEST_REQUIRES_ARM_NEON_V8;
9176 for (uint32_t n = 16; n <= 24; n += 8) {
9177 for (size_t k = 1; k <= 80; k += 17) {
9178 GemmMicrokernelTester()
9179 .mr(2)
9180 .nr(8)
9181 .kr(2)
9182 .sr(1)
9183 .m(2)
9184 .n(n)
9185 .k(k)
9186 .cn_stride(11)
9187 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
9188 }
9189 }
9190 }
9191
TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD1R,n_div_8_subtile)9192 TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD1R, n_div_8_subtile) {
9193 TEST_REQUIRES_ARM_NEON_V8;
9194 for (uint32_t n = 16; n <= 24; n += 8) {
9195 for (size_t k = 1; k <= 80; k += 17) {
9196 for (uint32_t m = 1; m <= 2; m++) {
9197 GemmMicrokernelTester()
9198 .mr(2)
9199 .nr(8)
9200 .kr(2)
9201 .sr(1)
9202 .m(m)
9203 .n(n)
9204 .k(k)
9205 .iterations(1)
9206 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
9207 }
9208 }
9209 }
9210 }
9211
TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD1R,small_kernel)9212 TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD1R, small_kernel) {
9213 TEST_REQUIRES_ARM_NEON_V8;
9214 for (size_t k = 1; k <= 80; k += 17) {
9215 GemmMicrokernelTester()
9216 .mr(2)
9217 .nr(8)
9218 .kr(2)
9219 .sr(1)
9220 .m(2)
9221 .n(8)
9222 .k(k)
9223 .ks(3)
9224 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
9225 }
9226 }
9227
TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD1R,small_kernel_subtile)9228 TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD1R, small_kernel_subtile) {
9229 TEST_REQUIRES_ARM_NEON_V8;
9230 for (size_t k = 1; k <= 80; k += 17) {
9231 for (uint32_t n = 1; n <= 8; n++) {
9232 for (uint32_t m = 1; m <= 2; m++) {
9233 GemmMicrokernelTester()
9234 .mr(2)
9235 .nr(8)
9236 .kr(2)
9237 .sr(1)
9238 .m(m)
9239 .n(n)
9240 .k(k)
9241 .ks(3)
9242 .iterations(1)
9243 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
9244 }
9245 }
9246 }
9247 }
9248
TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD1R,n_gt_8_small_kernel)9249 TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD1R, n_gt_8_small_kernel) {
9250 TEST_REQUIRES_ARM_NEON_V8;
9251 for (uint32_t n = 9; n < 16; n++) {
9252 for (size_t k = 1; k <= 80; k += 17) {
9253 GemmMicrokernelTester()
9254 .mr(2)
9255 .nr(8)
9256 .kr(2)
9257 .sr(1)
9258 .m(2)
9259 .n(n)
9260 .k(k)
9261 .ks(3)
9262 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
9263 }
9264 }
9265 }
9266
TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD1R,n_div_8_small_kernel)9267 TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD1R, n_div_8_small_kernel) {
9268 TEST_REQUIRES_ARM_NEON_V8;
9269 for (uint32_t n = 16; n <= 24; n += 8) {
9270 for (size_t k = 1; k <= 80; k += 17) {
9271 GemmMicrokernelTester()
9272 .mr(2)
9273 .nr(8)
9274 .kr(2)
9275 .sr(1)
9276 .m(2)
9277 .n(n)
9278 .k(k)
9279 .ks(3)
9280 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
9281 }
9282 }
9283 }
9284
TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD1R,strided_cm_subtile)9285 TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD1R, strided_cm_subtile) {
9286 TEST_REQUIRES_ARM_NEON_V8;
9287 for (size_t k = 1; k <= 80; k += 17) {
9288 for (uint32_t n = 1; n <= 8; n++) {
9289 for (uint32_t m = 1; m <= 2; m++) {
9290 GemmMicrokernelTester()
9291 .mr(2)
9292 .nr(8)
9293 .kr(2)
9294 .sr(1)
9295 .m(m)
9296 .n(n)
9297 .k(k)
9298 .cm_stride(11)
9299 .iterations(1)
9300 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
9301 }
9302 }
9303 }
9304 }
9305
TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD1R,a_offset)9306 TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD1R, a_offset) {
9307 TEST_REQUIRES_ARM_NEON_V8;
9308 for (size_t k = 1; k <= 80; k += 17) {
9309 GemmMicrokernelTester()
9310 .mr(2)
9311 .nr(8)
9312 .kr(2)
9313 .sr(1)
9314 .m(2)
9315 .n(8)
9316 .k(k)
9317 .ks(3)
9318 .a_offset(163)
9319 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
9320 }
9321 }
9322
TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD1R,zero)9323 TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD1R, zero) {
9324 TEST_REQUIRES_ARM_NEON_V8;
9325 for (size_t k = 1; k <= 80; k += 17) {
9326 for (uint32_t mz = 0; mz < 2; mz++) {
9327 GemmMicrokernelTester()
9328 .mr(2)
9329 .nr(8)
9330 .kr(2)
9331 .sr(1)
9332 .m(2)
9333 .n(8)
9334 .k(k)
9335 .ks(3)
9336 .a_offset(163)
9337 .zero_index(mz)
9338 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
9339 }
9340 }
9341 }
9342
TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD1R,qmin)9343 TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD1R, qmin) {
9344 TEST_REQUIRES_ARM_NEON_V8;
9345 GemmMicrokernelTester()
9346 .mr(2)
9347 .nr(8)
9348 .kr(2)
9349 .sr(1)
9350 .m(2)
9351 .n(8)
9352 .k(16)
9353 .qmin(128)
9354 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
9355 }
9356
TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD1R,qmax)9357 TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD1R, qmax) {
9358 TEST_REQUIRES_ARM_NEON_V8;
9359 GemmMicrokernelTester()
9360 .mr(2)
9361 .nr(8)
9362 .kr(2)
9363 .sr(1)
9364 .m(2)
9365 .n(8)
9366 .k(16)
9367 .qmax(128)
9368 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
9369 }
9370
TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD1R,strided_cm)9371 TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD1R, strided_cm) {
9372 TEST_REQUIRES_ARM_NEON_V8;
9373 GemmMicrokernelTester()
9374 .mr(2)
9375 .nr(8)
9376 .kr(2)
9377 .sr(1)
9378 .m(2)
9379 .n(8)
9380 .k(16)
9381 .cm_stride(11)
9382 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld1r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
9383 }
9384 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
9385
9386
9387 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD4R,k_eq_16)9388 TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD4R, k_eq_16) {
9389 TEST_REQUIRES_ARM_NEON_V8;
9390 GemmMicrokernelTester()
9391 .mr(2)
9392 .nr(8)
9393 .kr(2)
9394 .sr(1)
9395 .m(2)
9396 .n(8)
9397 .k(16)
9398 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld4r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
9399 }
9400
TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD4R,strided_cn)9401 TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD4R, strided_cn) {
9402 TEST_REQUIRES_ARM_NEON_V8;
9403 GemmMicrokernelTester()
9404 .mr(2)
9405 .nr(8)
9406 .kr(2)
9407 .sr(1)
9408 .m(2)
9409 .n(8)
9410 .k(16)
9411 .cn_stride(11)
9412 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld4r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
9413 }
9414
TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD4R,k_eq_16_subtile)9415 TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD4R, k_eq_16_subtile) {
9416 TEST_REQUIRES_ARM_NEON_V8;
9417 for (uint32_t n = 1; n <= 8; n++) {
9418 for (uint32_t m = 1; m <= 2; m++) {
9419 GemmMicrokernelTester()
9420 .mr(2)
9421 .nr(8)
9422 .kr(2)
9423 .sr(1)
9424 .m(m)
9425 .n(n)
9426 .k(16)
9427 .iterations(1)
9428 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld4r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
9429 }
9430 }
9431 }
9432
TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD4R,k_eq_16_subtile_m)9433 TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD4R, k_eq_16_subtile_m) {
9434 TEST_REQUIRES_ARM_NEON_V8;
9435 for (uint32_t m = 1; m <= 2; m++) {
9436 GemmMicrokernelTester()
9437 .mr(2)
9438 .nr(8)
9439 .kr(2)
9440 .sr(1)
9441 .m(m)
9442 .n(8)
9443 .k(16)
9444 .iterations(1)
9445 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld4r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
9446 }
9447 }
9448
TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD4R,k_eq_16_subtile_n)9449 TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD4R, k_eq_16_subtile_n) {
9450 TEST_REQUIRES_ARM_NEON_V8;
9451 for (uint32_t n = 1; n <= 8; n++) {
9452 GemmMicrokernelTester()
9453 .mr(2)
9454 .nr(8)
9455 .kr(2)
9456 .sr(1)
9457 .m(2)
9458 .n(n)
9459 .k(16)
9460 .iterations(1)
9461 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld4r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
9462 }
9463 }
9464
TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD4R,k_lt_16)9465 TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD4R, k_lt_16) {
9466 TEST_REQUIRES_ARM_NEON_V8;
9467 for (size_t k = 1; k < 16; k++) {
9468 GemmMicrokernelTester()
9469 .mr(2)
9470 .nr(8)
9471 .kr(2)
9472 .sr(1)
9473 .m(2)
9474 .n(8)
9475 .k(k)
9476 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld4r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
9477 }
9478 }
9479
TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD4R,k_lt_16_subtile)9480 TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD4R, k_lt_16_subtile) {
9481 TEST_REQUIRES_ARM_NEON_V8;
9482 for (size_t k = 1; k < 16; k++) {
9483 for (uint32_t n = 1; n <= 8; n++) {
9484 for (uint32_t m = 1; m <= 2; m++) {
9485 GemmMicrokernelTester()
9486 .mr(2)
9487 .nr(8)
9488 .kr(2)
9489 .sr(1)
9490 .m(m)
9491 .n(n)
9492 .k(k)
9493 .iterations(1)
9494 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld4r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
9495 }
9496 }
9497 }
9498 }
9499
TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD4R,k_gt_16)9500 TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD4R, k_gt_16) {
9501 TEST_REQUIRES_ARM_NEON_V8;
9502 for (size_t k = 17; k < 32; k++) {
9503 GemmMicrokernelTester()
9504 .mr(2)
9505 .nr(8)
9506 .kr(2)
9507 .sr(1)
9508 .m(2)
9509 .n(8)
9510 .k(k)
9511 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld4r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
9512 }
9513 }
9514
TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD4R,k_gt_16_subtile)9515 TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD4R, k_gt_16_subtile) {
9516 TEST_REQUIRES_ARM_NEON_V8;
9517 for (size_t k = 17; k < 32; k++) {
9518 for (uint32_t n = 1; n <= 8; n++) {
9519 for (uint32_t m = 1; m <= 2; m++) {
9520 GemmMicrokernelTester()
9521 .mr(2)
9522 .nr(8)
9523 .kr(2)
9524 .sr(1)
9525 .m(m)
9526 .n(n)
9527 .k(k)
9528 .iterations(1)
9529 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld4r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
9530 }
9531 }
9532 }
9533 }
9534
TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD4R,k_div_16)9535 TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD4R, k_div_16) {
9536 TEST_REQUIRES_ARM_NEON_V8;
9537 for (size_t k = 32; k <= 160; k += 16) {
9538 GemmMicrokernelTester()
9539 .mr(2)
9540 .nr(8)
9541 .kr(2)
9542 .sr(1)
9543 .m(2)
9544 .n(8)
9545 .k(k)
9546 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld4r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
9547 }
9548 }
9549
TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD4R,k_div_16_subtile)9550 TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD4R, k_div_16_subtile) {
9551 TEST_REQUIRES_ARM_NEON_V8;
9552 for (size_t k = 32; k <= 160; k += 16) {
9553 for (uint32_t n = 1; n <= 8; n++) {
9554 for (uint32_t m = 1; m <= 2; m++) {
9555 GemmMicrokernelTester()
9556 .mr(2)
9557 .nr(8)
9558 .kr(2)
9559 .sr(1)
9560 .m(m)
9561 .n(n)
9562 .k(k)
9563 .iterations(1)
9564 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld4r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
9565 }
9566 }
9567 }
9568 }
9569
TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD4R,n_gt_8)9570 TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD4R, n_gt_8) {
9571 TEST_REQUIRES_ARM_NEON_V8;
9572 for (uint32_t n = 9; n < 16; n++) {
9573 for (size_t k = 1; k <= 80; k += 17) {
9574 GemmMicrokernelTester()
9575 .mr(2)
9576 .nr(8)
9577 .kr(2)
9578 .sr(1)
9579 .m(2)
9580 .n(n)
9581 .k(k)
9582 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld4r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
9583 }
9584 }
9585 }
9586
TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD4R,n_gt_8_strided_cn)9587 TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD4R, n_gt_8_strided_cn) {
9588 TEST_REQUIRES_ARM_NEON_V8;
9589 for (uint32_t n = 9; n < 16; n++) {
9590 for (size_t k = 1; k <= 80; k += 17) {
9591 GemmMicrokernelTester()
9592 .mr(2)
9593 .nr(8)
9594 .kr(2)
9595 .sr(1)
9596 .m(2)
9597 .n(n)
9598 .k(k)
9599 .cn_stride(11)
9600 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld4r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
9601 }
9602 }
9603 }
9604
TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD4R,n_gt_8_subtile)9605 TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD4R, n_gt_8_subtile) {
9606 TEST_REQUIRES_ARM_NEON_V8;
9607 for (uint32_t n = 9; n < 16; n++) {
9608 for (size_t k = 1; k <= 80; k += 17) {
9609 for (uint32_t m = 1; m <= 2; m++) {
9610 GemmMicrokernelTester()
9611 .mr(2)
9612 .nr(8)
9613 .kr(2)
9614 .sr(1)
9615 .m(m)
9616 .n(n)
9617 .k(k)
9618 .iterations(1)
9619 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld4r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
9620 }
9621 }
9622 }
9623 }
9624
TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD4R,n_div_8)9625 TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD4R, n_div_8) {
9626 TEST_REQUIRES_ARM_NEON_V8;
9627 for (uint32_t n = 16; n <= 24; n += 8) {
9628 for (size_t k = 1; k <= 80; k += 17) {
9629 GemmMicrokernelTester()
9630 .mr(2)
9631 .nr(8)
9632 .kr(2)
9633 .sr(1)
9634 .m(2)
9635 .n(n)
9636 .k(k)
9637 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld4r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
9638 }
9639 }
9640 }
9641
TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD4R,n_div_8_strided_cn)9642 TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD4R, n_div_8_strided_cn) {
9643 TEST_REQUIRES_ARM_NEON_V8;
9644 for (uint32_t n = 16; n <= 24; n += 8) {
9645 for (size_t k = 1; k <= 80; k += 17) {
9646 GemmMicrokernelTester()
9647 .mr(2)
9648 .nr(8)
9649 .kr(2)
9650 .sr(1)
9651 .m(2)
9652 .n(n)
9653 .k(k)
9654 .cn_stride(11)
9655 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld4r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
9656 }
9657 }
9658 }
9659
TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD4R,n_div_8_subtile)9660 TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD4R, n_div_8_subtile) {
9661 TEST_REQUIRES_ARM_NEON_V8;
9662 for (uint32_t n = 16; n <= 24; n += 8) {
9663 for (size_t k = 1; k <= 80; k += 17) {
9664 for (uint32_t m = 1; m <= 2; m++) {
9665 GemmMicrokernelTester()
9666 .mr(2)
9667 .nr(8)
9668 .kr(2)
9669 .sr(1)
9670 .m(m)
9671 .n(n)
9672 .k(k)
9673 .iterations(1)
9674 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld4r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
9675 }
9676 }
9677 }
9678 }
9679
TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD4R,small_kernel)9680 TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD4R, small_kernel) {
9681 TEST_REQUIRES_ARM_NEON_V8;
9682 for (size_t k = 1; k <= 80; k += 17) {
9683 GemmMicrokernelTester()
9684 .mr(2)
9685 .nr(8)
9686 .kr(2)
9687 .sr(1)
9688 .m(2)
9689 .n(8)
9690 .k(k)
9691 .ks(3)
9692 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld4r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
9693 }
9694 }
9695
TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD4R,small_kernel_subtile)9696 TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD4R, small_kernel_subtile) {
9697 TEST_REQUIRES_ARM_NEON_V8;
9698 for (size_t k = 1; k <= 80; k += 17) {
9699 for (uint32_t n = 1; n <= 8; n++) {
9700 for (uint32_t m = 1; m <= 2; m++) {
9701 GemmMicrokernelTester()
9702 .mr(2)
9703 .nr(8)
9704 .kr(2)
9705 .sr(1)
9706 .m(m)
9707 .n(n)
9708 .k(k)
9709 .ks(3)
9710 .iterations(1)
9711 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld4r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
9712 }
9713 }
9714 }
9715 }
9716
TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD4R,n_gt_8_small_kernel)9717 TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD4R, n_gt_8_small_kernel) {
9718 TEST_REQUIRES_ARM_NEON_V8;
9719 for (uint32_t n = 9; n < 16; n++) {
9720 for (size_t k = 1; k <= 80; k += 17) {
9721 GemmMicrokernelTester()
9722 .mr(2)
9723 .nr(8)
9724 .kr(2)
9725 .sr(1)
9726 .m(2)
9727 .n(n)
9728 .k(k)
9729 .ks(3)
9730 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld4r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
9731 }
9732 }
9733 }
9734
TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD4R,n_div_8_small_kernel)9735 TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD4R, n_div_8_small_kernel) {
9736 TEST_REQUIRES_ARM_NEON_V8;
9737 for (uint32_t n = 16; n <= 24; n += 8) {
9738 for (size_t k = 1; k <= 80; k += 17) {
9739 GemmMicrokernelTester()
9740 .mr(2)
9741 .nr(8)
9742 .kr(2)
9743 .sr(1)
9744 .m(2)
9745 .n(n)
9746 .k(k)
9747 .ks(3)
9748 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld4r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
9749 }
9750 }
9751 }
9752
TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD4R,strided_cm_subtile)9753 TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD4R, strided_cm_subtile) {
9754 TEST_REQUIRES_ARM_NEON_V8;
9755 for (size_t k = 1; k <= 80; k += 17) {
9756 for (uint32_t n = 1; n <= 8; n++) {
9757 for (uint32_t m = 1; m <= 2; m++) {
9758 GemmMicrokernelTester()
9759 .mr(2)
9760 .nr(8)
9761 .kr(2)
9762 .sr(1)
9763 .m(m)
9764 .n(n)
9765 .k(k)
9766 .cm_stride(11)
9767 .iterations(1)
9768 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld4r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
9769 }
9770 }
9771 }
9772 }
9773
TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD4R,a_offset)9774 TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD4R, a_offset) {
9775 TEST_REQUIRES_ARM_NEON_V8;
9776 for (size_t k = 1; k <= 80; k += 17) {
9777 GemmMicrokernelTester()
9778 .mr(2)
9779 .nr(8)
9780 .kr(2)
9781 .sr(1)
9782 .m(2)
9783 .n(8)
9784 .k(k)
9785 .ks(3)
9786 .a_offset(163)
9787 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld4r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
9788 }
9789 }
9790
TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD4R,zero)9791 TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD4R, zero) {
9792 TEST_REQUIRES_ARM_NEON_V8;
9793 for (size_t k = 1; k <= 80; k += 17) {
9794 for (uint32_t mz = 0; mz < 2; mz++) {
9795 GemmMicrokernelTester()
9796 .mr(2)
9797 .nr(8)
9798 .kr(2)
9799 .sr(1)
9800 .m(2)
9801 .n(8)
9802 .k(k)
9803 .ks(3)
9804 .a_offset(163)
9805 .zero_index(mz)
9806 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld4r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
9807 }
9808 }
9809 }
9810
TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD4R,qmin)9811 TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD4R, qmin) {
9812 TEST_REQUIRES_ARM_NEON_V8;
9813 GemmMicrokernelTester()
9814 .mr(2)
9815 .nr(8)
9816 .kr(2)
9817 .sr(1)
9818 .m(2)
9819 .n(8)
9820 .k(16)
9821 .qmin(128)
9822 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld4r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
9823 }
9824
TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD4R,qmax)9825 TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD4R, qmax) {
9826 TEST_REQUIRES_ARM_NEON_V8;
9827 GemmMicrokernelTester()
9828 .mr(2)
9829 .nr(8)
9830 .kr(2)
9831 .sr(1)
9832 .m(2)
9833 .n(8)
9834 .k(16)
9835 .qmax(128)
9836 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld4r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
9837 }
9838
TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD4R,strided_cm)9839 TEST(QC8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD4R, strided_cm) {
9840 TEST_REQUIRES_ARM_NEON_V8;
9841 GemmMicrokernelTester()
9842 .mr(2)
9843 .nr(8)
9844 .kr(2)
9845 .sr(1)
9846 .m(2)
9847 .n(8)
9848 .k(16)
9849 .cm_stride(11)
9850 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld4r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
9851 }
9852 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
9853
9854
9855 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QC8_IGEMM_MINMAX_FP32_2X8C2S4__NEON_MLAL,k_eq_16)9856 TEST(QC8_IGEMM_MINMAX_FP32_2X8C2S4__NEON_MLAL, k_eq_16) {
9857 TEST_REQUIRES_ARM_NEON;
9858 GemmMicrokernelTester()
9859 .mr(2)
9860 .nr(8)
9861 .kr(2)
9862 .sr(4)
9863 .m(2)
9864 .n(8)
9865 .k(16)
9866 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2s4__neon_mlal, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
9867 }
9868
TEST(QC8_IGEMM_MINMAX_FP32_2X8C2S4__NEON_MLAL,strided_cn)9869 TEST(QC8_IGEMM_MINMAX_FP32_2X8C2S4__NEON_MLAL, strided_cn) {
9870 TEST_REQUIRES_ARM_NEON;
9871 GemmMicrokernelTester()
9872 .mr(2)
9873 .nr(8)
9874 .kr(2)
9875 .sr(4)
9876 .m(2)
9877 .n(8)
9878 .k(16)
9879 .cn_stride(11)
9880 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2s4__neon_mlal, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
9881 }
9882
TEST(QC8_IGEMM_MINMAX_FP32_2X8C2S4__NEON_MLAL,k_eq_16_subtile)9883 TEST(QC8_IGEMM_MINMAX_FP32_2X8C2S4__NEON_MLAL, k_eq_16_subtile) {
9884 TEST_REQUIRES_ARM_NEON;
9885 for (uint32_t n = 1; n <= 8; n++) {
9886 for (uint32_t m = 1; m <= 2; m++) {
9887 GemmMicrokernelTester()
9888 .mr(2)
9889 .nr(8)
9890 .kr(2)
9891 .sr(4)
9892 .m(m)
9893 .n(n)
9894 .k(16)
9895 .iterations(1)
9896 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2s4__neon_mlal, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
9897 }
9898 }
9899 }
9900
TEST(QC8_IGEMM_MINMAX_FP32_2X8C2S4__NEON_MLAL,k_eq_16_subtile_m)9901 TEST(QC8_IGEMM_MINMAX_FP32_2X8C2S4__NEON_MLAL, k_eq_16_subtile_m) {
9902 TEST_REQUIRES_ARM_NEON;
9903 for (uint32_t m = 1; m <= 2; m++) {
9904 GemmMicrokernelTester()
9905 .mr(2)
9906 .nr(8)
9907 .kr(2)
9908 .sr(4)
9909 .m(m)
9910 .n(8)
9911 .k(16)
9912 .iterations(1)
9913 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2s4__neon_mlal, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
9914 }
9915 }
9916
TEST(QC8_IGEMM_MINMAX_FP32_2X8C2S4__NEON_MLAL,k_eq_16_subtile_n)9917 TEST(QC8_IGEMM_MINMAX_FP32_2X8C2S4__NEON_MLAL, k_eq_16_subtile_n) {
9918 TEST_REQUIRES_ARM_NEON;
9919 for (uint32_t n = 1; n <= 8; n++) {
9920 GemmMicrokernelTester()
9921 .mr(2)
9922 .nr(8)
9923 .kr(2)
9924 .sr(4)
9925 .m(2)
9926 .n(n)
9927 .k(16)
9928 .iterations(1)
9929 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2s4__neon_mlal, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
9930 }
9931 }
9932
TEST(QC8_IGEMM_MINMAX_FP32_2X8C2S4__NEON_MLAL,k_lt_16)9933 TEST(QC8_IGEMM_MINMAX_FP32_2X8C2S4__NEON_MLAL, k_lt_16) {
9934 TEST_REQUIRES_ARM_NEON;
9935 for (size_t k = 1; k < 16; k++) {
9936 GemmMicrokernelTester()
9937 .mr(2)
9938 .nr(8)
9939 .kr(2)
9940 .sr(4)
9941 .m(2)
9942 .n(8)
9943 .k(k)
9944 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2s4__neon_mlal, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
9945 }
9946 }
9947
TEST(QC8_IGEMM_MINMAX_FP32_2X8C2S4__NEON_MLAL,k_lt_16_subtile)9948 TEST(QC8_IGEMM_MINMAX_FP32_2X8C2S4__NEON_MLAL, k_lt_16_subtile) {
9949 TEST_REQUIRES_ARM_NEON;
9950 for (size_t k = 1; k < 16; k++) {
9951 for (uint32_t n = 1; n <= 8; n++) {
9952 for (uint32_t m = 1; m <= 2; m++) {
9953 GemmMicrokernelTester()
9954 .mr(2)
9955 .nr(8)
9956 .kr(2)
9957 .sr(4)
9958 .m(m)
9959 .n(n)
9960 .k(k)
9961 .iterations(1)
9962 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2s4__neon_mlal, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
9963 }
9964 }
9965 }
9966 }
9967
TEST(QC8_IGEMM_MINMAX_FP32_2X8C2S4__NEON_MLAL,k_gt_16)9968 TEST(QC8_IGEMM_MINMAX_FP32_2X8C2S4__NEON_MLAL, k_gt_16) {
9969 TEST_REQUIRES_ARM_NEON;
9970 for (size_t k = 17; k < 32; k++) {
9971 GemmMicrokernelTester()
9972 .mr(2)
9973 .nr(8)
9974 .kr(2)
9975 .sr(4)
9976 .m(2)
9977 .n(8)
9978 .k(k)
9979 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2s4__neon_mlal, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
9980 }
9981 }
9982
TEST(QC8_IGEMM_MINMAX_FP32_2X8C2S4__NEON_MLAL,k_gt_16_subtile)9983 TEST(QC8_IGEMM_MINMAX_FP32_2X8C2S4__NEON_MLAL, k_gt_16_subtile) {
9984 TEST_REQUIRES_ARM_NEON;
9985 for (size_t k = 17; k < 32; k++) {
9986 for (uint32_t n = 1; n <= 8; n++) {
9987 for (uint32_t m = 1; m <= 2; m++) {
9988 GemmMicrokernelTester()
9989 .mr(2)
9990 .nr(8)
9991 .kr(2)
9992 .sr(4)
9993 .m(m)
9994 .n(n)
9995 .k(k)
9996 .iterations(1)
9997 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2s4__neon_mlal, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
9998 }
9999 }
10000 }
10001 }
10002
TEST(QC8_IGEMM_MINMAX_FP32_2X8C2S4__NEON_MLAL,k_div_16)10003 TEST(QC8_IGEMM_MINMAX_FP32_2X8C2S4__NEON_MLAL, k_div_16) {
10004 TEST_REQUIRES_ARM_NEON;
10005 for (size_t k = 32; k <= 160; k += 16) {
10006 GemmMicrokernelTester()
10007 .mr(2)
10008 .nr(8)
10009 .kr(2)
10010 .sr(4)
10011 .m(2)
10012 .n(8)
10013 .k(k)
10014 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2s4__neon_mlal, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
10015 }
10016 }
10017
TEST(QC8_IGEMM_MINMAX_FP32_2X8C2S4__NEON_MLAL,k_div_16_subtile)10018 TEST(QC8_IGEMM_MINMAX_FP32_2X8C2S4__NEON_MLAL, k_div_16_subtile) {
10019 TEST_REQUIRES_ARM_NEON;
10020 for (size_t k = 32; k <= 160; k += 16) {
10021 for (uint32_t n = 1; n <= 8; n++) {
10022 for (uint32_t m = 1; m <= 2; m++) {
10023 GemmMicrokernelTester()
10024 .mr(2)
10025 .nr(8)
10026 .kr(2)
10027 .sr(4)
10028 .m(m)
10029 .n(n)
10030 .k(k)
10031 .iterations(1)
10032 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2s4__neon_mlal, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
10033 }
10034 }
10035 }
10036 }
10037
TEST(QC8_IGEMM_MINMAX_FP32_2X8C2S4__NEON_MLAL,n_gt_8)10038 TEST(QC8_IGEMM_MINMAX_FP32_2X8C2S4__NEON_MLAL, n_gt_8) {
10039 TEST_REQUIRES_ARM_NEON;
10040 for (uint32_t n = 9; n < 16; n++) {
10041 for (size_t k = 1; k <= 80; k += 17) {
10042 GemmMicrokernelTester()
10043 .mr(2)
10044 .nr(8)
10045 .kr(2)
10046 .sr(4)
10047 .m(2)
10048 .n(n)
10049 .k(k)
10050 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2s4__neon_mlal, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
10051 }
10052 }
10053 }
10054
TEST(QC8_IGEMM_MINMAX_FP32_2X8C2S4__NEON_MLAL,n_gt_8_strided_cn)10055 TEST(QC8_IGEMM_MINMAX_FP32_2X8C2S4__NEON_MLAL, n_gt_8_strided_cn) {
10056 TEST_REQUIRES_ARM_NEON;
10057 for (uint32_t n = 9; n < 16; n++) {
10058 for (size_t k = 1; k <= 80; k += 17) {
10059 GemmMicrokernelTester()
10060 .mr(2)
10061 .nr(8)
10062 .kr(2)
10063 .sr(4)
10064 .m(2)
10065 .n(n)
10066 .k(k)
10067 .cn_stride(11)
10068 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2s4__neon_mlal, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
10069 }
10070 }
10071 }
10072
TEST(QC8_IGEMM_MINMAX_FP32_2X8C2S4__NEON_MLAL,n_gt_8_subtile)10073 TEST(QC8_IGEMM_MINMAX_FP32_2X8C2S4__NEON_MLAL, n_gt_8_subtile) {
10074 TEST_REQUIRES_ARM_NEON;
10075 for (uint32_t n = 9; n < 16; n++) {
10076 for (size_t k = 1; k <= 80; k += 17) {
10077 for (uint32_t m = 1; m <= 2; m++) {
10078 GemmMicrokernelTester()
10079 .mr(2)
10080 .nr(8)
10081 .kr(2)
10082 .sr(4)
10083 .m(m)
10084 .n(n)
10085 .k(k)
10086 .iterations(1)
10087 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2s4__neon_mlal, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
10088 }
10089 }
10090 }
10091 }
10092
TEST(QC8_IGEMM_MINMAX_FP32_2X8C2S4__NEON_MLAL,n_div_8)10093 TEST(QC8_IGEMM_MINMAX_FP32_2X8C2S4__NEON_MLAL, n_div_8) {
10094 TEST_REQUIRES_ARM_NEON;
10095 for (uint32_t n = 16; n <= 24; n += 8) {
10096 for (size_t k = 1; k <= 80; k += 17) {
10097 GemmMicrokernelTester()
10098 .mr(2)
10099 .nr(8)
10100 .kr(2)
10101 .sr(4)
10102 .m(2)
10103 .n(n)
10104 .k(k)
10105 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2s4__neon_mlal, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
10106 }
10107 }
10108 }
10109
TEST(QC8_IGEMM_MINMAX_FP32_2X8C2S4__NEON_MLAL,n_div_8_strided_cn)10110 TEST(QC8_IGEMM_MINMAX_FP32_2X8C2S4__NEON_MLAL, n_div_8_strided_cn) {
10111 TEST_REQUIRES_ARM_NEON;
10112 for (uint32_t n = 16; n <= 24; n += 8) {
10113 for (size_t k = 1; k <= 80; k += 17) {
10114 GemmMicrokernelTester()
10115 .mr(2)
10116 .nr(8)
10117 .kr(2)
10118 .sr(4)
10119 .m(2)
10120 .n(n)
10121 .k(k)
10122 .cn_stride(11)
10123 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2s4__neon_mlal, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
10124 }
10125 }
10126 }
10127
TEST(QC8_IGEMM_MINMAX_FP32_2X8C2S4__NEON_MLAL,n_div_8_subtile)10128 TEST(QC8_IGEMM_MINMAX_FP32_2X8C2S4__NEON_MLAL, n_div_8_subtile) {
10129 TEST_REQUIRES_ARM_NEON;
10130 for (uint32_t n = 16; n <= 24; n += 8) {
10131 for (size_t k = 1; k <= 80; k += 17) {
10132 for (uint32_t m = 1; m <= 2; m++) {
10133 GemmMicrokernelTester()
10134 .mr(2)
10135 .nr(8)
10136 .kr(2)
10137 .sr(4)
10138 .m(m)
10139 .n(n)
10140 .k(k)
10141 .iterations(1)
10142 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2s4__neon_mlal, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
10143 }
10144 }
10145 }
10146 }
10147
TEST(QC8_IGEMM_MINMAX_FP32_2X8C2S4__NEON_MLAL,small_kernel)10148 TEST(QC8_IGEMM_MINMAX_FP32_2X8C2S4__NEON_MLAL, small_kernel) {
10149 TEST_REQUIRES_ARM_NEON;
10150 for (size_t k = 1; k <= 80; k += 17) {
10151 GemmMicrokernelTester()
10152 .mr(2)
10153 .nr(8)
10154 .kr(2)
10155 .sr(4)
10156 .m(2)
10157 .n(8)
10158 .k(k)
10159 .ks(3)
10160 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2s4__neon_mlal, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
10161 }
10162 }
10163
TEST(QC8_IGEMM_MINMAX_FP32_2X8C2S4__NEON_MLAL,small_kernel_subtile)10164 TEST(QC8_IGEMM_MINMAX_FP32_2X8C2S4__NEON_MLAL, small_kernel_subtile) {
10165 TEST_REQUIRES_ARM_NEON;
10166 for (size_t k = 1; k <= 80; k += 17) {
10167 for (uint32_t n = 1; n <= 8; n++) {
10168 for (uint32_t m = 1; m <= 2; m++) {
10169 GemmMicrokernelTester()
10170 .mr(2)
10171 .nr(8)
10172 .kr(2)
10173 .sr(4)
10174 .m(m)
10175 .n(n)
10176 .k(k)
10177 .ks(3)
10178 .iterations(1)
10179 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2s4__neon_mlal, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
10180 }
10181 }
10182 }
10183 }
10184
TEST(QC8_IGEMM_MINMAX_FP32_2X8C2S4__NEON_MLAL,n_gt_8_small_kernel)10185 TEST(QC8_IGEMM_MINMAX_FP32_2X8C2S4__NEON_MLAL, n_gt_8_small_kernel) {
10186 TEST_REQUIRES_ARM_NEON;
10187 for (uint32_t n = 9; n < 16; n++) {
10188 for (size_t k = 1; k <= 80; k += 17) {
10189 GemmMicrokernelTester()
10190 .mr(2)
10191 .nr(8)
10192 .kr(2)
10193 .sr(4)
10194 .m(2)
10195 .n(n)
10196 .k(k)
10197 .ks(3)
10198 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2s4__neon_mlal, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
10199 }
10200 }
10201 }
10202
TEST(QC8_IGEMM_MINMAX_FP32_2X8C2S4__NEON_MLAL,n_div_8_small_kernel)10203 TEST(QC8_IGEMM_MINMAX_FP32_2X8C2S4__NEON_MLAL, n_div_8_small_kernel) {
10204 TEST_REQUIRES_ARM_NEON;
10205 for (uint32_t n = 16; n <= 24; n += 8) {
10206 for (size_t k = 1; k <= 80; k += 17) {
10207 GemmMicrokernelTester()
10208 .mr(2)
10209 .nr(8)
10210 .kr(2)
10211 .sr(4)
10212 .m(2)
10213 .n(n)
10214 .k(k)
10215 .ks(3)
10216 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2s4__neon_mlal, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
10217 }
10218 }
10219 }
10220
TEST(QC8_IGEMM_MINMAX_FP32_2X8C2S4__NEON_MLAL,strided_cm_subtile)10221 TEST(QC8_IGEMM_MINMAX_FP32_2X8C2S4__NEON_MLAL, strided_cm_subtile) {
10222 TEST_REQUIRES_ARM_NEON;
10223 for (size_t k = 1; k <= 80; k += 17) {
10224 for (uint32_t n = 1; n <= 8; n++) {
10225 for (uint32_t m = 1; m <= 2; m++) {
10226 GemmMicrokernelTester()
10227 .mr(2)
10228 .nr(8)
10229 .kr(2)
10230 .sr(4)
10231 .m(m)
10232 .n(n)
10233 .k(k)
10234 .cm_stride(11)
10235 .iterations(1)
10236 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2s4__neon_mlal, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
10237 }
10238 }
10239 }
10240 }
10241
TEST(QC8_IGEMM_MINMAX_FP32_2X8C2S4__NEON_MLAL,a_offset)10242 TEST(QC8_IGEMM_MINMAX_FP32_2X8C2S4__NEON_MLAL, a_offset) {
10243 TEST_REQUIRES_ARM_NEON;
10244 for (size_t k = 1; k <= 80; k += 17) {
10245 GemmMicrokernelTester()
10246 .mr(2)
10247 .nr(8)
10248 .kr(2)
10249 .sr(4)
10250 .m(2)
10251 .n(8)
10252 .k(k)
10253 .ks(3)
10254 .a_offset(163)
10255 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2s4__neon_mlal, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
10256 }
10257 }
10258
TEST(QC8_IGEMM_MINMAX_FP32_2X8C2S4__NEON_MLAL,zero)10259 TEST(QC8_IGEMM_MINMAX_FP32_2X8C2S4__NEON_MLAL, zero) {
10260 TEST_REQUIRES_ARM_NEON;
10261 for (size_t k = 1; k <= 80; k += 17) {
10262 for (uint32_t mz = 0; mz < 2; mz++) {
10263 GemmMicrokernelTester()
10264 .mr(2)
10265 .nr(8)
10266 .kr(2)
10267 .sr(4)
10268 .m(2)
10269 .n(8)
10270 .k(k)
10271 .ks(3)
10272 .a_offset(163)
10273 .zero_index(mz)
10274 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2s4__neon_mlal, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
10275 }
10276 }
10277 }
10278
TEST(QC8_IGEMM_MINMAX_FP32_2X8C2S4__NEON_MLAL,qmin)10279 TEST(QC8_IGEMM_MINMAX_FP32_2X8C2S4__NEON_MLAL, qmin) {
10280 TEST_REQUIRES_ARM_NEON;
10281 GemmMicrokernelTester()
10282 .mr(2)
10283 .nr(8)
10284 .kr(2)
10285 .sr(4)
10286 .m(2)
10287 .n(8)
10288 .k(16)
10289 .qmin(128)
10290 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2s4__neon_mlal, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
10291 }
10292
TEST(QC8_IGEMM_MINMAX_FP32_2X8C2S4__NEON_MLAL,qmax)10293 TEST(QC8_IGEMM_MINMAX_FP32_2X8C2S4__NEON_MLAL, qmax) {
10294 TEST_REQUIRES_ARM_NEON;
10295 GemmMicrokernelTester()
10296 .mr(2)
10297 .nr(8)
10298 .kr(2)
10299 .sr(4)
10300 .m(2)
10301 .n(8)
10302 .k(16)
10303 .qmax(128)
10304 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2s4__neon_mlal, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
10305 }
10306
TEST(QC8_IGEMM_MINMAX_FP32_2X8C2S4__NEON_MLAL,strided_cm)10307 TEST(QC8_IGEMM_MINMAX_FP32_2X8C2S4__NEON_MLAL, strided_cm) {
10308 TEST_REQUIRES_ARM_NEON;
10309 GemmMicrokernelTester()
10310 .mr(2)
10311 .nr(8)
10312 .kr(2)
10313 .sr(4)
10314 .m(2)
10315 .n(8)
10316 .k(16)
10317 .cm_stride(11)
10318 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2s4__neon_mlal, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
10319 }
10320 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
10321
10322
10323 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QC8_IGEMM_MINMAX_FP32_2X8C2S4__NEONV8_MLAL,k_eq_16)10324 TEST(QC8_IGEMM_MINMAX_FP32_2X8C2S4__NEONV8_MLAL, k_eq_16) {
10325 TEST_REQUIRES_ARM_NEON_V8;
10326 GemmMicrokernelTester()
10327 .mr(2)
10328 .nr(8)
10329 .kr(2)
10330 .sr(4)
10331 .m(2)
10332 .n(8)
10333 .k(16)
10334 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2s4__neonv8_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
10335 }
10336
TEST(QC8_IGEMM_MINMAX_FP32_2X8C2S4__NEONV8_MLAL,strided_cn)10337 TEST(QC8_IGEMM_MINMAX_FP32_2X8C2S4__NEONV8_MLAL, strided_cn) {
10338 TEST_REQUIRES_ARM_NEON_V8;
10339 GemmMicrokernelTester()
10340 .mr(2)
10341 .nr(8)
10342 .kr(2)
10343 .sr(4)
10344 .m(2)
10345 .n(8)
10346 .k(16)
10347 .cn_stride(11)
10348 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2s4__neonv8_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
10349 }
10350
TEST(QC8_IGEMM_MINMAX_FP32_2X8C2S4__NEONV8_MLAL,k_eq_16_subtile)10351 TEST(QC8_IGEMM_MINMAX_FP32_2X8C2S4__NEONV8_MLAL, k_eq_16_subtile) {
10352 TEST_REQUIRES_ARM_NEON_V8;
10353 for (uint32_t n = 1; n <= 8; n++) {
10354 for (uint32_t m = 1; m <= 2; m++) {
10355 GemmMicrokernelTester()
10356 .mr(2)
10357 .nr(8)
10358 .kr(2)
10359 .sr(4)
10360 .m(m)
10361 .n(n)
10362 .k(16)
10363 .iterations(1)
10364 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2s4__neonv8_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
10365 }
10366 }
10367 }
10368
TEST(QC8_IGEMM_MINMAX_FP32_2X8C2S4__NEONV8_MLAL,k_eq_16_subtile_m)10369 TEST(QC8_IGEMM_MINMAX_FP32_2X8C2S4__NEONV8_MLAL, k_eq_16_subtile_m) {
10370 TEST_REQUIRES_ARM_NEON_V8;
10371 for (uint32_t m = 1; m <= 2; m++) {
10372 GemmMicrokernelTester()
10373 .mr(2)
10374 .nr(8)
10375 .kr(2)
10376 .sr(4)
10377 .m(m)
10378 .n(8)
10379 .k(16)
10380 .iterations(1)
10381 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2s4__neonv8_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
10382 }
10383 }
10384
TEST(QC8_IGEMM_MINMAX_FP32_2X8C2S4__NEONV8_MLAL,k_eq_16_subtile_n)10385 TEST(QC8_IGEMM_MINMAX_FP32_2X8C2S4__NEONV8_MLAL, k_eq_16_subtile_n) {
10386 TEST_REQUIRES_ARM_NEON_V8;
10387 for (uint32_t n = 1; n <= 8; n++) {
10388 GemmMicrokernelTester()
10389 .mr(2)
10390 .nr(8)
10391 .kr(2)
10392 .sr(4)
10393 .m(2)
10394 .n(n)
10395 .k(16)
10396 .iterations(1)
10397 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2s4__neonv8_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
10398 }
10399 }
10400
TEST(QC8_IGEMM_MINMAX_FP32_2X8C2S4__NEONV8_MLAL,k_lt_16)10401 TEST(QC8_IGEMM_MINMAX_FP32_2X8C2S4__NEONV8_MLAL, k_lt_16) {
10402 TEST_REQUIRES_ARM_NEON_V8;
10403 for (size_t k = 1; k < 16; k++) {
10404 GemmMicrokernelTester()
10405 .mr(2)
10406 .nr(8)
10407 .kr(2)
10408 .sr(4)
10409 .m(2)
10410 .n(8)
10411 .k(k)
10412 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2s4__neonv8_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
10413 }
10414 }
10415
TEST(QC8_IGEMM_MINMAX_FP32_2X8C2S4__NEONV8_MLAL,k_lt_16_subtile)10416 TEST(QC8_IGEMM_MINMAX_FP32_2X8C2S4__NEONV8_MLAL, k_lt_16_subtile) {
10417 TEST_REQUIRES_ARM_NEON_V8;
10418 for (size_t k = 1; k < 16; k++) {
10419 for (uint32_t n = 1; n <= 8; n++) {
10420 for (uint32_t m = 1; m <= 2; m++) {
10421 GemmMicrokernelTester()
10422 .mr(2)
10423 .nr(8)
10424 .kr(2)
10425 .sr(4)
10426 .m(m)
10427 .n(n)
10428 .k(k)
10429 .iterations(1)
10430 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2s4__neonv8_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
10431 }
10432 }
10433 }
10434 }
10435
TEST(QC8_IGEMM_MINMAX_FP32_2X8C2S4__NEONV8_MLAL,k_gt_16)10436 TEST(QC8_IGEMM_MINMAX_FP32_2X8C2S4__NEONV8_MLAL, k_gt_16) {
10437 TEST_REQUIRES_ARM_NEON_V8;
10438 for (size_t k = 17; k < 32; k++) {
10439 GemmMicrokernelTester()
10440 .mr(2)
10441 .nr(8)
10442 .kr(2)
10443 .sr(4)
10444 .m(2)
10445 .n(8)
10446 .k(k)
10447 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2s4__neonv8_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
10448 }
10449 }
10450
TEST(QC8_IGEMM_MINMAX_FP32_2X8C2S4__NEONV8_MLAL,k_gt_16_subtile)10451 TEST(QC8_IGEMM_MINMAX_FP32_2X8C2S4__NEONV8_MLAL, k_gt_16_subtile) {
10452 TEST_REQUIRES_ARM_NEON_V8;
10453 for (size_t k = 17; k < 32; k++) {
10454 for (uint32_t n = 1; n <= 8; n++) {
10455 for (uint32_t m = 1; m <= 2; m++) {
10456 GemmMicrokernelTester()
10457 .mr(2)
10458 .nr(8)
10459 .kr(2)
10460 .sr(4)
10461 .m(m)
10462 .n(n)
10463 .k(k)
10464 .iterations(1)
10465 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2s4__neonv8_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
10466 }
10467 }
10468 }
10469 }
10470
TEST(QC8_IGEMM_MINMAX_FP32_2X8C2S4__NEONV8_MLAL,k_div_16)10471 TEST(QC8_IGEMM_MINMAX_FP32_2X8C2S4__NEONV8_MLAL, k_div_16) {
10472 TEST_REQUIRES_ARM_NEON_V8;
10473 for (size_t k = 32; k <= 160; k += 16) {
10474 GemmMicrokernelTester()
10475 .mr(2)
10476 .nr(8)
10477 .kr(2)
10478 .sr(4)
10479 .m(2)
10480 .n(8)
10481 .k(k)
10482 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2s4__neonv8_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
10483 }
10484 }
10485
TEST(QC8_IGEMM_MINMAX_FP32_2X8C2S4__NEONV8_MLAL,k_div_16_subtile)10486 TEST(QC8_IGEMM_MINMAX_FP32_2X8C2S4__NEONV8_MLAL, k_div_16_subtile) {
10487 TEST_REQUIRES_ARM_NEON_V8;
10488 for (size_t k = 32; k <= 160; k += 16) {
10489 for (uint32_t n = 1; n <= 8; n++) {
10490 for (uint32_t m = 1; m <= 2; m++) {
10491 GemmMicrokernelTester()
10492 .mr(2)
10493 .nr(8)
10494 .kr(2)
10495 .sr(4)
10496 .m(m)
10497 .n(n)
10498 .k(k)
10499 .iterations(1)
10500 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2s4__neonv8_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
10501 }
10502 }
10503 }
10504 }
10505
TEST(QC8_IGEMM_MINMAX_FP32_2X8C2S4__NEONV8_MLAL,n_gt_8)10506 TEST(QC8_IGEMM_MINMAX_FP32_2X8C2S4__NEONV8_MLAL, n_gt_8) {
10507 TEST_REQUIRES_ARM_NEON_V8;
10508 for (uint32_t n = 9; n < 16; n++) {
10509 for (size_t k = 1; k <= 80; k += 17) {
10510 GemmMicrokernelTester()
10511 .mr(2)
10512 .nr(8)
10513 .kr(2)
10514 .sr(4)
10515 .m(2)
10516 .n(n)
10517 .k(k)
10518 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2s4__neonv8_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
10519 }
10520 }
10521 }
10522
TEST(QC8_IGEMM_MINMAX_FP32_2X8C2S4__NEONV8_MLAL,n_gt_8_strided_cn)10523 TEST(QC8_IGEMM_MINMAX_FP32_2X8C2S4__NEONV8_MLAL, n_gt_8_strided_cn) {
10524 TEST_REQUIRES_ARM_NEON_V8;
10525 for (uint32_t n = 9; n < 16; n++) {
10526 for (size_t k = 1; k <= 80; k += 17) {
10527 GemmMicrokernelTester()
10528 .mr(2)
10529 .nr(8)
10530 .kr(2)
10531 .sr(4)
10532 .m(2)
10533 .n(n)
10534 .k(k)
10535 .cn_stride(11)
10536 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2s4__neonv8_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
10537 }
10538 }
10539 }
10540
TEST(QC8_IGEMM_MINMAX_FP32_2X8C2S4__NEONV8_MLAL,n_gt_8_subtile)10541 TEST(QC8_IGEMM_MINMAX_FP32_2X8C2S4__NEONV8_MLAL, n_gt_8_subtile) {
10542 TEST_REQUIRES_ARM_NEON_V8;
10543 for (uint32_t n = 9; n < 16; n++) {
10544 for (size_t k = 1; k <= 80; k += 17) {
10545 for (uint32_t m = 1; m <= 2; m++) {
10546 GemmMicrokernelTester()
10547 .mr(2)
10548 .nr(8)
10549 .kr(2)
10550 .sr(4)
10551 .m(m)
10552 .n(n)
10553 .k(k)
10554 .iterations(1)
10555 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2s4__neonv8_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
10556 }
10557 }
10558 }
10559 }
10560
TEST(QC8_IGEMM_MINMAX_FP32_2X8C2S4__NEONV8_MLAL,n_div_8)10561 TEST(QC8_IGEMM_MINMAX_FP32_2X8C2S4__NEONV8_MLAL, n_div_8) {
10562 TEST_REQUIRES_ARM_NEON_V8;
10563 for (uint32_t n = 16; n <= 24; n += 8) {
10564 for (size_t k = 1; k <= 80; k += 17) {
10565 GemmMicrokernelTester()
10566 .mr(2)
10567 .nr(8)
10568 .kr(2)
10569 .sr(4)
10570 .m(2)
10571 .n(n)
10572 .k(k)
10573 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2s4__neonv8_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
10574 }
10575 }
10576 }
10577
TEST(QC8_IGEMM_MINMAX_FP32_2X8C2S4__NEONV8_MLAL,n_div_8_strided_cn)10578 TEST(QC8_IGEMM_MINMAX_FP32_2X8C2S4__NEONV8_MLAL, n_div_8_strided_cn) {
10579 TEST_REQUIRES_ARM_NEON_V8;
10580 for (uint32_t n = 16; n <= 24; n += 8) {
10581 for (size_t k = 1; k <= 80; k += 17) {
10582 GemmMicrokernelTester()
10583 .mr(2)
10584 .nr(8)
10585 .kr(2)
10586 .sr(4)
10587 .m(2)
10588 .n(n)
10589 .k(k)
10590 .cn_stride(11)
10591 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2s4__neonv8_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
10592 }
10593 }
10594 }
10595
TEST(QC8_IGEMM_MINMAX_FP32_2X8C2S4__NEONV8_MLAL,n_div_8_subtile)10596 TEST(QC8_IGEMM_MINMAX_FP32_2X8C2S4__NEONV8_MLAL, n_div_8_subtile) {
10597 TEST_REQUIRES_ARM_NEON_V8;
10598 for (uint32_t n = 16; n <= 24; n += 8) {
10599 for (size_t k = 1; k <= 80; k += 17) {
10600 for (uint32_t m = 1; m <= 2; m++) {
10601 GemmMicrokernelTester()
10602 .mr(2)
10603 .nr(8)
10604 .kr(2)
10605 .sr(4)
10606 .m(m)
10607 .n(n)
10608 .k(k)
10609 .iterations(1)
10610 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2s4__neonv8_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
10611 }
10612 }
10613 }
10614 }
10615
TEST(QC8_IGEMM_MINMAX_FP32_2X8C2S4__NEONV8_MLAL,small_kernel)10616 TEST(QC8_IGEMM_MINMAX_FP32_2X8C2S4__NEONV8_MLAL, small_kernel) {
10617 TEST_REQUIRES_ARM_NEON_V8;
10618 for (size_t k = 1; k <= 80; k += 17) {
10619 GemmMicrokernelTester()
10620 .mr(2)
10621 .nr(8)
10622 .kr(2)
10623 .sr(4)
10624 .m(2)
10625 .n(8)
10626 .k(k)
10627 .ks(3)
10628 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2s4__neonv8_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
10629 }
10630 }
10631
TEST(QC8_IGEMM_MINMAX_FP32_2X8C2S4__NEONV8_MLAL,small_kernel_subtile)10632 TEST(QC8_IGEMM_MINMAX_FP32_2X8C2S4__NEONV8_MLAL, small_kernel_subtile) {
10633 TEST_REQUIRES_ARM_NEON_V8;
10634 for (size_t k = 1; k <= 80; k += 17) {
10635 for (uint32_t n = 1; n <= 8; n++) {
10636 for (uint32_t m = 1; m <= 2; m++) {
10637 GemmMicrokernelTester()
10638 .mr(2)
10639 .nr(8)
10640 .kr(2)
10641 .sr(4)
10642 .m(m)
10643 .n(n)
10644 .k(k)
10645 .ks(3)
10646 .iterations(1)
10647 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2s4__neonv8_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
10648 }
10649 }
10650 }
10651 }
10652
TEST(QC8_IGEMM_MINMAX_FP32_2X8C2S4__NEONV8_MLAL,n_gt_8_small_kernel)10653 TEST(QC8_IGEMM_MINMAX_FP32_2X8C2S4__NEONV8_MLAL, n_gt_8_small_kernel) {
10654 TEST_REQUIRES_ARM_NEON_V8;
10655 for (uint32_t n = 9; n < 16; n++) {
10656 for (size_t k = 1; k <= 80; k += 17) {
10657 GemmMicrokernelTester()
10658 .mr(2)
10659 .nr(8)
10660 .kr(2)
10661 .sr(4)
10662 .m(2)
10663 .n(n)
10664 .k(k)
10665 .ks(3)
10666 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2s4__neonv8_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
10667 }
10668 }
10669 }
10670
TEST(QC8_IGEMM_MINMAX_FP32_2X8C2S4__NEONV8_MLAL,n_div_8_small_kernel)10671 TEST(QC8_IGEMM_MINMAX_FP32_2X8C2S4__NEONV8_MLAL, n_div_8_small_kernel) {
10672 TEST_REQUIRES_ARM_NEON_V8;
10673 for (uint32_t n = 16; n <= 24; n += 8) {
10674 for (size_t k = 1; k <= 80; k += 17) {
10675 GemmMicrokernelTester()
10676 .mr(2)
10677 .nr(8)
10678 .kr(2)
10679 .sr(4)
10680 .m(2)
10681 .n(n)
10682 .k(k)
10683 .ks(3)
10684 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2s4__neonv8_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
10685 }
10686 }
10687 }
10688
TEST(QC8_IGEMM_MINMAX_FP32_2X8C2S4__NEONV8_MLAL,strided_cm_subtile)10689 TEST(QC8_IGEMM_MINMAX_FP32_2X8C2S4__NEONV8_MLAL, strided_cm_subtile) {
10690 TEST_REQUIRES_ARM_NEON_V8;
10691 for (size_t k = 1; k <= 80; k += 17) {
10692 for (uint32_t n = 1; n <= 8; n++) {
10693 for (uint32_t m = 1; m <= 2; m++) {
10694 GemmMicrokernelTester()
10695 .mr(2)
10696 .nr(8)
10697 .kr(2)
10698 .sr(4)
10699 .m(m)
10700 .n(n)
10701 .k(k)
10702 .cm_stride(11)
10703 .iterations(1)
10704 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2s4__neonv8_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
10705 }
10706 }
10707 }
10708 }
10709
TEST(QC8_IGEMM_MINMAX_FP32_2X8C2S4__NEONV8_MLAL,a_offset)10710 TEST(QC8_IGEMM_MINMAX_FP32_2X8C2S4__NEONV8_MLAL, a_offset) {
10711 TEST_REQUIRES_ARM_NEON_V8;
10712 for (size_t k = 1; k <= 80; k += 17) {
10713 GemmMicrokernelTester()
10714 .mr(2)
10715 .nr(8)
10716 .kr(2)
10717 .sr(4)
10718 .m(2)
10719 .n(8)
10720 .k(k)
10721 .ks(3)
10722 .a_offset(163)
10723 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2s4__neonv8_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
10724 }
10725 }
10726
TEST(QC8_IGEMM_MINMAX_FP32_2X8C2S4__NEONV8_MLAL,zero)10727 TEST(QC8_IGEMM_MINMAX_FP32_2X8C2S4__NEONV8_MLAL, zero) {
10728 TEST_REQUIRES_ARM_NEON_V8;
10729 for (size_t k = 1; k <= 80; k += 17) {
10730 for (uint32_t mz = 0; mz < 2; mz++) {
10731 GemmMicrokernelTester()
10732 .mr(2)
10733 .nr(8)
10734 .kr(2)
10735 .sr(4)
10736 .m(2)
10737 .n(8)
10738 .k(k)
10739 .ks(3)
10740 .a_offset(163)
10741 .zero_index(mz)
10742 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2s4__neonv8_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
10743 }
10744 }
10745 }
10746
TEST(QC8_IGEMM_MINMAX_FP32_2X8C2S4__NEONV8_MLAL,qmin)10747 TEST(QC8_IGEMM_MINMAX_FP32_2X8C2S4__NEONV8_MLAL, qmin) {
10748 TEST_REQUIRES_ARM_NEON_V8;
10749 GemmMicrokernelTester()
10750 .mr(2)
10751 .nr(8)
10752 .kr(2)
10753 .sr(4)
10754 .m(2)
10755 .n(8)
10756 .k(16)
10757 .qmin(128)
10758 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2s4__neonv8_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
10759 }
10760
TEST(QC8_IGEMM_MINMAX_FP32_2X8C2S4__NEONV8_MLAL,qmax)10761 TEST(QC8_IGEMM_MINMAX_FP32_2X8C2S4__NEONV8_MLAL, qmax) {
10762 TEST_REQUIRES_ARM_NEON_V8;
10763 GemmMicrokernelTester()
10764 .mr(2)
10765 .nr(8)
10766 .kr(2)
10767 .sr(4)
10768 .m(2)
10769 .n(8)
10770 .k(16)
10771 .qmax(128)
10772 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2s4__neonv8_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
10773 }
10774
TEST(QC8_IGEMM_MINMAX_FP32_2X8C2S4__NEONV8_MLAL,strided_cm)10775 TEST(QC8_IGEMM_MINMAX_FP32_2X8C2S4__NEONV8_MLAL, strided_cm) {
10776 TEST_REQUIRES_ARM_NEON_V8;
10777 GemmMicrokernelTester()
10778 .mr(2)
10779 .nr(8)
10780 .kr(2)
10781 .sr(4)
10782 .m(2)
10783 .n(8)
10784 .k(16)
10785 .cm_stride(11)
10786 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2s4__neonv8_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
10787 }
10788 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
10789
10790
10791 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QC8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_LD2R,k_eq_16)10792 TEST(QC8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_LD2R, k_eq_16) {
10793 TEST_REQUIRES_ARM_NEON_V8;
10794 GemmMicrokernelTester()
10795 .mr(2)
10796 .nr(8)
10797 .kr(4)
10798 .sr(1)
10799 .m(2)
10800 .n(8)
10801 .k(16)
10802 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
10803 }
10804
TEST(QC8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_LD2R,strided_cn)10805 TEST(QC8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_LD2R, strided_cn) {
10806 TEST_REQUIRES_ARM_NEON_V8;
10807 GemmMicrokernelTester()
10808 .mr(2)
10809 .nr(8)
10810 .kr(4)
10811 .sr(1)
10812 .m(2)
10813 .n(8)
10814 .k(16)
10815 .cn_stride(11)
10816 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
10817 }
10818
TEST(QC8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_LD2R,k_eq_16_subtile)10819 TEST(QC8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_LD2R, k_eq_16_subtile) {
10820 TEST_REQUIRES_ARM_NEON_V8;
10821 for (uint32_t n = 1; n <= 8; n++) {
10822 for (uint32_t m = 1; m <= 2; m++) {
10823 GemmMicrokernelTester()
10824 .mr(2)
10825 .nr(8)
10826 .kr(4)
10827 .sr(1)
10828 .m(m)
10829 .n(n)
10830 .k(16)
10831 .iterations(1)
10832 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
10833 }
10834 }
10835 }
10836
TEST(QC8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_LD2R,k_eq_16_subtile_m)10837 TEST(QC8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_LD2R, k_eq_16_subtile_m) {
10838 TEST_REQUIRES_ARM_NEON_V8;
10839 for (uint32_t m = 1; m <= 2; m++) {
10840 GemmMicrokernelTester()
10841 .mr(2)
10842 .nr(8)
10843 .kr(4)
10844 .sr(1)
10845 .m(m)
10846 .n(8)
10847 .k(16)
10848 .iterations(1)
10849 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
10850 }
10851 }
10852
TEST(QC8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_LD2R,k_eq_16_subtile_n)10853 TEST(QC8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_LD2R, k_eq_16_subtile_n) {
10854 TEST_REQUIRES_ARM_NEON_V8;
10855 for (uint32_t n = 1; n <= 8; n++) {
10856 GemmMicrokernelTester()
10857 .mr(2)
10858 .nr(8)
10859 .kr(4)
10860 .sr(1)
10861 .m(2)
10862 .n(n)
10863 .k(16)
10864 .iterations(1)
10865 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
10866 }
10867 }
10868
TEST(QC8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_LD2R,k_lt_16)10869 TEST(QC8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_LD2R, k_lt_16) {
10870 TEST_REQUIRES_ARM_NEON_V8;
10871 for (size_t k = 1; k < 16; k++) {
10872 GemmMicrokernelTester()
10873 .mr(2)
10874 .nr(8)
10875 .kr(4)
10876 .sr(1)
10877 .m(2)
10878 .n(8)
10879 .k(k)
10880 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
10881 }
10882 }
10883
TEST(QC8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_LD2R,k_lt_16_subtile)10884 TEST(QC8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_LD2R, k_lt_16_subtile) {
10885 TEST_REQUIRES_ARM_NEON_V8;
10886 for (size_t k = 1; k < 16; k++) {
10887 for (uint32_t n = 1; n <= 8; n++) {
10888 for (uint32_t m = 1; m <= 2; m++) {
10889 GemmMicrokernelTester()
10890 .mr(2)
10891 .nr(8)
10892 .kr(4)
10893 .sr(1)
10894 .m(m)
10895 .n(n)
10896 .k(k)
10897 .iterations(1)
10898 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
10899 }
10900 }
10901 }
10902 }
10903
TEST(QC8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_LD2R,k_gt_16)10904 TEST(QC8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_LD2R, k_gt_16) {
10905 TEST_REQUIRES_ARM_NEON_V8;
10906 for (size_t k = 17; k < 32; k++) {
10907 GemmMicrokernelTester()
10908 .mr(2)
10909 .nr(8)
10910 .kr(4)
10911 .sr(1)
10912 .m(2)
10913 .n(8)
10914 .k(k)
10915 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
10916 }
10917 }
10918
TEST(QC8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_LD2R,k_gt_16_subtile)10919 TEST(QC8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_LD2R, k_gt_16_subtile) {
10920 TEST_REQUIRES_ARM_NEON_V8;
10921 for (size_t k = 17; k < 32; k++) {
10922 for (uint32_t n = 1; n <= 8; n++) {
10923 for (uint32_t m = 1; m <= 2; m++) {
10924 GemmMicrokernelTester()
10925 .mr(2)
10926 .nr(8)
10927 .kr(4)
10928 .sr(1)
10929 .m(m)
10930 .n(n)
10931 .k(k)
10932 .iterations(1)
10933 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
10934 }
10935 }
10936 }
10937 }
10938
TEST(QC8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_LD2R,k_div_16)10939 TEST(QC8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_LD2R, k_div_16) {
10940 TEST_REQUIRES_ARM_NEON_V8;
10941 for (size_t k = 32; k <= 160; k += 16) {
10942 GemmMicrokernelTester()
10943 .mr(2)
10944 .nr(8)
10945 .kr(4)
10946 .sr(1)
10947 .m(2)
10948 .n(8)
10949 .k(k)
10950 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
10951 }
10952 }
10953
TEST(QC8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_LD2R,k_div_16_subtile)10954 TEST(QC8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_LD2R, k_div_16_subtile) {
10955 TEST_REQUIRES_ARM_NEON_V8;
10956 for (size_t k = 32; k <= 160; k += 16) {
10957 for (uint32_t n = 1; n <= 8; n++) {
10958 for (uint32_t m = 1; m <= 2; m++) {
10959 GemmMicrokernelTester()
10960 .mr(2)
10961 .nr(8)
10962 .kr(4)
10963 .sr(1)
10964 .m(m)
10965 .n(n)
10966 .k(k)
10967 .iterations(1)
10968 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
10969 }
10970 }
10971 }
10972 }
10973
TEST(QC8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_LD2R,n_gt_8)10974 TEST(QC8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_LD2R, n_gt_8) {
10975 TEST_REQUIRES_ARM_NEON_V8;
10976 for (uint32_t n = 9; n < 16; n++) {
10977 for (size_t k = 1; k <= 80; k += 17) {
10978 GemmMicrokernelTester()
10979 .mr(2)
10980 .nr(8)
10981 .kr(4)
10982 .sr(1)
10983 .m(2)
10984 .n(n)
10985 .k(k)
10986 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
10987 }
10988 }
10989 }
10990
TEST(QC8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_LD2R,n_gt_8_strided_cn)10991 TEST(QC8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_LD2R, n_gt_8_strided_cn) {
10992 TEST_REQUIRES_ARM_NEON_V8;
10993 for (uint32_t n = 9; n < 16; n++) {
10994 for (size_t k = 1; k <= 80; k += 17) {
10995 GemmMicrokernelTester()
10996 .mr(2)
10997 .nr(8)
10998 .kr(4)
10999 .sr(1)
11000 .m(2)
11001 .n(n)
11002 .k(k)
11003 .cn_stride(11)
11004 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
11005 }
11006 }
11007 }
11008
TEST(QC8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_LD2R,n_gt_8_subtile)11009 TEST(QC8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_LD2R, n_gt_8_subtile) {
11010 TEST_REQUIRES_ARM_NEON_V8;
11011 for (uint32_t n = 9; n < 16; n++) {
11012 for (size_t k = 1; k <= 80; k += 17) {
11013 for (uint32_t m = 1; m <= 2; m++) {
11014 GemmMicrokernelTester()
11015 .mr(2)
11016 .nr(8)
11017 .kr(4)
11018 .sr(1)
11019 .m(m)
11020 .n(n)
11021 .k(k)
11022 .iterations(1)
11023 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
11024 }
11025 }
11026 }
11027 }
11028
TEST(QC8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_LD2R,n_div_8)11029 TEST(QC8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_LD2R, n_div_8) {
11030 TEST_REQUIRES_ARM_NEON_V8;
11031 for (uint32_t n = 16; n <= 24; n += 8) {
11032 for (size_t k = 1; k <= 80; k += 17) {
11033 GemmMicrokernelTester()
11034 .mr(2)
11035 .nr(8)
11036 .kr(4)
11037 .sr(1)
11038 .m(2)
11039 .n(n)
11040 .k(k)
11041 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
11042 }
11043 }
11044 }
11045
TEST(QC8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_LD2R,n_div_8_strided_cn)11046 TEST(QC8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_LD2R, n_div_8_strided_cn) {
11047 TEST_REQUIRES_ARM_NEON_V8;
11048 for (uint32_t n = 16; n <= 24; n += 8) {
11049 for (size_t k = 1; k <= 80; k += 17) {
11050 GemmMicrokernelTester()
11051 .mr(2)
11052 .nr(8)
11053 .kr(4)
11054 .sr(1)
11055 .m(2)
11056 .n(n)
11057 .k(k)
11058 .cn_stride(11)
11059 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
11060 }
11061 }
11062 }
11063
TEST(QC8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_LD2R,n_div_8_subtile)11064 TEST(QC8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_LD2R, n_div_8_subtile) {
11065 TEST_REQUIRES_ARM_NEON_V8;
11066 for (uint32_t n = 16; n <= 24; n += 8) {
11067 for (size_t k = 1; k <= 80; k += 17) {
11068 for (uint32_t m = 1; m <= 2; m++) {
11069 GemmMicrokernelTester()
11070 .mr(2)
11071 .nr(8)
11072 .kr(4)
11073 .sr(1)
11074 .m(m)
11075 .n(n)
11076 .k(k)
11077 .iterations(1)
11078 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
11079 }
11080 }
11081 }
11082 }
11083
TEST(QC8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_LD2R,small_kernel)11084 TEST(QC8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_LD2R, small_kernel) {
11085 TEST_REQUIRES_ARM_NEON_V8;
11086 for (size_t k = 1; k <= 80; k += 17) {
11087 GemmMicrokernelTester()
11088 .mr(2)
11089 .nr(8)
11090 .kr(4)
11091 .sr(1)
11092 .m(2)
11093 .n(8)
11094 .k(k)
11095 .ks(3)
11096 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
11097 }
11098 }
11099
TEST(QC8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_LD2R,small_kernel_subtile)11100 TEST(QC8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_LD2R, small_kernel_subtile) {
11101 TEST_REQUIRES_ARM_NEON_V8;
11102 for (size_t k = 1; k <= 80; k += 17) {
11103 for (uint32_t n = 1; n <= 8; n++) {
11104 for (uint32_t m = 1; m <= 2; m++) {
11105 GemmMicrokernelTester()
11106 .mr(2)
11107 .nr(8)
11108 .kr(4)
11109 .sr(1)
11110 .m(m)
11111 .n(n)
11112 .k(k)
11113 .ks(3)
11114 .iterations(1)
11115 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
11116 }
11117 }
11118 }
11119 }
11120
TEST(QC8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_LD2R,n_gt_8_small_kernel)11121 TEST(QC8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_LD2R, n_gt_8_small_kernel) {
11122 TEST_REQUIRES_ARM_NEON_V8;
11123 for (uint32_t n = 9; n < 16; n++) {
11124 for (size_t k = 1; k <= 80; k += 17) {
11125 GemmMicrokernelTester()
11126 .mr(2)
11127 .nr(8)
11128 .kr(4)
11129 .sr(1)
11130 .m(2)
11131 .n(n)
11132 .k(k)
11133 .ks(3)
11134 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
11135 }
11136 }
11137 }
11138
TEST(QC8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_LD2R,n_div_8_small_kernel)11139 TEST(QC8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_LD2R, n_div_8_small_kernel) {
11140 TEST_REQUIRES_ARM_NEON_V8;
11141 for (uint32_t n = 16; n <= 24; n += 8) {
11142 for (size_t k = 1; k <= 80; k += 17) {
11143 GemmMicrokernelTester()
11144 .mr(2)
11145 .nr(8)
11146 .kr(4)
11147 .sr(1)
11148 .m(2)
11149 .n(n)
11150 .k(k)
11151 .ks(3)
11152 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
11153 }
11154 }
11155 }
11156
TEST(QC8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_LD2R,strided_cm_subtile)11157 TEST(QC8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_LD2R, strided_cm_subtile) {
11158 TEST_REQUIRES_ARM_NEON_V8;
11159 for (size_t k = 1; k <= 80; k += 17) {
11160 for (uint32_t n = 1; n <= 8; n++) {
11161 for (uint32_t m = 1; m <= 2; m++) {
11162 GemmMicrokernelTester()
11163 .mr(2)
11164 .nr(8)
11165 .kr(4)
11166 .sr(1)
11167 .m(m)
11168 .n(n)
11169 .k(k)
11170 .cm_stride(11)
11171 .iterations(1)
11172 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
11173 }
11174 }
11175 }
11176 }
11177
TEST(QC8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_LD2R,a_offset)11178 TEST(QC8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_LD2R, a_offset) {
11179 TEST_REQUIRES_ARM_NEON_V8;
11180 for (size_t k = 1; k <= 80; k += 17) {
11181 GemmMicrokernelTester()
11182 .mr(2)
11183 .nr(8)
11184 .kr(4)
11185 .sr(1)
11186 .m(2)
11187 .n(8)
11188 .k(k)
11189 .ks(3)
11190 .a_offset(163)
11191 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
11192 }
11193 }
11194
TEST(QC8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_LD2R,zero)11195 TEST(QC8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_LD2R, zero) {
11196 TEST_REQUIRES_ARM_NEON_V8;
11197 for (size_t k = 1; k <= 80; k += 17) {
11198 for (uint32_t mz = 0; mz < 2; mz++) {
11199 GemmMicrokernelTester()
11200 .mr(2)
11201 .nr(8)
11202 .kr(4)
11203 .sr(1)
11204 .m(2)
11205 .n(8)
11206 .k(k)
11207 .ks(3)
11208 .a_offset(163)
11209 .zero_index(mz)
11210 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
11211 }
11212 }
11213 }
11214
TEST(QC8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_LD2R,qmin)11215 TEST(QC8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_LD2R, qmin) {
11216 TEST_REQUIRES_ARM_NEON_V8;
11217 GemmMicrokernelTester()
11218 .mr(2)
11219 .nr(8)
11220 .kr(4)
11221 .sr(1)
11222 .m(2)
11223 .n(8)
11224 .k(16)
11225 .qmin(128)
11226 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
11227 }
11228
TEST(QC8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_LD2R,qmax)11229 TEST(QC8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_LD2R, qmax) {
11230 TEST_REQUIRES_ARM_NEON_V8;
11231 GemmMicrokernelTester()
11232 .mr(2)
11233 .nr(8)
11234 .kr(4)
11235 .sr(1)
11236 .m(2)
11237 .n(8)
11238 .k(16)
11239 .qmax(128)
11240 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
11241 }
11242
TEST(QC8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_LD2R,strided_cm)11243 TEST(QC8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_LD2R, strided_cm) {
11244 TEST_REQUIRES_ARM_NEON_V8;
11245 GemmMicrokernelTester()
11246 .mr(2)
11247 .nr(8)
11248 .kr(4)
11249 .sr(1)
11250 .m(2)
11251 .n(8)
11252 .k(16)
11253 .cm_stride(11)
11254 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_ld2r, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
11255 }
11256 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
11257
11258
11259 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QC8_IGEMM_MINMAX_FP32_2X8C4S2__NEONV8_MLAL,k_eq_16)11260 TEST(QC8_IGEMM_MINMAX_FP32_2X8C4S2__NEONV8_MLAL, k_eq_16) {
11261 TEST_REQUIRES_ARM_NEON_V8;
11262 GemmMicrokernelTester()
11263 .mr(2)
11264 .nr(8)
11265 .kr(4)
11266 .sr(2)
11267 .m(2)
11268 .n(8)
11269 .k(16)
11270 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c4s2__neonv8_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
11271 }
11272
TEST(QC8_IGEMM_MINMAX_FP32_2X8C4S2__NEONV8_MLAL,strided_cn)11273 TEST(QC8_IGEMM_MINMAX_FP32_2X8C4S2__NEONV8_MLAL, strided_cn) {
11274 TEST_REQUIRES_ARM_NEON_V8;
11275 GemmMicrokernelTester()
11276 .mr(2)
11277 .nr(8)
11278 .kr(4)
11279 .sr(2)
11280 .m(2)
11281 .n(8)
11282 .k(16)
11283 .cn_stride(11)
11284 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c4s2__neonv8_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
11285 }
11286
TEST(QC8_IGEMM_MINMAX_FP32_2X8C4S2__NEONV8_MLAL,k_eq_16_subtile)11287 TEST(QC8_IGEMM_MINMAX_FP32_2X8C4S2__NEONV8_MLAL, k_eq_16_subtile) {
11288 TEST_REQUIRES_ARM_NEON_V8;
11289 for (uint32_t n = 1; n <= 8; n++) {
11290 for (uint32_t m = 1; m <= 2; m++) {
11291 GemmMicrokernelTester()
11292 .mr(2)
11293 .nr(8)
11294 .kr(4)
11295 .sr(2)
11296 .m(m)
11297 .n(n)
11298 .k(16)
11299 .iterations(1)
11300 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c4s2__neonv8_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
11301 }
11302 }
11303 }
11304
TEST(QC8_IGEMM_MINMAX_FP32_2X8C4S2__NEONV8_MLAL,k_eq_16_subtile_m)11305 TEST(QC8_IGEMM_MINMAX_FP32_2X8C4S2__NEONV8_MLAL, k_eq_16_subtile_m) {
11306 TEST_REQUIRES_ARM_NEON_V8;
11307 for (uint32_t m = 1; m <= 2; m++) {
11308 GemmMicrokernelTester()
11309 .mr(2)
11310 .nr(8)
11311 .kr(4)
11312 .sr(2)
11313 .m(m)
11314 .n(8)
11315 .k(16)
11316 .iterations(1)
11317 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c4s2__neonv8_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
11318 }
11319 }
11320
TEST(QC8_IGEMM_MINMAX_FP32_2X8C4S2__NEONV8_MLAL,k_eq_16_subtile_n)11321 TEST(QC8_IGEMM_MINMAX_FP32_2X8C4S2__NEONV8_MLAL, k_eq_16_subtile_n) {
11322 TEST_REQUIRES_ARM_NEON_V8;
11323 for (uint32_t n = 1; n <= 8; n++) {
11324 GemmMicrokernelTester()
11325 .mr(2)
11326 .nr(8)
11327 .kr(4)
11328 .sr(2)
11329 .m(2)
11330 .n(n)
11331 .k(16)
11332 .iterations(1)
11333 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c4s2__neonv8_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
11334 }
11335 }
11336
TEST(QC8_IGEMM_MINMAX_FP32_2X8C4S2__NEONV8_MLAL,k_lt_16)11337 TEST(QC8_IGEMM_MINMAX_FP32_2X8C4S2__NEONV8_MLAL, k_lt_16) {
11338 TEST_REQUIRES_ARM_NEON_V8;
11339 for (size_t k = 1; k < 16; k++) {
11340 GemmMicrokernelTester()
11341 .mr(2)
11342 .nr(8)
11343 .kr(4)
11344 .sr(2)
11345 .m(2)
11346 .n(8)
11347 .k(k)
11348 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c4s2__neonv8_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
11349 }
11350 }
11351
TEST(QC8_IGEMM_MINMAX_FP32_2X8C4S2__NEONV8_MLAL,k_lt_16_subtile)11352 TEST(QC8_IGEMM_MINMAX_FP32_2X8C4S2__NEONV8_MLAL, k_lt_16_subtile) {
11353 TEST_REQUIRES_ARM_NEON_V8;
11354 for (size_t k = 1; k < 16; k++) {
11355 for (uint32_t n = 1; n <= 8; n++) {
11356 for (uint32_t m = 1; m <= 2; m++) {
11357 GemmMicrokernelTester()
11358 .mr(2)
11359 .nr(8)
11360 .kr(4)
11361 .sr(2)
11362 .m(m)
11363 .n(n)
11364 .k(k)
11365 .iterations(1)
11366 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c4s2__neonv8_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
11367 }
11368 }
11369 }
11370 }
11371
TEST(QC8_IGEMM_MINMAX_FP32_2X8C4S2__NEONV8_MLAL,k_gt_16)11372 TEST(QC8_IGEMM_MINMAX_FP32_2X8C4S2__NEONV8_MLAL, k_gt_16) {
11373 TEST_REQUIRES_ARM_NEON_V8;
11374 for (size_t k = 17; k < 32; k++) {
11375 GemmMicrokernelTester()
11376 .mr(2)
11377 .nr(8)
11378 .kr(4)
11379 .sr(2)
11380 .m(2)
11381 .n(8)
11382 .k(k)
11383 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c4s2__neonv8_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
11384 }
11385 }
11386
TEST(QC8_IGEMM_MINMAX_FP32_2X8C4S2__NEONV8_MLAL,k_gt_16_subtile)11387 TEST(QC8_IGEMM_MINMAX_FP32_2X8C4S2__NEONV8_MLAL, k_gt_16_subtile) {
11388 TEST_REQUIRES_ARM_NEON_V8;
11389 for (size_t k = 17; k < 32; k++) {
11390 for (uint32_t n = 1; n <= 8; n++) {
11391 for (uint32_t m = 1; m <= 2; m++) {
11392 GemmMicrokernelTester()
11393 .mr(2)
11394 .nr(8)
11395 .kr(4)
11396 .sr(2)
11397 .m(m)
11398 .n(n)
11399 .k(k)
11400 .iterations(1)
11401 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c4s2__neonv8_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
11402 }
11403 }
11404 }
11405 }
11406
TEST(QC8_IGEMM_MINMAX_FP32_2X8C4S2__NEONV8_MLAL,k_div_16)11407 TEST(QC8_IGEMM_MINMAX_FP32_2X8C4S2__NEONV8_MLAL, k_div_16) {
11408 TEST_REQUIRES_ARM_NEON_V8;
11409 for (size_t k = 32; k <= 160; k += 16) {
11410 GemmMicrokernelTester()
11411 .mr(2)
11412 .nr(8)
11413 .kr(4)
11414 .sr(2)
11415 .m(2)
11416 .n(8)
11417 .k(k)
11418 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c4s2__neonv8_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
11419 }
11420 }
11421
TEST(QC8_IGEMM_MINMAX_FP32_2X8C4S2__NEONV8_MLAL,k_div_16_subtile)11422 TEST(QC8_IGEMM_MINMAX_FP32_2X8C4S2__NEONV8_MLAL, k_div_16_subtile) {
11423 TEST_REQUIRES_ARM_NEON_V8;
11424 for (size_t k = 32; k <= 160; k += 16) {
11425 for (uint32_t n = 1; n <= 8; n++) {
11426 for (uint32_t m = 1; m <= 2; m++) {
11427 GemmMicrokernelTester()
11428 .mr(2)
11429 .nr(8)
11430 .kr(4)
11431 .sr(2)
11432 .m(m)
11433 .n(n)
11434 .k(k)
11435 .iterations(1)
11436 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c4s2__neonv8_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
11437 }
11438 }
11439 }
11440 }
11441
TEST(QC8_IGEMM_MINMAX_FP32_2X8C4S2__NEONV8_MLAL,n_gt_8)11442 TEST(QC8_IGEMM_MINMAX_FP32_2X8C4S2__NEONV8_MLAL, n_gt_8) {
11443 TEST_REQUIRES_ARM_NEON_V8;
11444 for (uint32_t n = 9; n < 16; n++) {
11445 for (size_t k = 1; k <= 80; k += 17) {
11446 GemmMicrokernelTester()
11447 .mr(2)
11448 .nr(8)
11449 .kr(4)
11450 .sr(2)
11451 .m(2)
11452 .n(n)
11453 .k(k)
11454 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c4s2__neonv8_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
11455 }
11456 }
11457 }
11458
TEST(QC8_IGEMM_MINMAX_FP32_2X8C4S2__NEONV8_MLAL,n_gt_8_strided_cn)11459 TEST(QC8_IGEMM_MINMAX_FP32_2X8C4S2__NEONV8_MLAL, n_gt_8_strided_cn) {
11460 TEST_REQUIRES_ARM_NEON_V8;
11461 for (uint32_t n = 9; n < 16; n++) {
11462 for (size_t k = 1; k <= 80; k += 17) {
11463 GemmMicrokernelTester()
11464 .mr(2)
11465 .nr(8)
11466 .kr(4)
11467 .sr(2)
11468 .m(2)
11469 .n(n)
11470 .k(k)
11471 .cn_stride(11)
11472 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c4s2__neonv8_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
11473 }
11474 }
11475 }
11476
TEST(QC8_IGEMM_MINMAX_FP32_2X8C4S2__NEONV8_MLAL,n_gt_8_subtile)11477 TEST(QC8_IGEMM_MINMAX_FP32_2X8C4S2__NEONV8_MLAL, n_gt_8_subtile) {
11478 TEST_REQUIRES_ARM_NEON_V8;
11479 for (uint32_t n = 9; n < 16; n++) {
11480 for (size_t k = 1; k <= 80; k += 17) {
11481 for (uint32_t m = 1; m <= 2; m++) {
11482 GemmMicrokernelTester()
11483 .mr(2)
11484 .nr(8)
11485 .kr(4)
11486 .sr(2)
11487 .m(m)
11488 .n(n)
11489 .k(k)
11490 .iterations(1)
11491 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c4s2__neonv8_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
11492 }
11493 }
11494 }
11495 }
11496
TEST(QC8_IGEMM_MINMAX_FP32_2X8C4S2__NEONV8_MLAL,n_div_8)11497 TEST(QC8_IGEMM_MINMAX_FP32_2X8C4S2__NEONV8_MLAL, n_div_8) {
11498 TEST_REQUIRES_ARM_NEON_V8;
11499 for (uint32_t n = 16; n <= 24; n += 8) {
11500 for (size_t k = 1; k <= 80; k += 17) {
11501 GemmMicrokernelTester()
11502 .mr(2)
11503 .nr(8)
11504 .kr(4)
11505 .sr(2)
11506 .m(2)
11507 .n(n)
11508 .k(k)
11509 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c4s2__neonv8_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
11510 }
11511 }
11512 }
11513
TEST(QC8_IGEMM_MINMAX_FP32_2X8C4S2__NEONV8_MLAL,n_div_8_strided_cn)11514 TEST(QC8_IGEMM_MINMAX_FP32_2X8C4S2__NEONV8_MLAL, n_div_8_strided_cn) {
11515 TEST_REQUIRES_ARM_NEON_V8;
11516 for (uint32_t n = 16; n <= 24; n += 8) {
11517 for (size_t k = 1; k <= 80; k += 17) {
11518 GemmMicrokernelTester()
11519 .mr(2)
11520 .nr(8)
11521 .kr(4)
11522 .sr(2)
11523 .m(2)
11524 .n(n)
11525 .k(k)
11526 .cn_stride(11)
11527 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c4s2__neonv8_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
11528 }
11529 }
11530 }
11531
TEST(QC8_IGEMM_MINMAX_FP32_2X8C4S2__NEONV8_MLAL,n_div_8_subtile)11532 TEST(QC8_IGEMM_MINMAX_FP32_2X8C4S2__NEONV8_MLAL, n_div_8_subtile) {
11533 TEST_REQUIRES_ARM_NEON_V8;
11534 for (uint32_t n = 16; n <= 24; n += 8) {
11535 for (size_t k = 1; k <= 80; k += 17) {
11536 for (uint32_t m = 1; m <= 2; m++) {
11537 GemmMicrokernelTester()
11538 .mr(2)
11539 .nr(8)
11540 .kr(4)
11541 .sr(2)
11542 .m(m)
11543 .n(n)
11544 .k(k)
11545 .iterations(1)
11546 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c4s2__neonv8_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
11547 }
11548 }
11549 }
11550 }
11551
TEST(QC8_IGEMM_MINMAX_FP32_2X8C4S2__NEONV8_MLAL,small_kernel)11552 TEST(QC8_IGEMM_MINMAX_FP32_2X8C4S2__NEONV8_MLAL, small_kernel) {
11553 TEST_REQUIRES_ARM_NEON_V8;
11554 for (size_t k = 1; k <= 80; k += 17) {
11555 GemmMicrokernelTester()
11556 .mr(2)
11557 .nr(8)
11558 .kr(4)
11559 .sr(2)
11560 .m(2)
11561 .n(8)
11562 .k(k)
11563 .ks(3)
11564 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c4s2__neonv8_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
11565 }
11566 }
11567
TEST(QC8_IGEMM_MINMAX_FP32_2X8C4S2__NEONV8_MLAL,small_kernel_subtile)11568 TEST(QC8_IGEMM_MINMAX_FP32_2X8C4S2__NEONV8_MLAL, small_kernel_subtile) {
11569 TEST_REQUIRES_ARM_NEON_V8;
11570 for (size_t k = 1; k <= 80; k += 17) {
11571 for (uint32_t n = 1; n <= 8; n++) {
11572 for (uint32_t m = 1; m <= 2; m++) {
11573 GemmMicrokernelTester()
11574 .mr(2)
11575 .nr(8)
11576 .kr(4)
11577 .sr(2)
11578 .m(m)
11579 .n(n)
11580 .k(k)
11581 .ks(3)
11582 .iterations(1)
11583 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c4s2__neonv8_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
11584 }
11585 }
11586 }
11587 }
11588
TEST(QC8_IGEMM_MINMAX_FP32_2X8C4S2__NEONV8_MLAL,n_gt_8_small_kernel)11589 TEST(QC8_IGEMM_MINMAX_FP32_2X8C4S2__NEONV8_MLAL, n_gt_8_small_kernel) {
11590 TEST_REQUIRES_ARM_NEON_V8;
11591 for (uint32_t n = 9; n < 16; n++) {
11592 for (size_t k = 1; k <= 80; k += 17) {
11593 GemmMicrokernelTester()
11594 .mr(2)
11595 .nr(8)
11596 .kr(4)
11597 .sr(2)
11598 .m(2)
11599 .n(n)
11600 .k(k)
11601 .ks(3)
11602 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c4s2__neonv8_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
11603 }
11604 }
11605 }
11606
TEST(QC8_IGEMM_MINMAX_FP32_2X8C4S2__NEONV8_MLAL,n_div_8_small_kernel)11607 TEST(QC8_IGEMM_MINMAX_FP32_2X8C4S2__NEONV8_MLAL, n_div_8_small_kernel) {
11608 TEST_REQUIRES_ARM_NEON_V8;
11609 for (uint32_t n = 16; n <= 24; n += 8) {
11610 for (size_t k = 1; k <= 80; k += 17) {
11611 GemmMicrokernelTester()
11612 .mr(2)
11613 .nr(8)
11614 .kr(4)
11615 .sr(2)
11616 .m(2)
11617 .n(n)
11618 .k(k)
11619 .ks(3)
11620 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c4s2__neonv8_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
11621 }
11622 }
11623 }
11624
TEST(QC8_IGEMM_MINMAX_FP32_2X8C4S2__NEONV8_MLAL,strided_cm_subtile)11625 TEST(QC8_IGEMM_MINMAX_FP32_2X8C4S2__NEONV8_MLAL, strided_cm_subtile) {
11626 TEST_REQUIRES_ARM_NEON_V8;
11627 for (size_t k = 1; k <= 80; k += 17) {
11628 for (uint32_t n = 1; n <= 8; n++) {
11629 for (uint32_t m = 1; m <= 2; m++) {
11630 GemmMicrokernelTester()
11631 .mr(2)
11632 .nr(8)
11633 .kr(4)
11634 .sr(2)
11635 .m(m)
11636 .n(n)
11637 .k(k)
11638 .cm_stride(11)
11639 .iterations(1)
11640 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c4s2__neonv8_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
11641 }
11642 }
11643 }
11644 }
11645
TEST(QC8_IGEMM_MINMAX_FP32_2X8C4S2__NEONV8_MLAL,a_offset)11646 TEST(QC8_IGEMM_MINMAX_FP32_2X8C4S2__NEONV8_MLAL, a_offset) {
11647 TEST_REQUIRES_ARM_NEON_V8;
11648 for (size_t k = 1; k <= 80; k += 17) {
11649 GemmMicrokernelTester()
11650 .mr(2)
11651 .nr(8)
11652 .kr(4)
11653 .sr(2)
11654 .m(2)
11655 .n(8)
11656 .k(k)
11657 .ks(3)
11658 .a_offset(163)
11659 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c4s2__neonv8_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
11660 }
11661 }
11662
TEST(QC8_IGEMM_MINMAX_FP32_2X8C4S2__NEONV8_MLAL,zero)11663 TEST(QC8_IGEMM_MINMAX_FP32_2X8C4S2__NEONV8_MLAL, zero) {
11664 TEST_REQUIRES_ARM_NEON_V8;
11665 for (size_t k = 1; k <= 80; k += 17) {
11666 for (uint32_t mz = 0; mz < 2; mz++) {
11667 GemmMicrokernelTester()
11668 .mr(2)
11669 .nr(8)
11670 .kr(4)
11671 .sr(2)
11672 .m(2)
11673 .n(8)
11674 .k(k)
11675 .ks(3)
11676 .a_offset(163)
11677 .zero_index(mz)
11678 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c4s2__neonv8_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
11679 }
11680 }
11681 }
11682
TEST(QC8_IGEMM_MINMAX_FP32_2X8C4S2__NEONV8_MLAL,qmin)11683 TEST(QC8_IGEMM_MINMAX_FP32_2X8C4S2__NEONV8_MLAL, qmin) {
11684 TEST_REQUIRES_ARM_NEON_V8;
11685 GemmMicrokernelTester()
11686 .mr(2)
11687 .nr(8)
11688 .kr(4)
11689 .sr(2)
11690 .m(2)
11691 .n(8)
11692 .k(16)
11693 .qmin(128)
11694 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c4s2__neonv8_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
11695 }
11696
TEST(QC8_IGEMM_MINMAX_FP32_2X8C4S2__NEONV8_MLAL,qmax)11697 TEST(QC8_IGEMM_MINMAX_FP32_2X8C4S2__NEONV8_MLAL, qmax) {
11698 TEST_REQUIRES_ARM_NEON_V8;
11699 GemmMicrokernelTester()
11700 .mr(2)
11701 .nr(8)
11702 .kr(4)
11703 .sr(2)
11704 .m(2)
11705 .n(8)
11706 .k(16)
11707 .qmax(128)
11708 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c4s2__neonv8_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
11709 }
11710
TEST(QC8_IGEMM_MINMAX_FP32_2X8C4S2__NEONV8_MLAL,strided_cm)11711 TEST(QC8_IGEMM_MINMAX_FP32_2X8C4S2__NEONV8_MLAL, strided_cm) {
11712 TEST_REQUIRES_ARM_NEON_V8;
11713 GemmMicrokernelTester()
11714 .mr(2)
11715 .nr(8)
11716 .kr(4)
11717 .sr(2)
11718 .m(2)
11719 .n(8)
11720 .k(16)
11721 .cm_stride(11)
11722 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c4s2__neonv8_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
11723 }
11724 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
11725
11726
11727 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__NEONV8_MLAL,k_eq_16)11728 TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__NEONV8_MLAL, k_eq_16) {
11729 TEST_REQUIRES_ARM_NEON_V8;
11730 GemmMicrokernelTester()
11731 .mr(2)
11732 .nr(8)
11733 .kr(8)
11734 .sr(1)
11735 .m(2)
11736 .n(8)
11737 .k(16)
11738 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c8__neonv8_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
11739 }
11740
TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__NEONV8_MLAL,strided_cn)11741 TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__NEONV8_MLAL, strided_cn) {
11742 TEST_REQUIRES_ARM_NEON_V8;
11743 GemmMicrokernelTester()
11744 .mr(2)
11745 .nr(8)
11746 .kr(8)
11747 .sr(1)
11748 .m(2)
11749 .n(8)
11750 .k(16)
11751 .cn_stride(11)
11752 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c8__neonv8_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
11753 }
11754
TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__NEONV8_MLAL,k_eq_16_subtile)11755 TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__NEONV8_MLAL, k_eq_16_subtile) {
11756 TEST_REQUIRES_ARM_NEON_V8;
11757 for (uint32_t n = 1; n <= 8; n++) {
11758 for (uint32_t m = 1; m <= 2; m++) {
11759 GemmMicrokernelTester()
11760 .mr(2)
11761 .nr(8)
11762 .kr(8)
11763 .sr(1)
11764 .m(m)
11765 .n(n)
11766 .k(16)
11767 .iterations(1)
11768 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c8__neonv8_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
11769 }
11770 }
11771 }
11772
TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__NEONV8_MLAL,k_eq_16_subtile_m)11773 TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__NEONV8_MLAL, k_eq_16_subtile_m) {
11774 TEST_REQUIRES_ARM_NEON_V8;
11775 for (uint32_t m = 1; m <= 2; m++) {
11776 GemmMicrokernelTester()
11777 .mr(2)
11778 .nr(8)
11779 .kr(8)
11780 .sr(1)
11781 .m(m)
11782 .n(8)
11783 .k(16)
11784 .iterations(1)
11785 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c8__neonv8_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
11786 }
11787 }
11788
TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__NEONV8_MLAL,k_eq_16_subtile_n)11789 TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__NEONV8_MLAL, k_eq_16_subtile_n) {
11790 TEST_REQUIRES_ARM_NEON_V8;
11791 for (uint32_t n = 1; n <= 8; n++) {
11792 GemmMicrokernelTester()
11793 .mr(2)
11794 .nr(8)
11795 .kr(8)
11796 .sr(1)
11797 .m(2)
11798 .n(n)
11799 .k(16)
11800 .iterations(1)
11801 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c8__neonv8_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
11802 }
11803 }
11804
TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__NEONV8_MLAL,k_lt_16)11805 TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__NEONV8_MLAL, k_lt_16) {
11806 TEST_REQUIRES_ARM_NEON_V8;
11807 for (size_t k = 1; k < 16; k++) {
11808 GemmMicrokernelTester()
11809 .mr(2)
11810 .nr(8)
11811 .kr(8)
11812 .sr(1)
11813 .m(2)
11814 .n(8)
11815 .k(k)
11816 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c8__neonv8_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
11817 }
11818 }
11819
TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__NEONV8_MLAL,k_lt_16_subtile)11820 TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__NEONV8_MLAL, k_lt_16_subtile) {
11821 TEST_REQUIRES_ARM_NEON_V8;
11822 for (size_t k = 1; k < 16; k++) {
11823 for (uint32_t n = 1; n <= 8; n++) {
11824 for (uint32_t m = 1; m <= 2; m++) {
11825 GemmMicrokernelTester()
11826 .mr(2)
11827 .nr(8)
11828 .kr(8)
11829 .sr(1)
11830 .m(m)
11831 .n(n)
11832 .k(k)
11833 .iterations(1)
11834 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c8__neonv8_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
11835 }
11836 }
11837 }
11838 }
11839
TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__NEONV8_MLAL,k_gt_16)11840 TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__NEONV8_MLAL, k_gt_16) {
11841 TEST_REQUIRES_ARM_NEON_V8;
11842 for (size_t k = 17; k < 32; k++) {
11843 GemmMicrokernelTester()
11844 .mr(2)
11845 .nr(8)
11846 .kr(8)
11847 .sr(1)
11848 .m(2)
11849 .n(8)
11850 .k(k)
11851 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c8__neonv8_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
11852 }
11853 }
11854
TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__NEONV8_MLAL,k_gt_16_subtile)11855 TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__NEONV8_MLAL, k_gt_16_subtile) {
11856 TEST_REQUIRES_ARM_NEON_V8;
11857 for (size_t k = 17; k < 32; k++) {
11858 for (uint32_t n = 1; n <= 8; n++) {
11859 for (uint32_t m = 1; m <= 2; m++) {
11860 GemmMicrokernelTester()
11861 .mr(2)
11862 .nr(8)
11863 .kr(8)
11864 .sr(1)
11865 .m(m)
11866 .n(n)
11867 .k(k)
11868 .iterations(1)
11869 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c8__neonv8_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
11870 }
11871 }
11872 }
11873 }
11874
TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__NEONV8_MLAL,k_div_16)11875 TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__NEONV8_MLAL, k_div_16) {
11876 TEST_REQUIRES_ARM_NEON_V8;
11877 for (size_t k = 32; k <= 160; k += 16) {
11878 GemmMicrokernelTester()
11879 .mr(2)
11880 .nr(8)
11881 .kr(8)
11882 .sr(1)
11883 .m(2)
11884 .n(8)
11885 .k(k)
11886 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c8__neonv8_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
11887 }
11888 }
11889
TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__NEONV8_MLAL,k_div_16_subtile)11890 TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__NEONV8_MLAL, k_div_16_subtile) {
11891 TEST_REQUIRES_ARM_NEON_V8;
11892 for (size_t k = 32; k <= 160; k += 16) {
11893 for (uint32_t n = 1; n <= 8; n++) {
11894 for (uint32_t m = 1; m <= 2; m++) {
11895 GemmMicrokernelTester()
11896 .mr(2)
11897 .nr(8)
11898 .kr(8)
11899 .sr(1)
11900 .m(m)
11901 .n(n)
11902 .k(k)
11903 .iterations(1)
11904 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c8__neonv8_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
11905 }
11906 }
11907 }
11908 }
11909
TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__NEONV8_MLAL,n_gt_8)11910 TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__NEONV8_MLAL, n_gt_8) {
11911 TEST_REQUIRES_ARM_NEON_V8;
11912 for (uint32_t n = 9; n < 16; n++) {
11913 for (size_t k = 1; k <= 80; k += 17) {
11914 GemmMicrokernelTester()
11915 .mr(2)
11916 .nr(8)
11917 .kr(8)
11918 .sr(1)
11919 .m(2)
11920 .n(n)
11921 .k(k)
11922 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c8__neonv8_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
11923 }
11924 }
11925 }
11926
TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__NEONV8_MLAL,n_gt_8_strided_cn)11927 TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__NEONV8_MLAL, n_gt_8_strided_cn) {
11928 TEST_REQUIRES_ARM_NEON_V8;
11929 for (uint32_t n = 9; n < 16; n++) {
11930 for (size_t k = 1; k <= 80; k += 17) {
11931 GemmMicrokernelTester()
11932 .mr(2)
11933 .nr(8)
11934 .kr(8)
11935 .sr(1)
11936 .m(2)
11937 .n(n)
11938 .k(k)
11939 .cn_stride(11)
11940 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c8__neonv8_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
11941 }
11942 }
11943 }
11944
TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__NEONV8_MLAL,n_gt_8_subtile)11945 TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__NEONV8_MLAL, n_gt_8_subtile) {
11946 TEST_REQUIRES_ARM_NEON_V8;
11947 for (uint32_t n = 9; n < 16; n++) {
11948 for (size_t k = 1; k <= 80; k += 17) {
11949 for (uint32_t m = 1; m <= 2; m++) {
11950 GemmMicrokernelTester()
11951 .mr(2)
11952 .nr(8)
11953 .kr(8)
11954 .sr(1)
11955 .m(m)
11956 .n(n)
11957 .k(k)
11958 .iterations(1)
11959 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c8__neonv8_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
11960 }
11961 }
11962 }
11963 }
11964
TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__NEONV8_MLAL,n_div_8)11965 TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__NEONV8_MLAL, n_div_8) {
11966 TEST_REQUIRES_ARM_NEON_V8;
11967 for (uint32_t n = 16; n <= 24; n += 8) {
11968 for (size_t k = 1; k <= 80; k += 17) {
11969 GemmMicrokernelTester()
11970 .mr(2)
11971 .nr(8)
11972 .kr(8)
11973 .sr(1)
11974 .m(2)
11975 .n(n)
11976 .k(k)
11977 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c8__neonv8_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
11978 }
11979 }
11980 }
11981
TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__NEONV8_MLAL,n_div_8_strided_cn)11982 TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__NEONV8_MLAL, n_div_8_strided_cn) {
11983 TEST_REQUIRES_ARM_NEON_V8;
11984 for (uint32_t n = 16; n <= 24; n += 8) {
11985 for (size_t k = 1; k <= 80; k += 17) {
11986 GemmMicrokernelTester()
11987 .mr(2)
11988 .nr(8)
11989 .kr(8)
11990 .sr(1)
11991 .m(2)
11992 .n(n)
11993 .k(k)
11994 .cn_stride(11)
11995 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c8__neonv8_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
11996 }
11997 }
11998 }
11999
TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__NEONV8_MLAL,n_div_8_subtile)12000 TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__NEONV8_MLAL, n_div_8_subtile) {
12001 TEST_REQUIRES_ARM_NEON_V8;
12002 for (uint32_t n = 16; n <= 24; n += 8) {
12003 for (size_t k = 1; k <= 80; k += 17) {
12004 for (uint32_t m = 1; m <= 2; m++) {
12005 GemmMicrokernelTester()
12006 .mr(2)
12007 .nr(8)
12008 .kr(8)
12009 .sr(1)
12010 .m(m)
12011 .n(n)
12012 .k(k)
12013 .iterations(1)
12014 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c8__neonv8_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
12015 }
12016 }
12017 }
12018 }
12019
TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__NEONV8_MLAL,small_kernel)12020 TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__NEONV8_MLAL, small_kernel) {
12021 TEST_REQUIRES_ARM_NEON_V8;
12022 for (size_t k = 1; k <= 80; k += 17) {
12023 GemmMicrokernelTester()
12024 .mr(2)
12025 .nr(8)
12026 .kr(8)
12027 .sr(1)
12028 .m(2)
12029 .n(8)
12030 .k(k)
12031 .ks(3)
12032 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c8__neonv8_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
12033 }
12034 }
12035
TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__NEONV8_MLAL,small_kernel_subtile)12036 TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__NEONV8_MLAL, small_kernel_subtile) {
12037 TEST_REQUIRES_ARM_NEON_V8;
12038 for (size_t k = 1; k <= 80; k += 17) {
12039 for (uint32_t n = 1; n <= 8; n++) {
12040 for (uint32_t m = 1; m <= 2; m++) {
12041 GemmMicrokernelTester()
12042 .mr(2)
12043 .nr(8)
12044 .kr(8)
12045 .sr(1)
12046 .m(m)
12047 .n(n)
12048 .k(k)
12049 .ks(3)
12050 .iterations(1)
12051 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c8__neonv8_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
12052 }
12053 }
12054 }
12055 }
12056
TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__NEONV8_MLAL,n_gt_8_small_kernel)12057 TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__NEONV8_MLAL, n_gt_8_small_kernel) {
12058 TEST_REQUIRES_ARM_NEON_V8;
12059 for (uint32_t n = 9; n < 16; n++) {
12060 for (size_t k = 1; k <= 80; k += 17) {
12061 GemmMicrokernelTester()
12062 .mr(2)
12063 .nr(8)
12064 .kr(8)
12065 .sr(1)
12066 .m(2)
12067 .n(n)
12068 .k(k)
12069 .ks(3)
12070 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c8__neonv8_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
12071 }
12072 }
12073 }
12074
TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__NEONV8_MLAL,n_div_8_small_kernel)12075 TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__NEONV8_MLAL, n_div_8_small_kernel) {
12076 TEST_REQUIRES_ARM_NEON_V8;
12077 for (uint32_t n = 16; n <= 24; n += 8) {
12078 for (size_t k = 1; k <= 80; k += 17) {
12079 GemmMicrokernelTester()
12080 .mr(2)
12081 .nr(8)
12082 .kr(8)
12083 .sr(1)
12084 .m(2)
12085 .n(n)
12086 .k(k)
12087 .ks(3)
12088 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c8__neonv8_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
12089 }
12090 }
12091 }
12092
TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__NEONV8_MLAL,strided_cm_subtile)12093 TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__NEONV8_MLAL, strided_cm_subtile) {
12094 TEST_REQUIRES_ARM_NEON_V8;
12095 for (size_t k = 1; k <= 80; k += 17) {
12096 for (uint32_t n = 1; n <= 8; n++) {
12097 for (uint32_t m = 1; m <= 2; m++) {
12098 GemmMicrokernelTester()
12099 .mr(2)
12100 .nr(8)
12101 .kr(8)
12102 .sr(1)
12103 .m(m)
12104 .n(n)
12105 .k(k)
12106 .cm_stride(11)
12107 .iterations(1)
12108 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c8__neonv8_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
12109 }
12110 }
12111 }
12112 }
12113
TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__NEONV8_MLAL,a_offset)12114 TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__NEONV8_MLAL, a_offset) {
12115 TEST_REQUIRES_ARM_NEON_V8;
12116 for (size_t k = 1; k <= 80; k += 17) {
12117 GemmMicrokernelTester()
12118 .mr(2)
12119 .nr(8)
12120 .kr(8)
12121 .sr(1)
12122 .m(2)
12123 .n(8)
12124 .k(k)
12125 .ks(3)
12126 .a_offset(163)
12127 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c8__neonv8_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
12128 }
12129 }
12130
TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__NEONV8_MLAL,zero)12131 TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__NEONV8_MLAL, zero) {
12132 TEST_REQUIRES_ARM_NEON_V8;
12133 for (size_t k = 1; k <= 80; k += 17) {
12134 for (uint32_t mz = 0; mz < 2; mz++) {
12135 GemmMicrokernelTester()
12136 .mr(2)
12137 .nr(8)
12138 .kr(8)
12139 .sr(1)
12140 .m(2)
12141 .n(8)
12142 .k(k)
12143 .ks(3)
12144 .a_offset(163)
12145 .zero_index(mz)
12146 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c8__neonv8_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
12147 }
12148 }
12149 }
12150
TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__NEONV8_MLAL,qmin)12151 TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__NEONV8_MLAL, qmin) {
12152 TEST_REQUIRES_ARM_NEON_V8;
12153 GemmMicrokernelTester()
12154 .mr(2)
12155 .nr(8)
12156 .kr(8)
12157 .sr(1)
12158 .m(2)
12159 .n(8)
12160 .k(16)
12161 .qmin(128)
12162 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c8__neonv8_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
12163 }
12164
TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__NEONV8_MLAL,qmax)12165 TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__NEONV8_MLAL, qmax) {
12166 TEST_REQUIRES_ARM_NEON_V8;
12167 GemmMicrokernelTester()
12168 .mr(2)
12169 .nr(8)
12170 .kr(8)
12171 .sr(1)
12172 .m(2)
12173 .n(8)
12174 .k(16)
12175 .qmax(128)
12176 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c8__neonv8_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
12177 }
12178
TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__NEONV8_MLAL,strided_cm)12179 TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__NEONV8_MLAL, strided_cm) {
12180 TEST_REQUIRES_ARM_NEON_V8;
12181 GemmMicrokernelTester()
12182 .mr(2)
12183 .nr(8)
12184 .kr(8)
12185 .sr(1)
12186 .m(2)
12187 .n(8)
12188 .k(16)
12189 .cm_stride(11)
12190 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c8__neonv8_mlal, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
12191 }
12192 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
12193
12194
12195 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QC8_IGEMM_MINMAX_FP32_2X16__NEON_MLAL_LANE_PRFM,k_eq_8)12196 TEST(QC8_IGEMM_MINMAX_FP32_2X16__NEON_MLAL_LANE_PRFM, k_eq_8) {
12197 TEST_REQUIRES_ARM_NEON;
12198 GemmMicrokernelTester()
12199 .mr(2)
12200 .nr(16)
12201 .kr(1)
12202 .sr(1)
12203 .m(2)
12204 .n(16)
12205 .k(8)
12206 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x16__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
12207 }
12208
TEST(QC8_IGEMM_MINMAX_FP32_2X16__NEON_MLAL_LANE_PRFM,strided_cn)12209 TEST(QC8_IGEMM_MINMAX_FP32_2X16__NEON_MLAL_LANE_PRFM, strided_cn) {
12210 TEST_REQUIRES_ARM_NEON;
12211 GemmMicrokernelTester()
12212 .mr(2)
12213 .nr(16)
12214 .kr(1)
12215 .sr(1)
12216 .m(2)
12217 .n(16)
12218 .k(8)
12219 .cn_stride(19)
12220 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x16__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
12221 }
12222
TEST(QC8_IGEMM_MINMAX_FP32_2X16__NEON_MLAL_LANE_PRFM,k_eq_8_subtile)12223 TEST(QC8_IGEMM_MINMAX_FP32_2X16__NEON_MLAL_LANE_PRFM, k_eq_8_subtile) {
12224 TEST_REQUIRES_ARM_NEON;
12225 for (uint32_t n = 1; n <= 16; n++) {
12226 for (uint32_t m = 1; m <= 2; m++) {
12227 GemmMicrokernelTester()
12228 .mr(2)
12229 .nr(16)
12230 .kr(1)
12231 .sr(1)
12232 .m(m)
12233 .n(n)
12234 .k(8)
12235 .iterations(1)
12236 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x16__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
12237 }
12238 }
12239 }
12240
TEST(QC8_IGEMM_MINMAX_FP32_2X16__NEON_MLAL_LANE_PRFM,k_eq_8_subtile_m)12241 TEST(QC8_IGEMM_MINMAX_FP32_2X16__NEON_MLAL_LANE_PRFM, k_eq_8_subtile_m) {
12242 TEST_REQUIRES_ARM_NEON;
12243 for (uint32_t m = 1; m <= 2; m++) {
12244 GemmMicrokernelTester()
12245 .mr(2)
12246 .nr(16)
12247 .kr(1)
12248 .sr(1)
12249 .m(m)
12250 .n(16)
12251 .k(8)
12252 .iterations(1)
12253 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x16__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
12254 }
12255 }
12256
TEST(QC8_IGEMM_MINMAX_FP32_2X16__NEON_MLAL_LANE_PRFM,k_eq_8_subtile_n)12257 TEST(QC8_IGEMM_MINMAX_FP32_2X16__NEON_MLAL_LANE_PRFM, k_eq_8_subtile_n) {
12258 TEST_REQUIRES_ARM_NEON;
12259 for (uint32_t n = 1; n <= 16; n++) {
12260 GemmMicrokernelTester()
12261 .mr(2)
12262 .nr(16)
12263 .kr(1)
12264 .sr(1)
12265 .m(2)
12266 .n(n)
12267 .k(8)
12268 .iterations(1)
12269 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x16__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
12270 }
12271 }
12272
TEST(QC8_IGEMM_MINMAX_FP32_2X16__NEON_MLAL_LANE_PRFM,k_lt_8)12273 TEST(QC8_IGEMM_MINMAX_FP32_2X16__NEON_MLAL_LANE_PRFM, k_lt_8) {
12274 TEST_REQUIRES_ARM_NEON;
12275 for (size_t k = 1; k < 8; k++) {
12276 GemmMicrokernelTester()
12277 .mr(2)
12278 .nr(16)
12279 .kr(1)
12280 .sr(1)
12281 .m(2)
12282 .n(16)
12283 .k(k)
12284 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x16__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
12285 }
12286 }
12287
TEST(QC8_IGEMM_MINMAX_FP32_2X16__NEON_MLAL_LANE_PRFM,k_lt_8_subtile)12288 TEST(QC8_IGEMM_MINMAX_FP32_2X16__NEON_MLAL_LANE_PRFM, k_lt_8_subtile) {
12289 TEST_REQUIRES_ARM_NEON;
12290 for (size_t k = 1; k < 8; k++) {
12291 for (uint32_t n = 1; n <= 16; n++) {
12292 for (uint32_t m = 1; m <= 2; m++) {
12293 GemmMicrokernelTester()
12294 .mr(2)
12295 .nr(16)
12296 .kr(1)
12297 .sr(1)
12298 .m(m)
12299 .n(n)
12300 .k(k)
12301 .iterations(1)
12302 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x16__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
12303 }
12304 }
12305 }
12306 }
12307
TEST(QC8_IGEMM_MINMAX_FP32_2X16__NEON_MLAL_LANE_PRFM,k_gt_8)12308 TEST(QC8_IGEMM_MINMAX_FP32_2X16__NEON_MLAL_LANE_PRFM, k_gt_8) {
12309 TEST_REQUIRES_ARM_NEON;
12310 for (size_t k = 9; k < 16; k++) {
12311 GemmMicrokernelTester()
12312 .mr(2)
12313 .nr(16)
12314 .kr(1)
12315 .sr(1)
12316 .m(2)
12317 .n(16)
12318 .k(k)
12319 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x16__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
12320 }
12321 }
12322
TEST(QC8_IGEMM_MINMAX_FP32_2X16__NEON_MLAL_LANE_PRFM,k_gt_8_subtile)12323 TEST(QC8_IGEMM_MINMAX_FP32_2X16__NEON_MLAL_LANE_PRFM, k_gt_8_subtile) {
12324 TEST_REQUIRES_ARM_NEON;
12325 for (size_t k = 9; k < 16; k++) {
12326 for (uint32_t n = 1; n <= 16; n++) {
12327 for (uint32_t m = 1; m <= 2; m++) {
12328 GemmMicrokernelTester()
12329 .mr(2)
12330 .nr(16)
12331 .kr(1)
12332 .sr(1)
12333 .m(m)
12334 .n(n)
12335 .k(k)
12336 .iterations(1)
12337 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x16__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
12338 }
12339 }
12340 }
12341 }
12342
TEST(QC8_IGEMM_MINMAX_FP32_2X16__NEON_MLAL_LANE_PRFM,k_div_8)12343 TEST(QC8_IGEMM_MINMAX_FP32_2X16__NEON_MLAL_LANE_PRFM, k_div_8) {
12344 TEST_REQUIRES_ARM_NEON;
12345 for (size_t k = 16; k <= 80; k += 8) {
12346 GemmMicrokernelTester()
12347 .mr(2)
12348 .nr(16)
12349 .kr(1)
12350 .sr(1)
12351 .m(2)
12352 .n(16)
12353 .k(k)
12354 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x16__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
12355 }
12356 }
12357
TEST(QC8_IGEMM_MINMAX_FP32_2X16__NEON_MLAL_LANE_PRFM,k_div_8_subtile)12358 TEST(QC8_IGEMM_MINMAX_FP32_2X16__NEON_MLAL_LANE_PRFM, k_div_8_subtile) {
12359 TEST_REQUIRES_ARM_NEON;
12360 for (size_t k = 16; k <= 80; k += 8) {
12361 for (uint32_t n = 1; n <= 16; n++) {
12362 for (uint32_t m = 1; m <= 2; m++) {
12363 GemmMicrokernelTester()
12364 .mr(2)
12365 .nr(16)
12366 .kr(1)
12367 .sr(1)
12368 .m(m)
12369 .n(n)
12370 .k(k)
12371 .iterations(1)
12372 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x16__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
12373 }
12374 }
12375 }
12376 }
12377
TEST(QC8_IGEMM_MINMAX_FP32_2X16__NEON_MLAL_LANE_PRFM,n_gt_16)12378 TEST(QC8_IGEMM_MINMAX_FP32_2X16__NEON_MLAL_LANE_PRFM, n_gt_16) {
12379 TEST_REQUIRES_ARM_NEON;
12380 for (uint32_t n = 17; n < 32; n++) {
12381 for (size_t k = 1; k <= 40; k += 9) {
12382 GemmMicrokernelTester()
12383 .mr(2)
12384 .nr(16)
12385 .kr(1)
12386 .sr(1)
12387 .m(2)
12388 .n(n)
12389 .k(k)
12390 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x16__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
12391 }
12392 }
12393 }
12394
TEST(QC8_IGEMM_MINMAX_FP32_2X16__NEON_MLAL_LANE_PRFM,n_gt_16_strided_cn)12395 TEST(QC8_IGEMM_MINMAX_FP32_2X16__NEON_MLAL_LANE_PRFM, n_gt_16_strided_cn) {
12396 TEST_REQUIRES_ARM_NEON;
12397 for (uint32_t n = 17; n < 32; n++) {
12398 for (size_t k = 1; k <= 40; k += 9) {
12399 GemmMicrokernelTester()
12400 .mr(2)
12401 .nr(16)
12402 .kr(1)
12403 .sr(1)
12404 .m(2)
12405 .n(n)
12406 .k(k)
12407 .cn_stride(19)
12408 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x16__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
12409 }
12410 }
12411 }
12412
TEST(QC8_IGEMM_MINMAX_FP32_2X16__NEON_MLAL_LANE_PRFM,n_gt_16_subtile)12413 TEST(QC8_IGEMM_MINMAX_FP32_2X16__NEON_MLAL_LANE_PRFM, n_gt_16_subtile) {
12414 TEST_REQUIRES_ARM_NEON;
12415 for (uint32_t n = 17; n < 32; n++) {
12416 for (size_t k = 1; k <= 40; k += 9) {
12417 for (uint32_t m = 1; m <= 2; m++) {
12418 GemmMicrokernelTester()
12419 .mr(2)
12420 .nr(16)
12421 .kr(1)
12422 .sr(1)
12423 .m(m)
12424 .n(n)
12425 .k(k)
12426 .iterations(1)
12427 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x16__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
12428 }
12429 }
12430 }
12431 }
12432
TEST(QC8_IGEMM_MINMAX_FP32_2X16__NEON_MLAL_LANE_PRFM,n_div_16)12433 TEST(QC8_IGEMM_MINMAX_FP32_2X16__NEON_MLAL_LANE_PRFM, n_div_16) {
12434 TEST_REQUIRES_ARM_NEON;
12435 for (uint32_t n = 32; n <= 48; n += 16) {
12436 for (size_t k = 1; k <= 40; k += 9) {
12437 GemmMicrokernelTester()
12438 .mr(2)
12439 .nr(16)
12440 .kr(1)
12441 .sr(1)
12442 .m(2)
12443 .n(n)
12444 .k(k)
12445 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x16__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
12446 }
12447 }
12448 }
12449
TEST(QC8_IGEMM_MINMAX_FP32_2X16__NEON_MLAL_LANE_PRFM,n_div_16_strided_cn)12450 TEST(QC8_IGEMM_MINMAX_FP32_2X16__NEON_MLAL_LANE_PRFM, n_div_16_strided_cn) {
12451 TEST_REQUIRES_ARM_NEON;
12452 for (uint32_t n = 32; n <= 48; n += 16) {
12453 for (size_t k = 1; k <= 40; k += 9) {
12454 GemmMicrokernelTester()
12455 .mr(2)
12456 .nr(16)
12457 .kr(1)
12458 .sr(1)
12459 .m(2)
12460 .n(n)
12461 .k(k)
12462 .cn_stride(19)
12463 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x16__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
12464 }
12465 }
12466 }
12467
TEST(QC8_IGEMM_MINMAX_FP32_2X16__NEON_MLAL_LANE_PRFM,n_div_16_subtile)12468 TEST(QC8_IGEMM_MINMAX_FP32_2X16__NEON_MLAL_LANE_PRFM, n_div_16_subtile) {
12469 TEST_REQUIRES_ARM_NEON;
12470 for (uint32_t n = 32; n <= 48; n += 16) {
12471 for (size_t k = 1; k <= 40; k += 9) {
12472 for (uint32_t m = 1; m <= 2; m++) {
12473 GemmMicrokernelTester()
12474 .mr(2)
12475 .nr(16)
12476 .kr(1)
12477 .sr(1)
12478 .m(m)
12479 .n(n)
12480 .k(k)
12481 .iterations(1)
12482 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x16__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
12483 }
12484 }
12485 }
12486 }
12487
TEST(QC8_IGEMM_MINMAX_FP32_2X16__NEON_MLAL_LANE_PRFM,small_kernel)12488 TEST(QC8_IGEMM_MINMAX_FP32_2X16__NEON_MLAL_LANE_PRFM, small_kernel) {
12489 TEST_REQUIRES_ARM_NEON;
12490 for (size_t k = 1; k <= 40; k += 9) {
12491 GemmMicrokernelTester()
12492 .mr(2)
12493 .nr(16)
12494 .kr(1)
12495 .sr(1)
12496 .m(2)
12497 .n(16)
12498 .k(k)
12499 .ks(3)
12500 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x16__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
12501 }
12502 }
12503
TEST(QC8_IGEMM_MINMAX_FP32_2X16__NEON_MLAL_LANE_PRFM,small_kernel_subtile)12504 TEST(QC8_IGEMM_MINMAX_FP32_2X16__NEON_MLAL_LANE_PRFM, small_kernel_subtile) {
12505 TEST_REQUIRES_ARM_NEON;
12506 for (size_t k = 1; k <= 40; k += 9) {
12507 for (uint32_t n = 1; n <= 16; n++) {
12508 for (uint32_t m = 1; m <= 2; m++) {
12509 GemmMicrokernelTester()
12510 .mr(2)
12511 .nr(16)
12512 .kr(1)
12513 .sr(1)
12514 .m(m)
12515 .n(n)
12516 .k(k)
12517 .ks(3)
12518 .iterations(1)
12519 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x16__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
12520 }
12521 }
12522 }
12523 }
12524
TEST(QC8_IGEMM_MINMAX_FP32_2X16__NEON_MLAL_LANE_PRFM,n_gt_16_small_kernel)12525 TEST(QC8_IGEMM_MINMAX_FP32_2X16__NEON_MLAL_LANE_PRFM, n_gt_16_small_kernel) {
12526 TEST_REQUIRES_ARM_NEON;
12527 for (uint32_t n = 17; n < 32; n++) {
12528 for (size_t k = 1; k <= 40; k += 9) {
12529 GemmMicrokernelTester()
12530 .mr(2)
12531 .nr(16)
12532 .kr(1)
12533 .sr(1)
12534 .m(2)
12535 .n(n)
12536 .k(k)
12537 .ks(3)
12538 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x16__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
12539 }
12540 }
12541 }
12542
TEST(QC8_IGEMM_MINMAX_FP32_2X16__NEON_MLAL_LANE_PRFM,n_div_16_small_kernel)12543 TEST(QC8_IGEMM_MINMAX_FP32_2X16__NEON_MLAL_LANE_PRFM, n_div_16_small_kernel) {
12544 TEST_REQUIRES_ARM_NEON;
12545 for (uint32_t n = 32; n <= 48; n += 16) {
12546 for (size_t k = 1; k <= 40; k += 9) {
12547 GemmMicrokernelTester()
12548 .mr(2)
12549 .nr(16)
12550 .kr(1)
12551 .sr(1)
12552 .m(2)
12553 .n(n)
12554 .k(k)
12555 .ks(3)
12556 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x16__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
12557 }
12558 }
12559 }
12560
TEST(QC8_IGEMM_MINMAX_FP32_2X16__NEON_MLAL_LANE_PRFM,strided_cm_subtile)12561 TEST(QC8_IGEMM_MINMAX_FP32_2X16__NEON_MLAL_LANE_PRFM, strided_cm_subtile) {
12562 TEST_REQUIRES_ARM_NEON;
12563 for (size_t k = 1; k <= 40; k += 9) {
12564 for (uint32_t n = 1; n <= 16; n++) {
12565 for (uint32_t m = 1; m <= 2; m++) {
12566 GemmMicrokernelTester()
12567 .mr(2)
12568 .nr(16)
12569 .kr(1)
12570 .sr(1)
12571 .m(m)
12572 .n(n)
12573 .k(k)
12574 .cm_stride(19)
12575 .iterations(1)
12576 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x16__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
12577 }
12578 }
12579 }
12580 }
12581
TEST(QC8_IGEMM_MINMAX_FP32_2X16__NEON_MLAL_LANE_PRFM,a_offset)12582 TEST(QC8_IGEMM_MINMAX_FP32_2X16__NEON_MLAL_LANE_PRFM, a_offset) {
12583 TEST_REQUIRES_ARM_NEON;
12584 for (size_t k = 1; k <= 40; k += 9) {
12585 GemmMicrokernelTester()
12586 .mr(2)
12587 .nr(16)
12588 .kr(1)
12589 .sr(1)
12590 .m(2)
12591 .n(16)
12592 .k(k)
12593 .ks(3)
12594 .a_offset(83)
12595 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x16__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
12596 }
12597 }
12598
TEST(QC8_IGEMM_MINMAX_FP32_2X16__NEON_MLAL_LANE_PRFM,zero)12599 TEST(QC8_IGEMM_MINMAX_FP32_2X16__NEON_MLAL_LANE_PRFM, zero) {
12600 TEST_REQUIRES_ARM_NEON;
12601 for (size_t k = 1; k <= 40; k += 9) {
12602 for (uint32_t mz = 0; mz < 2; mz++) {
12603 GemmMicrokernelTester()
12604 .mr(2)
12605 .nr(16)
12606 .kr(1)
12607 .sr(1)
12608 .m(2)
12609 .n(16)
12610 .k(k)
12611 .ks(3)
12612 .a_offset(83)
12613 .zero_index(mz)
12614 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x16__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
12615 }
12616 }
12617 }
12618
TEST(QC8_IGEMM_MINMAX_FP32_2X16__NEON_MLAL_LANE_PRFM,qmin)12619 TEST(QC8_IGEMM_MINMAX_FP32_2X16__NEON_MLAL_LANE_PRFM, qmin) {
12620 TEST_REQUIRES_ARM_NEON;
12621 GemmMicrokernelTester()
12622 .mr(2)
12623 .nr(16)
12624 .kr(1)
12625 .sr(1)
12626 .m(2)
12627 .n(16)
12628 .k(8)
12629 .qmin(128)
12630 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x16__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
12631 }
12632
TEST(QC8_IGEMM_MINMAX_FP32_2X16__NEON_MLAL_LANE_PRFM,qmax)12633 TEST(QC8_IGEMM_MINMAX_FP32_2X16__NEON_MLAL_LANE_PRFM, qmax) {
12634 TEST_REQUIRES_ARM_NEON;
12635 GemmMicrokernelTester()
12636 .mr(2)
12637 .nr(16)
12638 .kr(1)
12639 .sr(1)
12640 .m(2)
12641 .n(16)
12642 .k(8)
12643 .qmax(128)
12644 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x16__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
12645 }
12646
TEST(QC8_IGEMM_MINMAX_FP32_2X16__NEON_MLAL_LANE_PRFM,strided_cm)12647 TEST(QC8_IGEMM_MINMAX_FP32_2X16__NEON_MLAL_LANE_PRFM, strided_cm) {
12648 TEST_REQUIRES_ARM_NEON;
12649 GemmMicrokernelTester()
12650 .mr(2)
12651 .nr(16)
12652 .kr(1)
12653 .sr(1)
12654 .m(2)
12655 .n(16)
12656 .k(8)
12657 .cm_stride(19)
12658 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x16__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
12659 }
12660 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
12661
12662
12663 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QC8_IGEMM_MINMAX_FP32_2X16__NEONV8_MLAL_LANE,k_eq_8)12664 TEST(QC8_IGEMM_MINMAX_FP32_2X16__NEONV8_MLAL_LANE, k_eq_8) {
12665 TEST_REQUIRES_ARM_NEON_V8;
12666 GemmMicrokernelTester()
12667 .mr(2)
12668 .nr(16)
12669 .kr(1)
12670 .sr(1)
12671 .m(2)
12672 .n(16)
12673 .k(8)
12674 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x16__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
12675 }
12676
TEST(QC8_IGEMM_MINMAX_FP32_2X16__NEONV8_MLAL_LANE,strided_cn)12677 TEST(QC8_IGEMM_MINMAX_FP32_2X16__NEONV8_MLAL_LANE, strided_cn) {
12678 TEST_REQUIRES_ARM_NEON_V8;
12679 GemmMicrokernelTester()
12680 .mr(2)
12681 .nr(16)
12682 .kr(1)
12683 .sr(1)
12684 .m(2)
12685 .n(16)
12686 .k(8)
12687 .cn_stride(19)
12688 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x16__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
12689 }
12690
TEST(QC8_IGEMM_MINMAX_FP32_2X16__NEONV8_MLAL_LANE,k_eq_8_subtile)12691 TEST(QC8_IGEMM_MINMAX_FP32_2X16__NEONV8_MLAL_LANE, k_eq_8_subtile) {
12692 TEST_REQUIRES_ARM_NEON_V8;
12693 for (uint32_t n = 1; n <= 16; n++) {
12694 for (uint32_t m = 1; m <= 2; m++) {
12695 GemmMicrokernelTester()
12696 .mr(2)
12697 .nr(16)
12698 .kr(1)
12699 .sr(1)
12700 .m(m)
12701 .n(n)
12702 .k(8)
12703 .iterations(1)
12704 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x16__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
12705 }
12706 }
12707 }
12708
TEST(QC8_IGEMM_MINMAX_FP32_2X16__NEONV8_MLAL_LANE,k_eq_8_subtile_m)12709 TEST(QC8_IGEMM_MINMAX_FP32_2X16__NEONV8_MLAL_LANE, k_eq_8_subtile_m) {
12710 TEST_REQUIRES_ARM_NEON_V8;
12711 for (uint32_t m = 1; m <= 2; m++) {
12712 GemmMicrokernelTester()
12713 .mr(2)
12714 .nr(16)
12715 .kr(1)
12716 .sr(1)
12717 .m(m)
12718 .n(16)
12719 .k(8)
12720 .iterations(1)
12721 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x16__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
12722 }
12723 }
12724
TEST(QC8_IGEMM_MINMAX_FP32_2X16__NEONV8_MLAL_LANE,k_eq_8_subtile_n)12725 TEST(QC8_IGEMM_MINMAX_FP32_2X16__NEONV8_MLAL_LANE, k_eq_8_subtile_n) {
12726 TEST_REQUIRES_ARM_NEON_V8;
12727 for (uint32_t n = 1; n <= 16; n++) {
12728 GemmMicrokernelTester()
12729 .mr(2)
12730 .nr(16)
12731 .kr(1)
12732 .sr(1)
12733 .m(2)
12734 .n(n)
12735 .k(8)
12736 .iterations(1)
12737 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x16__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
12738 }
12739 }
12740
TEST(QC8_IGEMM_MINMAX_FP32_2X16__NEONV8_MLAL_LANE,k_lt_8)12741 TEST(QC8_IGEMM_MINMAX_FP32_2X16__NEONV8_MLAL_LANE, k_lt_8) {
12742 TEST_REQUIRES_ARM_NEON_V8;
12743 for (size_t k = 1; k < 8; k++) {
12744 GemmMicrokernelTester()
12745 .mr(2)
12746 .nr(16)
12747 .kr(1)
12748 .sr(1)
12749 .m(2)
12750 .n(16)
12751 .k(k)
12752 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x16__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
12753 }
12754 }
12755
TEST(QC8_IGEMM_MINMAX_FP32_2X16__NEONV8_MLAL_LANE,k_lt_8_subtile)12756 TEST(QC8_IGEMM_MINMAX_FP32_2X16__NEONV8_MLAL_LANE, k_lt_8_subtile) {
12757 TEST_REQUIRES_ARM_NEON_V8;
12758 for (size_t k = 1; k < 8; k++) {
12759 for (uint32_t n = 1; n <= 16; n++) {
12760 for (uint32_t m = 1; m <= 2; m++) {
12761 GemmMicrokernelTester()
12762 .mr(2)
12763 .nr(16)
12764 .kr(1)
12765 .sr(1)
12766 .m(m)
12767 .n(n)
12768 .k(k)
12769 .iterations(1)
12770 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x16__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
12771 }
12772 }
12773 }
12774 }
12775
TEST(QC8_IGEMM_MINMAX_FP32_2X16__NEONV8_MLAL_LANE,k_gt_8)12776 TEST(QC8_IGEMM_MINMAX_FP32_2X16__NEONV8_MLAL_LANE, k_gt_8) {
12777 TEST_REQUIRES_ARM_NEON_V8;
12778 for (size_t k = 9; k < 16; k++) {
12779 GemmMicrokernelTester()
12780 .mr(2)
12781 .nr(16)
12782 .kr(1)
12783 .sr(1)
12784 .m(2)
12785 .n(16)
12786 .k(k)
12787 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x16__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
12788 }
12789 }
12790
TEST(QC8_IGEMM_MINMAX_FP32_2X16__NEONV8_MLAL_LANE,k_gt_8_subtile)12791 TEST(QC8_IGEMM_MINMAX_FP32_2X16__NEONV8_MLAL_LANE, k_gt_8_subtile) {
12792 TEST_REQUIRES_ARM_NEON_V8;
12793 for (size_t k = 9; k < 16; k++) {
12794 for (uint32_t n = 1; n <= 16; n++) {
12795 for (uint32_t m = 1; m <= 2; m++) {
12796 GemmMicrokernelTester()
12797 .mr(2)
12798 .nr(16)
12799 .kr(1)
12800 .sr(1)
12801 .m(m)
12802 .n(n)
12803 .k(k)
12804 .iterations(1)
12805 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x16__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
12806 }
12807 }
12808 }
12809 }
12810
TEST(QC8_IGEMM_MINMAX_FP32_2X16__NEONV8_MLAL_LANE,k_div_8)12811 TEST(QC8_IGEMM_MINMAX_FP32_2X16__NEONV8_MLAL_LANE, k_div_8) {
12812 TEST_REQUIRES_ARM_NEON_V8;
12813 for (size_t k = 16; k <= 80; k += 8) {
12814 GemmMicrokernelTester()
12815 .mr(2)
12816 .nr(16)
12817 .kr(1)
12818 .sr(1)
12819 .m(2)
12820 .n(16)
12821 .k(k)
12822 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x16__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
12823 }
12824 }
12825
TEST(QC8_IGEMM_MINMAX_FP32_2X16__NEONV8_MLAL_LANE,k_div_8_subtile)12826 TEST(QC8_IGEMM_MINMAX_FP32_2X16__NEONV8_MLAL_LANE, k_div_8_subtile) {
12827 TEST_REQUIRES_ARM_NEON_V8;
12828 for (size_t k = 16; k <= 80; k += 8) {
12829 for (uint32_t n = 1; n <= 16; n++) {
12830 for (uint32_t m = 1; m <= 2; m++) {
12831 GemmMicrokernelTester()
12832 .mr(2)
12833 .nr(16)
12834 .kr(1)
12835 .sr(1)
12836 .m(m)
12837 .n(n)
12838 .k(k)
12839 .iterations(1)
12840 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x16__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
12841 }
12842 }
12843 }
12844 }
12845
TEST(QC8_IGEMM_MINMAX_FP32_2X16__NEONV8_MLAL_LANE,n_gt_16)12846 TEST(QC8_IGEMM_MINMAX_FP32_2X16__NEONV8_MLAL_LANE, n_gt_16) {
12847 TEST_REQUIRES_ARM_NEON_V8;
12848 for (uint32_t n = 17; n < 32; n++) {
12849 for (size_t k = 1; k <= 40; k += 9) {
12850 GemmMicrokernelTester()
12851 .mr(2)
12852 .nr(16)
12853 .kr(1)
12854 .sr(1)
12855 .m(2)
12856 .n(n)
12857 .k(k)
12858 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x16__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
12859 }
12860 }
12861 }
12862
TEST(QC8_IGEMM_MINMAX_FP32_2X16__NEONV8_MLAL_LANE,n_gt_16_strided_cn)12863 TEST(QC8_IGEMM_MINMAX_FP32_2X16__NEONV8_MLAL_LANE, n_gt_16_strided_cn) {
12864 TEST_REQUIRES_ARM_NEON_V8;
12865 for (uint32_t n = 17; n < 32; n++) {
12866 for (size_t k = 1; k <= 40; k += 9) {
12867 GemmMicrokernelTester()
12868 .mr(2)
12869 .nr(16)
12870 .kr(1)
12871 .sr(1)
12872 .m(2)
12873 .n(n)
12874 .k(k)
12875 .cn_stride(19)
12876 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x16__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
12877 }
12878 }
12879 }
12880
TEST(QC8_IGEMM_MINMAX_FP32_2X16__NEONV8_MLAL_LANE,n_gt_16_subtile)12881 TEST(QC8_IGEMM_MINMAX_FP32_2X16__NEONV8_MLAL_LANE, n_gt_16_subtile) {
12882 TEST_REQUIRES_ARM_NEON_V8;
12883 for (uint32_t n = 17; n < 32; n++) {
12884 for (size_t k = 1; k <= 40; k += 9) {
12885 for (uint32_t m = 1; m <= 2; m++) {
12886 GemmMicrokernelTester()
12887 .mr(2)
12888 .nr(16)
12889 .kr(1)
12890 .sr(1)
12891 .m(m)
12892 .n(n)
12893 .k(k)
12894 .iterations(1)
12895 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x16__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
12896 }
12897 }
12898 }
12899 }
12900
TEST(QC8_IGEMM_MINMAX_FP32_2X16__NEONV8_MLAL_LANE,n_div_16)12901 TEST(QC8_IGEMM_MINMAX_FP32_2X16__NEONV8_MLAL_LANE, n_div_16) {
12902 TEST_REQUIRES_ARM_NEON_V8;
12903 for (uint32_t n = 32; n <= 48; n += 16) {
12904 for (size_t k = 1; k <= 40; k += 9) {
12905 GemmMicrokernelTester()
12906 .mr(2)
12907 .nr(16)
12908 .kr(1)
12909 .sr(1)
12910 .m(2)
12911 .n(n)
12912 .k(k)
12913 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x16__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
12914 }
12915 }
12916 }
12917
TEST(QC8_IGEMM_MINMAX_FP32_2X16__NEONV8_MLAL_LANE,n_div_16_strided_cn)12918 TEST(QC8_IGEMM_MINMAX_FP32_2X16__NEONV8_MLAL_LANE, n_div_16_strided_cn) {
12919 TEST_REQUIRES_ARM_NEON_V8;
12920 for (uint32_t n = 32; n <= 48; n += 16) {
12921 for (size_t k = 1; k <= 40; k += 9) {
12922 GemmMicrokernelTester()
12923 .mr(2)
12924 .nr(16)
12925 .kr(1)
12926 .sr(1)
12927 .m(2)
12928 .n(n)
12929 .k(k)
12930 .cn_stride(19)
12931 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x16__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
12932 }
12933 }
12934 }
12935
TEST(QC8_IGEMM_MINMAX_FP32_2X16__NEONV8_MLAL_LANE,n_div_16_subtile)12936 TEST(QC8_IGEMM_MINMAX_FP32_2X16__NEONV8_MLAL_LANE, n_div_16_subtile) {
12937 TEST_REQUIRES_ARM_NEON_V8;
12938 for (uint32_t n = 32; n <= 48; n += 16) {
12939 for (size_t k = 1; k <= 40; k += 9) {
12940 for (uint32_t m = 1; m <= 2; m++) {
12941 GemmMicrokernelTester()
12942 .mr(2)
12943 .nr(16)
12944 .kr(1)
12945 .sr(1)
12946 .m(m)
12947 .n(n)
12948 .k(k)
12949 .iterations(1)
12950 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x16__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
12951 }
12952 }
12953 }
12954 }
12955
TEST(QC8_IGEMM_MINMAX_FP32_2X16__NEONV8_MLAL_LANE,small_kernel)12956 TEST(QC8_IGEMM_MINMAX_FP32_2X16__NEONV8_MLAL_LANE, small_kernel) {
12957 TEST_REQUIRES_ARM_NEON_V8;
12958 for (size_t k = 1; k <= 40; k += 9) {
12959 GemmMicrokernelTester()
12960 .mr(2)
12961 .nr(16)
12962 .kr(1)
12963 .sr(1)
12964 .m(2)
12965 .n(16)
12966 .k(k)
12967 .ks(3)
12968 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x16__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
12969 }
12970 }
12971
TEST(QC8_IGEMM_MINMAX_FP32_2X16__NEONV8_MLAL_LANE,small_kernel_subtile)12972 TEST(QC8_IGEMM_MINMAX_FP32_2X16__NEONV8_MLAL_LANE, small_kernel_subtile) {
12973 TEST_REQUIRES_ARM_NEON_V8;
12974 for (size_t k = 1; k <= 40; k += 9) {
12975 for (uint32_t n = 1; n <= 16; n++) {
12976 for (uint32_t m = 1; m <= 2; m++) {
12977 GemmMicrokernelTester()
12978 .mr(2)
12979 .nr(16)
12980 .kr(1)
12981 .sr(1)
12982 .m(m)
12983 .n(n)
12984 .k(k)
12985 .ks(3)
12986 .iterations(1)
12987 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x16__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
12988 }
12989 }
12990 }
12991 }
12992
TEST(QC8_IGEMM_MINMAX_FP32_2X16__NEONV8_MLAL_LANE,n_gt_16_small_kernel)12993 TEST(QC8_IGEMM_MINMAX_FP32_2X16__NEONV8_MLAL_LANE, n_gt_16_small_kernel) {
12994 TEST_REQUIRES_ARM_NEON_V8;
12995 for (uint32_t n = 17; n < 32; n++) {
12996 for (size_t k = 1; k <= 40; k += 9) {
12997 GemmMicrokernelTester()
12998 .mr(2)
12999 .nr(16)
13000 .kr(1)
13001 .sr(1)
13002 .m(2)
13003 .n(n)
13004 .k(k)
13005 .ks(3)
13006 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x16__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
13007 }
13008 }
13009 }
13010
TEST(QC8_IGEMM_MINMAX_FP32_2X16__NEONV8_MLAL_LANE,n_div_16_small_kernel)13011 TEST(QC8_IGEMM_MINMAX_FP32_2X16__NEONV8_MLAL_LANE, n_div_16_small_kernel) {
13012 TEST_REQUIRES_ARM_NEON_V8;
13013 for (uint32_t n = 32; n <= 48; n += 16) {
13014 for (size_t k = 1; k <= 40; k += 9) {
13015 GemmMicrokernelTester()
13016 .mr(2)
13017 .nr(16)
13018 .kr(1)
13019 .sr(1)
13020 .m(2)
13021 .n(n)
13022 .k(k)
13023 .ks(3)
13024 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x16__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
13025 }
13026 }
13027 }
13028
TEST(QC8_IGEMM_MINMAX_FP32_2X16__NEONV8_MLAL_LANE,strided_cm_subtile)13029 TEST(QC8_IGEMM_MINMAX_FP32_2X16__NEONV8_MLAL_LANE, strided_cm_subtile) {
13030 TEST_REQUIRES_ARM_NEON_V8;
13031 for (size_t k = 1; k <= 40; k += 9) {
13032 for (uint32_t n = 1; n <= 16; n++) {
13033 for (uint32_t m = 1; m <= 2; m++) {
13034 GemmMicrokernelTester()
13035 .mr(2)
13036 .nr(16)
13037 .kr(1)
13038 .sr(1)
13039 .m(m)
13040 .n(n)
13041 .k(k)
13042 .cm_stride(19)
13043 .iterations(1)
13044 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x16__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
13045 }
13046 }
13047 }
13048 }
13049
TEST(QC8_IGEMM_MINMAX_FP32_2X16__NEONV8_MLAL_LANE,a_offset)13050 TEST(QC8_IGEMM_MINMAX_FP32_2X16__NEONV8_MLAL_LANE, a_offset) {
13051 TEST_REQUIRES_ARM_NEON_V8;
13052 for (size_t k = 1; k <= 40; k += 9) {
13053 GemmMicrokernelTester()
13054 .mr(2)
13055 .nr(16)
13056 .kr(1)
13057 .sr(1)
13058 .m(2)
13059 .n(16)
13060 .k(k)
13061 .ks(3)
13062 .a_offset(83)
13063 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x16__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
13064 }
13065 }
13066
TEST(QC8_IGEMM_MINMAX_FP32_2X16__NEONV8_MLAL_LANE,zero)13067 TEST(QC8_IGEMM_MINMAX_FP32_2X16__NEONV8_MLAL_LANE, zero) {
13068 TEST_REQUIRES_ARM_NEON_V8;
13069 for (size_t k = 1; k <= 40; k += 9) {
13070 for (uint32_t mz = 0; mz < 2; mz++) {
13071 GemmMicrokernelTester()
13072 .mr(2)
13073 .nr(16)
13074 .kr(1)
13075 .sr(1)
13076 .m(2)
13077 .n(16)
13078 .k(k)
13079 .ks(3)
13080 .a_offset(83)
13081 .zero_index(mz)
13082 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x16__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
13083 }
13084 }
13085 }
13086
TEST(QC8_IGEMM_MINMAX_FP32_2X16__NEONV8_MLAL_LANE,qmin)13087 TEST(QC8_IGEMM_MINMAX_FP32_2X16__NEONV8_MLAL_LANE, qmin) {
13088 TEST_REQUIRES_ARM_NEON_V8;
13089 GemmMicrokernelTester()
13090 .mr(2)
13091 .nr(16)
13092 .kr(1)
13093 .sr(1)
13094 .m(2)
13095 .n(16)
13096 .k(8)
13097 .qmin(128)
13098 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x16__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
13099 }
13100
TEST(QC8_IGEMM_MINMAX_FP32_2X16__NEONV8_MLAL_LANE,qmax)13101 TEST(QC8_IGEMM_MINMAX_FP32_2X16__NEONV8_MLAL_LANE, qmax) {
13102 TEST_REQUIRES_ARM_NEON_V8;
13103 GemmMicrokernelTester()
13104 .mr(2)
13105 .nr(16)
13106 .kr(1)
13107 .sr(1)
13108 .m(2)
13109 .n(16)
13110 .k(8)
13111 .qmax(128)
13112 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x16__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
13113 }
13114
TEST(QC8_IGEMM_MINMAX_FP32_2X16__NEONV8_MLAL_LANE,strided_cm)13115 TEST(QC8_IGEMM_MINMAX_FP32_2X16__NEONV8_MLAL_LANE, strided_cm) {
13116 TEST_REQUIRES_ARM_NEON_V8;
13117 GemmMicrokernelTester()
13118 .mr(2)
13119 .nr(16)
13120 .kr(1)
13121 .sr(1)
13122 .m(2)
13123 .n(16)
13124 .k(8)
13125 .cm_stride(19)
13126 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x16__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
13127 }
13128 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
13129
13130
13131 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QC8_IGEMM_MINMAX_FP32_2X16__NEONV8_MLAL_LANE_PRFM,k_eq_8)13132 TEST(QC8_IGEMM_MINMAX_FP32_2X16__NEONV8_MLAL_LANE_PRFM, k_eq_8) {
13133 TEST_REQUIRES_ARM_NEON_V8;
13134 GemmMicrokernelTester()
13135 .mr(2)
13136 .nr(16)
13137 .kr(1)
13138 .sr(1)
13139 .m(2)
13140 .n(16)
13141 .k(8)
13142 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x16__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
13143 }
13144
TEST(QC8_IGEMM_MINMAX_FP32_2X16__NEONV8_MLAL_LANE_PRFM,strided_cn)13145 TEST(QC8_IGEMM_MINMAX_FP32_2X16__NEONV8_MLAL_LANE_PRFM, strided_cn) {
13146 TEST_REQUIRES_ARM_NEON_V8;
13147 GemmMicrokernelTester()
13148 .mr(2)
13149 .nr(16)
13150 .kr(1)
13151 .sr(1)
13152 .m(2)
13153 .n(16)
13154 .k(8)
13155 .cn_stride(19)
13156 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x16__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
13157 }
13158
TEST(QC8_IGEMM_MINMAX_FP32_2X16__NEONV8_MLAL_LANE_PRFM,k_eq_8_subtile)13159 TEST(QC8_IGEMM_MINMAX_FP32_2X16__NEONV8_MLAL_LANE_PRFM, k_eq_8_subtile) {
13160 TEST_REQUIRES_ARM_NEON_V8;
13161 for (uint32_t n = 1; n <= 16; n++) {
13162 for (uint32_t m = 1; m <= 2; m++) {
13163 GemmMicrokernelTester()
13164 .mr(2)
13165 .nr(16)
13166 .kr(1)
13167 .sr(1)
13168 .m(m)
13169 .n(n)
13170 .k(8)
13171 .iterations(1)
13172 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x16__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
13173 }
13174 }
13175 }
13176
TEST(QC8_IGEMM_MINMAX_FP32_2X16__NEONV8_MLAL_LANE_PRFM,k_eq_8_subtile_m)13177 TEST(QC8_IGEMM_MINMAX_FP32_2X16__NEONV8_MLAL_LANE_PRFM, k_eq_8_subtile_m) {
13178 TEST_REQUIRES_ARM_NEON_V8;
13179 for (uint32_t m = 1; m <= 2; m++) {
13180 GemmMicrokernelTester()
13181 .mr(2)
13182 .nr(16)
13183 .kr(1)
13184 .sr(1)
13185 .m(m)
13186 .n(16)
13187 .k(8)
13188 .iterations(1)
13189 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x16__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
13190 }
13191 }
13192
TEST(QC8_IGEMM_MINMAX_FP32_2X16__NEONV8_MLAL_LANE_PRFM,k_eq_8_subtile_n)13193 TEST(QC8_IGEMM_MINMAX_FP32_2X16__NEONV8_MLAL_LANE_PRFM, k_eq_8_subtile_n) {
13194 TEST_REQUIRES_ARM_NEON_V8;
13195 for (uint32_t n = 1; n <= 16; n++) {
13196 GemmMicrokernelTester()
13197 .mr(2)
13198 .nr(16)
13199 .kr(1)
13200 .sr(1)
13201 .m(2)
13202 .n(n)
13203 .k(8)
13204 .iterations(1)
13205 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x16__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
13206 }
13207 }
13208
TEST(QC8_IGEMM_MINMAX_FP32_2X16__NEONV8_MLAL_LANE_PRFM,k_lt_8)13209 TEST(QC8_IGEMM_MINMAX_FP32_2X16__NEONV8_MLAL_LANE_PRFM, k_lt_8) {
13210 TEST_REQUIRES_ARM_NEON_V8;
13211 for (size_t k = 1; k < 8; k++) {
13212 GemmMicrokernelTester()
13213 .mr(2)
13214 .nr(16)
13215 .kr(1)
13216 .sr(1)
13217 .m(2)
13218 .n(16)
13219 .k(k)
13220 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x16__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
13221 }
13222 }
13223
TEST(QC8_IGEMM_MINMAX_FP32_2X16__NEONV8_MLAL_LANE_PRFM,k_lt_8_subtile)13224 TEST(QC8_IGEMM_MINMAX_FP32_2X16__NEONV8_MLAL_LANE_PRFM, k_lt_8_subtile) {
13225 TEST_REQUIRES_ARM_NEON_V8;
13226 for (size_t k = 1; k < 8; k++) {
13227 for (uint32_t n = 1; n <= 16; n++) {
13228 for (uint32_t m = 1; m <= 2; m++) {
13229 GemmMicrokernelTester()
13230 .mr(2)
13231 .nr(16)
13232 .kr(1)
13233 .sr(1)
13234 .m(m)
13235 .n(n)
13236 .k(k)
13237 .iterations(1)
13238 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x16__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
13239 }
13240 }
13241 }
13242 }
13243
TEST(QC8_IGEMM_MINMAX_FP32_2X16__NEONV8_MLAL_LANE_PRFM,k_gt_8)13244 TEST(QC8_IGEMM_MINMAX_FP32_2X16__NEONV8_MLAL_LANE_PRFM, k_gt_8) {
13245 TEST_REQUIRES_ARM_NEON_V8;
13246 for (size_t k = 9; k < 16; k++) {
13247 GemmMicrokernelTester()
13248 .mr(2)
13249 .nr(16)
13250 .kr(1)
13251 .sr(1)
13252 .m(2)
13253 .n(16)
13254 .k(k)
13255 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x16__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
13256 }
13257 }
13258
TEST(QC8_IGEMM_MINMAX_FP32_2X16__NEONV8_MLAL_LANE_PRFM,k_gt_8_subtile)13259 TEST(QC8_IGEMM_MINMAX_FP32_2X16__NEONV8_MLAL_LANE_PRFM, k_gt_8_subtile) {
13260 TEST_REQUIRES_ARM_NEON_V8;
13261 for (size_t k = 9; k < 16; k++) {
13262 for (uint32_t n = 1; n <= 16; n++) {
13263 for (uint32_t m = 1; m <= 2; m++) {
13264 GemmMicrokernelTester()
13265 .mr(2)
13266 .nr(16)
13267 .kr(1)
13268 .sr(1)
13269 .m(m)
13270 .n(n)
13271 .k(k)
13272 .iterations(1)
13273 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x16__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
13274 }
13275 }
13276 }
13277 }
13278
TEST(QC8_IGEMM_MINMAX_FP32_2X16__NEONV8_MLAL_LANE_PRFM,k_div_8)13279 TEST(QC8_IGEMM_MINMAX_FP32_2X16__NEONV8_MLAL_LANE_PRFM, k_div_8) {
13280 TEST_REQUIRES_ARM_NEON_V8;
13281 for (size_t k = 16; k <= 80; k += 8) {
13282 GemmMicrokernelTester()
13283 .mr(2)
13284 .nr(16)
13285 .kr(1)
13286 .sr(1)
13287 .m(2)
13288 .n(16)
13289 .k(k)
13290 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x16__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
13291 }
13292 }
13293
TEST(QC8_IGEMM_MINMAX_FP32_2X16__NEONV8_MLAL_LANE_PRFM,k_div_8_subtile)13294 TEST(QC8_IGEMM_MINMAX_FP32_2X16__NEONV8_MLAL_LANE_PRFM, k_div_8_subtile) {
13295 TEST_REQUIRES_ARM_NEON_V8;
13296 for (size_t k = 16; k <= 80; k += 8) {
13297 for (uint32_t n = 1; n <= 16; n++) {
13298 for (uint32_t m = 1; m <= 2; m++) {
13299 GemmMicrokernelTester()
13300 .mr(2)
13301 .nr(16)
13302 .kr(1)
13303 .sr(1)
13304 .m(m)
13305 .n(n)
13306 .k(k)
13307 .iterations(1)
13308 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x16__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
13309 }
13310 }
13311 }
13312 }
13313
TEST(QC8_IGEMM_MINMAX_FP32_2X16__NEONV8_MLAL_LANE_PRFM,n_gt_16)13314 TEST(QC8_IGEMM_MINMAX_FP32_2X16__NEONV8_MLAL_LANE_PRFM, n_gt_16) {
13315 TEST_REQUIRES_ARM_NEON_V8;
13316 for (uint32_t n = 17; n < 32; n++) {
13317 for (size_t k = 1; k <= 40; k += 9) {
13318 GemmMicrokernelTester()
13319 .mr(2)
13320 .nr(16)
13321 .kr(1)
13322 .sr(1)
13323 .m(2)
13324 .n(n)
13325 .k(k)
13326 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x16__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
13327 }
13328 }
13329 }
13330
TEST(QC8_IGEMM_MINMAX_FP32_2X16__NEONV8_MLAL_LANE_PRFM,n_gt_16_strided_cn)13331 TEST(QC8_IGEMM_MINMAX_FP32_2X16__NEONV8_MLAL_LANE_PRFM, n_gt_16_strided_cn) {
13332 TEST_REQUIRES_ARM_NEON_V8;
13333 for (uint32_t n = 17; n < 32; n++) {
13334 for (size_t k = 1; k <= 40; k += 9) {
13335 GemmMicrokernelTester()
13336 .mr(2)
13337 .nr(16)
13338 .kr(1)
13339 .sr(1)
13340 .m(2)
13341 .n(n)
13342 .k(k)
13343 .cn_stride(19)
13344 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x16__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
13345 }
13346 }
13347 }
13348
TEST(QC8_IGEMM_MINMAX_FP32_2X16__NEONV8_MLAL_LANE_PRFM,n_gt_16_subtile)13349 TEST(QC8_IGEMM_MINMAX_FP32_2X16__NEONV8_MLAL_LANE_PRFM, n_gt_16_subtile) {
13350 TEST_REQUIRES_ARM_NEON_V8;
13351 for (uint32_t n = 17; n < 32; n++) {
13352 for (size_t k = 1; k <= 40; k += 9) {
13353 for (uint32_t m = 1; m <= 2; m++) {
13354 GemmMicrokernelTester()
13355 .mr(2)
13356 .nr(16)
13357 .kr(1)
13358 .sr(1)
13359 .m(m)
13360 .n(n)
13361 .k(k)
13362 .iterations(1)
13363 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x16__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
13364 }
13365 }
13366 }
13367 }
13368
TEST(QC8_IGEMM_MINMAX_FP32_2X16__NEONV8_MLAL_LANE_PRFM,n_div_16)13369 TEST(QC8_IGEMM_MINMAX_FP32_2X16__NEONV8_MLAL_LANE_PRFM, n_div_16) {
13370 TEST_REQUIRES_ARM_NEON_V8;
13371 for (uint32_t n = 32; n <= 48; n += 16) {
13372 for (size_t k = 1; k <= 40; k += 9) {
13373 GemmMicrokernelTester()
13374 .mr(2)
13375 .nr(16)
13376 .kr(1)
13377 .sr(1)
13378 .m(2)
13379 .n(n)
13380 .k(k)
13381 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x16__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
13382 }
13383 }
13384 }
13385
TEST(QC8_IGEMM_MINMAX_FP32_2X16__NEONV8_MLAL_LANE_PRFM,n_div_16_strided_cn)13386 TEST(QC8_IGEMM_MINMAX_FP32_2X16__NEONV8_MLAL_LANE_PRFM, n_div_16_strided_cn) {
13387 TEST_REQUIRES_ARM_NEON_V8;
13388 for (uint32_t n = 32; n <= 48; n += 16) {
13389 for (size_t k = 1; k <= 40; k += 9) {
13390 GemmMicrokernelTester()
13391 .mr(2)
13392 .nr(16)
13393 .kr(1)
13394 .sr(1)
13395 .m(2)
13396 .n(n)
13397 .k(k)
13398 .cn_stride(19)
13399 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x16__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
13400 }
13401 }
13402 }
13403
TEST(QC8_IGEMM_MINMAX_FP32_2X16__NEONV8_MLAL_LANE_PRFM,n_div_16_subtile)13404 TEST(QC8_IGEMM_MINMAX_FP32_2X16__NEONV8_MLAL_LANE_PRFM, n_div_16_subtile) {
13405 TEST_REQUIRES_ARM_NEON_V8;
13406 for (uint32_t n = 32; n <= 48; n += 16) {
13407 for (size_t k = 1; k <= 40; k += 9) {
13408 for (uint32_t m = 1; m <= 2; m++) {
13409 GemmMicrokernelTester()
13410 .mr(2)
13411 .nr(16)
13412 .kr(1)
13413 .sr(1)
13414 .m(m)
13415 .n(n)
13416 .k(k)
13417 .iterations(1)
13418 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x16__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
13419 }
13420 }
13421 }
13422 }
13423
TEST(QC8_IGEMM_MINMAX_FP32_2X16__NEONV8_MLAL_LANE_PRFM,small_kernel)13424 TEST(QC8_IGEMM_MINMAX_FP32_2X16__NEONV8_MLAL_LANE_PRFM, small_kernel) {
13425 TEST_REQUIRES_ARM_NEON_V8;
13426 for (size_t k = 1; k <= 40; k += 9) {
13427 GemmMicrokernelTester()
13428 .mr(2)
13429 .nr(16)
13430 .kr(1)
13431 .sr(1)
13432 .m(2)
13433 .n(16)
13434 .k(k)
13435 .ks(3)
13436 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x16__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
13437 }
13438 }
13439
TEST(QC8_IGEMM_MINMAX_FP32_2X16__NEONV8_MLAL_LANE_PRFM,small_kernel_subtile)13440 TEST(QC8_IGEMM_MINMAX_FP32_2X16__NEONV8_MLAL_LANE_PRFM, small_kernel_subtile) {
13441 TEST_REQUIRES_ARM_NEON_V8;
13442 for (size_t k = 1; k <= 40; k += 9) {
13443 for (uint32_t n = 1; n <= 16; n++) {
13444 for (uint32_t m = 1; m <= 2; m++) {
13445 GemmMicrokernelTester()
13446 .mr(2)
13447 .nr(16)
13448 .kr(1)
13449 .sr(1)
13450 .m(m)
13451 .n(n)
13452 .k(k)
13453 .ks(3)
13454 .iterations(1)
13455 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x16__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
13456 }
13457 }
13458 }
13459 }
13460
TEST(QC8_IGEMM_MINMAX_FP32_2X16__NEONV8_MLAL_LANE_PRFM,n_gt_16_small_kernel)13461 TEST(QC8_IGEMM_MINMAX_FP32_2X16__NEONV8_MLAL_LANE_PRFM, n_gt_16_small_kernel) {
13462 TEST_REQUIRES_ARM_NEON_V8;
13463 for (uint32_t n = 17; n < 32; n++) {
13464 for (size_t k = 1; k <= 40; k += 9) {
13465 GemmMicrokernelTester()
13466 .mr(2)
13467 .nr(16)
13468 .kr(1)
13469 .sr(1)
13470 .m(2)
13471 .n(n)
13472 .k(k)
13473 .ks(3)
13474 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x16__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
13475 }
13476 }
13477 }
13478
TEST(QC8_IGEMM_MINMAX_FP32_2X16__NEONV8_MLAL_LANE_PRFM,n_div_16_small_kernel)13479 TEST(QC8_IGEMM_MINMAX_FP32_2X16__NEONV8_MLAL_LANE_PRFM, n_div_16_small_kernel) {
13480 TEST_REQUIRES_ARM_NEON_V8;
13481 for (uint32_t n = 32; n <= 48; n += 16) {
13482 for (size_t k = 1; k <= 40; k += 9) {
13483 GemmMicrokernelTester()
13484 .mr(2)
13485 .nr(16)
13486 .kr(1)
13487 .sr(1)
13488 .m(2)
13489 .n(n)
13490 .k(k)
13491 .ks(3)
13492 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x16__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
13493 }
13494 }
13495 }
13496
TEST(QC8_IGEMM_MINMAX_FP32_2X16__NEONV8_MLAL_LANE_PRFM,strided_cm_subtile)13497 TEST(QC8_IGEMM_MINMAX_FP32_2X16__NEONV8_MLAL_LANE_PRFM, strided_cm_subtile) {
13498 TEST_REQUIRES_ARM_NEON_V8;
13499 for (size_t k = 1; k <= 40; k += 9) {
13500 for (uint32_t n = 1; n <= 16; n++) {
13501 for (uint32_t m = 1; m <= 2; m++) {
13502 GemmMicrokernelTester()
13503 .mr(2)
13504 .nr(16)
13505 .kr(1)
13506 .sr(1)
13507 .m(m)
13508 .n(n)
13509 .k(k)
13510 .cm_stride(19)
13511 .iterations(1)
13512 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x16__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
13513 }
13514 }
13515 }
13516 }
13517
TEST(QC8_IGEMM_MINMAX_FP32_2X16__NEONV8_MLAL_LANE_PRFM,a_offset)13518 TEST(QC8_IGEMM_MINMAX_FP32_2X16__NEONV8_MLAL_LANE_PRFM, a_offset) {
13519 TEST_REQUIRES_ARM_NEON_V8;
13520 for (size_t k = 1; k <= 40; k += 9) {
13521 GemmMicrokernelTester()
13522 .mr(2)
13523 .nr(16)
13524 .kr(1)
13525 .sr(1)
13526 .m(2)
13527 .n(16)
13528 .k(k)
13529 .ks(3)
13530 .a_offset(83)
13531 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x16__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
13532 }
13533 }
13534
TEST(QC8_IGEMM_MINMAX_FP32_2X16__NEONV8_MLAL_LANE_PRFM,zero)13535 TEST(QC8_IGEMM_MINMAX_FP32_2X16__NEONV8_MLAL_LANE_PRFM, zero) {
13536 TEST_REQUIRES_ARM_NEON_V8;
13537 for (size_t k = 1; k <= 40; k += 9) {
13538 for (uint32_t mz = 0; mz < 2; mz++) {
13539 GemmMicrokernelTester()
13540 .mr(2)
13541 .nr(16)
13542 .kr(1)
13543 .sr(1)
13544 .m(2)
13545 .n(16)
13546 .k(k)
13547 .ks(3)
13548 .a_offset(83)
13549 .zero_index(mz)
13550 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x16__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
13551 }
13552 }
13553 }
13554
TEST(QC8_IGEMM_MINMAX_FP32_2X16__NEONV8_MLAL_LANE_PRFM,qmin)13555 TEST(QC8_IGEMM_MINMAX_FP32_2X16__NEONV8_MLAL_LANE_PRFM, qmin) {
13556 TEST_REQUIRES_ARM_NEON_V8;
13557 GemmMicrokernelTester()
13558 .mr(2)
13559 .nr(16)
13560 .kr(1)
13561 .sr(1)
13562 .m(2)
13563 .n(16)
13564 .k(8)
13565 .qmin(128)
13566 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x16__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
13567 }
13568
TEST(QC8_IGEMM_MINMAX_FP32_2X16__NEONV8_MLAL_LANE_PRFM,qmax)13569 TEST(QC8_IGEMM_MINMAX_FP32_2X16__NEONV8_MLAL_LANE_PRFM, qmax) {
13570 TEST_REQUIRES_ARM_NEON_V8;
13571 GemmMicrokernelTester()
13572 .mr(2)
13573 .nr(16)
13574 .kr(1)
13575 .sr(1)
13576 .m(2)
13577 .n(16)
13578 .k(8)
13579 .qmax(128)
13580 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x16__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
13581 }
13582
TEST(QC8_IGEMM_MINMAX_FP32_2X16__NEONV8_MLAL_LANE_PRFM,strided_cm)13583 TEST(QC8_IGEMM_MINMAX_FP32_2X16__NEONV8_MLAL_LANE_PRFM, strided_cm) {
13584 TEST_REQUIRES_ARM_NEON_V8;
13585 GemmMicrokernelTester()
13586 .mr(2)
13587 .nr(16)
13588 .kr(1)
13589 .sr(1)
13590 .m(2)
13591 .n(16)
13592 .k(8)
13593 .cm_stride(19)
13594 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x16__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
13595 }
13596 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
13597
13598
13599 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QC8_IGEMM_MINMAX_FP32_3X8__NEON_MLAL_LANE,k_eq_8)13600 TEST(QC8_IGEMM_MINMAX_FP32_3X8__NEON_MLAL_LANE, k_eq_8) {
13601 TEST_REQUIRES_ARM_NEON;
13602 GemmMicrokernelTester()
13603 .mr(3)
13604 .nr(8)
13605 .kr(1)
13606 .sr(1)
13607 .m(3)
13608 .n(8)
13609 .k(8)
13610 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x8__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
13611 }
13612
TEST(QC8_IGEMM_MINMAX_FP32_3X8__NEON_MLAL_LANE,strided_cn)13613 TEST(QC8_IGEMM_MINMAX_FP32_3X8__NEON_MLAL_LANE, strided_cn) {
13614 TEST_REQUIRES_ARM_NEON;
13615 GemmMicrokernelTester()
13616 .mr(3)
13617 .nr(8)
13618 .kr(1)
13619 .sr(1)
13620 .m(3)
13621 .n(8)
13622 .k(8)
13623 .cn_stride(11)
13624 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x8__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
13625 }
13626
TEST(QC8_IGEMM_MINMAX_FP32_3X8__NEON_MLAL_LANE,k_eq_8_subtile)13627 TEST(QC8_IGEMM_MINMAX_FP32_3X8__NEON_MLAL_LANE, k_eq_8_subtile) {
13628 TEST_REQUIRES_ARM_NEON;
13629 for (uint32_t n = 1; n <= 8; n++) {
13630 for (uint32_t m = 1; m <= 3; m++) {
13631 GemmMicrokernelTester()
13632 .mr(3)
13633 .nr(8)
13634 .kr(1)
13635 .sr(1)
13636 .m(m)
13637 .n(n)
13638 .k(8)
13639 .iterations(1)
13640 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x8__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
13641 }
13642 }
13643 }
13644
TEST(QC8_IGEMM_MINMAX_FP32_3X8__NEON_MLAL_LANE,k_eq_8_subtile_m)13645 TEST(QC8_IGEMM_MINMAX_FP32_3X8__NEON_MLAL_LANE, k_eq_8_subtile_m) {
13646 TEST_REQUIRES_ARM_NEON;
13647 for (uint32_t m = 1; m <= 3; m++) {
13648 GemmMicrokernelTester()
13649 .mr(3)
13650 .nr(8)
13651 .kr(1)
13652 .sr(1)
13653 .m(m)
13654 .n(8)
13655 .k(8)
13656 .iterations(1)
13657 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x8__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
13658 }
13659 }
13660
TEST(QC8_IGEMM_MINMAX_FP32_3X8__NEON_MLAL_LANE,k_eq_8_subtile_n)13661 TEST(QC8_IGEMM_MINMAX_FP32_3X8__NEON_MLAL_LANE, k_eq_8_subtile_n) {
13662 TEST_REQUIRES_ARM_NEON;
13663 for (uint32_t n = 1; n <= 8; n++) {
13664 GemmMicrokernelTester()
13665 .mr(3)
13666 .nr(8)
13667 .kr(1)
13668 .sr(1)
13669 .m(3)
13670 .n(n)
13671 .k(8)
13672 .iterations(1)
13673 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x8__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
13674 }
13675 }
13676
TEST(QC8_IGEMM_MINMAX_FP32_3X8__NEON_MLAL_LANE,k_lt_8)13677 TEST(QC8_IGEMM_MINMAX_FP32_3X8__NEON_MLAL_LANE, k_lt_8) {
13678 TEST_REQUIRES_ARM_NEON;
13679 for (size_t k = 1; k < 8; k++) {
13680 GemmMicrokernelTester()
13681 .mr(3)
13682 .nr(8)
13683 .kr(1)
13684 .sr(1)
13685 .m(3)
13686 .n(8)
13687 .k(k)
13688 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x8__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
13689 }
13690 }
13691
TEST(QC8_IGEMM_MINMAX_FP32_3X8__NEON_MLAL_LANE,k_lt_8_subtile)13692 TEST(QC8_IGEMM_MINMAX_FP32_3X8__NEON_MLAL_LANE, k_lt_8_subtile) {
13693 TEST_REQUIRES_ARM_NEON;
13694 for (size_t k = 1; k < 8; k++) {
13695 for (uint32_t n = 1; n <= 8; n++) {
13696 for (uint32_t m = 1; m <= 3; m++) {
13697 GemmMicrokernelTester()
13698 .mr(3)
13699 .nr(8)
13700 .kr(1)
13701 .sr(1)
13702 .m(m)
13703 .n(n)
13704 .k(k)
13705 .iterations(1)
13706 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x8__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
13707 }
13708 }
13709 }
13710 }
13711
TEST(QC8_IGEMM_MINMAX_FP32_3X8__NEON_MLAL_LANE,k_gt_8)13712 TEST(QC8_IGEMM_MINMAX_FP32_3X8__NEON_MLAL_LANE, k_gt_8) {
13713 TEST_REQUIRES_ARM_NEON;
13714 for (size_t k = 9; k < 16; k++) {
13715 GemmMicrokernelTester()
13716 .mr(3)
13717 .nr(8)
13718 .kr(1)
13719 .sr(1)
13720 .m(3)
13721 .n(8)
13722 .k(k)
13723 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x8__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
13724 }
13725 }
13726
TEST(QC8_IGEMM_MINMAX_FP32_3X8__NEON_MLAL_LANE,k_gt_8_subtile)13727 TEST(QC8_IGEMM_MINMAX_FP32_3X8__NEON_MLAL_LANE, k_gt_8_subtile) {
13728 TEST_REQUIRES_ARM_NEON;
13729 for (size_t k = 9; k < 16; k++) {
13730 for (uint32_t n = 1; n <= 8; n++) {
13731 for (uint32_t m = 1; m <= 3; m++) {
13732 GemmMicrokernelTester()
13733 .mr(3)
13734 .nr(8)
13735 .kr(1)
13736 .sr(1)
13737 .m(m)
13738 .n(n)
13739 .k(k)
13740 .iterations(1)
13741 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x8__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
13742 }
13743 }
13744 }
13745 }
13746
TEST(QC8_IGEMM_MINMAX_FP32_3X8__NEON_MLAL_LANE,k_div_8)13747 TEST(QC8_IGEMM_MINMAX_FP32_3X8__NEON_MLAL_LANE, k_div_8) {
13748 TEST_REQUIRES_ARM_NEON;
13749 for (size_t k = 16; k <= 80; k += 8) {
13750 GemmMicrokernelTester()
13751 .mr(3)
13752 .nr(8)
13753 .kr(1)
13754 .sr(1)
13755 .m(3)
13756 .n(8)
13757 .k(k)
13758 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x8__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
13759 }
13760 }
13761
TEST(QC8_IGEMM_MINMAX_FP32_3X8__NEON_MLAL_LANE,k_div_8_subtile)13762 TEST(QC8_IGEMM_MINMAX_FP32_3X8__NEON_MLAL_LANE, k_div_8_subtile) {
13763 TEST_REQUIRES_ARM_NEON;
13764 for (size_t k = 16; k <= 80; k += 8) {
13765 for (uint32_t n = 1; n <= 8; n++) {
13766 for (uint32_t m = 1; m <= 3; m++) {
13767 GemmMicrokernelTester()
13768 .mr(3)
13769 .nr(8)
13770 .kr(1)
13771 .sr(1)
13772 .m(m)
13773 .n(n)
13774 .k(k)
13775 .iterations(1)
13776 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x8__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
13777 }
13778 }
13779 }
13780 }
13781
TEST(QC8_IGEMM_MINMAX_FP32_3X8__NEON_MLAL_LANE,n_gt_8)13782 TEST(QC8_IGEMM_MINMAX_FP32_3X8__NEON_MLAL_LANE, n_gt_8) {
13783 TEST_REQUIRES_ARM_NEON;
13784 for (uint32_t n = 9; n < 16; n++) {
13785 for (size_t k = 1; k <= 40; k += 9) {
13786 GemmMicrokernelTester()
13787 .mr(3)
13788 .nr(8)
13789 .kr(1)
13790 .sr(1)
13791 .m(3)
13792 .n(n)
13793 .k(k)
13794 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x8__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
13795 }
13796 }
13797 }
13798
TEST(QC8_IGEMM_MINMAX_FP32_3X8__NEON_MLAL_LANE,n_gt_8_strided_cn)13799 TEST(QC8_IGEMM_MINMAX_FP32_3X8__NEON_MLAL_LANE, n_gt_8_strided_cn) {
13800 TEST_REQUIRES_ARM_NEON;
13801 for (uint32_t n = 9; n < 16; n++) {
13802 for (size_t k = 1; k <= 40; k += 9) {
13803 GemmMicrokernelTester()
13804 .mr(3)
13805 .nr(8)
13806 .kr(1)
13807 .sr(1)
13808 .m(3)
13809 .n(n)
13810 .k(k)
13811 .cn_stride(11)
13812 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x8__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
13813 }
13814 }
13815 }
13816
TEST(QC8_IGEMM_MINMAX_FP32_3X8__NEON_MLAL_LANE,n_gt_8_subtile)13817 TEST(QC8_IGEMM_MINMAX_FP32_3X8__NEON_MLAL_LANE, n_gt_8_subtile) {
13818 TEST_REQUIRES_ARM_NEON;
13819 for (uint32_t n = 9; n < 16; n++) {
13820 for (size_t k = 1; k <= 40; k += 9) {
13821 for (uint32_t m = 1; m <= 3; m++) {
13822 GemmMicrokernelTester()
13823 .mr(3)
13824 .nr(8)
13825 .kr(1)
13826 .sr(1)
13827 .m(m)
13828 .n(n)
13829 .k(k)
13830 .iterations(1)
13831 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x8__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
13832 }
13833 }
13834 }
13835 }
13836
TEST(QC8_IGEMM_MINMAX_FP32_3X8__NEON_MLAL_LANE,n_div_8)13837 TEST(QC8_IGEMM_MINMAX_FP32_3X8__NEON_MLAL_LANE, n_div_8) {
13838 TEST_REQUIRES_ARM_NEON;
13839 for (uint32_t n = 16; n <= 24; n += 8) {
13840 for (size_t k = 1; k <= 40; k += 9) {
13841 GemmMicrokernelTester()
13842 .mr(3)
13843 .nr(8)
13844 .kr(1)
13845 .sr(1)
13846 .m(3)
13847 .n(n)
13848 .k(k)
13849 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x8__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
13850 }
13851 }
13852 }
13853
TEST(QC8_IGEMM_MINMAX_FP32_3X8__NEON_MLAL_LANE,n_div_8_strided_cn)13854 TEST(QC8_IGEMM_MINMAX_FP32_3X8__NEON_MLAL_LANE, n_div_8_strided_cn) {
13855 TEST_REQUIRES_ARM_NEON;
13856 for (uint32_t n = 16; n <= 24; n += 8) {
13857 for (size_t k = 1; k <= 40; k += 9) {
13858 GemmMicrokernelTester()
13859 .mr(3)
13860 .nr(8)
13861 .kr(1)
13862 .sr(1)
13863 .m(3)
13864 .n(n)
13865 .k(k)
13866 .cn_stride(11)
13867 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x8__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
13868 }
13869 }
13870 }
13871
TEST(QC8_IGEMM_MINMAX_FP32_3X8__NEON_MLAL_LANE,n_div_8_subtile)13872 TEST(QC8_IGEMM_MINMAX_FP32_3X8__NEON_MLAL_LANE, n_div_8_subtile) {
13873 TEST_REQUIRES_ARM_NEON;
13874 for (uint32_t n = 16; n <= 24; n += 8) {
13875 for (size_t k = 1; k <= 40; k += 9) {
13876 for (uint32_t m = 1; m <= 3; m++) {
13877 GemmMicrokernelTester()
13878 .mr(3)
13879 .nr(8)
13880 .kr(1)
13881 .sr(1)
13882 .m(m)
13883 .n(n)
13884 .k(k)
13885 .iterations(1)
13886 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x8__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
13887 }
13888 }
13889 }
13890 }
13891
TEST(QC8_IGEMM_MINMAX_FP32_3X8__NEON_MLAL_LANE,small_kernel)13892 TEST(QC8_IGEMM_MINMAX_FP32_3X8__NEON_MLAL_LANE, small_kernel) {
13893 TEST_REQUIRES_ARM_NEON;
13894 for (size_t k = 1; k <= 40; k += 9) {
13895 GemmMicrokernelTester()
13896 .mr(3)
13897 .nr(8)
13898 .kr(1)
13899 .sr(1)
13900 .m(3)
13901 .n(8)
13902 .k(k)
13903 .ks(3)
13904 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x8__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
13905 }
13906 }
13907
TEST(QC8_IGEMM_MINMAX_FP32_3X8__NEON_MLAL_LANE,small_kernel_subtile)13908 TEST(QC8_IGEMM_MINMAX_FP32_3X8__NEON_MLAL_LANE, small_kernel_subtile) {
13909 TEST_REQUIRES_ARM_NEON;
13910 for (size_t k = 1; k <= 40; k += 9) {
13911 for (uint32_t n = 1; n <= 8; n++) {
13912 for (uint32_t m = 1; m <= 3; m++) {
13913 GemmMicrokernelTester()
13914 .mr(3)
13915 .nr(8)
13916 .kr(1)
13917 .sr(1)
13918 .m(m)
13919 .n(n)
13920 .k(k)
13921 .ks(3)
13922 .iterations(1)
13923 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x8__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
13924 }
13925 }
13926 }
13927 }
13928
TEST(QC8_IGEMM_MINMAX_FP32_3X8__NEON_MLAL_LANE,n_gt_8_small_kernel)13929 TEST(QC8_IGEMM_MINMAX_FP32_3X8__NEON_MLAL_LANE, n_gt_8_small_kernel) {
13930 TEST_REQUIRES_ARM_NEON;
13931 for (uint32_t n = 9; n < 16; n++) {
13932 for (size_t k = 1; k <= 40; k += 9) {
13933 GemmMicrokernelTester()
13934 .mr(3)
13935 .nr(8)
13936 .kr(1)
13937 .sr(1)
13938 .m(3)
13939 .n(n)
13940 .k(k)
13941 .ks(3)
13942 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x8__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
13943 }
13944 }
13945 }
13946
TEST(QC8_IGEMM_MINMAX_FP32_3X8__NEON_MLAL_LANE,n_div_8_small_kernel)13947 TEST(QC8_IGEMM_MINMAX_FP32_3X8__NEON_MLAL_LANE, n_div_8_small_kernel) {
13948 TEST_REQUIRES_ARM_NEON;
13949 for (uint32_t n = 16; n <= 24; n += 8) {
13950 for (size_t k = 1; k <= 40; k += 9) {
13951 GemmMicrokernelTester()
13952 .mr(3)
13953 .nr(8)
13954 .kr(1)
13955 .sr(1)
13956 .m(3)
13957 .n(n)
13958 .k(k)
13959 .ks(3)
13960 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x8__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
13961 }
13962 }
13963 }
13964
TEST(QC8_IGEMM_MINMAX_FP32_3X8__NEON_MLAL_LANE,strided_cm_subtile)13965 TEST(QC8_IGEMM_MINMAX_FP32_3X8__NEON_MLAL_LANE, strided_cm_subtile) {
13966 TEST_REQUIRES_ARM_NEON;
13967 for (size_t k = 1; k <= 40; k += 9) {
13968 for (uint32_t n = 1; n <= 8; n++) {
13969 for (uint32_t m = 1; m <= 3; m++) {
13970 GemmMicrokernelTester()
13971 .mr(3)
13972 .nr(8)
13973 .kr(1)
13974 .sr(1)
13975 .m(m)
13976 .n(n)
13977 .k(k)
13978 .cm_stride(11)
13979 .iterations(1)
13980 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x8__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
13981 }
13982 }
13983 }
13984 }
13985
TEST(QC8_IGEMM_MINMAX_FP32_3X8__NEON_MLAL_LANE,a_offset)13986 TEST(QC8_IGEMM_MINMAX_FP32_3X8__NEON_MLAL_LANE, a_offset) {
13987 TEST_REQUIRES_ARM_NEON;
13988 for (size_t k = 1; k <= 40; k += 9) {
13989 GemmMicrokernelTester()
13990 .mr(3)
13991 .nr(8)
13992 .kr(1)
13993 .sr(1)
13994 .m(3)
13995 .n(8)
13996 .k(k)
13997 .ks(3)
13998 .a_offset(127)
13999 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x8__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
14000 }
14001 }
14002
TEST(QC8_IGEMM_MINMAX_FP32_3X8__NEON_MLAL_LANE,zero)14003 TEST(QC8_IGEMM_MINMAX_FP32_3X8__NEON_MLAL_LANE, zero) {
14004 TEST_REQUIRES_ARM_NEON;
14005 for (size_t k = 1; k <= 40; k += 9) {
14006 for (uint32_t mz = 0; mz < 3; mz++) {
14007 GemmMicrokernelTester()
14008 .mr(3)
14009 .nr(8)
14010 .kr(1)
14011 .sr(1)
14012 .m(3)
14013 .n(8)
14014 .k(k)
14015 .ks(3)
14016 .a_offset(127)
14017 .zero_index(mz)
14018 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x8__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
14019 }
14020 }
14021 }
14022
TEST(QC8_IGEMM_MINMAX_FP32_3X8__NEON_MLAL_LANE,qmin)14023 TEST(QC8_IGEMM_MINMAX_FP32_3X8__NEON_MLAL_LANE, qmin) {
14024 TEST_REQUIRES_ARM_NEON;
14025 GemmMicrokernelTester()
14026 .mr(3)
14027 .nr(8)
14028 .kr(1)
14029 .sr(1)
14030 .m(3)
14031 .n(8)
14032 .k(8)
14033 .qmin(128)
14034 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x8__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
14035 }
14036
TEST(QC8_IGEMM_MINMAX_FP32_3X8__NEON_MLAL_LANE,qmax)14037 TEST(QC8_IGEMM_MINMAX_FP32_3X8__NEON_MLAL_LANE, qmax) {
14038 TEST_REQUIRES_ARM_NEON;
14039 GemmMicrokernelTester()
14040 .mr(3)
14041 .nr(8)
14042 .kr(1)
14043 .sr(1)
14044 .m(3)
14045 .n(8)
14046 .k(8)
14047 .qmax(128)
14048 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x8__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
14049 }
14050
TEST(QC8_IGEMM_MINMAX_FP32_3X8__NEON_MLAL_LANE,strided_cm)14051 TEST(QC8_IGEMM_MINMAX_FP32_3X8__NEON_MLAL_LANE, strided_cm) {
14052 TEST_REQUIRES_ARM_NEON;
14053 GemmMicrokernelTester()
14054 .mr(3)
14055 .nr(8)
14056 .kr(1)
14057 .sr(1)
14058 .m(3)
14059 .n(8)
14060 .k(8)
14061 .cm_stride(11)
14062 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x8__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
14063 }
14064 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
14065
14066
14067 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QC8_IGEMM_MINMAX_FP32_3X16__NEONV8_MLAL_LANE_PRFM,k_eq_8)14068 TEST(QC8_IGEMM_MINMAX_FP32_3X16__NEONV8_MLAL_LANE_PRFM, k_eq_8) {
14069 TEST_REQUIRES_ARM_NEON_V8;
14070 GemmMicrokernelTester()
14071 .mr(3)
14072 .nr(16)
14073 .kr(1)
14074 .sr(1)
14075 .m(3)
14076 .n(16)
14077 .k(8)
14078 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x16__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
14079 }
14080
TEST(QC8_IGEMM_MINMAX_FP32_3X16__NEONV8_MLAL_LANE_PRFM,strided_cn)14081 TEST(QC8_IGEMM_MINMAX_FP32_3X16__NEONV8_MLAL_LANE_PRFM, strided_cn) {
14082 TEST_REQUIRES_ARM_NEON_V8;
14083 GemmMicrokernelTester()
14084 .mr(3)
14085 .nr(16)
14086 .kr(1)
14087 .sr(1)
14088 .m(3)
14089 .n(16)
14090 .k(8)
14091 .cn_stride(19)
14092 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x16__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
14093 }
14094
TEST(QC8_IGEMM_MINMAX_FP32_3X16__NEONV8_MLAL_LANE_PRFM,k_eq_8_subtile)14095 TEST(QC8_IGEMM_MINMAX_FP32_3X16__NEONV8_MLAL_LANE_PRFM, k_eq_8_subtile) {
14096 TEST_REQUIRES_ARM_NEON_V8;
14097 for (uint32_t n = 1; n <= 16; n++) {
14098 for (uint32_t m = 1; m <= 3; m++) {
14099 GemmMicrokernelTester()
14100 .mr(3)
14101 .nr(16)
14102 .kr(1)
14103 .sr(1)
14104 .m(m)
14105 .n(n)
14106 .k(8)
14107 .iterations(1)
14108 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x16__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
14109 }
14110 }
14111 }
14112
TEST(QC8_IGEMM_MINMAX_FP32_3X16__NEONV8_MLAL_LANE_PRFM,k_eq_8_subtile_m)14113 TEST(QC8_IGEMM_MINMAX_FP32_3X16__NEONV8_MLAL_LANE_PRFM, k_eq_8_subtile_m) {
14114 TEST_REQUIRES_ARM_NEON_V8;
14115 for (uint32_t m = 1; m <= 3; m++) {
14116 GemmMicrokernelTester()
14117 .mr(3)
14118 .nr(16)
14119 .kr(1)
14120 .sr(1)
14121 .m(m)
14122 .n(16)
14123 .k(8)
14124 .iterations(1)
14125 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x16__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
14126 }
14127 }
14128
TEST(QC8_IGEMM_MINMAX_FP32_3X16__NEONV8_MLAL_LANE_PRFM,k_eq_8_subtile_n)14129 TEST(QC8_IGEMM_MINMAX_FP32_3X16__NEONV8_MLAL_LANE_PRFM, k_eq_8_subtile_n) {
14130 TEST_REQUIRES_ARM_NEON_V8;
14131 for (uint32_t n = 1; n <= 16; n++) {
14132 GemmMicrokernelTester()
14133 .mr(3)
14134 .nr(16)
14135 .kr(1)
14136 .sr(1)
14137 .m(3)
14138 .n(n)
14139 .k(8)
14140 .iterations(1)
14141 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x16__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
14142 }
14143 }
14144
TEST(QC8_IGEMM_MINMAX_FP32_3X16__NEONV8_MLAL_LANE_PRFM,k_lt_8)14145 TEST(QC8_IGEMM_MINMAX_FP32_3X16__NEONV8_MLAL_LANE_PRFM, k_lt_8) {
14146 TEST_REQUIRES_ARM_NEON_V8;
14147 for (size_t k = 1; k < 8; k++) {
14148 GemmMicrokernelTester()
14149 .mr(3)
14150 .nr(16)
14151 .kr(1)
14152 .sr(1)
14153 .m(3)
14154 .n(16)
14155 .k(k)
14156 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x16__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
14157 }
14158 }
14159
TEST(QC8_IGEMM_MINMAX_FP32_3X16__NEONV8_MLAL_LANE_PRFM,k_lt_8_subtile)14160 TEST(QC8_IGEMM_MINMAX_FP32_3X16__NEONV8_MLAL_LANE_PRFM, k_lt_8_subtile) {
14161 TEST_REQUIRES_ARM_NEON_V8;
14162 for (size_t k = 1; k < 8; k++) {
14163 for (uint32_t n = 1; n <= 16; n++) {
14164 for (uint32_t m = 1; m <= 3; m++) {
14165 GemmMicrokernelTester()
14166 .mr(3)
14167 .nr(16)
14168 .kr(1)
14169 .sr(1)
14170 .m(m)
14171 .n(n)
14172 .k(k)
14173 .iterations(1)
14174 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x16__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
14175 }
14176 }
14177 }
14178 }
14179
TEST(QC8_IGEMM_MINMAX_FP32_3X16__NEONV8_MLAL_LANE_PRFM,k_gt_8)14180 TEST(QC8_IGEMM_MINMAX_FP32_3X16__NEONV8_MLAL_LANE_PRFM, k_gt_8) {
14181 TEST_REQUIRES_ARM_NEON_V8;
14182 for (size_t k = 9; k < 16; k++) {
14183 GemmMicrokernelTester()
14184 .mr(3)
14185 .nr(16)
14186 .kr(1)
14187 .sr(1)
14188 .m(3)
14189 .n(16)
14190 .k(k)
14191 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x16__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
14192 }
14193 }
14194
TEST(QC8_IGEMM_MINMAX_FP32_3X16__NEONV8_MLAL_LANE_PRFM,k_gt_8_subtile)14195 TEST(QC8_IGEMM_MINMAX_FP32_3X16__NEONV8_MLAL_LANE_PRFM, k_gt_8_subtile) {
14196 TEST_REQUIRES_ARM_NEON_V8;
14197 for (size_t k = 9; k < 16; k++) {
14198 for (uint32_t n = 1; n <= 16; n++) {
14199 for (uint32_t m = 1; m <= 3; m++) {
14200 GemmMicrokernelTester()
14201 .mr(3)
14202 .nr(16)
14203 .kr(1)
14204 .sr(1)
14205 .m(m)
14206 .n(n)
14207 .k(k)
14208 .iterations(1)
14209 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x16__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
14210 }
14211 }
14212 }
14213 }
14214
TEST(QC8_IGEMM_MINMAX_FP32_3X16__NEONV8_MLAL_LANE_PRFM,k_div_8)14215 TEST(QC8_IGEMM_MINMAX_FP32_3X16__NEONV8_MLAL_LANE_PRFM, k_div_8) {
14216 TEST_REQUIRES_ARM_NEON_V8;
14217 for (size_t k = 16; k <= 80; k += 8) {
14218 GemmMicrokernelTester()
14219 .mr(3)
14220 .nr(16)
14221 .kr(1)
14222 .sr(1)
14223 .m(3)
14224 .n(16)
14225 .k(k)
14226 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x16__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
14227 }
14228 }
14229
TEST(QC8_IGEMM_MINMAX_FP32_3X16__NEONV8_MLAL_LANE_PRFM,k_div_8_subtile)14230 TEST(QC8_IGEMM_MINMAX_FP32_3X16__NEONV8_MLAL_LANE_PRFM, k_div_8_subtile) {
14231 TEST_REQUIRES_ARM_NEON_V8;
14232 for (size_t k = 16; k <= 80; k += 8) {
14233 for (uint32_t n = 1; n <= 16; n++) {
14234 for (uint32_t m = 1; m <= 3; m++) {
14235 GemmMicrokernelTester()
14236 .mr(3)
14237 .nr(16)
14238 .kr(1)
14239 .sr(1)
14240 .m(m)
14241 .n(n)
14242 .k(k)
14243 .iterations(1)
14244 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x16__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
14245 }
14246 }
14247 }
14248 }
14249
TEST(QC8_IGEMM_MINMAX_FP32_3X16__NEONV8_MLAL_LANE_PRFM,n_gt_16)14250 TEST(QC8_IGEMM_MINMAX_FP32_3X16__NEONV8_MLAL_LANE_PRFM, n_gt_16) {
14251 TEST_REQUIRES_ARM_NEON_V8;
14252 for (uint32_t n = 17; n < 32; n++) {
14253 for (size_t k = 1; k <= 40; k += 9) {
14254 GemmMicrokernelTester()
14255 .mr(3)
14256 .nr(16)
14257 .kr(1)
14258 .sr(1)
14259 .m(3)
14260 .n(n)
14261 .k(k)
14262 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x16__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
14263 }
14264 }
14265 }
14266
TEST(QC8_IGEMM_MINMAX_FP32_3X16__NEONV8_MLAL_LANE_PRFM,n_gt_16_strided_cn)14267 TEST(QC8_IGEMM_MINMAX_FP32_3X16__NEONV8_MLAL_LANE_PRFM, n_gt_16_strided_cn) {
14268 TEST_REQUIRES_ARM_NEON_V8;
14269 for (uint32_t n = 17; n < 32; n++) {
14270 for (size_t k = 1; k <= 40; k += 9) {
14271 GemmMicrokernelTester()
14272 .mr(3)
14273 .nr(16)
14274 .kr(1)
14275 .sr(1)
14276 .m(3)
14277 .n(n)
14278 .k(k)
14279 .cn_stride(19)
14280 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x16__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
14281 }
14282 }
14283 }
14284
TEST(QC8_IGEMM_MINMAX_FP32_3X16__NEONV8_MLAL_LANE_PRFM,n_gt_16_subtile)14285 TEST(QC8_IGEMM_MINMAX_FP32_3X16__NEONV8_MLAL_LANE_PRFM, n_gt_16_subtile) {
14286 TEST_REQUIRES_ARM_NEON_V8;
14287 for (uint32_t n = 17; n < 32; n++) {
14288 for (size_t k = 1; k <= 40; k += 9) {
14289 for (uint32_t m = 1; m <= 3; m++) {
14290 GemmMicrokernelTester()
14291 .mr(3)
14292 .nr(16)
14293 .kr(1)
14294 .sr(1)
14295 .m(m)
14296 .n(n)
14297 .k(k)
14298 .iterations(1)
14299 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x16__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
14300 }
14301 }
14302 }
14303 }
14304
TEST(QC8_IGEMM_MINMAX_FP32_3X16__NEONV8_MLAL_LANE_PRFM,n_div_16)14305 TEST(QC8_IGEMM_MINMAX_FP32_3X16__NEONV8_MLAL_LANE_PRFM, n_div_16) {
14306 TEST_REQUIRES_ARM_NEON_V8;
14307 for (uint32_t n = 32; n <= 48; n += 16) {
14308 for (size_t k = 1; k <= 40; k += 9) {
14309 GemmMicrokernelTester()
14310 .mr(3)
14311 .nr(16)
14312 .kr(1)
14313 .sr(1)
14314 .m(3)
14315 .n(n)
14316 .k(k)
14317 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x16__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
14318 }
14319 }
14320 }
14321
TEST(QC8_IGEMM_MINMAX_FP32_3X16__NEONV8_MLAL_LANE_PRFM,n_div_16_strided_cn)14322 TEST(QC8_IGEMM_MINMAX_FP32_3X16__NEONV8_MLAL_LANE_PRFM, n_div_16_strided_cn) {
14323 TEST_REQUIRES_ARM_NEON_V8;
14324 for (uint32_t n = 32; n <= 48; n += 16) {
14325 for (size_t k = 1; k <= 40; k += 9) {
14326 GemmMicrokernelTester()
14327 .mr(3)
14328 .nr(16)
14329 .kr(1)
14330 .sr(1)
14331 .m(3)
14332 .n(n)
14333 .k(k)
14334 .cn_stride(19)
14335 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x16__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
14336 }
14337 }
14338 }
14339
TEST(QC8_IGEMM_MINMAX_FP32_3X16__NEONV8_MLAL_LANE_PRFM,n_div_16_subtile)14340 TEST(QC8_IGEMM_MINMAX_FP32_3X16__NEONV8_MLAL_LANE_PRFM, n_div_16_subtile) {
14341 TEST_REQUIRES_ARM_NEON_V8;
14342 for (uint32_t n = 32; n <= 48; n += 16) {
14343 for (size_t k = 1; k <= 40; k += 9) {
14344 for (uint32_t m = 1; m <= 3; m++) {
14345 GemmMicrokernelTester()
14346 .mr(3)
14347 .nr(16)
14348 .kr(1)
14349 .sr(1)
14350 .m(m)
14351 .n(n)
14352 .k(k)
14353 .iterations(1)
14354 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x16__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
14355 }
14356 }
14357 }
14358 }
14359
TEST(QC8_IGEMM_MINMAX_FP32_3X16__NEONV8_MLAL_LANE_PRFM,small_kernel)14360 TEST(QC8_IGEMM_MINMAX_FP32_3X16__NEONV8_MLAL_LANE_PRFM, small_kernel) {
14361 TEST_REQUIRES_ARM_NEON_V8;
14362 for (size_t k = 1; k <= 40; k += 9) {
14363 GemmMicrokernelTester()
14364 .mr(3)
14365 .nr(16)
14366 .kr(1)
14367 .sr(1)
14368 .m(3)
14369 .n(16)
14370 .k(k)
14371 .ks(3)
14372 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x16__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
14373 }
14374 }
14375
TEST(QC8_IGEMM_MINMAX_FP32_3X16__NEONV8_MLAL_LANE_PRFM,small_kernel_subtile)14376 TEST(QC8_IGEMM_MINMAX_FP32_3X16__NEONV8_MLAL_LANE_PRFM, small_kernel_subtile) {
14377 TEST_REQUIRES_ARM_NEON_V8;
14378 for (size_t k = 1; k <= 40; k += 9) {
14379 for (uint32_t n = 1; n <= 16; n++) {
14380 for (uint32_t m = 1; m <= 3; m++) {
14381 GemmMicrokernelTester()
14382 .mr(3)
14383 .nr(16)
14384 .kr(1)
14385 .sr(1)
14386 .m(m)
14387 .n(n)
14388 .k(k)
14389 .ks(3)
14390 .iterations(1)
14391 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x16__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
14392 }
14393 }
14394 }
14395 }
14396
TEST(QC8_IGEMM_MINMAX_FP32_3X16__NEONV8_MLAL_LANE_PRFM,n_gt_16_small_kernel)14397 TEST(QC8_IGEMM_MINMAX_FP32_3X16__NEONV8_MLAL_LANE_PRFM, n_gt_16_small_kernel) {
14398 TEST_REQUIRES_ARM_NEON_V8;
14399 for (uint32_t n = 17; n < 32; n++) {
14400 for (size_t k = 1; k <= 40; k += 9) {
14401 GemmMicrokernelTester()
14402 .mr(3)
14403 .nr(16)
14404 .kr(1)
14405 .sr(1)
14406 .m(3)
14407 .n(n)
14408 .k(k)
14409 .ks(3)
14410 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x16__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
14411 }
14412 }
14413 }
14414
TEST(QC8_IGEMM_MINMAX_FP32_3X16__NEONV8_MLAL_LANE_PRFM,n_div_16_small_kernel)14415 TEST(QC8_IGEMM_MINMAX_FP32_3X16__NEONV8_MLAL_LANE_PRFM, n_div_16_small_kernel) {
14416 TEST_REQUIRES_ARM_NEON_V8;
14417 for (uint32_t n = 32; n <= 48; n += 16) {
14418 for (size_t k = 1; k <= 40; k += 9) {
14419 GemmMicrokernelTester()
14420 .mr(3)
14421 .nr(16)
14422 .kr(1)
14423 .sr(1)
14424 .m(3)
14425 .n(n)
14426 .k(k)
14427 .ks(3)
14428 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x16__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
14429 }
14430 }
14431 }
14432
TEST(QC8_IGEMM_MINMAX_FP32_3X16__NEONV8_MLAL_LANE_PRFM,strided_cm_subtile)14433 TEST(QC8_IGEMM_MINMAX_FP32_3X16__NEONV8_MLAL_LANE_PRFM, strided_cm_subtile) {
14434 TEST_REQUIRES_ARM_NEON_V8;
14435 for (size_t k = 1; k <= 40; k += 9) {
14436 for (uint32_t n = 1; n <= 16; n++) {
14437 for (uint32_t m = 1; m <= 3; m++) {
14438 GemmMicrokernelTester()
14439 .mr(3)
14440 .nr(16)
14441 .kr(1)
14442 .sr(1)
14443 .m(m)
14444 .n(n)
14445 .k(k)
14446 .cm_stride(19)
14447 .iterations(1)
14448 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x16__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
14449 }
14450 }
14451 }
14452 }
14453
TEST(QC8_IGEMM_MINMAX_FP32_3X16__NEONV8_MLAL_LANE_PRFM,a_offset)14454 TEST(QC8_IGEMM_MINMAX_FP32_3X16__NEONV8_MLAL_LANE_PRFM, a_offset) {
14455 TEST_REQUIRES_ARM_NEON_V8;
14456 for (size_t k = 1; k <= 40; k += 9) {
14457 GemmMicrokernelTester()
14458 .mr(3)
14459 .nr(16)
14460 .kr(1)
14461 .sr(1)
14462 .m(3)
14463 .n(16)
14464 .k(k)
14465 .ks(3)
14466 .a_offset(127)
14467 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x16__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
14468 }
14469 }
14470
TEST(QC8_IGEMM_MINMAX_FP32_3X16__NEONV8_MLAL_LANE_PRFM,zero)14471 TEST(QC8_IGEMM_MINMAX_FP32_3X16__NEONV8_MLAL_LANE_PRFM, zero) {
14472 TEST_REQUIRES_ARM_NEON_V8;
14473 for (size_t k = 1; k <= 40; k += 9) {
14474 for (uint32_t mz = 0; mz < 3; mz++) {
14475 GemmMicrokernelTester()
14476 .mr(3)
14477 .nr(16)
14478 .kr(1)
14479 .sr(1)
14480 .m(3)
14481 .n(16)
14482 .k(k)
14483 .ks(3)
14484 .a_offset(127)
14485 .zero_index(mz)
14486 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x16__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
14487 }
14488 }
14489 }
14490
TEST(QC8_IGEMM_MINMAX_FP32_3X16__NEONV8_MLAL_LANE_PRFM,qmin)14491 TEST(QC8_IGEMM_MINMAX_FP32_3X16__NEONV8_MLAL_LANE_PRFM, qmin) {
14492 TEST_REQUIRES_ARM_NEON_V8;
14493 GemmMicrokernelTester()
14494 .mr(3)
14495 .nr(16)
14496 .kr(1)
14497 .sr(1)
14498 .m(3)
14499 .n(16)
14500 .k(8)
14501 .qmin(128)
14502 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x16__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
14503 }
14504
TEST(QC8_IGEMM_MINMAX_FP32_3X16__NEONV8_MLAL_LANE_PRFM,qmax)14505 TEST(QC8_IGEMM_MINMAX_FP32_3X16__NEONV8_MLAL_LANE_PRFM, qmax) {
14506 TEST_REQUIRES_ARM_NEON_V8;
14507 GemmMicrokernelTester()
14508 .mr(3)
14509 .nr(16)
14510 .kr(1)
14511 .sr(1)
14512 .m(3)
14513 .n(16)
14514 .k(8)
14515 .qmax(128)
14516 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x16__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
14517 }
14518
TEST(QC8_IGEMM_MINMAX_FP32_3X16__NEONV8_MLAL_LANE_PRFM,strided_cm)14519 TEST(QC8_IGEMM_MINMAX_FP32_3X16__NEONV8_MLAL_LANE_PRFM, strided_cm) {
14520 TEST_REQUIRES_ARM_NEON_V8;
14521 GemmMicrokernelTester()
14522 .mr(3)
14523 .nr(16)
14524 .kr(1)
14525 .sr(1)
14526 .m(3)
14527 .n(16)
14528 .k(8)
14529 .cm_stride(19)
14530 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x16__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
14531 }
14532 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
14533
14534
14535 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QC8_IGEMM_MINMAX_FP32_4X8__NEON_MLAL_LANE_PRFM,k_eq_8)14536 TEST(QC8_IGEMM_MINMAX_FP32_4X8__NEON_MLAL_LANE_PRFM, k_eq_8) {
14537 TEST_REQUIRES_ARM_NEON;
14538 GemmMicrokernelTester()
14539 .mr(4)
14540 .nr(8)
14541 .kr(1)
14542 .sr(1)
14543 .m(4)
14544 .n(8)
14545 .k(8)
14546 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
14547 }
14548
TEST(QC8_IGEMM_MINMAX_FP32_4X8__NEON_MLAL_LANE_PRFM,strided_cn)14549 TEST(QC8_IGEMM_MINMAX_FP32_4X8__NEON_MLAL_LANE_PRFM, strided_cn) {
14550 TEST_REQUIRES_ARM_NEON;
14551 GemmMicrokernelTester()
14552 .mr(4)
14553 .nr(8)
14554 .kr(1)
14555 .sr(1)
14556 .m(4)
14557 .n(8)
14558 .k(8)
14559 .cn_stride(11)
14560 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
14561 }
14562
TEST(QC8_IGEMM_MINMAX_FP32_4X8__NEON_MLAL_LANE_PRFM,k_eq_8_subtile)14563 TEST(QC8_IGEMM_MINMAX_FP32_4X8__NEON_MLAL_LANE_PRFM, k_eq_8_subtile) {
14564 TEST_REQUIRES_ARM_NEON;
14565 for (uint32_t n = 1; n <= 8; n++) {
14566 for (uint32_t m = 1; m <= 4; m++) {
14567 GemmMicrokernelTester()
14568 .mr(4)
14569 .nr(8)
14570 .kr(1)
14571 .sr(1)
14572 .m(m)
14573 .n(n)
14574 .k(8)
14575 .iterations(1)
14576 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
14577 }
14578 }
14579 }
14580
TEST(QC8_IGEMM_MINMAX_FP32_4X8__NEON_MLAL_LANE_PRFM,k_eq_8_subtile_m)14581 TEST(QC8_IGEMM_MINMAX_FP32_4X8__NEON_MLAL_LANE_PRFM, k_eq_8_subtile_m) {
14582 TEST_REQUIRES_ARM_NEON;
14583 for (uint32_t m = 1; m <= 4; m++) {
14584 GemmMicrokernelTester()
14585 .mr(4)
14586 .nr(8)
14587 .kr(1)
14588 .sr(1)
14589 .m(m)
14590 .n(8)
14591 .k(8)
14592 .iterations(1)
14593 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
14594 }
14595 }
14596
TEST(QC8_IGEMM_MINMAX_FP32_4X8__NEON_MLAL_LANE_PRFM,k_eq_8_subtile_n)14597 TEST(QC8_IGEMM_MINMAX_FP32_4X8__NEON_MLAL_LANE_PRFM, k_eq_8_subtile_n) {
14598 TEST_REQUIRES_ARM_NEON;
14599 for (uint32_t n = 1; n <= 8; n++) {
14600 GemmMicrokernelTester()
14601 .mr(4)
14602 .nr(8)
14603 .kr(1)
14604 .sr(1)
14605 .m(4)
14606 .n(n)
14607 .k(8)
14608 .iterations(1)
14609 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
14610 }
14611 }
14612
TEST(QC8_IGEMM_MINMAX_FP32_4X8__NEON_MLAL_LANE_PRFM,k_lt_8)14613 TEST(QC8_IGEMM_MINMAX_FP32_4X8__NEON_MLAL_LANE_PRFM, k_lt_8) {
14614 TEST_REQUIRES_ARM_NEON;
14615 for (size_t k = 1; k < 8; k++) {
14616 GemmMicrokernelTester()
14617 .mr(4)
14618 .nr(8)
14619 .kr(1)
14620 .sr(1)
14621 .m(4)
14622 .n(8)
14623 .k(k)
14624 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
14625 }
14626 }
14627
TEST(QC8_IGEMM_MINMAX_FP32_4X8__NEON_MLAL_LANE_PRFM,k_lt_8_subtile)14628 TEST(QC8_IGEMM_MINMAX_FP32_4X8__NEON_MLAL_LANE_PRFM, k_lt_8_subtile) {
14629 TEST_REQUIRES_ARM_NEON;
14630 for (size_t k = 1; k < 8; k++) {
14631 for (uint32_t n = 1; n <= 8; n++) {
14632 for (uint32_t m = 1; m <= 4; m++) {
14633 GemmMicrokernelTester()
14634 .mr(4)
14635 .nr(8)
14636 .kr(1)
14637 .sr(1)
14638 .m(m)
14639 .n(n)
14640 .k(k)
14641 .iterations(1)
14642 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
14643 }
14644 }
14645 }
14646 }
14647
TEST(QC8_IGEMM_MINMAX_FP32_4X8__NEON_MLAL_LANE_PRFM,k_gt_8)14648 TEST(QC8_IGEMM_MINMAX_FP32_4X8__NEON_MLAL_LANE_PRFM, k_gt_8) {
14649 TEST_REQUIRES_ARM_NEON;
14650 for (size_t k = 9; k < 16; k++) {
14651 GemmMicrokernelTester()
14652 .mr(4)
14653 .nr(8)
14654 .kr(1)
14655 .sr(1)
14656 .m(4)
14657 .n(8)
14658 .k(k)
14659 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
14660 }
14661 }
14662
TEST(QC8_IGEMM_MINMAX_FP32_4X8__NEON_MLAL_LANE_PRFM,k_gt_8_subtile)14663 TEST(QC8_IGEMM_MINMAX_FP32_4X8__NEON_MLAL_LANE_PRFM, k_gt_8_subtile) {
14664 TEST_REQUIRES_ARM_NEON;
14665 for (size_t k = 9; k < 16; k++) {
14666 for (uint32_t n = 1; n <= 8; n++) {
14667 for (uint32_t m = 1; m <= 4; m++) {
14668 GemmMicrokernelTester()
14669 .mr(4)
14670 .nr(8)
14671 .kr(1)
14672 .sr(1)
14673 .m(m)
14674 .n(n)
14675 .k(k)
14676 .iterations(1)
14677 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
14678 }
14679 }
14680 }
14681 }
14682
TEST(QC8_IGEMM_MINMAX_FP32_4X8__NEON_MLAL_LANE_PRFM,k_div_8)14683 TEST(QC8_IGEMM_MINMAX_FP32_4X8__NEON_MLAL_LANE_PRFM, k_div_8) {
14684 TEST_REQUIRES_ARM_NEON;
14685 for (size_t k = 16; k <= 80; k += 8) {
14686 GemmMicrokernelTester()
14687 .mr(4)
14688 .nr(8)
14689 .kr(1)
14690 .sr(1)
14691 .m(4)
14692 .n(8)
14693 .k(k)
14694 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
14695 }
14696 }
14697
TEST(QC8_IGEMM_MINMAX_FP32_4X8__NEON_MLAL_LANE_PRFM,k_div_8_subtile)14698 TEST(QC8_IGEMM_MINMAX_FP32_4X8__NEON_MLAL_LANE_PRFM, k_div_8_subtile) {
14699 TEST_REQUIRES_ARM_NEON;
14700 for (size_t k = 16; k <= 80; k += 8) {
14701 for (uint32_t n = 1; n <= 8; n++) {
14702 for (uint32_t m = 1; m <= 4; m++) {
14703 GemmMicrokernelTester()
14704 .mr(4)
14705 .nr(8)
14706 .kr(1)
14707 .sr(1)
14708 .m(m)
14709 .n(n)
14710 .k(k)
14711 .iterations(1)
14712 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
14713 }
14714 }
14715 }
14716 }
14717
TEST(QC8_IGEMM_MINMAX_FP32_4X8__NEON_MLAL_LANE_PRFM,n_gt_8)14718 TEST(QC8_IGEMM_MINMAX_FP32_4X8__NEON_MLAL_LANE_PRFM, n_gt_8) {
14719 TEST_REQUIRES_ARM_NEON;
14720 for (uint32_t n = 9; n < 16; n++) {
14721 for (size_t k = 1; k <= 40; k += 9) {
14722 GemmMicrokernelTester()
14723 .mr(4)
14724 .nr(8)
14725 .kr(1)
14726 .sr(1)
14727 .m(4)
14728 .n(n)
14729 .k(k)
14730 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
14731 }
14732 }
14733 }
14734
TEST(QC8_IGEMM_MINMAX_FP32_4X8__NEON_MLAL_LANE_PRFM,n_gt_8_strided_cn)14735 TEST(QC8_IGEMM_MINMAX_FP32_4X8__NEON_MLAL_LANE_PRFM, n_gt_8_strided_cn) {
14736 TEST_REQUIRES_ARM_NEON;
14737 for (uint32_t n = 9; n < 16; n++) {
14738 for (size_t k = 1; k <= 40; k += 9) {
14739 GemmMicrokernelTester()
14740 .mr(4)
14741 .nr(8)
14742 .kr(1)
14743 .sr(1)
14744 .m(4)
14745 .n(n)
14746 .k(k)
14747 .cn_stride(11)
14748 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
14749 }
14750 }
14751 }
14752
TEST(QC8_IGEMM_MINMAX_FP32_4X8__NEON_MLAL_LANE_PRFM,n_gt_8_subtile)14753 TEST(QC8_IGEMM_MINMAX_FP32_4X8__NEON_MLAL_LANE_PRFM, n_gt_8_subtile) {
14754 TEST_REQUIRES_ARM_NEON;
14755 for (uint32_t n = 9; n < 16; n++) {
14756 for (size_t k = 1; k <= 40; k += 9) {
14757 for (uint32_t m = 1; m <= 4; m++) {
14758 GemmMicrokernelTester()
14759 .mr(4)
14760 .nr(8)
14761 .kr(1)
14762 .sr(1)
14763 .m(m)
14764 .n(n)
14765 .k(k)
14766 .iterations(1)
14767 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
14768 }
14769 }
14770 }
14771 }
14772
TEST(QC8_IGEMM_MINMAX_FP32_4X8__NEON_MLAL_LANE_PRFM,n_div_8)14773 TEST(QC8_IGEMM_MINMAX_FP32_4X8__NEON_MLAL_LANE_PRFM, n_div_8) {
14774 TEST_REQUIRES_ARM_NEON;
14775 for (uint32_t n = 16; n <= 24; n += 8) {
14776 for (size_t k = 1; k <= 40; k += 9) {
14777 GemmMicrokernelTester()
14778 .mr(4)
14779 .nr(8)
14780 .kr(1)
14781 .sr(1)
14782 .m(4)
14783 .n(n)
14784 .k(k)
14785 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
14786 }
14787 }
14788 }
14789
TEST(QC8_IGEMM_MINMAX_FP32_4X8__NEON_MLAL_LANE_PRFM,n_div_8_strided_cn)14790 TEST(QC8_IGEMM_MINMAX_FP32_4X8__NEON_MLAL_LANE_PRFM, n_div_8_strided_cn) {
14791 TEST_REQUIRES_ARM_NEON;
14792 for (uint32_t n = 16; n <= 24; n += 8) {
14793 for (size_t k = 1; k <= 40; k += 9) {
14794 GemmMicrokernelTester()
14795 .mr(4)
14796 .nr(8)
14797 .kr(1)
14798 .sr(1)
14799 .m(4)
14800 .n(n)
14801 .k(k)
14802 .cn_stride(11)
14803 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
14804 }
14805 }
14806 }
14807
TEST(QC8_IGEMM_MINMAX_FP32_4X8__NEON_MLAL_LANE_PRFM,n_div_8_subtile)14808 TEST(QC8_IGEMM_MINMAX_FP32_4X8__NEON_MLAL_LANE_PRFM, n_div_8_subtile) {
14809 TEST_REQUIRES_ARM_NEON;
14810 for (uint32_t n = 16; n <= 24; n += 8) {
14811 for (size_t k = 1; k <= 40; k += 9) {
14812 for (uint32_t m = 1; m <= 4; m++) {
14813 GemmMicrokernelTester()
14814 .mr(4)
14815 .nr(8)
14816 .kr(1)
14817 .sr(1)
14818 .m(m)
14819 .n(n)
14820 .k(k)
14821 .iterations(1)
14822 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
14823 }
14824 }
14825 }
14826 }
14827
TEST(QC8_IGEMM_MINMAX_FP32_4X8__NEON_MLAL_LANE_PRFM,small_kernel)14828 TEST(QC8_IGEMM_MINMAX_FP32_4X8__NEON_MLAL_LANE_PRFM, small_kernel) {
14829 TEST_REQUIRES_ARM_NEON;
14830 for (size_t k = 1; k <= 40; k += 9) {
14831 GemmMicrokernelTester()
14832 .mr(4)
14833 .nr(8)
14834 .kr(1)
14835 .sr(1)
14836 .m(4)
14837 .n(8)
14838 .k(k)
14839 .ks(3)
14840 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
14841 }
14842 }
14843
TEST(QC8_IGEMM_MINMAX_FP32_4X8__NEON_MLAL_LANE_PRFM,small_kernel_subtile)14844 TEST(QC8_IGEMM_MINMAX_FP32_4X8__NEON_MLAL_LANE_PRFM, small_kernel_subtile) {
14845 TEST_REQUIRES_ARM_NEON;
14846 for (size_t k = 1; k <= 40; k += 9) {
14847 for (uint32_t n = 1; n <= 8; n++) {
14848 for (uint32_t m = 1; m <= 4; m++) {
14849 GemmMicrokernelTester()
14850 .mr(4)
14851 .nr(8)
14852 .kr(1)
14853 .sr(1)
14854 .m(m)
14855 .n(n)
14856 .k(k)
14857 .ks(3)
14858 .iterations(1)
14859 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
14860 }
14861 }
14862 }
14863 }
14864
TEST(QC8_IGEMM_MINMAX_FP32_4X8__NEON_MLAL_LANE_PRFM,n_gt_8_small_kernel)14865 TEST(QC8_IGEMM_MINMAX_FP32_4X8__NEON_MLAL_LANE_PRFM, n_gt_8_small_kernel) {
14866 TEST_REQUIRES_ARM_NEON;
14867 for (uint32_t n = 9; n < 16; n++) {
14868 for (size_t k = 1; k <= 40; k += 9) {
14869 GemmMicrokernelTester()
14870 .mr(4)
14871 .nr(8)
14872 .kr(1)
14873 .sr(1)
14874 .m(4)
14875 .n(n)
14876 .k(k)
14877 .ks(3)
14878 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
14879 }
14880 }
14881 }
14882
TEST(QC8_IGEMM_MINMAX_FP32_4X8__NEON_MLAL_LANE_PRFM,n_div_8_small_kernel)14883 TEST(QC8_IGEMM_MINMAX_FP32_4X8__NEON_MLAL_LANE_PRFM, n_div_8_small_kernel) {
14884 TEST_REQUIRES_ARM_NEON;
14885 for (uint32_t n = 16; n <= 24; n += 8) {
14886 for (size_t k = 1; k <= 40; k += 9) {
14887 GemmMicrokernelTester()
14888 .mr(4)
14889 .nr(8)
14890 .kr(1)
14891 .sr(1)
14892 .m(4)
14893 .n(n)
14894 .k(k)
14895 .ks(3)
14896 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
14897 }
14898 }
14899 }
14900
TEST(QC8_IGEMM_MINMAX_FP32_4X8__NEON_MLAL_LANE_PRFM,strided_cm_subtile)14901 TEST(QC8_IGEMM_MINMAX_FP32_4X8__NEON_MLAL_LANE_PRFM, strided_cm_subtile) {
14902 TEST_REQUIRES_ARM_NEON;
14903 for (size_t k = 1; k <= 40; k += 9) {
14904 for (uint32_t n = 1; n <= 8; n++) {
14905 for (uint32_t m = 1; m <= 4; m++) {
14906 GemmMicrokernelTester()
14907 .mr(4)
14908 .nr(8)
14909 .kr(1)
14910 .sr(1)
14911 .m(m)
14912 .n(n)
14913 .k(k)
14914 .cm_stride(11)
14915 .iterations(1)
14916 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
14917 }
14918 }
14919 }
14920 }
14921
TEST(QC8_IGEMM_MINMAX_FP32_4X8__NEON_MLAL_LANE_PRFM,a_offset)14922 TEST(QC8_IGEMM_MINMAX_FP32_4X8__NEON_MLAL_LANE_PRFM, a_offset) {
14923 TEST_REQUIRES_ARM_NEON;
14924 for (size_t k = 1; k <= 40; k += 9) {
14925 GemmMicrokernelTester()
14926 .mr(4)
14927 .nr(8)
14928 .kr(1)
14929 .sr(1)
14930 .m(4)
14931 .n(8)
14932 .k(k)
14933 .ks(3)
14934 .a_offset(163)
14935 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
14936 }
14937 }
14938
TEST(QC8_IGEMM_MINMAX_FP32_4X8__NEON_MLAL_LANE_PRFM,zero)14939 TEST(QC8_IGEMM_MINMAX_FP32_4X8__NEON_MLAL_LANE_PRFM, zero) {
14940 TEST_REQUIRES_ARM_NEON;
14941 for (size_t k = 1; k <= 40; k += 9) {
14942 for (uint32_t mz = 0; mz < 4; mz++) {
14943 GemmMicrokernelTester()
14944 .mr(4)
14945 .nr(8)
14946 .kr(1)
14947 .sr(1)
14948 .m(4)
14949 .n(8)
14950 .k(k)
14951 .ks(3)
14952 .a_offset(163)
14953 .zero_index(mz)
14954 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
14955 }
14956 }
14957 }
14958
TEST(QC8_IGEMM_MINMAX_FP32_4X8__NEON_MLAL_LANE_PRFM,qmin)14959 TEST(QC8_IGEMM_MINMAX_FP32_4X8__NEON_MLAL_LANE_PRFM, qmin) {
14960 TEST_REQUIRES_ARM_NEON;
14961 GemmMicrokernelTester()
14962 .mr(4)
14963 .nr(8)
14964 .kr(1)
14965 .sr(1)
14966 .m(4)
14967 .n(8)
14968 .k(8)
14969 .qmin(128)
14970 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
14971 }
14972
TEST(QC8_IGEMM_MINMAX_FP32_4X8__NEON_MLAL_LANE_PRFM,qmax)14973 TEST(QC8_IGEMM_MINMAX_FP32_4X8__NEON_MLAL_LANE_PRFM, qmax) {
14974 TEST_REQUIRES_ARM_NEON;
14975 GemmMicrokernelTester()
14976 .mr(4)
14977 .nr(8)
14978 .kr(1)
14979 .sr(1)
14980 .m(4)
14981 .n(8)
14982 .k(8)
14983 .qmax(128)
14984 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
14985 }
14986
TEST(QC8_IGEMM_MINMAX_FP32_4X8__NEON_MLAL_LANE_PRFM,strided_cm)14987 TEST(QC8_IGEMM_MINMAX_FP32_4X8__NEON_MLAL_LANE_PRFM, strided_cm) {
14988 TEST_REQUIRES_ARM_NEON;
14989 GemmMicrokernelTester()
14990 .mr(4)
14991 .nr(8)
14992 .kr(1)
14993 .sr(1)
14994 .m(4)
14995 .n(8)
14996 .k(8)
14997 .cm_stride(11)
14998 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
14999 }
15000 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
15001
15002
15003 #if XNN_ENABLE_ARM_DOTPROD && (XNN_ARCH_ARM && !XNN_PLATFORM_IOS || XNN_ARCH_ARM64)
TEST(QC8_IGEMM_MINMAX_FP32_4X8C4__NEONDOT,k_eq_8)15004 TEST(QC8_IGEMM_MINMAX_FP32_4X8C4__NEONDOT, k_eq_8) {
15005 TEST_REQUIRES_ARM_NEON_DOT;
15006 GemmMicrokernelTester()
15007 .mr(4)
15008 .nr(8)
15009 .kr(4)
15010 .sr(1)
15011 .m(4)
15012 .n(8)
15013 .k(8)
15014 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
15015 }
15016
TEST(QC8_IGEMM_MINMAX_FP32_4X8C4__NEONDOT,strided_cn)15017 TEST(QC8_IGEMM_MINMAX_FP32_4X8C4__NEONDOT, strided_cn) {
15018 TEST_REQUIRES_ARM_NEON_DOT;
15019 GemmMicrokernelTester()
15020 .mr(4)
15021 .nr(8)
15022 .kr(4)
15023 .sr(1)
15024 .m(4)
15025 .n(8)
15026 .k(8)
15027 .cn_stride(11)
15028 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
15029 }
15030
TEST(QC8_IGEMM_MINMAX_FP32_4X8C4__NEONDOT,k_eq_8_subtile)15031 TEST(QC8_IGEMM_MINMAX_FP32_4X8C4__NEONDOT, k_eq_8_subtile) {
15032 TEST_REQUIRES_ARM_NEON_DOT;
15033 for (uint32_t n = 1; n <= 8; n++) {
15034 for (uint32_t m = 1; m <= 4; m++) {
15035 GemmMicrokernelTester()
15036 .mr(4)
15037 .nr(8)
15038 .kr(4)
15039 .sr(1)
15040 .m(m)
15041 .n(n)
15042 .k(8)
15043 .iterations(1)
15044 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
15045 }
15046 }
15047 }
15048
TEST(QC8_IGEMM_MINMAX_FP32_4X8C4__NEONDOT,k_eq_8_subtile_m)15049 TEST(QC8_IGEMM_MINMAX_FP32_4X8C4__NEONDOT, k_eq_8_subtile_m) {
15050 TEST_REQUIRES_ARM_NEON_DOT;
15051 for (uint32_t m = 1; m <= 4; m++) {
15052 GemmMicrokernelTester()
15053 .mr(4)
15054 .nr(8)
15055 .kr(4)
15056 .sr(1)
15057 .m(m)
15058 .n(8)
15059 .k(8)
15060 .iterations(1)
15061 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
15062 }
15063 }
15064
TEST(QC8_IGEMM_MINMAX_FP32_4X8C4__NEONDOT,k_eq_8_subtile_n)15065 TEST(QC8_IGEMM_MINMAX_FP32_4X8C4__NEONDOT, k_eq_8_subtile_n) {
15066 TEST_REQUIRES_ARM_NEON_DOT;
15067 for (uint32_t n = 1; n <= 8; n++) {
15068 GemmMicrokernelTester()
15069 .mr(4)
15070 .nr(8)
15071 .kr(4)
15072 .sr(1)
15073 .m(4)
15074 .n(n)
15075 .k(8)
15076 .iterations(1)
15077 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
15078 }
15079 }
15080
TEST(QC8_IGEMM_MINMAX_FP32_4X8C4__NEONDOT,k_lt_8)15081 TEST(QC8_IGEMM_MINMAX_FP32_4X8C4__NEONDOT, k_lt_8) {
15082 TEST_REQUIRES_ARM_NEON_DOT;
15083 for (size_t k = 1; k < 8; k++) {
15084 GemmMicrokernelTester()
15085 .mr(4)
15086 .nr(8)
15087 .kr(4)
15088 .sr(1)
15089 .m(4)
15090 .n(8)
15091 .k(k)
15092 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
15093 }
15094 }
15095
TEST(QC8_IGEMM_MINMAX_FP32_4X8C4__NEONDOT,k_lt_8_subtile)15096 TEST(QC8_IGEMM_MINMAX_FP32_4X8C4__NEONDOT, k_lt_8_subtile) {
15097 TEST_REQUIRES_ARM_NEON_DOT;
15098 for (size_t k = 1; k < 8; k++) {
15099 for (uint32_t n = 1; n <= 8; n++) {
15100 for (uint32_t m = 1; m <= 4; m++) {
15101 GemmMicrokernelTester()
15102 .mr(4)
15103 .nr(8)
15104 .kr(4)
15105 .sr(1)
15106 .m(m)
15107 .n(n)
15108 .k(k)
15109 .iterations(1)
15110 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
15111 }
15112 }
15113 }
15114 }
15115
TEST(QC8_IGEMM_MINMAX_FP32_4X8C4__NEONDOT,k_gt_8)15116 TEST(QC8_IGEMM_MINMAX_FP32_4X8C4__NEONDOT, k_gt_8) {
15117 TEST_REQUIRES_ARM_NEON_DOT;
15118 for (size_t k = 9; k < 16; k++) {
15119 GemmMicrokernelTester()
15120 .mr(4)
15121 .nr(8)
15122 .kr(4)
15123 .sr(1)
15124 .m(4)
15125 .n(8)
15126 .k(k)
15127 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
15128 }
15129 }
15130
TEST(QC8_IGEMM_MINMAX_FP32_4X8C4__NEONDOT,k_gt_8_subtile)15131 TEST(QC8_IGEMM_MINMAX_FP32_4X8C4__NEONDOT, k_gt_8_subtile) {
15132 TEST_REQUIRES_ARM_NEON_DOT;
15133 for (size_t k = 9; k < 16; k++) {
15134 for (uint32_t n = 1; n <= 8; n++) {
15135 for (uint32_t m = 1; m <= 4; m++) {
15136 GemmMicrokernelTester()
15137 .mr(4)
15138 .nr(8)
15139 .kr(4)
15140 .sr(1)
15141 .m(m)
15142 .n(n)
15143 .k(k)
15144 .iterations(1)
15145 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
15146 }
15147 }
15148 }
15149 }
15150
TEST(QC8_IGEMM_MINMAX_FP32_4X8C4__NEONDOT,k_div_8)15151 TEST(QC8_IGEMM_MINMAX_FP32_4X8C4__NEONDOT, k_div_8) {
15152 TEST_REQUIRES_ARM_NEON_DOT;
15153 for (size_t k = 16; k <= 80; k += 8) {
15154 GemmMicrokernelTester()
15155 .mr(4)
15156 .nr(8)
15157 .kr(4)
15158 .sr(1)
15159 .m(4)
15160 .n(8)
15161 .k(k)
15162 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
15163 }
15164 }
15165
TEST(QC8_IGEMM_MINMAX_FP32_4X8C4__NEONDOT,k_div_8_subtile)15166 TEST(QC8_IGEMM_MINMAX_FP32_4X8C4__NEONDOT, k_div_8_subtile) {
15167 TEST_REQUIRES_ARM_NEON_DOT;
15168 for (size_t k = 16; k <= 80; k += 8) {
15169 for (uint32_t n = 1; n <= 8; n++) {
15170 for (uint32_t m = 1; m <= 4; m++) {
15171 GemmMicrokernelTester()
15172 .mr(4)
15173 .nr(8)
15174 .kr(4)
15175 .sr(1)
15176 .m(m)
15177 .n(n)
15178 .k(k)
15179 .iterations(1)
15180 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
15181 }
15182 }
15183 }
15184 }
15185
TEST(QC8_IGEMM_MINMAX_FP32_4X8C4__NEONDOT,n_gt_8)15186 TEST(QC8_IGEMM_MINMAX_FP32_4X8C4__NEONDOT, n_gt_8) {
15187 TEST_REQUIRES_ARM_NEON_DOT;
15188 for (uint32_t n = 9; n < 16; n++) {
15189 for (size_t k = 1; k <= 40; k += 9) {
15190 GemmMicrokernelTester()
15191 .mr(4)
15192 .nr(8)
15193 .kr(4)
15194 .sr(1)
15195 .m(4)
15196 .n(n)
15197 .k(k)
15198 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
15199 }
15200 }
15201 }
15202
TEST(QC8_IGEMM_MINMAX_FP32_4X8C4__NEONDOT,n_gt_8_strided_cn)15203 TEST(QC8_IGEMM_MINMAX_FP32_4X8C4__NEONDOT, n_gt_8_strided_cn) {
15204 TEST_REQUIRES_ARM_NEON_DOT;
15205 for (uint32_t n = 9; n < 16; n++) {
15206 for (size_t k = 1; k <= 40; k += 9) {
15207 GemmMicrokernelTester()
15208 .mr(4)
15209 .nr(8)
15210 .kr(4)
15211 .sr(1)
15212 .m(4)
15213 .n(n)
15214 .k(k)
15215 .cn_stride(11)
15216 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
15217 }
15218 }
15219 }
15220
TEST(QC8_IGEMM_MINMAX_FP32_4X8C4__NEONDOT,n_gt_8_subtile)15221 TEST(QC8_IGEMM_MINMAX_FP32_4X8C4__NEONDOT, n_gt_8_subtile) {
15222 TEST_REQUIRES_ARM_NEON_DOT;
15223 for (uint32_t n = 9; n < 16; n++) {
15224 for (size_t k = 1; k <= 40; k += 9) {
15225 for (uint32_t m = 1; m <= 4; m++) {
15226 GemmMicrokernelTester()
15227 .mr(4)
15228 .nr(8)
15229 .kr(4)
15230 .sr(1)
15231 .m(m)
15232 .n(n)
15233 .k(k)
15234 .iterations(1)
15235 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
15236 }
15237 }
15238 }
15239 }
15240
TEST(QC8_IGEMM_MINMAX_FP32_4X8C4__NEONDOT,n_div_8)15241 TEST(QC8_IGEMM_MINMAX_FP32_4X8C4__NEONDOT, n_div_8) {
15242 TEST_REQUIRES_ARM_NEON_DOT;
15243 for (uint32_t n = 16; n <= 24; n += 8) {
15244 for (size_t k = 1; k <= 40; k += 9) {
15245 GemmMicrokernelTester()
15246 .mr(4)
15247 .nr(8)
15248 .kr(4)
15249 .sr(1)
15250 .m(4)
15251 .n(n)
15252 .k(k)
15253 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
15254 }
15255 }
15256 }
15257
TEST(QC8_IGEMM_MINMAX_FP32_4X8C4__NEONDOT,n_div_8_strided_cn)15258 TEST(QC8_IGEMM_MINMAX_FP32_4X8C4__NEONDOT, n_div_8_strided_cn) {
15259 TEST_REQUIRES_ARM_NEON_DOT;
15260 for (uint32_t n = 16; n <= 24; n += 8) {
15261 for (size_t k = 1; k <= 40; k += 9) {
15262 GemmMicrokernelTester()
15263 .mr(4)
15264 .nr(8)
15265 .kr(4)
15266 .sr(1)
15267 .m(4)
15268 .n(n)
15269 .k(k)
15270 .cn_stride(11)
15271 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
15272 }
15273 }
15274 }
15275
TEST(QC8_IGEMM_MINMAX_FP32_4X8C4__NEONDOT,n_div_8_subtile)15276 TEST(QC8_IGEMM_MINMAX_FP32_4X8C4__NEONDOT, n_div_8_subtile) {
15277 TEST_REQUIRES_ARM_NEON_DOT;
15278 for (uint32_t n = 16; n <= 24; n += 8) {
15279 for (size_t k = 1; k <= 40; k += 9) {
15280 for (uint32_t m = 1; m <= 4; m++) {
15281 GemmMicrokernelTester()
15282 .mr(4)
15283 .nr(8)
15284 .kr(4)
15285 .sr(1)
15286 .m(m)
15287 .n(n)
15288 .k(k)
15289 .iterations(1)
15290 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
15291 }
15292 }
15293 }
15294 }
15295
TEST(QC8_IGEMM_MINMAX_FP32_4X8C4__NEONDOT,small_kernel)15296 TEST(QC8_IGEMM_MINMAX_FP32_4X8C4__NEONDOT, small_kernel) {
15297 TEST_REQUIRES_ARM_NEON_DOT;
15298 for (size_t k = 1; k <= 40; k += 9) {
15299 GemmMicrokernelTester()
15300 .mr(4)
15301 .nr(8)
15302 .kr(4)
15303 .sr(1)
15304 .m(4)
15305 .n(8)
15306 .k(k)
15307 .ks(3)
15308 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
15309 }
15310 }
15311
TEST(QC8_IGEMM_MINMAX_FP32_4X8C4__NEONDOT,small_kernel_subtile)15312 TEST(QC8_IGEMM_MINMAX_FP32_4X8C4__NEONDOT, small_kernel_subtile) {
15313 TEST_REQUIRES_ARM_NEON_DOT;
15314 for (size_t k = 1; k <= 40; k += 9) {
15315 for (uint32_t n = 1; n <= 8; n++) {
15316 for (uint32_t m = 1; m <= 4; m++) {
15317 GemmMicrokernelTester()
15318 .mr(4)
15319 .nr(8)
15320 .kr(4)
15321 .sr(1)
15322 .m(m)
15323 .n(n)
15324 .k(k)
15325 .ks(3)
15326 .iterations(1)
15327 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
15328 }
15329 }
15330 }
15331 }
15332
TEST(QC8_IGEMM_MINMAX_FP32_4X8C4__NEONDOT,n_gt_8_small_kernel)15333 TEST(QC8_IGEMM_MINMAX_FP32_4X8C4__NEONDOT, n_gt_8_small_kernel) {
15334 TEST_REQUIRES_ARM_NEON_DOT;
15335 for (uint32_t n = 9; n < 16; n++) {
15336 for (size_t k = 1; k <= 40; k += 9) {
15337 GemmMicrokernelTester()
15338 .mr(4)
15339 .nr(8)
15340 .kr(4)
15341 .sr(1)
15342 .m(4)
15343 .n(n)
15344 .k(k)
15345 .ks(3)
15346 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
15347 }
15348 }
15349 }
15350
TEST(QC8_IGEMM_MINMAX_FP32_4X8C4__NEONDOT,n_div_8_small_kernel)15351 TEST(QC8_IGEMM_MINMAX_FP32_4X8C4__NEONDOT, n_div_8_small_kernel) {
15352 TEST_REQUIRES_ARM_NEON_DOT;
15353 for (uint32_t n = 16; n <= 24; n += 8) {
15354 for (size_t k = 1; k <= 40; k += 9) {
15355 GemmMicrokernelTester()
15356 .mr(4)
15357 .nr(8)
15358 .kr(4)
15359 .sr(1)
15360 .m(4)
15361 .n(n)
15362 .k(k)
15363 .ks(3)
15364 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
15365 }
15366 }
15367 }
15368
TEST(QC8_IGEMM_MINMAX_FP32_4X8C4__NEONDOT,strided_cm_subtile)15369 TEST(QC8_IGEMM_MINMAX_FP32_4X8C4__NEONDOT, strided_cm_subtile) {
15370 TEST_REQUIRES_ARM_NEON_DOT;
15371 for (size_t k = 1; k <= 40; k += 9) {
15372 for (uint32_t n = 1; n <= 8; n++) {
15373 for (uint32_t m = 1; m <= 4; m++) {
15374 GemmMicrokernelTester()
15375 .mr(4)
15376 .nr(8)
15377 .kr(4)
15378 .sr(1)
15379 .m(m)
15380 .n(n)
15381 .k(k)
15382 .cm_stride(11)
15383 .iterations(1)
15384 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
15385 }
15386 }
15387 }
15388 }
15389
TEST(QC8_IGEMM_MINMAX_FP32_4X8C4__NEONDOT,a_offset)15390 TEST(QC8_IGEMM_MINMAX_FP32_4X8C4__NEONDOT, a_offset) {
15391 TEST_REQUIRES_ARM_NEON_DOT;
15392 for (size_t k = 1; k <= 40; k += 9) {
15393 GemmMicrokernelTester()
15394 .mr(4)
15395 .nr(8)
15396 .kr(4)
15397 .sr(1)
15398 .m(4)
15399 .n(8)
15400 .k(k)
15401 .ks(3)
15402 .a_offset(163)
15403 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
15404 }
15405 }
15406
TEST(QC8_IGEMM_MINMAX_FP32_4X8C4__NEONDOT,zero)15407 TEST(QC8_IGEMM_MINMAX_FP32_4X8C4__NEONDOT, zero) {
15408 TEST_REQUIRES_ARM_NEON_DOT;
15409 for (size_t k = 1; k <= 40; k += 9) {
15410 for (uint32_t mz = 0; mz < 4; mz++) {
15411 GemmMicrokernelTester()
15412 .mr(4)
15413 .nr(8)
15414 .kr(4)
15415 .sr(1)
15416 .m(4)
15417 .n(8)
15418 .k(k)
15419 .ks(3)
15420 .a_offset(163)
15421 .zero_index(mz)
15422 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
15423 }
15424 }
15425 }
15426
TEST(QC8_IGEMM_MINMAX_FP32_4X8C4__NEONDOT,qmin)15427 TEST(QC8_IGEMM_MINMAX_FP32_4X8C4__NEONDOT, qmin) {
15428 TEST_REQUIRES_ARM_NEON_DOT;
15429 GemmMicrokernelTester()
15430 .mr(4)
15431 .nr(8)
15432 .kr(4)
15433 .sr(1)
15434 .m(4)
15435 .n(8)
15436 .k(8)
15437 .qmin(128)
15438 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
15439 }
15440
TEST(QC8_IGEMM_MINMAX_FP32_4X8C4__NEONDOT,qmax)15441 TEST(QC8_IGEMM_MINMAX_FP32_4X8C4__NEONDOT, qmax) {
15442 TEST_REQUIRES_ARM_NEON_DOT;
15443 GemmMicrokernelTester()
15444 .mr(4)
15445 .nr(8)
15446 .kr(4)
15447 .sr(1)
15448 .m(4)
15449 .n(8)
15450 .k(8)
15451 .qmax(128)
15452 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
15453 }
15454
TEST(QC8_IGEMM_MINMAX_FP32_4X8C4__NEONDOT,strided_cm)15455 TEST(QC8_IGEMM_MINMAX_FP32_4X8C4__NEONDOT, strided_cm) {
15456 TEST_REQUIRES_ARM_NEON_DOT;
15457 GemmMicrokernelTester()
15458 .mr(4)
15459 .nr(8)
15460 .kr(4)
15461 .sr(1)
15462 .m(4)
15463 .n(8)
15464 .k(8)
15465 .cm_stride(11)
15466 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x8c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
15467 }
15468 #endif // XNN_ENABLE_ARM_DOTPROD && (XNN_ARCH_ARM && !XNN_PLATFORM_IOS || XNN_ARCH_ARM64)
15469
15470
15471 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QC8_IGEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE,k_eq_8)15472 TEST(QC8_IGEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE, k_eq_8) {
15473 TEST_REQUIRES_ARM_NEON;
15474 GemmMicrokernelTester()
15475 .mr(4)
15476 .nr(16)
15477 .kr(1)
15478 .sr(1)
15479 .m(4)
15480 .n(16)
15481 .k(8)
15482 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x16__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
15483 }
15484
TEST(QC8_IGEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE,strided_cn)15485 TEST(QC8_IGEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE, strided_cn) {
15486 TEST_REQUIRES_ARM_NEON;
15487 GemmMicrokernelTester()
15488 .mr(4)
15489 .nr(16)
15490 .kr(1)
15491 .sr(1)
15492 .m(4)
15493 .n(16)
15494 .k(8)
15495 .cn_stride(19)
15496 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x16__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
15497 }
15498
TEST(QC8_IGEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE,k_eq_8_subtile)15499 TEST(QC8_IGEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE, k_eq_8_subtile) {
15500 TEST_REQUIRES_ARM_NEON;
15501 for (uint32_t n = 1; n <= 16; n++) {
15502 for (uint32_t m = 1; m <= 4; m++) {
15503 GemmMicrokernelTester()
15504 .mr(4)
15505 .nr(16)
15506 .kr(1)
15507 .sr(1)
15508 .m(m)
15509 .n(n)
15510 .k(8)
15511 .iterations(1)
15512 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x16__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
15513 }
15514 }
15515 }
15516
TEST(QC8_IGEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE,k_eq_8_subtile_m)15517 TEST(QC8_IGEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE, k_eq_8_subtile_m) {
15518 TEST_REQUIRES_ARM_NEON;
15519 for (uint32_t m = 1; m <= 4; m++) {
15520 GemmMicrokernelTester()
15521 .mr(4)
15522 .nr(16)
15523 .kr(1)
15524 .sr(1)
15525 .m(m)
15526 .n(16)
15527 .k(8)
15528 .iterations(1)
15529 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x16__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
15530 }
15531 }
15532
TEST(QC8_IGEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE,k_eq_8_subtile_n)15533 TEST(QC8_IGEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE, k_eq_8_subtile_n) {
15534 TEST_REQUIRES_ARM_NEON;
15535 for (uint32_t n = 1; n <= 16; n++) {
15536 GemmMicrokernelTester()
15537 .mr(4)
15538 .nr(16)
15539 .kr(1)
15540 .sr(1)
15541 .m(4)
15542 .n(n)
15543 .k(8)
15544 .iterations(1)
15545 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x16__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
15546 }
15547 }
15548
TEST(QC8_IGEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE,k_lt_8)15549 TEST(QC8_IGEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE, k_lt_8) {
15550 TEST_REQUIRES_ARM_NEON;
15551 for (size_t k = 1; k < 8; k++) {
15552 GemmMicrokernelTester()
15553 .mr(4)
15554 .nr(16)
15555 .kr(1)
15556 .sr(1)
15557 .m(4)
15558 .n(16)
15559 .k(k)
15560 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x16__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
15561 }
15562 }
15563
TEST(QC8_IGEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE,k_lt_8_subtile)15564 TEST(QC8_IGEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE, k_lt_8_subtile) {
15565 TEST_REQUIRES_ARM_NEON;
15566 for (size_t k = 1; k < 8; k++) {
15567 for (uint32_t n = 1; n <= 16; n++) {
15568 for (uint32_t m = 1; m <= 4; m++) {
15569 GemmMicrokernelTester()
15570 .mr(4)
15571 .nr(16)
15572 .kr(1)
15573 .sr(1)
15574 .m(m)
15575 .n(n)
15576 .k(k)
15577 .iterations(1)
15578 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x16__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
15579 }
15580 }
15581 }
15582 }
15583
TEST(QC8_IGEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE,k_gt_8)15584 TEST(QC8_IGEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE, k_gt_8) {
15585 TEST_REQUIRES_ARM_NEON;
15586 for (size_t k = 9; k < 16; k++) {
15587 GemmMicrokernelTester()
15588 .mr(4)
15589 .nr(16)
15590 .kr(1)
15591 .sr(1)
15592 .m(4)
15593 .n(16)
15594 .k(k)
15595 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x16__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
15596 }
15597 }
15598
TEST(QC8_IGEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE,k_gt_8_subtile)15599 TEST(QC8_IGEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE, k_gt_8_subtile) {
15600 TEST_REQUIRES_ARM_NEON;
15601 for (size_t k = 9; k < 16; k++) {
15602 for (uint32_t n = 1; n <= 16; n++) {
15603 for (uint32_t m = 1; m <= 4; m++) {
15604 GemmMicrokernelTester()
15605 .mr(4)
15606 .nr(16)
15607 .kr(1)
15608 .sr(1)
15609 .m(m)
15610 .n(n)
15611 .k(k)
15612 .iterations(1)
15613 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x16__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
15614 }
15615 }
15616 }
15617 }
15618
TEST(QC8_IGEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE,k_div_8)15619 TEST(QC8_IGEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE, k_div_8) {
15620 TEST_REQUIRES_ARM_NEON;
15621 for (size_t k = 16; k <= 80; k += 8) {
15622 GemmMicrokernelTester()
15623 .mr(4)
15624 .nr(16)
15625 .kr(1)
15626 .sr(1)
15627 .m(4)
15628 .n(16)
15629 .k(k)
15630 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x16__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
15631 }
15632 }
15633
TEST(QC8_IGEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE,k_div_8_subtile)15634 TEST(QC8_IGEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE, k_div_8_subtile) {
15635 TEST_REQUIRES_ARM_NEON;
15636 for (size_t k = 16; k <= 80; k += 8) {
15637 for (uint32_t n = 1; n <= 16; n++) {
15638 for (uint32_t m = 1; m <= 4; m++) {
15639 GemmMicrokernelTester()
15640 .mr(4)
15641 .nr(16)
15642 .kr(1)
15643 .sr(1)
15644 .m(m)
15645 .n(n)
15646 .k(k)
15647 .iterations(1)
15648 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x16__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
15649 }
15650 }
15651 }
15652 }
15653
TEST(QC8_IGEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE,n_gt_16)15654 TEST(QC8_IGEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE, n_gt_16) {
15655 TEST_REQUIRES_ARM_NEON;
15656 for (uint32_t n = 17; n < 32; n++) {
15657 for (size_t k = 1; k <= 40; k += 9) {
15658 GemmMicrokernelTester()
15659 .mr(4)
15660 .nr(16)
15661 .kr(1)
15662 .sr(1)
15663 .m(4)
15664 .n(n)
15665 .k(k)
15666 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x16__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
15667 }
15668 }
15669 }
15670
TEST(QC8_IGEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE,n_gt_16_strided_cn)15671 TEST(QC8_IGEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE, n_gt_16_strided_cn) {
15672 TEST_REQUIRES_ARM_NEON;
15673 for (uint32_t n = 17; n < 32; n++) {
15674 for (size_t k = 1; k <= 40; k += 9) {
15675 GemmMicrokernelTester()
15676 .mr(4)
15677 .nr(16)
15678 .kr(1)
15679 .sr(1)
15680 .m(4)
15681 .n(n)
15682 .k(k)
15683 .cn_stride(19)
15684 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x16__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
15685 }
15686 }
15687 }
15688
TEST(QC8_IGEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE,n_gt_16_subtile)15689 TEST(QC8_IGEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE, n_gt_16_subtile) {
15690 TEST_REQUIRES_ARM_NEON;
15691 for (uint32_t n = 17; n < 32; n++) {
15692 for (size_t k = 1; k <= 40; k += 9) {
15693 for (uint32_t m = 1; m <= 4; m++) {
15694 GemmMicrokernelTester()
15695 .mr(4)
15696 .nr(16)
15697 .kr(1)
15698 .sr(1)
15699 .m(m)
15700 .n(n)
15701 .k(k)
15702 .iterations(1)
15703 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x16__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
15704 }
15705 }
15706 }
15707 }
15708
TEST(QC8_IGEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE,n_div_16)15709 TEST(QC8_IGEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE, n_div_16) {
15710 TEST_REQUIRES_ARM_NEON;
15711 for (uint32_t n = 32; n <= 48; n += 16) {
15712 for (size_t k = 1; k <= 40; k += 9) {
15713 GemmMicrokernelTester()
15714 .mr(4)
15715 .nr(16)
15716 .kr(1)
15717 .sr(1)
15718 .m(4)
15719 .n(n)
15720 .k(k)
15721 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x16__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
15722 }
15723 }
15724 }
15725
TEST(QC8_IGEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE,n_div_16_strided_cn)15726 TEST(QC8_IGEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE, n_div_16_strided_cn) {
15727 TEST_REQUIRES_ARM_NEON;
15728 for (uint32_t n = 32; n <= 48; n += 16) {
15729 for (size_t k = 1; k <= 40; k += 9) {
15730 GemmMicrokernelTester()
15731 .mr(4)
15732 .nr(16)
15733 .kr(1)
15734 .sr(1)
15735 .m(4)
15736 .n(n)
15737 .k(k)
15738 .cn_stride(19)
15739 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x16__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
15740 }
15741 }
15742 }
15743
TEST(QC8_IGEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE,n_div_16_subtile)15744 TEST(QC8_IGEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE, n_div_16_subtile) {
15745 TEST_REQUIRES_ARM_NEON;
15746 for (uint32_t n = 32; n <= 48; n += 16) {
15747 for (size_t k = 1; k <= 40; k += 9) {
15748 for (uint32_t m = 1; m <= 4; m++) {
15749 GemmMicrokernelTester()
15750 .mr(4)
15751 .nr(16)
15752 .kr(1)
15753 .sr(1)
15754 .m(m)
15755 .n(n)
15756 .k(k)
15757 .iterations(1)
15758 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x16__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
15759 }
15760 }
15761 }
15762 }
15763
TEST(QC8_IGEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE,small_kernel)15764 TEST(QC8_IGEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE, small_kernel) {
15765 TEST_REQUIRES_ARM_NEON;
15766 for (size_t k = 1; k <= 40; k += 9) {
15767 GemmMicrokernelTester()
15768 .mr(4)
15769 .nr(16)
15770 .kr(1)
15771 .sr(1)
15772 .m(4)
15773 .n(16)
15774 .k(k)
15775 .ks(3)
15776 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x16__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
15777 }
15778 }
15779
TEST(QC8_IGEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE,small_kernel_subtile)15780 TEST(QC8_IGEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE, small_kernel_subtile) {
15781 TEST_REQUIRES_ARM_NEON;
15782 for (size_t k = 1; k <= 40; k += 9) {
15783 for (uint32_t n = 1; n <= 16; n++) {
15784 for (uint32_t m = 1; m <= 4; m++) {
15785 GemmMicrokernelTester()
15786 .mr(4)
15787 .nr(16)
15788 .kr(1)
15789 .sr(1)
15790 .m(m)
15791 .n(n)
15792 .k(k)
15793 .ks(3)
15794 .iterations(1)
15795 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x16__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
15796 }
15797 }
15798 }
15799 }
15800
TEST(QC8_IGEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE,n_gt_16_small_kernel)15801 TEST(QC8_IGEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE, n_gt_16_small_kernel) {
15802 TEST_REQUIRES_ARM_NEON;
15803 for (uint32_t n = 17; n < 32; n++) {
15804 for (size_t k = 1; k <= 40; k += 9) {
15805 GemmMicrokernelTester()
15806 .mr(4)
15807 .nr(16)
15808 .kr(1)
15809 .sr(1)
15810 .m(4)
15811 .n(n)
15812 .k(k)
15813 .ks(3)
15814 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x16__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
15815 }
15816 }
15817 }
15818
TEST(QC8_IGEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE,n_div_16_small_kernel)15819 TEST(QC8_IGEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE, n_div_16_small_kernel) {
15820 TEST_REQUIRES_ARM_NEON;
15821 for (uint32_t n = 32; n <= 48; n += 16) {
15822 for (size_t k = 1; k <= 40; k += 9) {
15823 GemmMicrokernelTester()
15824 .mr(4)
15825 .nr(16)
15826 .kr(1)
15827 .sr(1)
15828 .m(4)
15829 .n(n)
15830 .k(k)
15831 .ks(3)
15832 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x16__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
15833 }
15834 }
15835 }
15836
TEST(QC8_IGEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE,strided_cm_subtile)15837 TEST(QC8_IGEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE, strided_cm_subtile) {
15838 TEST_REQUIRES_ARM_NEON;
15839 for (size_t k = 1; k <= 40; k += 9) {
15840 for (uint32_t n = 1; n <= 16; n++) {
15841 for (uint32_t m = 1; m <= 4; m++) {
15842 GemmMicrokernelTester()
15843 .mr(4)
15844 .nr(16)
15845 .kr(1)
15846 .sr(1)
15847 .m(m)
15848 .n(n)
15849 .k(k)
15850 .cm_stride(19)
15851 .iterations(1)
15852 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x16__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
15853 }
15854 }
15855 }
15856 }
15857
TEST(QC8_IGEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE,a_offset)15858 TEST(QC8_IGEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE, a_offset) {
15859 TEST_REQUIRES_ARM_NEON;
15860 for (size_t k = 1; k <= 40; k += 9) {
15861 GemmMicrokernelTester()
15862 .mr(4)
15863 .nr(16)
15864 .kr(1)
15865 .sr(1)
15866 .m(4)
15867 .n(16)
15868 .k(k)
15869 .ks(3)
15870 .a_offset(163)
15871 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x16__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
15872 }
15873 }
15874
TEST(QC8_IGEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE,zero)15875 TEST(QC8_IGEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE, zero) {
15876 TEST_REQUIRES_ARM_NEON;
15877 for (size_t k = 1; k <= 40; k += 9) {
15878 for (uint32_t mz = 0; mz < 4; mz++) {
15879 GemmMicrokernelTester()
15880 .mr(4)
15881 .nr(16)
15882 .kr(1)
15883 .sr(1)
15884 .m(4)
15885 .n(16)
15886 .k(k)
15887 .ks(3)
15888 .a_offset(163)
15889 .zero_index(mz)
15890 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x16__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
15891 }
15892 }
15893 }
15894
TEST(QC8_IGEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE,qmin)15895 TEST(QC8_IGEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE, qmin) {
15896 TEST_REQUIRES_ARM_NEON;
15897 GemmMicrokernelTester()
15898 .mr(4)
15899 .nr(16)
15900 .kr(1)
15901 .sr(1)
15902 .m(4)
15903 .n(16)
15904 .k(8)
15905 .qmin(128)
15906 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x16__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
15907 }
15908
TEST(QC8_IGEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE,qmax)15909 TEST(QC8_IGEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE, qmax) {
15910 TEST_REQUIRES_ARM_NEON;
15911 GemmMicrokernelTester()
15912 .mr(4)
15913 .nr(16)
15914 .kr(1)
15915 .sr(1)
15916 .m(4)
15917 .n(16)
15918 .k(8)
15919 .qmax(128)
15920 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x16__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
15921 }
15922
TEST(QC8_IGEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE,strided_cm)15923 TEST(QC8_IGEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE, strided_cm) {
15924 TEST_REQUIRES_ARM_NEON;
15925 GemmMicrokernelTester()
15926 .mr(4)
15927 .nr(16)
15928 .kr(1)
15929 .sr(1)
15930 .m(4)
15931 .n(16)
15932 .k(8)
15933 .cm_stride(19)
15934 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x16__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
15935 }
15936 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
15937
15938
15939 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QC8_IGEMM_MINMAX_FP32_6X8__NEON_MLAL_LANE,k_eq_8)15940 TEST(QC8_IGEMM_MINMAX_FP32_6X8__NEON_MLAL_LANE, k_eq_8) {
15941 TEST_REQUIRES_ARM_NEON;
15942 GemmMicrokernelTester()
15943 .mr(6)
15944 .nr(8)
15945 .kr(1)
15946 .sr(1)
15947 .m(6)
15948 .n(8)
15949 .k(8)
15950 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_6x8__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
15951 }
15952
TEST(QC8_IGEMM_MINMAX_FP32_6X8__NEON_MLAL_LANE,strided_cn)15953 TEST(QC8_IGEMM_MINMAX_FP32_6X8__NEON_MLAL_LANE, strided_cn) {
15954 TEST_REQUIRES_ARM_NEON;
15955 GemmMicrokernelTester()
15956 .mr(6)
15957 .nr(8)
15958 .kr(1)
15959 .sr(1)
15960 .m(6)
15961 .n(8)
15962 .k(8)
15963 .cn_stride(11)
15964 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_6x8__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
15965 }
15966
TEST(QC8_IGEMM_MINMAX_FP32_6X8__NEON_MLAL_LANE,k_eq_8_subtile)15967 TEST(QC8_IGEMM_MINMAX_FP32_6X8__NEON_MLAL_LANE, k_eq_8_subtile) {
15968 TEST_REQUIRES_ARM_NEON;
15969 for (uint32_t n = 1; n <= 8; n++) {
15970 for (uint32_t m = 1; m <= 6; m++) {
15971 GemmMicrokernelTester()
15972 .mr(6)
15973 .nr(8)
15974 .kr(1)
15975 .sr(1)
15976 .m(m)
15977 .n(n)
15978 .k(8)
15979 .iterations(1)
15980 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_6x8__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
15981 }
15982 }
15983 }
15984
TEST(QC8_IGEMM_MINMAX_FP32_6X8__NEON_MLAL_LANE,k_eq_8_subtile_m)15985 TEST(QC8_IGEMM_MINMAX_FP32_6X8__NEON_MLAL_LANE, k_eq_8_subtile_m) {
15986 TEST_REQUIRES_ARM_NEON;
15987 for (uint32_t m = 1; m <= 6; m++) {
15988 GemmMicrokernelTester()
15989 .mr(6)
15990 .nr(8)
15991 .kr(1)
15992 .sr(1)
15993 .m(m)
15994 .n(8)
15995 .k(8)
15996 .iterations(1)
15997 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_6x8__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
15998 }
15999 }
16000
TEST(QC8_IGEMM_MINMAX_FP32_6X8__NEON_MLAL_LANE,k_eq_8_subtile_n)16001 TEST(QC8_IGEMM_MINMAX_FP32_6X8__NEON_MLAL_LANE, k_eq_8_subtile_n) {
16002 TEST_REQUIRES_ARM_NEON;
16003 for (uint32_t n = 1; n <= 8; n++) {
16004 GemmMicrokernelTester()
16005 .mr(6)
16006 .nr(8)
16007 .kr(1)
16008 .sr(1)
16009 .m(6)
16010 .n(n)
16011 .k(8)
16012 .iterations(1)
16013 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_6x8__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
16014 }
16015 }
16016
TEST(QC8_IGEMM_MINMAX_FP32_6X8__NEON_MLAL_LANE,k_lt_8)16017 TEST(QC8_IGEMM_MINMAX_FP32_6X8__NEON_MLAL_LANE, k_lt_8) {
16018 TEST_REQUIRES_ARM_NEON;
16019 for (size_t k = 1; k < 8; k++) {
16020 GemmMicrokernelTester()
16021 .mr(6)
16022 .nr(8)
16023 .kr(1)
16024 .sr(1)
16025 .m(6)
16026 .n(8)
16027 .k(k)
16028 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_6x8__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
16029 }
16030 }
16031
TEST(QC8_IGEMM_MINMAX_FP32_6X8__NEON_MLAL_LANE,k_lt_8_subtile)16032 TEST(QC8_IGEMM_MINMAX_FP32_6X8__NEON_MLAL_LANE, k_lt_8_subtile) {
16033 TEST_REQUIRES_ARM_NEON;
16034 for (size_t k = 1; k < 8; k++) {
16035 for (uint32_t n = 1; n <= 8; n++) {
16036 for (uint32_t m = 1; m <= 6; m++) {
16037 GemmMicrokernelTester()
16038 .mr(6)
16039 .nr(8)
16040 .kr(1)
16041 .sr(1)
16042 .m(m)
16043 .n(n)
16044 .k(k)
16045 .iterations(1)
16046 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_6x8__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
16047 }
16048 }
16049 }
16050 }
16051
TEST(QC8_IGEMM_MINMAX_FP32_6X8__NEON_MLAL_LANE,k_gt_8)16052 TEST(QC8_IGEMM_MINMAX_FP32_6X8__NEON_MLAL_LANE, k_gt_8) {
16053 TEST_REQUIRES_ARM_NEON;
16054 for (size_t k = 9; k < 16; k++) {
16055 GemmMicrokernelTester()
16056 .mr(6)
16057 .nr(8)
16058 .kr(1)
16059 .sr(1)
16060 .m(6)
16061 .n(8)
16062 .k(k)
16063 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_6x8__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
16064 }
16065 }
16066
TEST(QC8_IGEMM_MINMAX_FP32_6X8__NEON_MLAL_LANE,k_gt_8_subtile)16067 TEST(QC8_IGEMM_MINMAX_FP32_6X8__NEON_MLAL_LANE, k_gt_8_subtile) {
16068 TEST_REQUIRES_ARM_NEON;
16069 for (size_t k = 9; k < 16; k++) {
16070 for (uint32_t n = 1; n <= 8; n++) {
16071 for (uint32_t m = 1; m <= 6; m++) {
16072 GemmMicrokernelTester()
16073 .mr(6)
16074 .nr(8)
16075 .kr(1)
16076 .sr(1)
16077 .m(m)
16078 .n(n)
16079 .k(k)
16080 .iterations(1)
16081 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_6x8__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
16082 }
16083 }
16084 }
16085 }
16086
TEST(QC8_IGEMM_MINMAX_FP32_6X8__NEON_MLAL_LANE,k_div_8)16087 TEST(QC8_IGEMM_MINMAX_FP32_6X8__NEON_MLAL_LANE, k_div_8) {
16088 TEST_REQUIRES_ARM_NEON;
16089 for (size_t k = 16; k <= 80; k += 8) {
16090 GemmMicrokernelTester()
16091 .mr(6)
16092 .nr(8)
16093 .kr(1)
16094 .sr(1)
16095 .m(6)
16096 .n(8)
16097 .k(k)
16098 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_6x8__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
16099 }
16100 }
16101
TEST(QC8_IGEMM_MINMAX_FP32_6X8__NEON_MLAL_LANE,k_div_8_subtile)16102 TEST(QC8_IGEMM_MINMAX_FP32_6X8__NEON_MLAL_LANE, k_div_8_subtile) {
16103 TEST_REQUIRES_ARM_NEON;
16104 for (size_t k = 16; k <= 80; k += 8) {
16105 for (uint32_t n = 1; n <= 8; n++) {
16106 for (uint32_t m = 1; m <= 6; m++) {
16107 GemmMicrokernelTester()
16108 .mr(6)
16109 .nr(8)
16110 .kr(1)
16111 .sr(1)
16112 .m(m)
16113 .n(n)
16114 .k(k)
16115 .iterations(1)
16116 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_6x8__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
16117 }
16118 }
16119 }
16120 }
16121
TEST(QC8_IGEMM_MINMAX_FP32_6X8__NEON_MLAL_LANE,n_gt_8)16122 TEST(QC8_IGEMM_MINMAX_FP32_6X8__NEON_MLAL_LANE, n_gt_8) {
16123 TEST_REQUIRES_ARM_NEON;
16124 for (uint32_t n = 9; n < 16; n++) {
16125 for (size_t k = 1; k <= 40; k += 9) {
16126 GemmMicrokernelTester()
16127 .mr(6)
16128 .nr(8)
16129 .kr(1)
16130 .sr(1)
16131 .m(6)
16132 .n(n)
16133 .k(k)
16134 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_6x8__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
16135 }
16136 }
16137 }
16138
TEST(QC8_IGEMM_MINMAX_FP32_6X8__NEON_MLAL_LANE,n_gt_8_strided_cn)16139 TEST(QC8_IGEMM_MINMAX_FP32_6X8__NEON_MLAL_LANE, n_gt_8_strided_cn) {
16140 TEST_REQUIRES_ARM_NEON;
16141 for (uint32_t n = 9; n < 16; n++) {
16142 for (size_t k = 1; k <= 40; k += 9) {
16143 GemmMicrokernelTester()
16144 .mr(6)
16145 .nr(8)
16146 .kr(1)
16147 .sr(1)
16148 .m(6)
16149 .n(n)
16150 .k(k)
16151 .cn_stride(11)
16152 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_6x8__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
16153 }
16154 }
16155 }
16156
TEST(QC8_IGEMM_MINMAX_FP32_6X8__NEON_MLAL_LANE,n_gt_8_subtile)16157 TEST(QC8_IGEMM_MINMAX_FP32_6X8__NEON_MLAL_LANE, n_gt_8_subtile) {
16158 TEST_REQUIRES_ARM_NEON;
16159 for (uint32_t n = 9; n < 16; n++) {
16160 for (size_t k = 1; k <= 40; k += 9) {
16161 for (uint32_t m = 1; m <= 6; m++) {
16162 GemmMicrokernelTester()
16163 .mr(6)
16164 .nr(8)
16165 .kr(1)
16166 .sr(1)
16167 .m(m)
16168 .n(n)
16169 .k(k)
16170 .iterations(1)
16171 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_6x8__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
16172 }
16173 }
16174 }
16175 }
16176
TEST(QC8_IGEMM_MINMAX_FP32_6X8__NEON_MLAL_LANE,n_div_8)16177 TEST(QC8_IGEMM_MINMAX_FP32_6X8__NEON_MLAL_LANE, n_div_8) {
16178 TEST_REQUIRES_ARM_NEON;
16179 for (uint32_t n = 16; n <= 24; n += 8) {
16180 for (size_t k = 1; k <= 40; k += 9) {
16181 GemmMicrokernelTester()
16182 .mr(6)
16183 .nr(8)
16184 .kr(1)
16185 .sr(1)
16186 .m(6)
16187 .n(n)
16188 .k(k)
16189 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_6x8__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
16190 }
16191 }
16192 }
16193
TEST(QC8_IGEMM_MINMAX_FP32_6X8__NEON_MLAL_LANE,n_div_8_strided_cn)16194 TEST(QC8_IGEMM_MINMAX_FP32_6X8__NEON_MLAL_LANE, n_div_8_strided_cn) {
16195 TEST_REQUIRES_ARM_NEON;
16196 for (uint32_t n = 16; n <= 24; n += 8) {
16197 for (size_t k = 1; k <= 40; k += 9) {
16198 GemmMicrokernelTester()
16199 .mr(6)
16200 .nr(8)
16201 .kr(1)
16202 .sr(1)
16203 .m(6)
16204 .n(n)
16205 .k(k)
16206 .cn_stride(11)
16207 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_6x8__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
16208 }
16209 }
16210 }
16211
TEST(QC8_IGEMM_MINMAX_FP32_6X8__NEON_MLAL_LANE,n_div_8_subtile)16212 TEST(QC8_IGEMM_MINMAX_FP32_6X8__NEON_MLAL_LANE, n_div_8_subtile) {
16213 TEST_REQUIRES_ARM_NEON;
16214 for (uint32_t n = 16; n <= 24; n += 8) {
16215 for (size_t k = 1; k <= 40; k += 9) {
16216 for (uint32_t m = 1; m <= 6; m++) {
16217 GemmMicrokernelTester()
16218 .mr(6)
16219 .nr(8)
16220 .kr(1)
16221 .sr(1)
16222 .m(m)
16223 .n(n)
16224 .k(k)
16225 .iterations(1)
16226 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_6x8__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
16227 }
16228 }
16229 }
16230 }
16231
TEST(QC8_IGEMM_MINMAX_FP32_6X8__NEON_MLAL_LANE,small_kernel)16232 TEST(QC8_IGEMM_MINMAX_FP32_6X8__NEON_MLAL_LANE, small_kernel) {
16233 TEST_REQUIRES_ARM_NEON;
16234 for (size_t k = 1; k <= 40; k += 9) {
16235 GemmMicrokernelTester()
16236 .mr(6)
16237 .nr(8)
16238 .kr(1)
16239 .sr(1)
16240 .m(6)
16241 .n(8)
16242 .k(k)
16243 .ks(3)
16244 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_6x8__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
16245 }
16246 }
16247
TEST(QC8_IGEMM_MINMAX_FP32_6X8__NEON_MLAL_LANE,small_kernel_subtile)16248 TEST(QC8_IGEMM_MINMAX_FP32_6X8__NEON_MLAL_LANE, small_kernel_subtile) {
16249 TEST_REQUIRES_ARM_NEON;
16250 for (size_t k = 1; k <= 40; k += 9) {
16251 for (uint32_t n = 1; n <= 8; n++) {
16252 for (uint32_t m = 1; m <= 6; m++) {
16253 GemmMicrokernelTester()
16254 .mr(6)
16255 .nr(8)
16256 .kr(1)
16257 .sr(1)
16258 .m(m)
16259 .n(n)
16260 .k(k)
16261 .ks(3)
16262 .iterations(1)
16263 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_6x8__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
16264 }
16265 }
16266 }
16267 }
16268
TEST(QC8_IGEMM_MINMAX_FP32_6X8__NEON_MLAL_LANE,n_gt_8_small_kernel)16269 TEST(QC8_IGEMM_MINMAX_FP32_6X8__NEON_MLAL_LANE, n_gt_8_small_kernel) {
16270 TEST_REQUIRES_ARM_NEON;
16271 for (uint32_t n = 9; n < 16; n++) {
16272 for (size_t k = 1; k <= 40; k += 9) {
16273 GemmMicrokernelTester()
16274 .mr(6)
16275 .nr(8)
16276 .kr(1)
16277 .sr(1)
16278 .m(6)
16279 .n(n)
16280 .k(k)
16281 .ks(3)
16282 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_6x8__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
16283 }
16284 }
16285 }
16286
TEST(QC8_IGEMM_MINMAX_FP32_6X8__NEON_MLAL_LANE,n_div_8_small_kernel)16287 TEST(QC8_IGEMM_MINMAX_FP32_6X8__NEON_MLAL_LANE, n_div_8_small_kernel) {
16288 TEST_REQUIRES_ARM_NEON;
16289 for (uint32_t n = 16; n <= 24; n += 8) {
16290 for (size_t k = 1; k <= 40; k += 9) {
16291 GemmMicrokernelTester()
16292 .mr(6)
16293 .nr(8)
16294 .kr(1)
16295 .sr(1)
16296 .m(6)
16297 .n(n)
16298 .k(k)
16299 .ks(3)
16300 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_6x8__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
16301 }
16302 }
16303 }
16304
TEST(QC8_IGEMM_MINMAX_FP32_6X8__NEON_MLAL_LANE,strided_cm_subtile)16305 TEST(QC8_IGEMM_MINMAX_FP32_6X8__NEON_MLAL_LANE, strided_cm_subtile) {
16306 TEST_REQUIRES_ARM_NEON;
16307 for (size_t k = 1; k <= 40; k += 9) {
16308 for (uint32_t n = 1; n <= 8; n++) {
16309 for (uint32_t m = 1; m <= 6; m++) {
16310 GemmMicrokernelTester()
16311 .mr(6)
16312 .nr(8)
16313 .kr(1)
16314 .sr(1)
16315 .m(m)
16316 .n(n)
16317 .k(k)
16318 .cm_stride(11)
16319 .iterations(1)
16320 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_6x8__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
16321 }
16322 }
16323 }
16324 }
16325
TEST(QC8_IGEMM_MINMAX_FP32_6X8__NEON_MLAL_LANE,a_offset)16326 TEST(QC8_IGEMM_MINMAX_FP32_6X8__NEON_MLAL_LANE, a_offset) {
16327 TEST_REQUIRES_ARM_NEON;
16328 for (size_t k = 1; k <= 40; k += 9) {
16329 GemmMicrokernelTester()
16330 .mr(6)
16331 .nr(8)
16332 .kr(1)
16333 .sr(1)
16334 .m(6)
16335 .n(8)
16336 .k(k)
16337 .ks(3)
16338 .a_offset(251)
16339 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_6x8__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
16340 }
16341 }
16342
TEST(QC8_IGEMM_MINMAX_FP32_6X8__NEON_MLAL_LANE,zero)16343 TEST(QC8_IGEMM_MINMAX_FP32_6X8__NEON_MLAL_LANE, zero) {
16344 TEST_REQUIRES_ARM_NEON;
16345 for (size_t k = 1; k <= 40; k += 9) {
16346 for (uint32_t mz = 0; mz < 6; mz++) {
16347 GemmMicrokernelTester()
16348 .mr(6)
16349 .nr(8)
16350 .kr(1)
16351 .sr(1)
16352 .m(6)
16353 .n(8)
16354 .k(k)
16355 .ks(3)
16356 .a_offset(251)
16357 .zero_index(mz)
16358 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_6x8__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
16359 }
16360 }
16361 }
16362
TEST(QC8_IGEMM_MINMAX_FP32_6X8__NEON_MLAL_LANE,qmin)16363 TEST(QC8_IGEMM_MINMAX_FP32_6X8__NEON_MLAL_LANE, qmin) {
16364 TEST_REQUIRES_ARM_NEON;
16365 GemmMicrokernelTester()
16366 .mr(6)
16367 .nr(8)
16368 .kr(1)
16369 .sr(1)
16370 .m(6)
16371 .n(8)
16372 .k(8)
16373 .qmin(128)
16374 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_6x8__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
16375 }
16376
TEST(QC8_IGEMM_MINMAX_FP32_6X8__NEON_MLAL_LANE,qmax)16377 TEST(QC8_IGEMM_MINMAX_FP32_6X8__NEON_MLAL_LANE, qmax) {
16378 TEST_REQUIRES_ARM_NEON;
16379 GemmMicrokernelTester()
16380 .mr(6)
16381 .nr(8)
16382 .kr(1)
16383 .sr(1)
16384 .m(6)
16385 .n(8)
16386 .k(8)
16387 .qmax(128)
16388 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_6x8__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
16389 }
16390
TEST(QC8_IGEMM_MINMAX_FP32_6X8__NEON_MLAL_LANE,strided_cm)16391 TEST(QC8_IGEMM_MINMAX_FP32_6X8__NEON_MLAL_LANE, strided_cm) {
16392 TEST_REQUIRES_ARM_NEON;
16393 GemmMicrokernelTester()
16394 .mr(6)
16395 .nr(8)
16396 .kr(1)
16397 .sr(1)
16398 .m(6)
16399 .n(8)
16400 .k(8)
16401 .cm_stride(11)
16402 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_6x8__neon_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
16403 }
16404 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
16405
16406
16407 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QC8_IGEMM_MINMAX_FP32_6X16__NEON_MLAL_LANE_PRFM,k_eq_8)16408 TEST(QC8_IGEMM_MINMAX_FP32_6X16__NEON_MLAL_LANE_PRFM, k_eq_8) {
16409 TEST_REQUIRES_ARM_NEON;
16410 GemmMicrokernelTester()
16411 .mr(6)
16412 .nr(16)
16413 .kr(1)
16414 .sr(1)
16415 .m(6)
16416 .n(16)
16417 .k(8)
16418 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_6x16__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
16419 }
16420
TEST(QC8_IGEMM_MINMAX_FP32_6X16__NEON_MLAL_LANE_PRFM,strided_cn)16421 TEST(QC8_IGEMM_MINMAX_FP32_6X16__NEON_MLAL_LANE_PRFM, strided_cn) {
16422 TEST_REQUIRES_ARM_NEON;
16423 GemmMicrokernelTester()
16424 .mr(6)
16425 .nr(16)
16426 .kr(1)
16427 .sr(1)
16428 .m(6)
16429 .n(16)
16430 .k(8)
16431 .cn_stride(19)
16432 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_6x16__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
16433 }
16434
TEST(QC8_IGEMM_MINMAX_FP32_6X16__NEON_MLAL_LANE_PRFM,k_eq_8_subtile)16435 TEST(QC8_IGEMM_MINMAX_FP32_6X16__NEON_MLAL_LANE_PRFM, k_eq_8_subtile) {
16436 TEST_REQUIRES_ARM_NEON;
16437 for (uint32_t n = 1; n <= 16; n++) {
16438 for (uint32_t m = 1; m <= 6; m++) {
16439 GemmMicrokernelTester()
16440 .mr(6)
16441 .nr(16)
16442 .kr(1)
16443 .sr(1)
16444 .m(m)
16445 .n(n)
16446 .k(8)
16447 .iterations(1)
16448 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_6x16__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
16449 }
16450 }
16451 }
16452
TEST(QC8_IGEMM_MINMAX_FP32_6X16__NEON_MLAL_LANE_PRFM,k_eq_8_subtile_m)16453 TEST(QC8_IGEMM_MINMAX_FP32_6X16__NEON_MLAL_LANE_PRFM, k_eq_8_subtile_m) {
16454 TEST_REQUIRES_ARM_NEON;
16455 for (uint32_t m = 1; m <= 6; m++) {
16456 GemmMicrokernelTester()
16457 .mr(6)
16458 .nr(16)
16459 .kr(1)
16460 .sr(1)
16461 .m(m)
16462 .n(16)
16463 .k(8)
16464 .iterations(1)
16465 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_6x16__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
16466 }
16467 }
16468
TEST(QC8_IGEMM_MINMAX_FP32_6X16__NEON_MLAL_LANE_PRFM,k_eq_8_subtile_n)16469 TEST(QC8_IGEMM_MINMAX_FP32_6X16__NEON_MLAL_LANE_PRFM, k_eq_8_subtile_n) {
16470 TEST_REQUIRES_ARM_NEON;
16471 for (uint32_t n = 1; n <= 16; n++) {
16472 GemmMicrokernelTester()
16473 .mr(6)
16474 .nr(16)
16475 .kr(1)
16476 .sr(1)
16477 .m(6)
16478 .n(n)
16479 .k(8)
16480 .iterations(1)
16481 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_6x16__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
16482 }
16483 }
16484
TEST(QC8_IGEMM_MINMAX_FP32_6X16__NEON_MLAL_LANE_PRFM,k_lt_8)16485 TEST(QC8_IGEMM_MINMAX_FP32_6X16__NEON_MLAL_LANE_PRFM, k_lt_8) {
16486 TEST_REQUIRES_ARM_NEON;
16487 for (size_t k = 1; k < 8; k++) {
16488 GemmMicrokernelTester()
16489 .mr(6)
16490 .nr(16)
16491 .kr(1)
16492 .sr(1)
16493 .m(6)
16494 .n(16)
16495 .k(k)
16496 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_6x16__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
16497 }
16498 }
16499
TEST(QC8_IGEMM_MINMAX_FP32_6X16__NEON_MLAL_LANE_PRFM,k_lt_8_subtile)16500 TEST(QC8_IGEMM_MINMAX_FP32_6X16__NEON_MLAL_LANE_PRFM, k_lt_8_subtile) {
16501 TEST_REQUIRES_ARM_NEON;
16502 for (size_t k = 1; k < 8; k++) {
16503 for (uint32_t n = 1; n <= 16; n++) {
16504 for (uint32_t m = 1; m <= 6; m++) {
16505 GemmMicrokernelTester()
16506 .mr(6)
16507 .nr(16)
16508 .kr(1)
16509 .sr(1)
16510 .m(m)
16511 .n(n)
16512 .k(k)
16513 .iterations(1)
16514 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_6x16__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
16515 }
16516 }
16517 }
16518 }
16519
TEST(QC8_IGEMM_MINMAX_FP32_6X16__NEON_MLAL_LANE_PRFM,k_gt_8)16520 TEST(QC8_IGEMM_MINMAX_FP32_6X16__NEON_MLAL_LANE_PRFM, k_gt_8) {
16521 TEST_REQUIRES_ARM_NEON;
16522 for (size_t k = 9; k < 16; k++) {
16523 GemmMicrokernelTester()
16524 .mr(6)
16525 .nr(16)
16526 .kr(1)
16527 .sr(1)
16528 .m(6)
16529 .n(16)
16530 .k(k)
16531 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_6x16__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
16532 }
16533 }
16534
TEST(QC8_IGEMM_MINMAX_FP32_6X16__NEON_MLAL_LANE_PRFM,k_gt_8_subtile)16535 TEST(QC8_IGEMM_MINMAX_FP32_6X16__NEON_MLAL_LANE_PRFM, k_gt_8_subtile) {
16536 TEST_REQUIRES_ARM_NEON;
16537 for (size_t k = 9; k < 16; k++) {
16538 for (uint32_t n = 1; n <= 16; n++) {
16539 for (uint32_t m = 1; m <= 6; m++) {
16540 GemmMicrokernelTester()
16541 .mr(6)
16542 .nr(16)
16543 .kr(1)
16544 .sr(1)
16545 .m(m)
16546 .n(n)
16547 .k(k)
16548 .iterations(1)
16549 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_6x16__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
16550 }
16551 }
16552 }
16553 }
16554
TEST(QC8_IGEMM_MINMAX_FP32_6X16__NEON_MLAL_LANE_PRFM,k_div_8)16555 TEST(QC8_IGEMM_MINMAX_FP32_6X16__NEON_MLAL_LANE_PRFM, k_div_8) {
16556 TEST_REQUIRES_ARM_NEON;
16557 for (size_t k = 16; k <= 80; k += 8) {
16558 GemmMicrokernelTester()
16559 .mr(6)
16560 .nr(16)
16561 .kr(1)
16562 .sr(1)
16563 .m(6)
16564 .n(16)
16565 .k(k)
16566 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_6x16__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
16567 }
16568 }
16569
TEST(QC8_IGEMM_MINMAX_FP32_6X16__NEON_MLAL_LANE_PRFM,k_div_8_subtile)16570 TEST(QC8_IGEMM_MINMAX_FP32_6X16__NEON_MLAL_LANE_PRFM, k_div_8_subtile) {
16571 TEST_REQUIRES_ARM_NEON;
16572 for (size_t k = 16; k <= 80; k += 8) {
16573 for (uint32_t n = 1; n <= 16; n++) {
16574 for (uint32_t m = 1; m <= 6; m++) {
16575 GemmMicrokernelTester()
16576 .mr(6)
16577 .nr(16)
16578 .kr(1)
16579 .sr(1)
16580 .m(m)
16581 .n(n)
16582 .k(k)
16583 .iterations(1)
16584 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_6x16__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
16585 }
16586 }
16587 }
16588 }
16589
TEST(QC8_IGEMM_MINMAX_FP32_6X16__NEON_MLAL_LANE_PRFM,n_gt_16)16590 TEST(QC8_IGEMM_MINMAX_FP32_6X16__NEON_MLAL_LANE_PRFM, n_gt_16) {
16591 TEST_REQUIRES_ARM_NEON;
16592 for (uint32_t n = 17; n < 32; n++) {
16593 for (size_t k = 1; k <= 40; k += 9) {
16594 GemmMicrokernelTester()
16595 .mr(6)
16596 .nr(16)
16597 .kr(1)
16598 .sr(1)
16599 .m(6)
16600 .n(n)
16601 .k(k)
16602 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_6x16__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
16603 }
16604 }
16605 }
16606
TEST(QC8_IGEMM_MINMAX_FP32_6X16__NEON_MLAL_LANE_PRFM,n_gt_16_strided_cn)16607 TEST(QC8_IGEMM_MINMAX_FP32_6X16__NEON_MLAL_LANE_PRFM, n_gt_16_strided_cn) {
16608 TEST_REQUIRES_ARM_NEON;
16609 for (uint32_t n = 17; n < 32; n++) {
16610 for (size_t k = 1; k <= 40; k += 9) {
16611 GemmMicrokernelTester()
16612 .mr(6)
16613 .nr(16)
16614 .kr(1)
16615 .sr(1)
16616 .m(6)
16617 .n(n)
16618 .k(k)
16619 .cn_stride(19)
16620 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_6x16__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
16621 }
16622 }
16623 }
16624
TEST(QC8_IGEMM_MINMAX_FP32_6X16__NEON_MLAL_LANE_PRFM,n_gt_16_subtile)16625 TEST(QC8_IGEMM_MINMAX_FP32_6X16__NEON_MLAL_LANE_PRFM, n_gt_16_subtile) {
16626 TEST_REQUIRES_ARM_NEON;
16627 for (uint32_t n = 17; n < 32; n++) {
16628 for (size_t k = 1; k <= 40; k += 9) {
16629 for (uint32_t m = 1; m <= 6; m++) {
16630 GemmMicrokernelTester()
16631 .mr(6)
16632 .nr(16)
16633 .kr(1)
16634 .sr(1)
16635 .m(m)
16636 .n(n)
16637 .k(k)
16638 .iterations(1)
16639 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_6x16__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
16640 }
16641 }
16642 }
16643 }
16644
TEST(QC8_IGEMM_MINMAX_FP32_6X16__NEON_MLAL_LANE_PRFM,n_div_16)16645 TEST(QC8_IGEMM_MINMAX_FP32_6X16__NEON_MLAL_LANE_PRFM, n_div_16) {
16646 TEST_REQUIRES_ARM_NEON;
16647 for (uint32_t n = 32; n <= 48; n += 16) {
16648 for (size_t k = 1; k <= 40; k += 9) {
16649 GemmMicrokernelTester()
16650 .mr(6)
16651 .nr(16)
16652 .kr(1)
16653 .sr(1)
16654 .m(6)
16655 .n(n)
16656 .k(k)
16657 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_6x16__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
16658 }
16659 }
16660 }
16661
TEST(QC8_IGEMM_MINMAX_FP32_6X16__NEON_MLAL_LANE_PRFM,n_div_16_strided_cn)16662 TEST(QC8_IGEMM_MINMAX_FP32_6X16__NEON_MLAL_LANE_PRFM, n_div_16_strided_cn) {
16663 TEST_REQUIRES_ARM_NEON;
16664 for (uint32_t n = 32; n <= 48; n += 16) {
16665 for (size_t k = 1; k <= 40; k += 9) {
16666 GemmMicrokernelTester()
16667 .mr(6)
16668 .nr(16)
16669 .kr(1)
16670 .sr(1)
16671 .m(6)
16672 .n(n)
16673 .k(k)
16674 .cn_stride(19)
16675 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_6x16__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
16676 }
16677 }
16678 }
16679
TEST(QC8_IGEMM_MINMAX_FP32_6X16__NEON_MLAL_LANE_PRFM,n_div_16_subtile)16680 TEST(QC8_IGEMM_MINMAX_FP32_6X16__NEON_MLAL_LANE_PRFM, n_div_16_subtile) {
16681 TEST_REQUIRES_ARM_NEON;
16682 for (uint32_t n = 32; n <= 48; n += 16) {
16683 for (size_t k = 1; k <= 40; k += 9) {
16684 for (uint32_t m = 1; m <= 6; m++) {
16685 GemmMicrokernelTester()
16686 .mr(6)
16687 .nr(16)
16688 .kr(1)
16689 .sr(1)
16690 .m(m)
16691 .n(n)
16692 .k(k)
16693 .iterations(1)
16694 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_6x16__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
16695 }
16696 }
16697 }
16698 }
16699
TEST(QC8_IGEMM_MINMAX_FP32_6X16__NEON_MLAL_LANE_PRFM,small_kernel)16700 TEST(QC8_IGEMM_MINMAX_FP32_6X16__NEON_MLAL_LANE_PRFM, small_kernel) {
16701 TEST_REQUIRES_ARM_NEON;
16702 for (size_t k = 1; k <= 40; k += 9) {
16703 GemmMicrokernelTester()
16704 .mr(6)
16705 .nr(16)
16706 .kr(1)
16707 .sr(1)
16708 .m(6)
16709 .n(16)
16710 .k(k)
16711 .ks(3)
16712 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_6x16__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
16713 }
16714 }
16715
TEST(QC8_IGEMM_MINMAX_FP32_6X16__NEON_MLAL_LANE_PRFM,small_kernel_subtile)16716 TEST(QC8_IGEMM_MINMAX_FP32_6X16__NEON_MLAL_LANE_PRFM, small_kernel_subtile) {
16717 TEST_REQUIRES_ARM_NEON;
16718 for (size_t k = 1; k <= 40; k += 9) {
16719 for (uint32_t n = 1; n <= 16; n++) {
16720 for (uint32_t m = 1; m <= 6; m++) {
16721 GemmMicrokernelTester()
16722 .mr(6)
16723 .nr(16)
16724 .kr(1)
16725 .sr(1)
16726 .m(m)
16727 .n(n)
16728 .k(k)
16729 .ks(3)
16730 .iterations(1)
16731 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_6x16__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
16732 }
16733 }
16734 }
16735 }
16736
TEST(QC8_IGEMM_MINMAX_FP32_6X16__NEON_MLAL_LANE_PRFM,n_gt_16_small_kernel)16737 TEST(QC8_IGEMM_MINMAX_FP32_6X16__NEON_MLAL_LANE_PRFM, n_gt_16_small_kernel) {
16738 TEST_REQUIRES_ARM_NEON;
16739 for (uint32_t n = 17; n < 32; n++) {
16740 for (size_t k = 1; k <= 40; k += 9) {
16741 GemmMicrokernelTester()
16742 .mr(6)
16743 .nr(16)
16744 .kr(1)
16745 .sr(1)
16746 .m(6)
16747 .n(n)
16748 .k(k)
16749 .ks(3)
16750 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_6x16__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
16751 }
16752 }
16753 }
16754
TEST(QC8_IGEMM_MINMAX_FP32_6X16__NEON_MLAL_LANE_PRFM,n_div_16_small_kernel)16755 TEST(QC8_IGEMM_MINMAX_FP32_6X16__NEON_MLAL_LANE_PRFM, n_div_16_small_kernel) {
16756 TEST_REQUIRES_ARM_NEON;
16757 for (uint32_t n = 32; n <= 48; n += 16) {
16758 for (size_t k = 1; k <= 40; k += 9) {
16759 GemmMicrokernelTester()
16760 .mr(6)
16761 .nr(16)
16762 .kr(1)
16763 .sr(1)
16764 .m(6)
16765 .n(n)
16766 .k(k)
16767 .ks(3)
16768 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_6x16__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
16769 }
16770 }
16771 }
16772
TEST(QC8_IGEMM_MINMAX_FP32_6X16__NEON_MLAL_LANE_PRFM,strided_cm_subtile)16773 TEST(QC8_IGEMM_MINMAX_FP32_6X16__NEON_MLAL_LANE_PRFM, strided_cm_subtile) {
16774 TEST_REQUIRES_ARM_NEON;
16775 for (size_t k = 1; k <= 40; k += 9) {
16776 for (uint32_t n = 1; n <= 16; n++) {
16777 for (uint32_t m = 1; m <= 6; m++) {
16778 GemmMicrokernelTester()
16779 .mr(6)
16780 .nr(16)
16781 .kr(1)
16782 .sr(1)
16783 .m(m)
16784 .n(n)
16785 .k(k)
16786 .cm_stride(19)
16787 .iterations(1)
16788 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_6x16__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
16789 }
16790 }
16791 }
16792 }
16793
TEST(QC8_IGEMM_MINMAX_FP32_6X16__NEON_MLAL_LANE_PRFM,a_offset)16794 TEST(QC8_IGEMM_MINMAX_FP32_6X16__NEON_MLAL_LANE_PRFM, a_offset) {
16795 TEST_REQUIRES_ARM_NEON;
16796 for (size_t k = 1; k <= 40; k += 9) {
16797 GemmMicrokernelTester()
16798 .mr(6)
16799 .nr(16)
16800 .kr(1)
16801 .sr(1)
16802 .m(6)
16803 .n(16)
16804 .k(k)
16805 .ks(3)
16806 .a_offset(251)
16807 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_6x16__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
16808 }
16809 }
16810
TEST(QC8_IGEMM_MINMAX_FP32_6X16__NEON_MLAL_LANE_PRFM,zero)16811 TEST(QC8_IGEMM_MINMAX_FP32_6X16__NEON_MLAL_LANE_PRFM, zero) {
16812 TEST_REQUIRES_ARM_NEON;
16813 for (size_t k = 1; k <= 40; k += 9) {
16814 for (uint32_t mz = 0; mz < 6; mz++) {
16815 GemmMicrokernelTester()
16816 .mr(6)
16817 .nr(16)
16818 .kr(1)
16819 .sr(1)
16820 .m(6)
16821 .n(16)
16822 .k(k)
16823 .ks(3)
16824 .a_offset(251)
16825 .zero_index(mz)
16826 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_6x16__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
16827 }
16828 }
16829 }
16830
TEST(QC8_IGEMM_MINMAX_FP32_6X16__NEON_MLAL_LANE_PRFM,qmin)16831 TEST(QC8_IGEMM_MINMAX_FP32_6X16__NEON_MLAL_LANE_PRFM, qmin) {
16832 TEST_REQUIRES_ARM_NEON;
16833 GemmMicrokernelTester()
16834 .mr(6)
16835 .nr(16)
16836 .kr(1)
16837 .sr(1)
16838 .m(6)
16839 .n(16)
16840 .k(8)
16841 .qmin(128)
16842 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_6x16__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
16843 }
16844
TEST(QC8_IGEMM_MINMAX_FP32_6X16__NEON_MLAL_LANE_PRFM,qmax)16845 TEST(QC8_IGEMM_MINMAX_FP32_6X16__NEON_MLAL_LANE_PRFM, qmax) {
16846 TEST_REQUIRES_ARM_NEON;
16847 GemmMicrokernelTester()
16848 .mr(6)
16849 .nr(16)
16850 .kr(1)
16851 .sr(1)
16852 .m(6)
16853 .n(16)
16854 .k(8)
16855 .qmax(128)
16856 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_6x16__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
16857 }
16858
TEST(QC8_IGEMM_MINMAX_FP32_6X16__NEON_MLAL_LANE_PRFM,strided_cm)16859 TEST(QC8_IGEMM_MINMAX_FP32_6X16__NEON_MLAL_LANE_PRFM, strided_cm) {
16860 TEST_REQUIRES_ARM_NEON;
16861 GemmMicrokernelTester()
16862 .mr(6)
16863 .nr(16)
16864 .kr(1)
16865 .sr(1)
16866 .m(6)
16867 .n(16)
16868 .k(8)
16869 .cm_stride(19)
16870 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_6x16__neon_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
16871 }
16872 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
16873
16874
16875 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QC8_IGEMM_MINMAX_FP32_6X16__NEONV8_MLAL_LANE,k_eq_8)16876 TEST(QC8_IGEMM_MINMAX_FP32_6X16__NEONV8_MLAL_LANE, k_eq_8) {
16877 TEST_REQUIRES_ARM_NEON_V8;
16878 GemmMicrokernelTester()
16879 .mr(6)
16880 .nr(16)
16881 .kr(1)
16882 .sr(1)
16883 .m(6)
16884 .n(16)
16885 .k(8)
16886 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_6x16__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
16887 }
16888
TEST(QC8_IGEMM_MINMAX_FP32_6X16__NEONV8_MLAL_LANE,strided_cn)16889 TEST(QC8_IGEMM_MINMAX_FP32_6X16__NEONV8_MLAL_LANE, strided_cn) {
16890 TEST_REQUIRES_ARM_NEON_V8;
16891 GemmMicrokernelTester()
16892 .mr(6)
16893 .nr(16)
16894 .kr(1)
16895 .sr(1)
16896 .m(6)
16897 .n(16)
16898 .k(8)
16899 .cn_stride(19)
16900 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_6x16__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
16901 }
16902
TEST(QC8_IGEMM_MINMAX_FP32_6X16__NEONV8_MLAL_LANE,k_eq_8_subtile)16903 TEST(QC8_IGEMM_MINMAX_FP32_6X16__NEONV8_MLAL_LANE, k_eq_8_subtile) {
16904 TEST_REQUIRES_ARM_NEON_V8;
16905 for (uint32_t n = 1; n <= 16; n++) {
16906 for (uint32_t m = 1; m <= 6; m++) {
16907 GemmMicrokernelTester()
16908 .mr(6)
16909 .nr(16)
16910 .kr(1)
16911 .sr(1)
16912 .m(m)
16913 .n(n)
16914 .k(8)
16915 .iterations(1)
16916 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_6x16__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
16917 }
16918 }
16919 }
16920
TEST(QC8_IGEMM_MINMAX_FP32_6X16__NEONV8_MLAL_LANE,k_eq_8_subtile_m)16921 TEST(QC8_IGEMM_MINMAX_FP32_6X16__NEONV8_MLAL_LANE, k_eq_8_subtile_m) {
16922 TEST_REQUIRES_ARM_NEON_V8;
16923 for (uint32_t m = 1; m <= 6; m++) {
16924 GemmMicrokernelTester()
16925 .mr(6)
16926 .nr(16)
16927 .kr(1)
16928 .sr(1)
16929 .m(m)
16930 .n(16)
16931 .k(8)
16932 .iterations(1)
16933 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_6x16__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
16934 }
16935 }
16936
TEST(QC8_IGEMM_MINMAX_FP32_6X16__NEONV8_MLAL_LANE,k_eq_8_subtile_n)16937 TEST(QC8_IGEMM_MINMAX_FP32_6X16__NEONV8_MLAL_LANE, k_eq_8_subtile_n) {
16938 TEST_REQUIRES_ARM_NEON_V8;
16939 for (uint32_t n = 1; n <= 16; n++) {
16940 GemmMicrokernelTester()
16941 .mr(6)
16942 .nr(16)
16943 .kr(1)
16944 .sr(1)
16945 .m(6)
16946 .n(n)
16947 .k(8)
16948 .iterations(1)
16949 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_6x16__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
16950 }
16951 }
16952
TEST(QC8_IGEMM_MINMAX_FP32_6X16__NEONV8_MLAL_LANE,k_lt_8)16953 TEST(QC8_IGEMM_MINMAX_FP32_6X16__NEONV8_MLAL_LANE, k_lt_8) {
16954 TEST_REQUIRES_ARM_NEON_V8;
16955 for (size_t k = 1; k < 8; k++) {
16956 GemmMicrokernelTester()
16957 .mr(6)
16958 .nr(16)
16959 .kr(1)
16960 .sr(1)
16961 .m(6)
16962 .n(16)
16963 .k(k)
16964 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_6x16__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
16965 }
16966 }
16967
TEST(QC8_IGEMM_MINMAX_FP32_6X16__NEONV8_MLAL_LANE,k_lt_8_subtile)16968 TEST(QC8_IGEMM_MINMAX_FP32_6X16__NEONV8_MLAL_LANE, k_lt_8_subtile) {
16969 TEST_REQUIRES_ARM_NEON_V8;
16970 for (size_t k = 1; k < 8; k++) {
16971 for (uint32_t n = 1; n <= 16; n++) {
16972 for (uint32_t m = 1; m <= 6; m++) {
16973 GemmMicrokernelTester()
16974 .mr(6)
16975 .nr(16)
16976 .kr(1)
16977 .sr(1)
16978 .m(m)
16979 .n(n)
16980 .k(k)
16981 .iterations(1)
16982 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_6x16__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
16983 }
16984 }
16985 }
16986 }
16987
TEST(QC8_IGEMM_MINMAX_FP32_6X16__NEONV8_MLAL_LANE,k_gt_8)16988 TEST(QC8_IGEMM_MINMAX_FP32_6X16__NEONV8_MLAL_LANE, k_gt_8) {
16989 TEST_REQUIRES_ARM_NEON_V8;
16990 for (size_t k = 9; k < 16; k++) {
16991 GemmMicrokernelTester()
16992 .mr(6)
16993 .nr(16)
16994 .kr(1)
16995 .sr(1)
16996 .m(6)
16997 .n(16)
16998 .k(k)
16999 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_6x16__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
17000 }
17001 }
17002
TEST(QC8_IGEMM_MINMAX_FP32_6X16__NEONV8_MLAL_LANE,k_gt_8_subtile)17003 TEST(QC8_IGEMM_MINMAX_FP32_6X16__NEONV8_MLAL_LANE, k_gt_8_subtile) {
17004 TEST_REQUIRES_ARM_NEON_V8;
17005 for (size_t k = 9; k < 16; k++) {
17006 for (uint32_t n = 1; n <= 16; n++) {
17007 for (uint32_t m = 1; m <= 6; m++) {
17008 GemmMicrokernelTester()
17009 .mr(6)
17010 .nr(16)
17011 .kr(1)
17012 .sr(1)
17013 .m(m)
17014 .n(n)
17015 .k(k)
17016 .iterations(1)
17017 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_6x16__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
17018 }
17019 }
17020 }
17021 }
17022
TEST(QC8_IGEMM_MINMAX_FP32_6X16__NEONV8_MLAL_LANE,k_div_8)17023 TEST(QC8_IGEMM_MINMAX_FP32_6X16__NEONV8_MLAL_LANE, k_div_8) {
17024 TEST_REQUIRES_ARM_NEON_V8;
17025 for (size_t k = 16; k <= 80; k += 8) {
17026 GemmMicrokernelTester()
17027 .mr(6)
17028 .nr(16)
17029 .kr(1)
17030 .sr(1)
17031 .m(6)
17032 .n(16)
17033 .k(k)
17034 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_6x16__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
17035 }
17036 }
17037
TEST(QC8_IGEMM_MINMAX_FP32_6X16__NEONV8_MLAL_LANE,k_div_8_subtile)17038 TEST(QC8_IGEMM_MINMAX_FP32_6X16__NEONV8_MLAL_LANE, k_div_8_subtile) {
17039 TEST_REQUIRES_ARM_NEON_V8;
17040 for (size_t k = 16; k <= 80; k += 8) {
17041 for (uint32_t n = 1; n <= 16; n++) {
17042 for (uint32_t m = 1; m <= 6; m++) {
17043 GemmMicrokernelTester()
17044 .mr(6)
17045 .nr(16)
17046 .kr(1)
17047 .sr(1)
17048 .m(m)
17049 .n(n)
17050 .k(k)
17051 .iterations(1)
17052 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_6x16__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
17053 }
17054 }
17055 }
17056 }
17057
TEST(QC8_IGEMM_MINMAX_FP32_6X16__NEONV8_MLAL_LANE,n_gt_16)17058 TEST(QC8_IGEMM_MINMAX_FP32_6X16__NEONV8_MLAL_LANE, n_gt_16) {
17059 TEST_REQUIRES_ARM_NEON_V8;
17060 for (uint32_t n = 17; n < 32; n++) {
17061 for (size_t k = 1; k <= 40; k += 9) {
17062 GemmMicrokernelTester()
17063 .mr(6)
17064 .nr(16)
17065 .kr(1)
17066 .sr(1)
17067 .m(6)
17068 .n(n)
17069 .k(k)
17070 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_6x16__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
17071 }
17072 }
17073 }
17074
TEST(QC8_IGEMM_MINMAX_FP32_6X16__NEONV8_MLAL_LANE,n_gt_16_strided_cn)17075 TEST(QC8_IGEMM_MINMAX_FP32_6X16__NEONV8_MLAL_LANE, n_gt_16_strided_cn) {
17076 TEST_REQUIRES_ARM_NEON_V8;
17077 for (uint32_t n = 17; n < 32; n++) {
17078 for (size_t k = 1; k <= 40; k += 9) {
17079 GemmMicrokernelTester()
17080 .mr(6)
17081 .nr(16)
17082 .kr(1)
17083 .sr(1)
17084 .m(6)
17085 .n(n)
17086 .k(k)
17087 .cn_stride(19)
17088 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_6x16__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
17089 }
17090 }
17091 }
17092
TEST(QC8_IGEMM_MINMAX_FP32_6X16__NEONV8_MLAL_LANE,n_gt_16_subtile)17093 TEST(QC8_IGEMM_MINMAX_FP32_6X16__NEONV8_MLAL_LANE, n_gt_16_subtile) {
17094 TEST_REQUIRES_ARM_NEON_V8;
17095 for (uint32_t n = 17; n < 32; n++) {
17096 for (size_t k = 1; k <= 40; k += 9) {
17097 for (uint32_t m = 1; m <= 6; m++) {
17098 GemmMicrokernelTester()
17099 .mr(6)
17100 .nr(16)
17101 .kr(1)
17102 .sr(1)
17103 .m(m)
17104 .n(n)
17105 .k(k)
17106 .iterations(1)
17107 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_6x16__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
17108 }
17109 }
17110 }
17111 }
17112
TEST(QC8_IGEMM_MINMAX_FP32_6X16__NEONV8_MLAL_LANE,n_div_16)17113 TEST(QC8_IGEMM_MINMAX_FP32_6X16__NEONV8_MLAL_LANE, n_div_16) {
17114 TEST_REQUIRES_ARM_NEON_V8;
17115 for (uint32_t n = 32; n <= 48; n += 16) {
17116 for (size_t k = 1; k <= 40; k += 9) {
17117 GemmMicrokernelTester()
17118 .mr(6)
17119 .nr(16)
17120 .kr(1)
17121 .sr(1)
17122 .m(6)
17123 .n(n)
17124 .k(k)
17125 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_6x16__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
17126 }
17127 }
17128 }
17129
TEST(QC8_IGEMM_MINMAX_FP32_6X16__NEONV8_MLAL_LANE,n_div_16_strided_cn)17130 TEST(QC8_IGEMM_MINMAX_FP32_6X16__NEONV8_MLAL_LANE, n_div_16_strided_cn) {
17131 TEST_REQUIRES_ARM_NEON_V8;
17132 for (uint32_t n = 32; n <= 48; n += 16) {
17133 for (size_t k = 1; k <= 40; k += 9) {
17134 GemmMicrokernelTester()
17135 .mr(6)
17136 .nr(16)
17137 .kr(1)
17138 .sr(1)
17139 .m(6)
17140 .n(n)
17141 .k(k)
17142 .cn_stride(19)
17143 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_6x16__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
17144 }
17145 }
17146 }
17147
TEST(QC8_IGEMM_MINMAX_FP32_6X16__NEONV8_MLAL_LANE,n_div_16_subtile)17148 TEST(QC8_IGEMM_MINMAX_FP32_6X16__NEONV8_MLAL_LANE, n_div_16_subtile) {
17149 TEST_REQUIRES_ARM_NEON_V8;
17150 for (uint32_t n = 32; n <= 48; n += 16) {
17151 for (size_t k = 1; k <= 40; k += 9) {
17152 for (uint32_t m = 1; m <= 6; m++) {
17153 GemmMicrokernelTester()
17154 .mr(6)
17155 .nr(16)
17156 .kr(1)
17157 .sr(1)
17158 .m(m)
17159 .n(n)
17160 .k(k)
17161 .iterations(1)
17162 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_6x16__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
17163 }
17164 }
17165 }
17166 }
17167
TEST(QC8_IGEMM_MINMAX_FP32_6X16__NEONV8_MLAL_LANE,small_kernel)17168 TEST(QC8_IGEMM_MINMAX_FP32_6X16__NEONV8_MLAL_LANE, small_kernel) {
17169 TEST_REQUIRES_ARM_NEON_V8;
17170 for (size_t k = 1; k <= 40; k += 9) {
17171 GemmMicrokernelTester()
17172 .mr(6)
17173 .nr(16)
17174 .kr(1)
17175 .sr(1)
17176 .m(6)
17177 .n(16)
17178 .k(k)
17179 .ks(3)
17180 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_6x16__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
17181 }
17182 }
17183
TEST(QC8_IGEMM_MINMAX_FP32_6X16__NEONV8_MLAL_LANE,small_kernel_subtile)17184 TEST(QC8_IGEMM_MINMAX_FP32_6X16__NEONV8_MLAL_LANE, small_kernel_subtile) {
17185 TEST_REQUIRES_ARM_NEON_V8;
17186 for (size_t k = 1; k <= 40; k += 9) {
17187 for (uint32_t n = 1; n <= 16; n++) {
17188 for (uint32_t m = 1; m <= 6; m++) {
17189 GemmMicrokernelTester()
17190 .mr(6)
17191 .nr(16)
17192 .kr(1)
17193 .sr(1)
17194 .m(m)
17195 .n(n)
17196 .k(k)
17197 .ks(3)
17198 .iterations(1)
17199 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_6x16__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
17200 }
17201 }
17202 }
17203 }
17204
TEST(QC8_IGEMM_MINMAX_FP32_6X16__NEONV8_MLAL_LANE,n_gt_16_small_kernel)17205 TEST(QC8_IGEMM_MINMAX_FP32_6X16__NEONV8_MLAL_LANE, n_gt_16_small_kernel) {
17206 TEST_REQUIRES_ARM_NEON_V8;
17207 for (uint32_t n = 17; n < 32; n++) {
17208 for (size_t k = 1; k <= 40; k += 9) {
17209 GemmMicrokernelTester()
17210 .mr(6)
17211 .nr(16)
17212 .kr(1)
17213 .sr(1)
17214 .m(6)
17215 .n(n)
17216 .k(k)
17217 .ks(3)
17218 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_6x16__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
17219 }
17220 }
17221 }
17222
TEST(QC8_IGEMM_MINMAX_FP32_6X16__NEONV8_MLAL_LANE,n_div_16_small_kernel)17223 TEST(QC8_IGEMM_MINMAX_FP32_6X16__NEONV8_MLAL_LANE, n_div_16_small_kernel) {
17224 TEST_REQUIRES_ARM_NEON_V8;
17225 for (uint32_t n = 32; n <= 48; n += 16) {
17226 for (size_t k = 1; k <= 40; k += 9) {
17227 GemmMicrokernelTester()
17228 .mr(6)
17229 .nr(16)
17230 .kr(1)
17231 .sr(1)
17232 .m(6)
17233 .n(n)
17234 .k(k)
17235 .ks(3)
17236 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_6x16__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
17237 }
17238 }
17239 }
17240
TEST(QC8_IGEMM_MINMAX_FP32_6X16__NEONV8_MLAL_LANE,strided_cm_subtile)17241 TEST(QC8_IGEMM_MINMAX_FP32_6X16__NEONV8_MLAL_LANE, strided_cm_subtile) {
17242 TEST_REQUIRES_ARM_NEON_V8;
17243 for (size_t k = 1; k <= 40; k += 9) {
17244 for (uint32_t n = 1; n <= 16; n++) {
17245 for (uint32_t m = 1; m <= 6; m++) {
17246 GemmMicrokernelTester()
17247 .mr(6)
17248 .nr(16)
17249 .kr(1)
17250 .sr(1)
17251 .m(m)
17252 .n(n)
17253 .k(k)
17254 .cm_stride(19)
17255 .iterations(1)
17256 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_6x16__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
17257 }
17258 }
17259 }
17260 }
17261
TEST(QC8_IGEMM_MINMAX_FP32_6X16__NEONV8_MLAL_LANE,a_offset)17262 TEST(QC8_IGEMM_MINMAX_FP32_6X16__NEONV8_MLAL_LANE, a_offset) {
17263 TEST_REQUIRES_ARM_NEON_V8;
17264 for (size_t k = 1; k <= 40; k += 9) {
17265 GemmMicrokernelTester()
17266 .mr(6)
17267 .nr(16)
17268 .kr(1)
17269 .sr(1)
17270 .m(6)
17271 .n(16)
17272 .k(k)
17273 .ks(3)
17274 .a_offset(251)
17275 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_6x16__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
17276 }
17277 }
17278
TEST(QC8_IGEMM_MINMAX_FP32_6X16__NEONV8_MLAL_LANE,zero)17279 TEST(QC8_IGEMM_MINMAX_FP32_6X16__NEONV8_MLAL_LANE, zero) {
17280 TEST_REQUIRES_ARM_NEON_V8;
17281 for (size_t k = 1; k <= 40; k += 9) {
17282 for (uint32_t mz = 0; mz < 6; mz++) {
17283 GemmMicrokernelTester()
17284 .mr(6)
17285 .nr(16)
17286 .kr(1)
17287 .sr(1)
17288 .m(6)
17289 .n(16)
17290 .k(k)
17291 .ks(3)
17292 .a_offset(251)
17293 .zero_index(mz)
17294 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_6x16__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
17295 }
17296 }
17297 }
17298
TEST(QC8_IGEMM_MINMAX_FP32_6X16__NEONV8_MLAL_LANE,qmin)17299 TEST(QC8_IGEMM_MINMAX_FP32_6X16__NEONV8_MLAL_LANE, qmin) {
17300 TEST_REQUIRES_ARM_NEON_V8;
17301 GemmMicrokernelTester()
17302 .mr(6)
17303 .nr(16)
17304 .kr(1)
17305 .sr(1)
17306 .m(6)
17307 .n(16)
17308 .k(8)
17309 .qmin(128)
17310 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_6x16__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
17311 }
17312
TEST(QC8_IGEMM_MINMAX_FP32_6X16__NEONV8_MLAL_LANE,qmax)17313 TEST(QC8_IGEMM_MINMAX_FP32_6X16__NEONV8_MLAL_LANE, qmax) {
17314 TEST_REQUIRES_ARM_NEON_V8;
17315 GemmMicrokernelTester()
17316 .mr(6)
17317 .nr(16)
17318 .kr(1)
17319 .sr(1)
17320 .m(6)
17321 .n(16)
17322 .k(8)
17323 .qmax(128)
17324 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_6x16__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
17325 }
17326
TEST(QC8_IGEMM_MINMAX_FP32_6X16__NEONV8_MLAL_LANE,strided_cm)17327 TEST(QC8_IGEMM_MINMAX_FP32_6X16__NEONV8_MLAL_LANE, strided_cm) {
17328 TEST_REQUIRES_ARM_NEON_V8;
17329 GemmMicrokernelTester()
17330 .mr(6)
17331 .nr(16)
17332 .kr(1)
17333 .sr(1)
17334 .m(6)
17335 .n(16)
17336 .k(8)
17337 .cm_stride(19)
17338 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_6x16__neonv8_mlal_lane, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
17339 }
17340 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
17341
17342
17343 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QC8_IGEMM_MINMAX_FP32_6X16__NEONV8_MLAL_LANE_PRFM,k_eq_8)17344 TEST(QC8_IGEMM_MINMAX_FP32_6X16__NEONV8_MLAL_LANE_PRFM, k_eq_8) {
17345 TEST_REQUIRES_ARM_NEON_V8;
17346 GemmMicrokernelTester()
17347 .mr(6)
17348 .nr(16)
17349 .kr(1)
17350 .sr(1)
17351 .m(6)
17352 .n(16)
17353 .k(8)
17354 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_6x16__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
17355 }
17356
TEST(QC8_IGEMM_MINMAX_FP32_6X16__NEONV8_MLAL_LANE_PRFM,strided_cn)17357 TEST(QC8_IGEMM_MINMAX_FP32_6X16__NEONV8_MLAL_LANE_PRFM, strided_cn) {
17358 TEST_REQUIRES_ARM_NEON_V8;
17359 GemmMicrokernelTester()
17360 .mr(6)
17361 .nr(16)
17362 .kr(1)
17363 .sr(1)
17364 .m(6)
17365 .n(16)
17366 .k(8)
17367 .cn_stride(19)
17368 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_6x16__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
17369 }
17370
TEST(QC8_IGEMM_MINMAX_FP32_6X16__NEONV8_MLAL_LANE_PRFM,k_eq_8_subtile)17371 TEST(QC8_IGEMM_MINMAX_FP32_6X16__NEONV8_MLAL_LANE_PRFM, k_eq_8_subtile) {
17372 TEST_REQUIRES_ARM_NEON_V8;
17373 for (uint32_t n = 1; n <= 16; n++) {
17374 for (uint32_t m = 1; m <= 6; m++) {
17375 GemmMicrokernelTester()
17376 .mr(6)
17377 .nr(16)
17378 .kr(1)
17379 .sr(1)
17380 .m(m)
17381 .n(n)
17382 .k(8)
17383 .iterations(1)
17384 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_6x16__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
17385 }
17386 }
17387 }
17388
TEST(QC8_IGEMM_MINMAX_FP32_6X16__NEONV8_MLAL_LANE_PRFM,k_eq_8_subtile_m)17389 TEST(QC8_IGEMM_MINMAX_FP32_6X16__NEONV8_MLAL_LANE_PRFM, k_eq_8_subtile_m) {
17390 TEST_REQUIRES_ARM_NEON_V8;
17391 for (uint32_t m = 1; m <= 6; m++) {
17392 GemmMicrokernelTester()
17393 .mr(6)
17394 .nr(16)
17395 .kr(1)
17396 .sr(1)
17397 .m(m)
17398 .n(16)
17399 .k(8)
17400 .iterations(1)
17401 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_6x16__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
17402 }
17403 }
17404
TEST(QC8_IGEMM_MINMAX_FP32_6X16__NEONV8_MLAL_LANE_PRFM,k_eq_8_subtile_n)17405 TEST(QC8_IGEMM_MINMAX_FP32_6X16__NEONV8_MLAL_LANE_PRFM, k_eq_8_subtile_n) {
17406 TEST_REQUIRES_ARM_NEON_V8;
17407 for (uint32_t n = 1; n <= 16; n++) {
17408 GemmMicrokernelTester()
17409 .mr(6)
17410 .nr(16)
17411 .kr(1)
17412 .sr(1)
17413 .m(6)
17414 .n(n)
17415 .k(8)
17416 .iterations(1)
17417 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_6x16__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
17418 }
17419 }
17420
TEST(QC8_IGEMM_MINMAX_FP32_6X16__NEONV8_MLAL_LANE_PRFM,k_lt_8)17421 TEST(QC8_IGEMM_MINMAX_FP32_6X16__NEONV8_MLAL_LANE_PRFM, k_lt_8) {
17422 TEST_REQUIRES_ARM_NEON_V8;
17423 for (size_t k = 1; k < 8; k++) {
17424 GemmMicrokernelTester()
17425 .mr(6)
17426 .nr(16)
17427 .kr(1)
17428 .sr(1)
17429 .m(6)
17430 .n(16)
17431 .k(k)
17432 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_6x16__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
17433 }
17434 }
17435
TEST(QC8_IGEMM_MINMAX_FP32_6X16__NEONV8_MLAL_LANE_PRFM,k_lt_8_subtile)17436 TEST(QC8_IGEMM_MINMAX_FP32_6X16__NEONV8_MLAL_LANE_PRFM, k_lt_8_subtile) {
17437 TEST_REQUIRES_ARM_NEON_V8;
17438 for (size_t k = 1; k < 8; k++) {
17439 for (uint32_t n = 1; n <= 16; n++) {
17440 for (uint32_t m = 1; m <= 6; m++) {
17441 GemmMicrokernelTester()
17442 .mr(6)
17443 .nr(16)
17444 .kr(1)
17445 .sr(1)
17446 .m(m)
17447 .n(n)
17448 .k(k)
17449 .iterations(1)
17450 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_6x16__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
17451 }
17452 }
17453 }
17454 }
17455
TEST(QC8_IGEMM_MINMAX_FP32_6X16__NEONV8_MLAL_LANE_PRFM,k_gt_8)17456 TEST(QC8_IGEMM_MINMAX_FP32_6X16__NEONV8_MLAL_LANE_PRFM, k_gt_8) {
17457 TEST_REQUIRES_ARM_NEON_V8;
17458 for (size_t k = 9; k < 16; k++) {
17459 GemmMicrokernelTester()
17460 .mr(6)
17461 .nr(16)
17462 .kr(1)
17463 .sr(1)
17464 .m(6)
17465 .n(16)
17466 .k(k)
17467 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_6x16__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
17468 }
17469 }
17470
TEST(QC8_IGEMM_MINMAX_FP32_6X16__NEONV8_MLAL_LANE_PRFM,k_gt_8_subtile)17471 TEST(QC8_IGEMM_MINMAX_FP32_6X16__NEONV8_MLAL_LANE_PRFM, k_gt_8_subtile) {
17472 TEST_REQUIRES_ARM_NEON_V8;
17473 for (size_t k = 9; k < 16; k++) {
17474 for (uint32_t n = 1; n <= 16; n++) {
17475 for (uint32_t m = 1; m <= 6; m++) {
17476 GemmMicrokernelTester()
17477 .mr(6)
17478 .nr(16)
17479 .kr(1)
17480 .sr(1)
17481 .m(m)
17482 .n(n)
17483 .k(k)
17484 .iterations(1)
17485 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_6x16__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
17486 }
17487 }
17488 }
17489 }
17490
TEST(QC8_IGEMM_MINMAX_FP32_6X16__NEONV8_MLAL_LANE_PRFM,k_div_8)17491 TEST(QC8_IGEMM_MINMAX_FP32_6X16__NEONV8_MLAL_LANE_PRFM, k_div_8) {
17492 TEST_REQUIRES_ARM_NEON_V8;
17493 for (size_t k = 16; k <= 80; k += 8) {
17494 GemmMicrokernelTester()
17495 .mr(6)
17496 .nr(16)
17497 .kr(1)
17498 .sr(1)
17499 .m(6)
17500 .n(16)
17501 .k(k)
17502 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_6x16__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
17503 }
17504 }
17505
TEST(QC8_IGEMM_MINMAX_FP32_6X16__NEONV8_MLAL_LANE_PRFM,k_div_8_subtile)17506 TEST(QC8_IGEMM_MINMAX_FP32_6X16__NEONV8_MLAL_LANE_PRFM, k_div_8_subtile) {
17507 TEST_REQUIRES_ARM_NEON_V8;
17508 for (size_t k = 16; k <= 80; k += 8) {
17509 for (uint32_t n = 1; n <= 16; n++) {
17510 for (uint32_t m = 1; m <= 6; m++) {
17511 GemmMicrokernelTester()
17512 .mr(6)
17513 .nr(16)
17514 .kr(1)
17515 .sr(1)
17516 .m(m)
17517 .n(n)
17518 .k(k)
17519 .iterations(1)
17520 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_6x16__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
17521 }
17522 }
17523 }
17524 }
17525
TEST(QC8_IGEMM_MINMAX_FP32_6X16__NEONV8_MLAL_LANE_PRFM,n_gt_16)17526 TEST(QC8_IGEMM_MINMAX_FP32_6X16__NEONV8_MLAL_LANE_PRFM, n_gt_16) {
17527 TEST_REQUIRES_ARM_NEON_V8;
17528 for (uint32_t n = 17; n < 32; n++) {
17529 for (size_t k = 1; k <= 40; k += 9) {
17530 GemmMicrokernelTester()
17531 .mr(6)
17532 .nr(16)
17533 .kr(1)
17534 .sr(1)
17535 .m(6)
17536 .n(n)
17537 .k(k)
17538 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_6x16__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
17539 }
17540 }
17541 }
17542
TEST(QC8_IGEMM_MINMAX_FP32_6X16__NEONV8_MLAL_LANE_PRFM,n_gt_16_strided_cn)17543 TEST(QC8_IGEMM_MINMAX_FP32_6X16__NEONV8_MLAL_LANE_PRFM, n_gt_16_strided_cn) {
17544 TEST_REQUIRES_ARM_NEON_V8;
17545 for (uint32_t n = 17; n < 32; n++) {
17546 for (size_t k = 1; k <= 40; k += 9) {
17547 GemmMicrokernelTester()
17548 .mr(6)
17549 .nr(16)
17550 .kr(1)
17551 .sr(1)
17552 .m(6)
17553 .n(n)
17554 .k(k)
17555 .cn_stride(19)
17556 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_6x16__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
17557 }
17558 }
17559 }
17560
TEST(QC8_IGEMM_MINMAX_FP32_6X16__NEONV8_MLAL_LANE_PRFM,n_gt_16_subtile)17561 TEST(QC8_IGEMM_MINMAX_FP32_6X16__NEONV8_MLAL_LANE_PRFM, n_gt_16_subtile) {
17562 TEST_REQUIRES_ARM_NEON_V8;
17563 for (uint32_t n = 17; n < 32; n++) {
17564 for (size_t k = 1; k <= 40; k += 9) {
17565 for (uint32_t m = 1; m <= 6; m++) {
17566 GemmMicrokernelTester()
17567 .mr(6)
17568 .nr(16)
17569 .kr(1)
17570 .sr(1)
17571 .m(m)
17572 .n(n)
17573 .k(k)
17574 .iterations(1)
17575 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_6x16__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
17576 }
17577 }
17578 }
17579 }
17580
TEST(QC8_IGEMM_MINMAX_FP32_6X16__NEONV8_MLAL_LANE_PRFM,n_div_16)17581 TEST(QC8_IGEMM_MINMAX_FP32_6X16__NEONV8_MLAL_LANE_PRFM, n_div_16) {
17582 TEST_REQUIRES_ARM_NEON_V8;
17583 for (uint32_t n = 32; n <= 48; n += 16) {
17584 for (size_t k = 1; k <= 40; k += 9) {
17585 GemmMicrokernelTester()
17586 .mr(6)
17587 .nr(16)
17588 .kr(1)
17589 .sr(1)
17590 .m(6)
17591 .n(n)
17592 .k(k)
17593 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_6x16__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
17594 }
17595 }
17596 }
17597
TEST(QC8_IGEMM_MINMAX_FP32_6X16__NEONV8_MLAL_LANE_PRFM,n_div_16_strided_cn)17598 TEST(QC8_IGEMM_MINMAX_FP32_6X16__NEONV8_MLAL_LANE_PRFM, n_div_16_strided_cn) {
17599 TEST_REQUIRES_ARM_NEON_V8;
17600 for (uint32_t n = 32; n <= 48; n += 16) {
17601 for (size_t k = 1; k <= 40; k += 9) {
17602 GemmMicrokernelTester()
17603 .mr(6)
17604 .nr(16)
17605 .kr(1)
17606 .sr(1)
17607 .m(6)
17608 .n(n)
17609 .k(k)
17610 .cn_stride(19)
17611 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_6x16__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
17612 }
17613 }
17614 }
17615
TEST(QC8_IGEMM_MINMAX_FP32_6X16__NEONV8_MLAL_LANE_PRFM,n_div_16_subtile)17616 TEST(QC8_IGEMM_MINMAX_FP32_6X16__NEONV8_MLAL_LANE_PRFM, n_div_16_subtile) {
17617 TEST_REQUIRES_ARM_NEON_V8;
17618 for (uint32_t n = 32; n <= 48; n += 16) {
17619 for (size_t k = 1; k <= 40; k += 9) {
17620 for (uint32_t m = 1; m <= 6; m++) {
17621 GemmMicrokernelTester()
17622 .mr(6)
17623 .nr(16)
17624 .kr(1)
17625 .sr(1)
17626 .m(m)
17627 .n(n)
17628 .k(k)
17629 .iterations(1)
17630 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_6x16__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
17631 }
17632 }
17633 }
17634 }
17635
TEST(QC8_IGEMM_MINMAX_FP32_6X16__NEONV8_MLAL_LANE_PRFM,small_kernel)17636 TEST(QC8_IGEMM_MINMAX_FP32_6X16__NEONV8_MLAL_LANE_PRFM, small_kernel) {
17637 TEST_REQUIRES_ARM_NEON_V8;
17638 for (size_t k = 1; k <= 40; k += 9) {
17639 GemmMicrokernelTester()
17640 .mr(6)
17641 .nr(16)
17642 .kr(1)
17643 .sr(1)
17644 .m(6)
17645 .n(16)
17646 .k(k)
17647 .ks(3)
17648 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_6x16__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
17649 }
17650 }
17651
TEST(QC8_IGEMM_MINMAX_FP32_6X16__NEONV8_MLAL_LANE_PRFM,small_kernel_subtile)17652 TEST(QC8_IGEMM_MINMAX_FP32_6X16__NEONV8_MLAL_LANE_PRFM, small_kernel_subtile) {
17653 TEST_REQUIRES_ARM_NEON_V8;
17654 for (size_t k = 1; k <= 40; k += 9) {
17655 for (uint32_t n = 1; n <= 16; n++) {
17656 for (uint32_t m = 1; m <= 6; m++) {
17657 GemmMicrokernelTester()
17658 .mr(6)
17659 .nr(16)
17660 .kr(1)
17661 .sr(1)
17662 .m(m)
17663 .n(n)
17664 .k(k)
17665 .ks(3)
17666 .iterations(1)
17667 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_6x16__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
17668 }
17669 }
17670 }
17671 }
17672
TEST(QC8_IGEMM_MINMAX_FP32_6X16__NEONV8_MLAL_LANE_PRFM,n_gt_16_small_kernel)17673 TEST(QC8_IGEMM_MINMAX_FP32_6X16__NEONV8_MLAL_LANE_PRFM, n_gt_16_small_kernel) {
17674 TEST_REQUIRES_ARM_NEON_V8;
17675 for (uint32_t n = 17; n < 32; n++) {
17676 for (size_t k = 1; k <= 40; k += 9) {
17677 GemmMicrokernelTester()
17678 .mr(6)
17679 .nr(16)
17680 .kr(1)
17681 .sr(1)
17682 .m(6)
17683 .n(n)
17684 .k(k)
17685 .ks(3)
17686 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_6x16__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
17687 }
17688 }
17689 }
17690
TEST(QC8_IGEMM_MINMAX_FP32_6X16__NEONV8_MLAL_LANE_PRFM,n_div_16_small_kernel)17691 TEST(QC8_IGEMM_MINMAX_FP32_6X16__NEONV8_MLAL_LANE_PRFM, n_div_16_small_kernel) {
17692 TEST_REQUIRES_ARM_NEON_V8;
17693 for (uint32_t n = 32; n <= 48; n += 16) {
17694 for (size_t k = 1; k <= 40; k += 9) {
17695 GemmMicrokernelTester()
17696 .mr(6)
17697 .nr(16)
17698 .kr(1)
17699 .sr(1)
17700 .m(6)
17701 .n(n)
17702 .k(k)
17703 .ks(3)
17704 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_6x16__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
17705 }
17706 }
17707 }
17708
TEST(QC8_IGEMM_MINMAX_FP32_6X16__NEONV8_MLAL_LANE_PRFM,strided_cm_subtile)17709 TEST(QC8_IGEMM_MINMAX_FP32_6X16__NEONV8_MLAL_LANE_PRFM, strided_cm_subtile) {
17710 TEST_REQUIRES_ARM_NEON_V8;
17711 for (size_t k = 1; k <= 40; k += 9) {
17712 for (uint32_t n = 1; n <= 16; n++) {
17713 for (uint32_t m = 1; m <= 6; m++) {
17714 GemmMicrokernelTester()
17715 .mr(6)
17716 .nr(16)
17717 .kr(1)
17718 .sr(1)
17719 .m(m)
17720 .n(n)
17721 .k(k)
17722 .cm_stride(19)
17723 .iterations(1)
17724 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_6x16__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
17725 }
17726 }
17727 }
17728 }
17729
TEST(QC8_IGEMM_MINMAX_FP32_6X16__NEONV8_MLAL_LANE_PRFM,a_offset)17730 TEST(QC8_IGEMM_MINMAX_FP32_6X16__NEONV8_MLAL_LANE_PRFM, a_offset) {
17731 TEST_REQUIRES_ARM_NEON_V8;
17732 for (size_t k = 1; k <= 40; k += 9) {
17733 GemmMicrokernelTester()
17734 .mr(6)
17735 .nr(16)
17736 .kr(1)
17737 .sr(1)
17738 .m(6)
17739 .n(16)
17740 .k(k)
17741 .ks(3)
17742 .a_offset(251)
17743 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_6x16__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
17744 }
17745 }
17746
TEST(QC8_IGEMM_MINMAX_FP32_6X16__NEONV8_MLAL_LANE_PRFM,zero)17747 TEST(QC8_IGEMM_MINMAX_FP32_6X16__NEONV8_MLAL_LANE_PRFM, zero) {
17748 TEST_REQUIRES_ARM_NEON_V8;
17749 for (size_t k = 1; k <= 40; k += 9) {
17750 for (uint32_t mz = 0; mz < 6; mz++) {
17751 GemmMicrokernelTester()
17752 .mr(6)
17753 .nr(16)
17754 .kr(1)
17755 .sr(1)
17756 .m(6)
17757 .n(16)
17758 .k(k)
17759 .ks(3)
17760 .a_offset(251)
17761 .zero_index(mz)
17762 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_6x16__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
17763 }
17764 }
17765 }
17766
TEST(QC8_IGEMM_MINMAX_FP32_6X16__NEONV8_MLAL_LANE_PRFM,qmin)17767 TEST(QC8_IGEMM_MINMAX_FP32_6X16__NEONV8_MLAL_LANE_PRFM, qmin) {
17768 TEST_REQUIRES_ARM_NEON_V8;
17769 GemmMicrokernelTester()
17770 .mr(6)
17771 .nr(16)
17772 .kr(1)
17773 .sr(1)
17774 .m(6)
17775 .n(16)
17776 .k(8)
17777 .qmin(128)
17778 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_6x16__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
17779 }
17780
TEST(QC8_IGEMM_MINMAX_FP32_6X16__NEONV8_MLAL_LANE_PRFM,qmax)17781 TEST(QC8_IGEMM_MINMAX_FP32_6X16__NEONV8_MLAL_LANE_PRFM, qmax) {
17782 TEST_REQUIRES_ARM_NEON_V8;
17783 GemmMicrokernelTester()
17784 .mr(6)
17785 .nr(16)
17786 .kr(1)
17787 .sr(1)
17788 .m(6)
17789 .n(16)
17790 .k(8)
17791 .qmax(128)
17792 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_6x16__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
17793 }
17794
TEST(QC8_IGEMM_MINMAX_FP32_6X16__NEONV8_MLAL_LANE_PRFM,strided_cm)17795 TEST(QC8_IGEMM_MINMAX_FP32_6X16__NEONV8_MLAL_LANE_PRFM, strided_cm) {
17796 TEST_REQUIRES_ARM_NEON_V8;
17797 GemmMicrokernelTester()
17798 .mr(6)
17799 .nr(16)
17800 .kr(1)
17801 .sr(1)
17802 .m(6)
17803 .n(16)
17804 .k(8)
17805 .cm_stride(19)
17806 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_6x16__neonv8_mlal_lane_prfm, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
17807 }
17808 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
17809
17810
17811 #if XNN_ENABLE_ARM_DOTPROD && (XNN_ARCH_ARM && !XNN_PLATFORM_IOS || XNN_ARCH_ARM64)
TEST(QC8_IGEMM_MINMAX_FP32_8X16C4__NEONDOT,k_eq_8)17812 TEST(QC8_IGEMM_MINMAX_FP32_8X16C4__NEONDOT, k_eq_8) {
17813 TEST_REQUIRES_ARM_NEON_DOT;
17814 GemmMicrokernelTester()
17815 .mr(8)
17816 .nr(16)
17817 .kr(4)
17818 .sr(1)
17819 .m(8)
17820 .n(16)
17821 .k(8)
17822 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_8x16c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
17823 }
17824
TEST(QC8_IGEMM_MINMAX_FP32_8X16C4__NEONDOT,strided_cn)17825 TEST(QC8_IGEMM_MINMAX_FP32_8X16C4__NEONDOT, strided_cn) {
17826 TEST_REQUIRES_ARM_NEON_DOT;
17827 GemmMicrokernelTester()
17828 .mr(8)
17829 .nr(16)
17830 .kr(4)
17831 .sr(1)
17832 .m(8)
17833 .n(16)
17834 .k(8)
17835 .cn_stride(19)
17836 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_8x16c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
17837 }
17838
TEST(QC8_IGEMM_MINMAX_FP32_8X16C4__NEONDOT,k_eq_8_subtile)17839 TEST(QC8_IGEMM_MINMAX_FP32_8X16C4__NEONDOT, k_eq_8_subtile) {
17840 TEST_REQUIRES_ARM_NEON_DOT;
17841 for (uint32_t n = 1; n <= 16; n++) {
17842 for (uint32_t m = 1; m <= 8; m++) {
17843 GemmMicrokernelTester()
17844 .mr(8)
17845 .nr(16)
17846 .kr(4)
17847 .sr(1)
17848 .m(m)
17849 .n(n)
17850 .k(8)
17851 .iterations(1)
17852 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_8x16c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
17853 }
17854 }
17855 }
17856
TEST(QC8_IGEMM_MINMAX_FP32_8X16C4__NEONDOT,k_eq_8_subtile_m)17857 TEST(QC8_IGEMM_MINMAX_FP32_8X16C4__NEONDOT, k_eq_8_subtile_m) {
17858 TEST_REQUIRES_ARM_NEON_DOT;
17859 for (uint32_t m = 1; m <= 8; m++) {
17860 GemmMicrokernelTester()
17861 .mr(8)
17862 .nr(16)
17863 .kr(4)
17864 .sr(1)
17865 .m(m)
17866 .n(16)
17867 .k(8)
17868 .iterations(1)
17869 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_8x16c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
17870 }
17871 }
17872
TEST(QC8_IGEMM_MINMAX_FP32_8X16C4__NEONDOT,k_eq_8_subtile_n)17873 TEST(QC8_IGEMM_MINMAX_FP32_8X16C4__NEONDOT, k_eq_8_subtile_n) {
17874 TEST_REQUIRES_ARM_NEON_DOT;
17875 for (uint32_t n = 1; n <= 16; n++) {
17876 GemmMicrokernelTester()
17877 .mr(8)
17878 .nr(16)
17879 .kr(4)
17880 .sr(1)
17881 .m(8)
17882 .n(n)
17883 .k(8)
17884 .iterations(1)
17885 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_8x16c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
17886 }
17887 }
17888
TEST(QC8_IGEMM_MINMAX_FP32_8X16C4__NEONDOT,k_lt_8)17889 TEST(QC8_IGEMM_MINMAX_FP32_8X16C4__NEONDOT, k_lt_8) {
17890 TEST_REQUIRES_ARM_NEON_DOT;
17891 for (size_t k = 1; k < 8; k++) {
17892 GemmMicrokernelTester()
17893 .mr(8)
17894 .nr(16)
17895 .kr(4)
17896 .sr(1)
17897 .m(8)
17898 .n(16)
17899 .k(k)
17900 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_8x16c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
17901 }
17902 }
17903
TEST(QC8_IGEMM_MINMAX_FP32_8X16C4__NEONDOT,k_lt_8_subtile)17904 TEST(QC8_IGEMM_MINMAX_FP32_8X16C4__NEONDOT, k_lt_8_subtile) {
17905 TEST_REQUIRES_ARM_NEON_DOT;
17906 for (size_t k = 1; k < 8; k++) {
17907 for (uint32_t n = 1; n <= 16; n++) {
17908 for (uint32_t m = 1; m <= 8; m++) {
17909 GemmMicrokernelTester()
17910 .mr(8)
17911 .nr(16)
17912 .kr(4)
17913 .sr(1)
17914 .m(m)
17915 .n(n)
17916 .k(k)
17917 .iterations(1)
17918 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_8x16c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
17919 }
17920 }
17921 }
17922 }
17923
TEST(QC8_IGEMM_MINMAX_FP32_8X16C4__NEONDOT,k_gt_8)17924 TEST(QC8_IGEMM_MINMAX_FP32_8X16C4__NEONDOT, k_gt_8) {
17925 TEST_REQUIRES_ARM_NEON_DOT;
17926 for (size_t k = 9; k < 16; k++) {
17927 GemmMicrokernelTester()
17928 .mr(8)
17929 .nr(16)
17930 .kr(4)
17931 .sr(1)
17932 .m(8)
17933 .n(16)
17934 .k(k)
17935 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_8x16c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
17936 }
17937 }
17938
TEST(QC8_IGEMM_MINMAX_FP32_8X16C4__NEONDOT,k_gt_8_subtile)17939 TEST(QC8_IGEMM_MINMAX_FP32_8X16C4__NEONDOT, k_gt_8_subtile) {
17940 TEST_REQUIRES_ARM_NEON_DOT;
17941 for (size_t k = 9; k < 16; k++) {
17942 for (uint32_t n = 1; n <= 16; n++) {
17943 for (uint32_t m = 1; m <= 8; m++) {
17944 GemmMicrokernelTester()
17945 .mr(8)
17946 .nr(16)
17947 .kr(4)
17948 .sr(1)
17949 .m(m)
17950 .n(n)
17951 .k(k)
17952 .iterations(1)
17953 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_8x16c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
17954 }
17955 }
17956 }
17957 }
17958
TEST(QC8_IGEMM_MINMAX_FP32_8X16C4__NEONDOT,k_div_8)17959 TEST(QC8_IGEMM_MINMAX_FP32_8X16C4__NEONDOT, k_div_8) {
17960 TEST_REQUIRES_ARM_NEON_DOT;
17961 for (size_t k = 16; k <= 80; k += 8) {
17962 GemmMicrokernelTester()
17963 .mr(8)
17964 .nr(16)
17965 .kr(4)
17966 .sr(1)
17967 .m(8)
17968 .n(16)
17969 .k(k)
17970 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_8x16c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
17971 }
17972 }
17973
TEST(QC8_IGEMM_MINMAX_FP32_8X16C4__NEONDOT,k_div_8_subtile)17974 TEST(QC8_IGEMM_MINMAX_FP32_8X16C4__NEONDOT, k_div_8_subtile) {
17975 TEST_REQUIRES_ARM_NEON_DOT;
17976 for (size_t k = 16; k <= 80; k += 8) {
17977 for (uint32_t n = 1; n <= 16; n++) {
17978 for (uint32_t m = 1; m <= 8; m++) {
17979 GemmMicrokernelTester()
17980 .mr(8)
17981 .nr(16)
17982 .kr(4)
17983 .sr(1)
17984 .m(m)
17985 .n(n)
17986 .k(k)
17987 .iterations(1)
17988 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_8x16c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
17989 }
17990 }
17991 }
17992 }
17993
TEST(QC8_IGEMM_MINMAX_FP32_8X16C4__NEONDOT,n_gt_16)17994 TEST(QC8_IGEMM_MINMAX_FP32_8X16C4__NEONDOT, n_gt_16) {
17995 TEST_REQUIRES_ARM_NEON_DOT;
17996 for (uint32_t n = 17; n < 32; n++) {
17997 for (size_t k = 1; k <= 40; k += 9) {
17998 GemmMicrokernelTester()
17999 .mr(8)
18000 .nr(16)
18001 .kr(4)
18002 .sr(1)
18003 .m(8)
18004 .n(n)
18005 .k(k)
18006 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_8x16c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
18007 }
18008 }
18009 }
18010
TEST(QC8_IGEMM_MINMAX_FP32_8X16C4__NEONDOT,n_gt_16_strided_cn)18011 TEST(QC8_IGEMM_MINMAX_FP32_8X16C4__NEONDOT, n_gt_16_strided_cn) {
18012 TEST_REQUIRES_ARM_NEON_DOT;
18013 for (uint32_t n = 17; n < 32; n++) {
18014 for (size_t k = 1; k <= 40; k += 9) {
18015 GemmMicrokernelTester()
18016 .mr(8)
18017 .nr(16)
18018 .kr(4)
18019 .sr(1)
18020 .m(8)
18021 .n(n)
18022 .k(k)
18023 .cn_stride(19)
18024 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_8x16c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
18025 }
18026 }
18027 }
18028
TEST(QC8_IGEMM_MINMAX_FP32_8X16C4__NEONDOT,n_gt_16_subtile)18029 TEST(QC8_IGEMM_MINMAX_FP32_8X16C4__NEONDOT, n_gt_16_subtile) {
18030 TEST_REQUIRES_ARM_NEON_DOT;
18031 for (uint32_t n = 17; n < 32; n++) {
18032 for (size_t k = 1; k <= 40; k += 9) {
18033 for (uint32_t m = 1; m <= 8; m++) {
18034 GemmMicrokernelTester()
18035 .mr(8)
18036 .nr(16)
18037 .kr(4)
18038 .sr(1)
18039 .m(m)
18040 .n(n)
18041 .k(k)
18042 .iterations(1)
18043 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_8x16c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
18044 }
18045 }
18046 }
18047 }
18048
TEST(QC8_IGEMM_MINMAX_FP32_8X16C4__NEONDOT,n_div_16)18049 TEST(QC8_IGEMM_MINMAX_FP32_8X16C4__NEONDOT, n_div_16) {
18050 TEST_REQUIRES_ARM_NEON_DOT;
18051 for (uint32_t n = 32; n <= 48; n += 16) {
18052 for (size_t k = 1; k <= 40; k += 9) {
18053 GemmMicrokernelTester()
18054 .mr(8)
18055 .nr(16)
18056 .kr(4)
18057 .sr(1)
18058 .m(8)
18059 .n(n)
18060 .k(k)
18061 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_8x16c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
18062 }
18063 }
18064 }
18065
TEST(QC8_IGEMM_MINMAX_FP32_8X16C4__NEONDOT,n_div_16_strided_cn)18066 TEST(QC8_IGEMM_MINMAX_FP32_8X16C4__NEONDOT, n_div_16_strided_cn) {
18067 TEST_REQUIRES_ARM_NEON_DOT;
18068 for (uint32_t n = 32; n <= 48; n += 16) {
18069 for (size_t k = 1; k <= 40; k += 9) {
18070 GemmMicrokernelTester()
18071 .mr(8)
18072 .nr(16)
18073 .kr(4)
18074 .sr(1)
18075 .m(8)
18076 .n(n)
18077 .k(k)
18078 .cn_stride(19)
18079 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_8x16c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
18080 }
18081 }
18082 }
18083
TEST(QC8_IGEMM_MINMAX_FP32_8X16C4__NEONDOT,n_div_16_subtile)18084 TEST(QC8_IGEMM_MINMAX_FP32_8X16C4__NEONDOT, n_div_16_subtile) {
18085 TEST_REQUIRES_ARM_NEON_DOT;
18086 for (uint32_t n = 32; n <= 48; n += 16) {
18087 for (size_t k = 1; k <= 40; k += 9) {
18088 for (uint32_t m = 1; m <= 8; m++) {
18089 GemmMicrokernelTester()
18090 .mr(8)
18091 .nr(16)
18092 .kr(4)
18093 .sr(1)
18094 .m(m)
18095 .n(n)
18096 .k(k)
18097 .iterations(1)
18098 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_8x16c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
18099 }
18100 }
18101 }
18102 }
18103
TEST(QC8_IGEMM_MINMAX_FP32_8X16C4__NEONDOT,small_kernel)18104 TEST(QC8_IGEMM_MINMAX_FP32_8X16C4__NEONDOT, small_kernel) {
18105 TEST_REQUIRES_ARM_NEON_DOT;
18106 for (size_t k = 1; k <= 40; k += 9) {
18107 GemmMicrokernelTester()
18108 .mr(8)
18109 .nr(16)
18110 .kr(4)
18111 .sr(1)
18112 .m(8)
18113 .n(16)
18114 .k(k)
18115 .ks(3)
18116 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_8x16c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
18117 }
18118 }
18119
TEST(QC8_IGEMM_MINMAX_FP32_8X16C4__NEONDOT,small_kernel_subtile)18120 TEST(QC8_IGEMM_MINMAX_FP32_8X16C4__NEONDOT, small_kernel_subtile) {
18121 TEST_REQUIRES_ARM_NEON_DOT;
18122 for (size_t k = 1; k <= 40; k += 9) {
18123 for (uint32_t n = 1; n <= 16; n++) {
18124 for (uint32_t m = 1; m <= 8; m++) {
18125 GemmMicrokernelTester()
18126 .mr(8)
18127 .nr(16)
18128 .kr(4)
18129 .sr(1)
18130 .m(m)
18131 .n(n)
18132 .k(k)
18133 .ks(3)
18134 .iterations(1)
18135 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_8x16c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
18136 }
18137 }
18138 }
18139 }
18140
TEST(QC8_IGEMM_MINMAX_FP32_8X16C4__NEONDOT,n_gt_16_small_kernel)18141 TEST(QC8_IGEMM_MINMAX_FP32_8X16C4__NEONDOT, n_gt_16_small_kernel) {
18142 TEST_REQUIRES_ARM_NEON_DOT;
18143 for (uint32_t n = 17; n < 32; n++) {
18144 for (size_t k = 1; k <= 40; k += 9) {
18145 GemmMicrokernelTester()
18146 .mr(8)
18147 .nr(16)
18148 .kr(4)
18149 .sr(1)
18150 .m(8)
18151 .n(n)
18152 .k(k)
18153 .ks(3)
18154 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_8x16c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
18155 }
18156 }
18157 }
18158
TEST(QC8_IGEMM_MINMAX_FP32_8X16C4__NEONDOT,n_div_16_small_kernel)18159 TEST(QC8_IGEMM_MINMAX_FP32_8X16C4__NEONDOT, n_div_16_small_kernel) {
18160 TEST_REQUIRES_ARM_NEON_DOT;
18161 for (uint32_t n = 32; n <= 48; n += 16) {
18162 for (size_t k = 1; k <= 40; k += 9) {
18163 GemmMicrokernelTester()
18164 .mr(8)
18165 .nr(16)
18166 .kr(4)
18167 .sr(1)
18168 .m(8)
18169 .n(n)
18170 .k(k)
18171 .ks(3)
18172 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_8x16c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
18173 }
18174 }
18175 }
18176
TEST(QC8_IGEMM_MINMAX_FP32_8X16C4__NEONDOT,strided_cm_subtile)18177 TEST(QC8_IGEMM_MINMAX_FP32_8X16C4__NEONDOT, strided_cm_subtile) {
18178 TEST_REQUIRES_ARM_NEON_DOT;
18179 for (size_t k = 1; k <= 40; k += 9) {
18180 for (uint32_t n = 1; n <= 16; n++) {
18181 for (uint32_t m = 1; m <= 8; m++) {
18182 GemmMicrokernelTester()
18183 .mr(8)
18184 .nr(16)
18185 .kr(4)
18186 .sr(1)
18187 .m(m)
18188 .n(n)
18189 .k(k)
18190 .cm_stride(19)
18191 .iterations(1)
18192 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_8x16c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
18193 }
18194 }
18195 }
18196 }
18197
TEST(QC8_IGEMM_MINMAX_FP32_8X16C4__NEONDOT,a_offset)18198 TEST(QC8_IGEMM_MINMAX_FP32_8X16C4__NEONDOT, a_offset) {
18199 TEST_REQUIRES_ARM_NEON_DOT;
18200 for (size_t k = 1; k <= 40; k += 9) {
18201 GemmMicrokernelTester()
18202 .mr(8)
18203 .nr(16)
18204 .kr(4)
18205 .sr(1)
18206 .m(8)
18207 .n(16)
18208 .k(k)
18209 .ks(3)
18210 .a_offset(331)
18211 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_8x16c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
18212 }
18213 }
18214
TEST(QC8_IGEMM_MINMAX_FP32_8X16C4__NEONDOT,zero)18215 TEST(QC8_IGEMM_MINMAX_FP32_8X16C4__NEONDOT, zero) {
18216 TEST_REQUIRES_ARM_NEON_DOT;
18217 for (size_t k = 1; k <= 40; k += 9) {
18218 for (uint32_t mz = 0; mz < 8; mz++) {
18219 GemmMicrokernelTester()
18220 .mr(8)
18221 .nr(16)
18222 .kr(4)
18223 .sr(1)
18224 .m(8)
18225 .n(16)
18226 .k(k)
18227 .ks(3)
18228 .a_offset(331)
18229 .zero_index(mz)
18230 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_8x16c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
18231 }
18232 }
18233 }
18234
TEST(QC8_IGEMM_MINMAX_FP32_8X16C4__NEONDOT,qmin)18235 TEST(QC8_IGEMM_MINMAX_FP32_8X16C4__NEONDOT, qmin) {
18236 TEST_REQUIRES_ARM_NEON_DOT;
18237 GemmMicrokernelTester()
18238 .mr(8)
18239 .nr(16)
18240 .kr(4)
18241 .sr(1)
18242 .m(8)
18243 .n(16)
18244 .k(8)
18245 .qmin(128)
18246 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_8x16c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
18247 }
18248
TEST(QC8_IGEMM_MINMAX_FP32_8X16C4__NEONDOT,qmax)18249 TEST(QC8_IGEMM_MINMAX_FP32_8X16C4__NEONDOT, qmax) {
18250 TEST_REQUIRES_ARM_NEON_DOT;
18251 GemmMicrokernelTester()
18252 .mr(8)
18253 .nr(16)
18254 .kr(4)
18255 .sr(1)
18256 .m(8)
18257 .n(16)
18258 .k(8)
18259 .qmax(128)
18260 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_8x16c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
18261 }
18262
TEST(QC8_IGEMM_MINMAX_FP32_8X16C4__NEONDOT,strided_cm)18263 TEST(QC8_IGEMM_MINMAX_FP32_8X16C4__NEONDOT, strided_cm) {
18264 TEST_REQUIRES_ARM_NEON_DOT;
18265 GemmMicrokernelTester()
18266 .mr(8)
18267 .nr(16)
18268 .kr(4)
18269 .sr(1)
18270 .m(8)
18271 .n(16)
18272 .k(8)
18273 .cm_stride(19)
18274 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_8x16c4__neondot, xnn_init_qc8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
18275 }
18276 #endif // XNN_ENABLE_ARM_DOTPROD && (XNN_ARCH_ARM && !XNN_PLATFORM_IOS || XNN_ARCH_ARM64)
18277
18278
18279 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD64,k_eq_8)18280 TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD64, k_eq_8) {
18281 TEST_REQUIRES_X86_SSE2;
18282 GemmMicrokernelTester()
18283 .mr(2)
18284 .nr(4)
18285 .kr(2)
18286 .sr(1)
18287 .m(2)
18288 .n(4)
18289 .k(8)
18290 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
18291 }
18292
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD64,strided_cn)18293 TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD64, strided_cn) {
18294 TEST_REQUIRES_X86_SSE2;
18295 GemmMicrokernelTester()
18296 .mr(2)
18297 .nr(4)
18298 .kr(2)
18299 .sr(1)
18300 .m(2)
18301 .n(4)
18302 .k(8)
18303 .cn_stride(7)
18304 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
18305 }
18306
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD64,k_eq_8_subtile)18307 TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD64, k_eq_8_subtile) {
18308 TEST_REQUIRES_X86_SSE2;
18309 for (uint32_t n = 1; n <= 4; n++) {
18310 for (uint32_t m = 1; m <= 2; m++) {
18311 GemmMicrokernelTester()
18312 .mr(2)
18313 .nr(4)
18314 .kr(2)
18315 .sr(1)
18316 .m(m)
18317 .n(n)
18318 .k(8)
18319 .iterations(1)
18320 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
18321 }
18322 }
18323 }
18324
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD64,k_eq_8_subtile_m)18325 TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD64, k_eq_8_subtile_m) {
18326 TEST_REQUIRES_X86_SSE2;
18327 for (uint32_t m = 1; m <= 2; m++) {
18328 GemmMicrokernelTester()
18329 .mr(2)
18330 .nr(4)
18331 .kr(2)
18332 .sr(1)
18333 .m(m)
18334 .n(4)
18335 .k(8)
18336 .iterations(1)
18337 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
18338 }
18339 }
18340
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD64,k_eq_8_subtile_n)18341 TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD64, k_eq_8_subtile_n) {
18342 TEST_REQUIRES_X86_SSE2;
18343 for (uint32_t n = 1; n <= 4; n++) {
18344 GemmMicrokernelTester()
18345 .mr(2)
18346 .nr(4)
18347 .kr(2)
18348 .sr(1)
18349 .m(2)
18350 .n(n)
18351 .k(8)
18352 .iterations(1)
18353 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
18354 }
18355 }
18356
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD64,k_lt_8)18357 TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD64, k_lt_8) {
18358 TEST_REQUIRES_X86_SSE2;
18359 for (size_t k = 1; k < 8; k++) {
18360 GemmMicrokernelTester()
18361 .mr(2)
18362 .nr(4)
18363 .kr(2)
18364 .sr(1)
18365 .m(2)
18366 .n(4)
18367 .k(k)
18368 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
18369 }
18370 }
18371
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD64,k_lt_8_subtile)18372 TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD64, k_lt_8_subtile) {
18373 TEST_REQUIRES_X86_SSE2;
18374 for (size_t k = 1; k < 8; k++) {
18375 for (uint32_t n = 1; n <= 4; n++) {
18376 for (uint32_t m = 1; m <= 2; m++) {
18377 GemmMicrokernelTester()
18378 .mr(2)
18379 .nr(4)
18380 .kr(2)
18381 .sr(1)
18382 .m(m)
18383 .n(n)
18384 .k(k)
18385 .iterations(1)
18386 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
18387 }
18388 }
18389 }
18390 }
18391
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD64,k_gt_8)18392 TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD64, k_gt_8) {
18393 TEST_REQUIRES_X86_SSE2;
18394 for (size_t k = 9; k < 16; k++) {
18395 GemmMicrokernelTester()
18396 .mr(2)
18397 .nr(4)
18398 .kr(2)
18399 .sr(1)
18400 .m(2)
18401 .n(4)
18402 .k(k)
18403 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
18404 }
18405 }
18406
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD64,k_gt_8_subtile)18407 TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD64, k_gt_8_subtile) {
18408 TEST_REQUIRES_X86_SSE2;
18409 for (size_t k = 9; k < 16; k++) {
18410 for (uint32_t n = 1; n <= 4; n++) {
18411 for (uint32_t m = 1; m <= 2; m++) {
18412 GemmMicrokernelTester()
18413 .mr(2)
18414 .nr(4)
18415 .kr(2)
18416 .sr(1)
18417 .m(m)
18418 .n(n)
18419 .k(k)
18420 .iterations(1)
18421 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
18422 }
18423 }
18424 }
18425 }
18426
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD64,k_div_8)18427 TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD64, k_div_8) {
18428 TEST_REQUIRES_X86_SSE2;
18429 for (size_t k = 16; k <= 80; k += 8) {
18430 GemmMicrokernelTester()
18431 .mr(2)
18432 .nr(4)
18433 .kr(2)
18434 .sr(1)
18435 .m(2)
18436 .n(4)
18437 .k(k)
18438 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
18439 }
18440 }
18441
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD64,k_div_8_subtile)18442 TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD64, k_div_8_subtile) {
18443 TEST_REQUIRES_X86_SSE2;
18444 for (size_t k = 16; k <= 80; k += 8) {
18445 for (uint32_t n = 1; n <= 4; n++) {
18446 for (uint32_t m = 1; m <= 2; m++) {
18447 GemmMicrokernelTester()
18448 .mr(2)
18449 .nr(4)
18450 .kr(2)
18451 .sr(1)
18452 .m(m)
18453 .n(n)
18454 .k(k)
18455 .iterations(1)
18456 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
18457 }
18458 }
18459 }
18460 }
18461
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD64,n_gt_4)18462 TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD64, n_gt_4) {
18463 TEST_REQUIRES_X86_SSE2;
18464 for (uint32_t n = 5; n < 8; n++) {
18465 for (size_t k = 1; k <= 40; k += 9) {
18466 GemmMicrokernelTester()
18467 .mr(2)
18468 .nr(4)
18469 .kr(2)
18470 .sr(1)
18471 .m(2)
18472 .n(n)
18473 .k(k)
18474 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
18475 }
18476 }
18477 }
18478
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD64,n_gt_4_strided_cn)18479 TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD64, n_gt_4_strided_cn) {
18480 TEST_REQUIRES_X86_SSE2;
18481 for (uint32_t n = 5; n < 8; n++) {
18482 for (size_t k = 1; k <= 40; k += 9) {
18483 GemmMicrokernelTester()
18484 .mr(2)
18485 .nr(4)
18486 .kr(2)
18487 .sr(1)
18488 .m(2)
18489 .n(n)
18490 .k(k)
18491 .cn_stride(7)
18492 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
18493 }
18494 }
18495 }
18496
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD64,n_gt_4_subtile)18497 TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD64, n_gt_4_subtile) {
18498 TEST_REQUIRES_X86_SSE2;
18499 for (uint32_t n = 5; n < 8; n++) {
18500 for (size_t k = 1; k <= 40; k += 9) {
18501 for (uint32_t m = 1; m <= 2; m++) {
18502 GemmMicrokernelTester()
18503 .mr(2)
18504 .nr(4)
18505 .kr(2)
18506 .sr(1)
18507 .m(m)
18508 .n(n)
18509 .k(k)
18510 .iterations(1)
18511 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
18512 }
18513 }
18514 }
18515 }
18516
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD64,n_div_4)18517 TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD64, n_div_4) {
18518 TEST_REQUIRES_X86_SSE2;
18519 for (uint32_t n = 8; n <= 12; n += 4) {
18520 for (size_t k = 1; k <= 40; k += 9) {
18521 GemmMicrokernelTester()
18522 .mr(2)
18523 .nr(4)
18524 .kr(2)
18525 .sr(1)
18526 .m(2)
18527 .n(n)
18528 .k(k)
18529 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
18530 }
18531 }
18532 }
18533
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD64,n_div_4_strided_cn)18534 TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD64, n_div_4_strided_cn) {
18535 TEST_REQUIRES_X86_SSE2;
18536 for (uint32_t n = 8; n <= 12; n += 4) {
18537 for (size_t k = 1; k <= 40; k += 9) {
18538 GemmMicrokernelTester()
18539 .mr(2)
18540 .nr(4)
18541 .kr(2)
18542 .sr(1)
18543 .m(2)
18544 .n(n)
18545 .k(k)
18546 .cn_stride(7)
18547 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
18548 }
18549 }
18550 }
18551
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD64,n_div_4_subtile)18552 TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD64, n_div_4_subtile) {
18553 TEST_REQUIRES_X86_SSE2;
18554 for (uint32_t n = 8; n <= 12; n += 4) {
18555 for (size_t k = 1; k <= 40; k += 9) {
18556 for (uint32_t m = 1; m <= 2; m++) {
18557 GemmMicrokernelTester()
18558 .mr(2)
18559 .nr(4)
18560 .kr(2)
18561 .sr(1)
18562 .m(m)
18563 .n(n)
18564 .k(k)
18565 .iterations(1)
18566 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
18567 }
18568 }
18569 }
18570 }
18571
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD64,small_kernel)18572 TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD64, small_kernel) {
18573 TEST_REQUIRES_X86_SSE2;
18574 for (size_t k = 1; k <= 40; k += 9) {
18575 GemmMicrokernelTester()
18576 .mr(2)
18577 .nr(4)
18578 .kr(2)
18579 .sr(1)
18580 .m(2)
18581 .n(4)
18582 .k(k)
18583 .ks(3)
18584 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
18585 }
18586 }
18587
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD64,small_kernel_subtile)18588 TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD64, small_kernel_subtile) {
18589 TEST_REQUIRES_X86_SSE2;
18590 for (size_t k = 1; k <= 40; k += 9) {
18591 for (uint32_t n = 1; n <= 4; n++) {
18592 for (uint32_t m = 1; m <= 2; m++) {
18593 GemmMicrokernelTester()
18594 .mr(2)
18595 .nr(4)
18596 .kr(2)
18597 .sr(1)
18598 .m(m)
18599 .n(n)
18600 .k(k)
18601 .ks(3)
18602 .iterations(1)
18603 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
18604 }
18605 }
18606 }
18607 }
18608
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD64,n_gt_4_small_kernel)18609 TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD64, n_gt_4_small_kernel) {
18610 TEST_REQUIRES_X86_SSE2;
18611 for (uint32_t n = 5; n < 8; n++) {
18612 for (size_t k = 1; k <= 40; k += 9) {
18613 GemmMicrokernelTester()
18614 .mr(2)
18615 .nr(4)
18616 .kr(2)
18617 .sr(1)
18618 .m(2)
18619 .n(n)
18620 .k(k)
18621 .ks(3)
18622 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
18623 }
18624 }
18625 }
18626
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD64,n_div_4_small_kernel)18627 TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD64, n_div_4_small_kernel) {
18628 TEST_REQUIRES_X86_SSE2;
18629 for (uint32_t n = 8; n <= 12; n += 4) {
18630 for (size_t k = 1; k <= 40; k += 9) {
18631 GemmMicrokernelTester()
18632 .mr(2)
18633 .nr(4)
18634 .kr(2)
18635 .sr(1)
18636 .m(2)
18637 .n(n)
18638 .k(k)
18639 .ks(3)
18640 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
18641 }
18642 }
18643 }
18644
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD64,strided_cm_subtile)18645 TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD64, strided_cm_subtile) {
18646 TEST_REQUIRES_X86_SSE2;
18647 for (size_t k = 1; k <= 40; k += 9) {
18648 for (uint32_t n = 1; n <= 4; n++) {
18649 for (uint32_t m = 1; m <= 2; m++) {
18650 GemmMicrokernelTester()
18651 .mr(2)
18652 .nr(4)
18653 .kr(2)
18654 .sr(1)
18655 .m(m)
18656 .n(n)
18657 .k(k)
18658 .cm_stride(7)
18659 .iterations(1)
18660 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
18661 }
18662 }
18663 }
18664 }
18665
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD64,a_offset)18666 TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD64, a_offset) {
18667 TEST_REQUIRES_X86_SSE2;
18668 for (size_t k = 1; k <= 40; k += 9) {
18669 GemmMicrokernelTester()
18670 .mr(2)
18671 .nr(4)
18672 .kr(2)
18673 .sr(1)
18674 .m(2)
18675 .n(4)
18676 .k(k)
18677 .ks(3)
18678 .a_offset(83)
18679 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
18680 }
18681 }
18682
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD64,zero)18683 TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD64, zero) {
18684 TEST_REQUIRES_X86_SSE2;
18685 for (size_t k = 1; k <= 40; k += 9) {
18686 for (uint32_t mz = 0; mz < 2; mz++) {
18687 GemmMicrokernelTester()
18688 .mr(2)
18689 .nr(4)
18690 .kr(2)
18691 .sr(1)
18692 .m(2)
18693 .n(4)
18694 .k(k)
18695 .ks(3)
18696 .a_offset(83)
18697 .zero_index(mz)
18698 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
18699 }
18700 }
18701 }
18702
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD64,qmin)18703 TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD64, qmin) {
18704 TEST_REQUIRES_X86_SSE2;
18705 GemmMicrokernelTester()
18706 .mr(2)
18707 .nr(4)
18708 .kr(2)
18709 .sr(1)
18710 .m(2)
18711 .n(4)
18712 .k(8)
18713 .qmin(128)
18714 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
18715 }
18716
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD64,qmax)18717 TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD64, qmax) {
18718 TEST_REQUIRES_X86_SSE2;
18719 GemmMicrokernelTester()
18720 .mr(2)
18721 .nr(4)
18722 .kr(2)
18723 .sr(1)
18724 .m(2)
18725 .n(4)
18726 .k(8)
18727 .qmax(128)
18728 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
18729 }
18730
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD64,strided_cm)18731 TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD64, strided_cm) {
18732 TEST_REQUIRES_X86_SSE2;
18733 GemmMicrokernelTester()
18734 .mr(2)
18735 .nr(4)
18736 .kr(2)
18737 .sr(1)
18738 .m(2)
18739 .n(4)
18740 .k(8)
18741 .cm_stride(7)
18742 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
18743 }
18744 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
18745
18746
18747 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD64,k_eq_8)18748 TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD64, k_eq_8) {
18749 TEST_REQUIRES_X86_SSE41;
18750 GemmMicrokernelTester()
18751 .mr(2)
18752 .nr(4)
18753 .kr(2)
18754 .sr(1)
18755 .m(2)
18756 .n(4)
18757 .k(8)
18758 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
18759 }
18760
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD64,strided_cn)18761 TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD64, strided_cn) {
18762 TEST_REQUIRES_X86_SSE41;
18763 GemmMicrokernelTester()
18764 .mr(2)
18765 .nr(4)
18766 .kr(2)
18767 .sr(1)
18768 .m(2)
18769 .n(4)
18770 .k(8)
18771 .cn_stride(7)
18772 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
18773 }
18774
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD64,k_eq_8_subtile)18775 TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD64, k_eq_8_subtile) {
18776 TEST_REQUIRES_X86_SSE41;
18777 for (uint32_t n = 1; n <= 4; n++) {
18778 for (uint32_t m = 1; m <= 2; m++) {
18779 GemmMicrokernelTester()
18780 .mr(2)
18781 .nr(4)
18782 .kr(2)
18783 .sr(1)
18784 .m(m)
18785 .n(n)
18786 .k(8)
18787 .iterations(1)
18788 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
18789 }
18790 }
18791 }
18792
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD64,k_eq_8_subtile_m)18793 TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD64, k_eq_8_subtile_m) {
18794 TEST_REQUIRES_X86_SSE41;
18795 for (uint32_t m = 1; m <= 2; m++) {
18796 GemmMicrokernelTester()
18797 .mr(2)
18798 .nr(4)
18799 .kr(2)
18800 .sr(1)
18801 .m(m)
18802 .n(4)
18803 .k(8)
18804 .iterations(1)
18805 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
18806 }
18807 }
18808
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD64,k_eq_8_subtile_n)18809 TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD64, k_eq_8_subtile_n) {
18810 TEST_REQUIRES_X86_SSE41;
18811 for (uint32_t n = 1; n <= 4; n++) {
18812 GemmMicrokernelTester()
18813 .mr(2)
18814 .nr(4)
18815 .kr(2)
18816 .sr(1)
18817 .m(2)
18818 .n(n)
18819 .k(8)
18820 .iterations(1)
18821 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
18822 }
18823 }
18824
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD64,k_lt_8)18825 TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD64, k_lt_8) {
18826 TEST_REQUIRES_X86_SSE41;
18827 for (size_t k = 1; k < 8; k++) {
18828 GemmMicrokernelTester()
18829 .mr(2)
18830 .nr(4)
18831 .kr(2)
18832 .sr(1)
18833 .m(2)
18834 .n(4)
18835 .k(k)
18836 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
18837 }
18838 }
18839
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD64,k_lt_8_subtile)18840 TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD64, k_lt_8_subtile) {
18841 TEST_REQUIRES_X86_SSE41;
18842 for (size_t k = 1; k < 8; k++) {
18843 for (uint32_t n = 1; n <= 4; n++) {
18844 for (uint32_t m = 1; m <= 2; m++) {
18845 GemmMicrokernelTester()
18846 .mr(2)
18847 .nr(4)
18848 .kr(2)
18849 .sr(1)
18850 .m(m)
18851 .n(n)
18852 .k(k)
18853 .iterations(1)
18854 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
18855 }
18856 }
18857 }
18858 }
18859
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD64,k_gt_8)18860 TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD64, k_gt_8) {
18861 TEST_REQUIRES_X86_SSE41;
18862 for (size_t k = 9; k < 16; k++) {
18863 GemmMicrokernelTester()
18864 .mr(2)
18865 .nr(4)
18866 .kr(2)
18867 .sr(1)
18868 .m(2)
18869 .n(4)
18870 .k(k)
18871 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
18872 }
18873 }
18874
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD64,k_gt_8_subtile)18875 TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD64, k_gt_8_subtile) {
18876 TEST_REQUIRES_X86_SSE41;
18877 for (size_t k = 9; k < 16; k++) {
18878 for (uint32_t n = 1; n <= 4; n++) {
18879 for (uint32_t m = 1; m <= 2; m++) {
18880 GemmMicrokernelTester()
18881 .mr(2)
18882 .nr(4)
18883 .kr(2)
18884 .sr(1)
18885 .m(m)
18886 .n(n)
18887 .k(k)
18888 .iterations(1)
18889 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
18890 }
18891 }
18892 }
18893 }
18894
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD64,k_div_8)18895 TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD64, k_div_8) {
18896 TEST_REQUIRES_X86_SSE41;
18897 for (size_t k = 16; k <= 80; k += 8) {
18898 GemmMicrokernelTester()
18899 .mr(2)
18900 .nr(4)
18901 .kr(2)
18902 .sr(1)
18903 .m(2)
18904 .n(4)
18905 .k(k)
18906 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
18907 }
18908 }
18909
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD64,k_div_8_subtile)18910 TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD64, k_div_8_subtile) {
18911 TEST_REQUIRES_X86_SSE41;
18912 for (size_t k = 16; k <= 80; k += 8) {
18913 for (uint32_t n = 1; n <= 4; n++) {
18914 for (uint32_t m = 1; m <= 2; m++) {
18915 GemmMicrokernelTester()
18916 .mr(2)
18917 .nr(4)
18918 .kr(2)
18919 .sr(1)
18920 .m(m)
18921 .n(n)
18922 .k(k)
18923 .iterations(1)
18924 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
18925 }
18926 }
18927 }
18928 }
18929
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD64,n_gt_4)18930 TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD64, n_gt_4) {
18931 TEST_REQUIRES_X86_SSE41;
18932 for (uint32_t n = 5; n < 8; n++) {
18933 for (size_t k = 1; k <= 40; k += 9) {
18934 GemmMicrokernelTester()
18935 .mr(2)
18936 .nr(4)
18937 .kr(2)
18938 .sr(1)
18939 .m(2)
18940 .n(n)
18941 .k(k)
18942 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
18943 }
18944 }
18945 }
18946
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD64,n_gt_4_strided_cn)18947 TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD64, n_gt_4_strided_cn) {
18948 TEST_REQUIRES_X86_SSE41;
18949 for (uint32_t n = 5; n < 8; n++) {
18950 for (size_t k = 1; k <= 40; k += 9) {
18951 GemmMicrokernelTester()
18952 .mr(2)
18953 .nr(4)
18954 .kr(2)
18955 .sr(1)
18956 .m(2)
18957 .n(n)
18958 .k(k)
18959 .cn_stride(7)
18960 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
18961 }
18962 }
18963 }
18964
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD64,n_gt_4_subtile)18965 TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD64, n_gt_4_subtile) {
18966 TEST_REQUIRES_X86_SSE41;
18967 for (uint32_t n = 5; n < 8; n++) {
18968 for (size_t k = 1; k <= 40; k += 9) {
18969 for (uint32_t m = 1; m <= 2; m++) {
18970 GemmMicrokernelTester()
18971 .mr(2)
18972 .nr(4)
18973 .kr(2)
18974 .sr(1)
18975 .m(m)
18976 .n(n)
18977 .k(k)
18978 .iterations(1)
18979 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
18980 }
18981 }
18982 }
18983 }
18984
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD64,n_div_4)18985 TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD64, n_div_4) {
18986 TEST_REQUIRES_X86_SSE41;
18987 for (uint32_t n = 8; n <= 12; n += 4) {
18988 for (size_t k = 1; k <= 40; k += 9) {
18989 GemmMicrokernelTester()
18990 .mr(2)
18991 .nr(4)
18992 .kr(2)
18993 .sr(1)
18994 .m(2)
18995 .n(n)
18996 .k(k)
18997 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
18998 }
18999 }
19000 }
19001
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD64,n_div_4_strided_cn)19002 TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD64, n_div_4_strided_cn) {
19003 TEST_REQUIRES_X86_SSE41;
19004 for (uint32_t n = 8; n <= 12; n += 4) {
19005 for (size_t k = 1; k <= 40; k += 9) {
19006 GemmMicrokernelTester()
19007 .mr(2)
19008 .nr(4)
19009 .kr(2)
19010 .sr(1)
19011 .m(2)
19012 .n(n)
19013 .k(k)
19014 .cn_stride(7)
19015 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
19016 }
19017 }
19018 }
19019
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD64,n_div_4_subtile)19020 TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD64, n_div_4_subtile) {
19021 TEST_REQUIRES_X86_SSE41;
19022 for (uint32_t n = 8; n <= 12; n += 4) {
19023 for (size_t k = 1; k <= 40; k += 9) {
19024 for (uint32_t m = 1; m <= 2; m++) {
19025 GemmMicrokernelTester()
19026 .mr(2)
19027 .nr(4)
19028 .kr(2)
19029 .sr(1)
19030 .m(m)
19031 .n(n)
19032 .k(k)
19033 .iterations(1)
19034 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
19035 }
19036 }
19037 }
19038 }
19039
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD64,small_kernel)19040 TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD64, small_kernel) {
19041 TEST_REQUIRES_X86_SSE41;
19042 for (size_t k = 1; k <= 40; k += 9) {
19043 GemmMicrokernelTester()
19044 .mr(2)
19045 .nr(4)
19046 .kr(2)
19047 .sr(1)
19048 .m(2)
19049 .n(4)
19050 .k(k)
19051 .ks(3)
19052 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
19053 }
19054 }
19055
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD64,small_kernel_subtile)19056 TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD64, small_kernel_subtile) {
19057 TEST_REQUIRES_X86_SSE41;
19058 for (size_t k = 1; k <= 40; k += 9) {
19059 for (uint32_t n = 1; n <= 4; n++) {
19060 for (uint32_t m = 1; m <= 2; m++) {
19061 GemmMicrokernelTester()
19062 .mr(2)
19063 .nr(4)
19064 .kr(2)
19065 .sr(1)
19066 .m(m)
19067 .n(n)
19068 .k(k)
19069 .ks(3)
19070 .iterations(1)
19071 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
19072 }
19073 }
19074 }
19075 }
19076
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD64,n_gt_4_small_kernel)19077 TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD64, n_gt_4_small_kernel) {
19078 TEST_REQUIRES_X86_SSE41;
19079 for (uint32_t n = 5; n < 8; n++) {
19080 for (size_t k = 1; k <= 40; k += 9) {
19081 GemmMicrokernelTester()
19082 .mr(2)
19083 .nr(4)
19084 .kr(2)
19085 .sr(1)
19086 .m(2)
19087 .n(n)
19088 .k(k)
19089 .ks(3)
19090 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
19091 }
19092 }
19093 }
19094
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD64,n_div_4_small_kernel)19095 TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD64, n_div_4_small_kernel) {
19096 TEST_REQUIRES_X86_SSE41;
19097 for (uint32_t n = 8; n <= 12; n += 4) {
19098 for (size_t k = 1; k <= 40; k += 9) {
19099 GemmMicrokernelTester()
19100 .mr(2)
19101 .nr(4)
19102 .kr(2)
19103 .sr(1)
19104 .m(2)
19105 .n(n)
19106 .k(k)
19107 .ks(3)
19108 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
19109 }
19110 }
19111 }
19112
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD64,strided_cm_subtile)19113 TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD64, strided_cm_subtile) {
19114 TEST_REQUIRES_X86_SSE41;
19115 for (size_t k = 1; k <= 40; k += 9) {
19116 for (uint32_t n = 1; n <= 4; n++) {
19117 for (uint32_t m = 1; m <= 2; m++) {
19118 GemmMicrokernelTester()
19119 .mr(2)
19120 .nr(4)
19121 .kr(2)
19122 .sr(1)
19123 .m(m)
19124 .n(n)
19125 .k(k)
19126 .cm_stride(7)
19127 .iterations(1)
19128 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
19129 }
19130 }
19131 }
19132 }
19133
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD64,a_offset)19134 TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD64, a_offset) {
19135 TEST_REQUIRES_X86_SSE41;
19136 for (size_t k = 1; k <= 40; k += 9) {
19137 GemmMicrokernelTester()
19138 .mr(2)
19139 .nr(4)
19140 .kr(2)
19141 .sr(1)
19142 .m(2)
19143 .n(4)
19144 .k(k)
19145 .ks(3)
19146 .a_offset(83)
19147 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
19148 }
19149 }
19150
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD64,zero)19151 TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD64, zero) {
19152 TEST_REQUIRES_X86_SSE41;
19153 for (size_t k = 1; k <= 40; k += 9) {
19154 for (uint32_t mz = 0; mz < 2; mz++) {
19155 GemmMicrokernelTester()
19156 .mr(2)
19157 .nr(4)
19158 .kr(2)
19159 .sr(1)
19160 .m(2)
19161 .n(4)
19162 .k(k)
19163 .ks(3)
19164 .a_offset(83)
19165 .zero_index(mz)
19166 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
19167 }
19168 }
19169 }
19170
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD64,qmin)19171 TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD64, qmin) {
19172 TEST_REQUIRES_X86_SSE41;
19173 GemmMicrokernelTester()
19174 .mr(2)
19175 .nr(4)
19176 .kr(2)
19177 .sr(1)
19178 .m(2)
19179 .n(4)
19180 .k(8)
19181 .qmin(128)
19182 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
19183 }
19184
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD64,qmax)19185 TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD64, qmax) {
19186 TEST_REQUIRES_X86_SSE41;
19187 GemmMicrokernelTester()
19188 .mr(2)
19189 .nr(4)
19190 .kr(2)
19191 .sr(1)
19192 .m(2)
19193 .n(4)
19194 .k(8)
19195 .qmax(128)
19196 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
19197 }
19198
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD64,strided_cm)19199 TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD64, strided_cm) {
19200 TEST_REQUIRES_X86_SSE41;
19201 GemmMicrokernelTester()
19202 .mr(2)
19203 .nr(4)
19204 .kr(2)
19205 .sr(1)
19206 .m(2)
19207 .n(4)
19208 .k(8)
19209 .cm_stride(7)
19210 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
19211 }
19212 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
19213
19214
19215 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD64,k_eq_8)19216 TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD64, k_eq_8) {
19217 TEST_REQUIRES_X86_AVX;
19218 GemmMicrokernelTester()
19219 .mr(2)
19220 .nr(4)
19221 .kr(2)
19222 .sr(1)
19223 .m(2)
19224 .n(4)
19225 .k(8)
19226 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
19227 }
19228
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD64,strided_cn)19229 TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD64, strided_cn) {
19230 TEST_REQUIRES_X86_AVX;
19231 GemmMicrokernelTester()
19232 .mr(2)
19233 .nr(4)
19234 .kr(2)
19235 .sr(1)
19236 .m(2)
19237 .n(4)
19238 .k(8)
19239 .cn_stride(7)
19240 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
19241 }
19242
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD64,k_eq_8_subtile)19243 TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD64, k_eq_8_subtile) {
19244 TEST_REQUIRES_X86_AVX;
19245 for (uint32_t n = 1; n <= 4; n++) {
19246 for (uint32_t m = 1; m <= 2; m++) {
19247 GemmMicrokernelTester()
19248 .mr(2)
19249 .nr(4)
19250 .kr(2)
19251 .sr(1)
19252 .m(m)
19253 .n(n)
19254 .k(8)
19255 .iterations(1)
19256 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
19257 }
19258 }
19259 }
19260
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD64,k_eq_8_subtile_m)19261 TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD64, k_eq_8_subtile_m) {
19262 TEST_REQUIRES_X86_AVX;
19263 for (uint32_t m = 1; m <= 2; m++) {
19264 GemmMicrokernelTester()
19265 .mr(2)
19266 .nr(4)
19267 .kr(2)
19268 .sr(1)
19269 .m(m)
19270 .n(4)
19271 .k(8)
19272 .iterations(1)
19273 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
19274 }
19275 }
19276
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD64,k_eq_8_subtile_n)19277 TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD64, k_eq_8_subtile_n) {
19278 TEST_REQUIRES_X86_AVX;
19279 for (uint32_t n = 1; n <= 4; n++) {
19280 GemmMicrokernelTester()
19281 .mr(2)
19282 .nr(4)
19283 .kr(2)
19284 .sr(1)
19285 .m(2)
19286 .n(n)
19287 .k(8)
19288 .iterations(1)
19289 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
19290 }
19291 }
19292
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD64,k_lt_8)19293 TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD64, k_lt_8) {
19294 TEST_REQUIRES_X86_AVX;
19295 for (size_t k = 1; k < 8; k++) {
19296 GemmMicrokernelTester()
19297 .mr(2)
19298 .nr(4)
19299 .kr(2)
19300 .sr(1)
19301 .m(2)
19302 .n(4)
19303 .k(k)
19304 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
19305 }
19306 }
19307
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD64,k_lt_8_subtile)19308 TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD64, k_lt_8_subtile) {
19309 TEST_REQUIRES_X86_AVX;
19310 for (size_t k = 1; k < 8; k++) {
19311 for (uint32_t n = 1; n <= 4; n++) {
19312 for (uint32_t m = 1; m <= 2; m++) {
19313 GemmMicrokernelTester()
19314 .mr(2)
19315 .nr(4)
19316 .kr(2)
19317 .sr(1)
19318 .m(m)
19319 .n(n)
19320 .k(k)
19321 .iterations(1)
19322 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
19323 }
19324 }
19325 }
19326 }
19327
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD64,k_gt_8)19328 TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD64, k_gt_8) {
19329 TEST_REQUIRES_X86_AVX;
19330 for (size_t k = 9; k < 16; k++) {
19331 GemmMicrokernelTester()
19332 .mr(2)
19333 .nr(4)
19334 .kr(2)
19335 .sr(1)
19336 .m(2)
19337 .n(4)
19338 .k(k)
19339 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
19340 }
19341 }
19342
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD64,k_gt_8_subtile)19343 TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD64, k_gt_8_subtile) {
19344 TEST_REQUIRES_X86_AVX;
19345 for (size_t k = 9; k < 16; k++) {
19346 for (uint32_t n = 1; n <= 4; n++) {
19347 for (uint32_t m = 1; m <= 2; m++) {
19348 GemmMicrokernelTester()
19349 .mr(2)
19350 .nr(4)
19351 .kr(2)
19352 .sr(1)
19353 .m(m)
19354 .n(n)
19355 .k(k)
19356 .iterations(1)
19357 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
19358 }
19359 }
19360 }
19361 }
19362
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD64,k_div_8)19363 TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD64, k_div_8) {
19364 TEST_REQUIRES_X86_AVX;
19365 for (size_t k = 16; k <= 80; k += 8) {
19366 GemmMicrokernelTester()
19367 .mr(2)
19368 .nr(4)
19369 .kr(2)
19370 .sr(1)
19371 .m(2)
19372 .n(4)
19373 .k(k)
19374 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
19375 }
19376 }
19377
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD64,k_div_8_subtile)19378 TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD64, k_div_8_subtile) {
19379 TEST_REQUIRES_X86_AVX;
19380 for (size_t k = 16; k <= 80; k += 8) {
19381 for (uint32_t n = 1; n <= 4; n++) {
19382 for (uint32_t m = 1; m <= 2; m++) {
19383 GemmMicrokernelTester()
19384 .mr(2)
19385 .nr(4)
19386 .kr(2)
19387 .sr(1)
19388 .m(m)
19389 .n(n)
19390 .k(k)
19391 .iterations(1)
19392 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
19393 }
19394 }
19395 }
19396 }
19397
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD64,n_gt_4)19398 TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD64, n_gt_4) {
19399 TEST_REQUIRES_X86_AVX;
19400 for (uint32_t n = 5; n < 8; n++) {
19401 for (size_t k = 1; k <= 40; k += 9) {
19402 GemmMicrokernelTester()
19403 .mr(2)
19404 .nr(4)
19405 .kr(2)
19406 .sr(1)
19407 .m(2)
19408 .n(n)
19409 .k(k)
19410 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
19411 }
19412 }
19413 }
19414
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD64,n_gt_4_strided_cn)19415 TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD64, n_gt_4_strided_cn) {
19416 TEST_REQUIRES_X86_AVX;
19417 for (uint32_t n = 5; n < 8; n++) {
19418 for (size_t k = 1; k <= 40; k += 9) {
19419 GemmMicrokernelTester()
19420 .mr(2)
19421 .nr(4)
19422 .kr(2)
19423 .sr(1)
19424 .m(2)
19425 .n(n)
19426 .k(k)
19427 .cn_stride(7)
19428 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
19429 }
19430 }
19431 }
19432
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD64,n_gt_4_subtile)19433 TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD64, n_gt_4_subtile) {
19434 TEST_REQUIRES_X86_AVX;
19435 for (uint32_t n = 5; n < 8; n++) {
19436 for (size_t k = 1; k <= 40; k += 9) {
19437 for (uint32_t m = 1; m <= 2; m++) {
19438 GemmMicrokernelTester()
19439 .mr(2)
19440 .nr(4)
19441 .kr(2)
19442 .sr(1)
19443 .m(m)
19444 .n(n)
19445 .k(k)
19446 .iterations(1)
19447 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
19448 }
19449 }
19450 }
19451 }
19452
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD64,n_div_4)19453 TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD64, n_div_4) {
19454 TEST_REQUIRES_X86_AVX;
19455 for (uint32_t n = 8; n <= 12; n += 4) {
19456 for (size_t k = 1; k <= 40; k += 9) {
19457 GemmMicrokernelTester()
19458 .mr(2)
19459 .nr(4)
19460 .kr(2)
19461 .sr(1)
19462 .m(2)
19463 .n(n)
19464 .k(k)
19465 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
19466 }
19467 }
19468 }
19469
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD64,n_div_4_strided_cn)19470 TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD64, n_div_4_strided_cn) {
19471 TEST_REQUIRES_X86_AVX;
19472 for (uint32_t n = 8; n <= 12; n += 4) {
19473 for (size_t k = 1; k <= 40; k += 9) {
19474 GemmMicrokernelTester()
19475 .mr(2)
19476 .nr(4)
19477 .kr(2)
19478 .sr(1)
19479 .m(2)
19480 .n(n)
19481 .k(k)
19482 .cn_stride(7)
19483 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
19484 }
19485 }
19486 }
19487
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD64,n_div_4_subtile)19488 TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD64, n_div_4_subtile) {
19489 TEST_REQUIRES_X86_AVX;
19490 for (uint32_t n = 8; n <= 12; n += 4) {
19491 for (size_t k = 1; k <= 40; k += 9) {
19492 for (uint32_t m = 1; m <= 2; m++) {
19493 GemmMicrokernelTester()
19494 .mr(2)
19495 .nr(4)
19496 .kr(2)
19497 .sr(1)
19498 .m(m)
19499 .n(n)
19500 .k(k)
19501 .iterations(1)
19502 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
19503 }
19504 }
19505 }
19506 }
19507
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD64,small_kernel)19508 TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD64, small_kernel) {
19509 TEST_REQUIRES_X86_AVX;
19510 for (size_t k = 1; k <= 40; k += 9) {
19511 GemmMicrokernelTester()
19512 .mr(2)
19513 .nr(4)
19514 .kr(2)
19515 .sr(1)
19516 .m(2)
19517 .n(4)
19518 .k(k)
19519 .ks(3)
19520 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
19521 }
19522 }
19523
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD64,small_kernel_subtile)19524 TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD64, small_kernel_subtile) {
19525 TEST_REQUIRES_X86_AVX;
19526 for (size_t k = 1; k <= 40; k += 9) {
19527 for (uint32_t n = 1; n <= 4; n++) {
19528 for (uint32_t m = 1; m <= 2; m++) {
19529 GemmMicrokernelTester()
19530 .mr(2)
19531 .nr(4)
19532 .kr(2)
19533 .sr(1)
19534 .m(m)
19535 .n(n)
19536 .k(k)
19537 .ks(3)
19538 .iterations(1)
19539 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
19540 }
19541 }
19542 }
19543 }
19544
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD64,n_gt_4_small_kernel)19545 TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD64, n_gt_4_small_kernel) {
19546 TEST_REQUIRES_X86_AVX;
19547 for (uint32_t n = 5; n < 8; n++) {
19548 for (size_t k = 1; k <= 40; k += 9) {
19549 GemmMicrokernelTester()
19550 .mr(2)
19551 .nr(4)
19552 .kr(2)
19553 .sr(1)
19554 .m(2)
19555 .n(n)
19556 .k(k)
19557 .ks(3)
19558 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
19559 }
19560 }
19561 }
19562
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD64,n_div_4_small_kernel)19563 TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD64, n_div_4_small_kernel) {
19564 TEST_REQUIRES_X86_AVX;
19565 for (uint32_t n = 8; n <= 12; n += 4) {
19566 for (size_t k = 1; k <= 40; k += 9) {
19567 GemmMicrokernelTester()
19568 .mr(2)
19569 .nr(4)
19570 .kr(2)
19571 .sr(1)
19572 .m(2)
19573 .n(n)
19574 .k(k)
19575 .ks(3)
19576 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
19577 }
19578 }
19579 }
19580
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD64,strided_cm_subtile)19581 TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD64, strided_cm_subtile) {
19582 TEST_REQUIRES_X86_AVX;
19583 for (size_t k = 1; k <= 40; k += 9) {
19584 for (uint32_t n = 1; n <= 4; n++) {
19585 for (uint32_t m = 1; m <= 2; m++) {
19586 GemmMicrokernelTester()
19587 .mr(2)
19588 .nr(4)
19589 .kr(2)
19590 .sr(1)
19591 .m(m)
19592 .n(n)
19593 .k(k)
19594 .cm_stride(7)
19595 .iterations(1)
19596 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
19597 }
19598 }
19599 }
19600 }
19601
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD64,a_offset)19602 TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD64, a_offset) {
19603 TEST_REQUIRES_X86_AVX;
19604 for (size_t k = 1; k <= 40; k += 9) {
19605 GemmMicrokernelTester()
19606 .mr(2)
19607 .nr(4)
19608 .kr(2)
19609 .sr(1)
19610 .m(2)
19611 .n(4)
19612 .k(k)
19613 .ks(3)
19614 .a_offset(83)
19615 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
19616 }
19617 }
19618
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD64,zero)19619 TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD64, zero) {
19620 TEST_REQUIRES_X86_AVX;
19621 for (size_t k = 1; k <= 40; k += 9) {
19622 for (uint32_t mz = 0; mz < 2; mz++) {
19623 GemmMicrokernelTester()
19624 .mr(2)
19625 .nr(4)
19626 .kr(2)
19627 .sr(1)
19628 .m(2)
19629 .n(4)
19630 .k(k)
19631 .ks(3)
19632 .a_offset(83)
19633 .zero_index(mz)
19634 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
19635 }
19636 }
19637 }
19638
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD64,qmin)19639 TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD64, qmin) {
19640 TEST_REQUIRES_X86_AVX;
19641 GemmMicrokernelTester()
19642 .mr(2)
19643 .nr(4)
19644 .kr(2)
19645 .sr(1)
19646 .m(2)
19647 .n(4)
19648 .k(8)
19649 .qmin(128)
19650 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
19651 }
19652
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD64,qmax)19653 TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD64, qmax) {
19654 TEST_REQUIRES_X86_AVX;
19655 GemmMicrokernelTester()
19656 .mr(2)
19657 .nr(4)
19658 .kr(2)
19659 .sr(1)
19660 .m(2)
19661 .n(4)
19662 .k(8)
19663 .qmax(128)
19664 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
19665 }
19666
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD64,strided_cm)19667 TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD64, strided_cm) {
19668 TEST_REQUIRES_X86_AVX;
19669 GemmMicrokernelTester()
19670 .mr(2)
19671 .nr(4)
19672 .kr(2)
19673 .sr(1)
19674 .m(2)
19675 .n(4)
19676 .k(8)
19677 .cm_stride(7)
19678 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
19679 }
19680 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
19681
19682
19683 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD64,k_eq_8)19684 TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD64, k_eq_8) {
19685 TEST_REQUIRES_X86_XOP;
19686 GemmMicrokernelTester()
19687 .mr(3)
19688 .nr(4)
19689 .kr(2)
19690 .sr(1)
19691 .m(3)
19692 .n(4)
19693 .k(8)
19694 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
19695 }
19696
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD64,strided_cn)19697 TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD64, strided_cn) {
19698 TEST_REQUIRES_X86_XOP;
19699 GemmMicrokernelTester()
19700 .mr(3)
19701 .nr(4)
19702 .kr(2)
19703 .sr(1)
19704 .m(3)
19705 .n(4)
19706 .k(8)
19707 .cn_stride(7)
19708 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
19709 }
19710
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD64,k_eq_8_subtile)19711 TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD64, k_eq_8_subtile) {
19712 TEST_REQUIRES_X86_XOP;
19713 for (uint32_t n = 1; n <= 4; n++) {
19714 for (uint32_t m = 1; m <= 3; m++) {
19715 GemmMicrokernelTester()
19716 .mr(3)
19717 .nr(4)
19718 .kr(2)
19719 .sr(1)
19720 .m(m)
19721 .n(n)
19722 .k(8)
19723 .iterations(1)
19724 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
19725 }
19726 }
19727 }
19728
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD64,k_eq_8_subtile_m)19729 TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD64, k_eq_8_subtile_m) {
19730 TEST_REQUIRES_X86_XOP;
19731 for (uint32_t m = 1; m <= 3; m++) {
19732 GemmMicrokernelTester()
19733 .mr(3)
19734 .nr(4)
19735 .kr(2)
19736 .sr(1)
19737 .m(m)
19738 .n(4)
19739 .k(8)
19740 .iterations(1)
19741 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
19742 }
19743 }
19744
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD64,k_eq_8_subtile_n)19745 TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD64, k_eq_8_subtile_n) {
19746 TEST_REQUIRES_X86_XOP;
19747 for (uint32_t n = 1; n <= 4; n++) {
19748 GemmMicrokernelTester()
19749 .mr(3)
19750 .nr(4)
19751 .kr(2)
19752 .sr(1)
19753 .m(3)
19754 .n(n)
19755 .k(8)
19756 .iterations(1)
19757 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
19758 }
19759 }
19760
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD64,k_lt_8)19761 TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD64, k_lt_8) {
19762 TEST_REQUIRES_X86_XOP;
19763 for (size_t k = 1; k < 8; k++) {
19764 GemmMicrokernelTester()
19765 .mr(3)
19766 .nr(4)
19767 .kr(2)
19768 .sr(1)
19769 .m(3)
19770 .n(4)
19771 .k(k)
19772 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
19773 }
19774 }
19775
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD64,k_lt_8_subtile)19776 TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD64, k_lt_8_subtile) {
19777 TEST_REQUIRES_X86_XOP;
19778 for (size_t k = 1; k < 8; k++) {
19779 for (uint32_t n = 1; n <= 4; n++) {
19780 for (uint32_t m = 1; m <= 3; m++) {
19781 GemmMicrokernelTester()
19782 .mr(3)
19783 .nr(4)
19784 .kr(2)
19785 .sr(1)
19786 .m(m)
19787 .n(n)
19788 .k(k)
19789 .iterations(1)
19790 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
19791 }
19792 }
19793 }
19794 }
19795
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD64,k_gt_8)19796 TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD64, k_gt_8) {
19797 TEST_REQUIRES_X86_XOP;
19798 for (size_t k = 9; k < 16; k++) {
19799 GemmMicrokernelTester()
19800 .mr(3)
19801 .nr(4)
19802 .kr(2)
19803 .sr(1)
19804 .m(3)
19805 .n(4)
19806 .k(k)
19807 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
19808 }
19809 }
19810
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD64,k_gt_8_subtile)19811 TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD64, k_gt_8_subtile) {
19812 TEST_REQUIRES_X86_XOP;
19813 for (size_t k = 9; k < 16; k++) {
19814 for (uint32_t n = 1; n <= 4; n++) {
19815 for (uint32_t m = 1; m <= 3; m++) {
19816 GemmMicrokernelTester()
19817 .mr(3)
19818 .nr(4)
19819 .kr(2)
19820 .sr(1)
19821 .m(m)
19822 .n(n)
19823 .k(k)
19824 .iterations(1)
19825 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
19826 }
19827 }
19828 }
19829 }
19830
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD64,k_div_8)19831 TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD64, k_div_8) {
19832 TEST_REQUIRES_X86_XOP;
19833 for (size_t k = 16; k <= 80; k += 8) {
19834 GemmMicrokernelTester()
19835 .mr(3)
19836 .nr(4)
19837 .kr(2)
19838 .sr(1)
19839 .m(3)
19840 .n(4)
19841 .k(k)
19842 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
19843 }
19844 }
19845
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD64,k_div_8_subtile)19846 TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD64, k_div_8_subtile) {
19847 TEST_REQUIRES_X86_XOP;
19848 for (size_t k = 16; k <= 80; k += 8) {
19849 for (uint32_t n = 1; n <= 4; n++) {
19850 for (uint32_t m = 1; m <= 3; m++) {
19851 GemmMicrokernelTester()
19852 .mr(3)
19853 .nr(4)
19854 .kr(2)
19855 .sr(1)
19856 .m(m)
19857 .n(n)
19858 .k(k)
19859 .iterations(1)
19860 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
19861 }
19862 }
19863 }
19864 }
19865
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD64,n_gt_4)19866 TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD64, n_gt_4) {
19867 TEST_REQUIRES_X86_XOP;
19868 for (uint32_t n = 5; n < 8; n++) {
19869 for (size_t k = 1; k <= 40; k += 9) {
19870 GemmMicrokernelTester()
19871 .mr(3)
19872 .nr(4)
19873 .kr(2)
19874 .sr(1)
19875 .m(3)
19876 .n(n)
19877 .k(k)
19878 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
19879 }
19880 }
19881 }
19882
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD64,n_gt_4_strided_cn)19883 TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD64, n_gt_4_strided_cn) {
19884 TEST_REQUIRES_X86_XOP;
19885 for (uint32_t n = 5; n < 8; n++) {
19886 for (size_t k = 1; k <= 40; k += 9) {
19887 GemmMicrokernelTester()
19888 .mr(3)
19889 .nr(4)
19890 .kr(2)
19891 .sr(1)
19892 .m(3)
19893 .n(n)
19894 .k(k)
19895 .cn_stride(7)
19896 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
19897 }
19898 }
19899 }
19900
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD64,n_gt_4_subtile)19901 TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD64, n_gt_4_subtile) {
19902 TEST_REQUIRES_X86_XOP;
19903 for (uint32_t n = 5; n < 8; n++) {
19904 for (size_t k = 1; k <= 40; k += 9) {
19905 for (uint32_t m = 1; m <= 3; m++) {
19906 GemmMicrokernelTester()
19907 .mr(3)
19908 .nr(4)
19909 .kr(2)
19910 .sr(1)
19911 .m(m)
19912 .n(n)
19913 .k(k)
19914 .iterations(1)
19915 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
19916 }
19917 }
19918 }
19919 }
19920
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD64,n_div_4)19921 TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD64, n_div_4) {
19922 TEST_REQUIRES_X86_XOP;
19923 for (uint32_t n = 8; n <= 12; n += 4) {
19924 for (size_t k = 1; k <= 40; k += 9) {
19925 GemmMicrokernelTester()
19926 .mr(3)
19927 .nr(4)
19928 .kr(2)
19929 .sr(1)
19930 .m(3)
19931 .n(n)
19932 .k(k)
19933 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
19934 }
19935 }
19936 }
19937
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD64,n_div_4_strided_cn)19938 TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD64, n_div_4_strided_cn) {
19939 TEST_REQUIRES_X86_XOP;
19940 for (uint32_t n = 8; n <= 12; n += 4) {
19941 for (size_t k = 1; k <= 40; k += 9) {
19942 GemmMicrokernelTester()
19943 .mr(3)
19944 .nr(4)
19945 .kr(2)
19946 .sr(1)
19947 .m(3)
19948 .n(n)
19949 .k(k)
19950 .cn_stride(7)
19951 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
19952 }
19953 }
19954 }
19955
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD64,n_div_4_subtile)19956 TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD64, n_div_4_subtile) {
19957 TEST_REQUIRES_X86_XOP;
19958 for (uint32_t n = 8; n <= 12; n += 4) {
19959 for (size_t k = 1; k <= 40; k += 9) {
19960 for (uint32_t m = 1; m <= 3; m++) {
19961 GemmMicrokernelTester()
19962 .mr(3)
19963 .nr(4)
19964 .kr(2)
19965 .sr(1)
19966 .m(m)
19967 .n(n)
19968 .k(k)
19969 .iterations(1)
19970 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
19971 }
19972 }
19973 }
19974 }
19975
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD64,small_kernel)19976 TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD64, small_kernel) {
19977 TEST_REQUIRES_X86_XOP;
19978 for (size_t k = 1; k <= 40; k += 9) {
19979 GemmMicrokernelTester()
19980 .mr(3)
19981 .nr(4)
19982 .kr(2)
19983 .sr(1)
19984 .m(3)
19985 .n(4)
19986 .k(k)
19987 .ks(3)
19988 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
19989 }
19990 }
19991
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD64,small_kernel_subtile)19992 TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD64, small_kernel_subtile) {
19993 TEST_REQUIRES_X86_XOP;
19994 for (size_t k = 1; k <= 40; k += 9) {
19995 for (uint32_t n = 1; n <= 4; n++) {
19996 for (uint32_t m = 1; m <= 3; m++) {
19997 GemmMicrokernelTester()
19998 .mr(3)
19999 .nr(4)
20000 .kr(2)
20001 .sr(1)
20002 .m(m)
20003 .n(n)
20004 .k(k)
20005 .ks(3)
20006 .iterations(1)
20007 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20008 }
20009 }
20010 }
20011 }
20012
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD64,n_gt_4_small_kernel)20013 TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD64, n_gt_4_small_kernel) {
20014 TEST_REQUIRES_X86_XOP;
20015 for (uint32_t n = 5; n < 8; n++) {
20016 for (size_t k = 1; k <= 40; k += 9) {
20017 GemmMicrokernelTester()
20018 .mr(3)
20019 .nr(4)
20020 .kr(2)
20021 .sr(1)
20022 .m(3)
20023 .n(n)
20024 .k(k)
20025 .ks(3)
20026 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20027 }
20028 }
20029 }
20030
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD64,n_div_4_small_kernel)20031 TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD64, n_div_4_small_kernel) {
20032 TEST_REQUIRES_X86_XOP;
20033 for (uint32_t n = 8; n <= 12; n += 4) {
20034 for (size_t k = 1; k <= 40; k += 9) {
20035 GemmMicrokernelTester()
20036 .mr(3)
20037 .nr(4)
20038 .kr(2)
20039 .sr(1)
20040 .m(3)
20041 .n(n)
20042 .k(k)
20043 .ks(3)
20044 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20045 }
20046 }
20047 }
20048
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD64,strided_cm_subtile)20049 TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD64, strided_cm_subtile) {
20050 TEST_REQUIRES_X86_XOP;
20051 for (size_t k = 1; k <= 40; k += 9) {
20052 for (uint32_t n = 1; n <= 4; n++) {
20053 for (uint32_t m = 1; m <= 3; m++) {
20054 GemmMicrokernelTester()
20055 .mr(3)
20056 .nr(4)
20057 .kr(2)
20058 .sr(1)
20059 .m(m)
20060 .n(n)
20061 .k(k)
20062 .cm_stride(7)
20063 .iterations(1)
20064 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20065 }
20066 }
20067 }
20068 }
20069
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD64,a_offset)20070 TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD64, a_offset) {
20071 TEST_REQUIRES_X86_XOP;
20072 for (size_t k = 1; k <= 40; k += 9) {
20073 GemmMicrokernelTester()
20074 .mr(3)
20075 .nr(4)
20076 .kr(2)
20077 .sr(1)
20078 .m(3)
20079 .n(4)
20080 .k(k)
20081 .ks(3)
20082 .a_offset(127)
20083 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20084 }
20085 }
20086
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD64,zero)20087 TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD64, zero) {
20088 TEST_REQUIRES_X86_XOP;
20089 for (size_t k = 1; k <= 40; k += 9) {
20090 for (uint32_t mz = 0; mz < 3; mz++) {
20091 GemmMicrokernelTester()
20092 .mr(3)
20093 .nr(4)
20094 .kr(2)
20095 .sr(1)
20096 .m(3)
20097 .n(4)
20098 .k(k)
20099 .ks(3)
20100 .a_offset(127)
20101 .zero_index(mz)
20102 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20103 }
20104 }
20105 }
20106
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD64,qmin)20107 TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD64, qmin) {
20108 TEST_REQUIRES_X86_XOP;
20109 GemmMicrokernelTester()
20110 .mr(3)
20111 .nr(4)
20112 .kr(2)
20113 .sr(1)
20114 .m(3)
20115 .n(4)
20116 .k(8)
20117 .qmin(128)
20118 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20119 }
20120
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD64,qmax)20121 TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD64, qmax) {
20122 TEST_REQUIRES_X86_XOP;
20123 GemmMicrokernelTester()
20124 .mr(3)
20125 .nr(4)
20126 .kr(2)
20127 .sr(1)
20128 .m(3)
20129 .n(4)
20130 .k(8)
20131 .qmax(128)
20132 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20133 }
20134
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD64,strided_cm)20135 TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD64, strided_cm) {
20136 TEST_REQUIRES_X86_XOP;
20137 GemmMicrokernelTester()
20138 .mr(3)
20139 .nr(4)
20140 .kr(2)
20141 .sr(1)
20142 .m(3)
20143 .n(4)
20144 .k(8)
20145 .cm_stride(7)
20146 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20147 }
20148 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
20149
20150
20151 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD64,k_eq_8)20152 TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD64, k_eq_8) {
20153 TEST_REQUIRES_X86_XOP;
20154 GemmMicrokernelTester()
20155 .mr(4)
20156 .nr(4)
20157 .kr(2)
20158 .sr(1)
20159 .m(4)
20160 .n(4)
20161 .k(8)
20162 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20163 }
20164
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD64,strided_cn)20165 TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD64, strided_cn) {
20166 TEST_REQUIRES_X86_XOP;
20167 GemmMicrokernelTester()
20168 .mr(4)
20169 .nr(4)
20170 .kr(2)
20171 .sr(1)
20172 .m(4)
20173 .n(4)
20174 .k(8)
20175 .cn_stride(7)
20176 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20177 }
20178
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD64,k_eq_8_subtile)20179 TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD64, k_eq_8_subtile) {
20180 TEST_REQUIRES_X86_XOP;
20181 for (uint32_t n = 1; n <= 4; n++) {
20182 for (uint32_t m = 1; m <= 4; m++) {
20183 GemmMicrokernelTester()
20184 .mr(4)
20185 .nr(4)
20186 .kr(2)
20187 .sr(1)
20188 .m(m)
20189 .n(n)
20190 .k(8)
20191 .iterations(1)
20192 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20193 }
20194 }
20195 }
20196
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD64,k_eq_8_subtile_m)20197 TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD64, k_eq_8_subtile_m) {
20198 TEST_REQUIRES_X86_XOP;
20199 for (uint32_t m = 1; m <= 4; m++) {
20200 GemmMicrokernelTester()
20201 .mr(4)
20202 .nr(4)
20203 .kr(2)
20204 .sr(1)
20205 .m(m)
20206 .n(4)
20207 .k(8)
20208 .iterations(1)
20209 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20210 }
20211 }
20212
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD64,k_eq_8_subtile_n)20213 TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD64, k_eq_8_subtile_n) {
20214 TEST_REQUIRES_X86_XOP;
20215 for (uint32_t n = 1; n <= 4; n++) {
20216 GemmMicrokernelTester()
20217 .mr(4)
20218 .nr(4)
20219 .kr(2)
20220 .sr(1)
20221 .m(4)
20222 .n(n)
20223 .k(8)
20224 .iterations(1)
20225 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20226 }
20227 }
20228
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD64,k_lt_8)20229 TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD64, k_lt_8) {
20230 TEST_REQUIRES_X86_XOP;
20231 for (size_t k = 1; k < 8; k++) {
20232 GemmMicrokernelTester()
20233 .mr(4)
20234 .nr(4)
20235 .kr(2)
20236 .sr(1)
20237 .m(4)
20238 .n(4)
20239 .k(k)
20240 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20241 }
20242 }
20243
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD64,k_lt_8_subtile)20244 TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD64, k_lt_8_subtile) {
20245 TEST_REQUIRES_X86_XOP;
20246 for (size_t k = 1; k < 8; k++) {
20247 for (uint32_t n = 1; n <= 4; n++) {
20248 for (uint32_t m = 1; m <= 4; m++) {
20249 GemmMicrokernelTester()
20250 .mr(4)
20251 .nr(4)
20252 .kr(2)
20253 .sr(1)
20254 .m(m)
20255 .n(n)
20256 .k(k)
20257 .iterations(1)
20258 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20259 }
20260 }
20261 }
20262 }
20263
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD64,k_gt_8)20264 TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD64, k_gt_8) {
20265 TEST_REQUIRES_X86_XOP;
20266 for (size_t k = 9; k < 16; k++) {
20267 GemmMicrokernelTester()
20268 .mr(4)
20269 .nr(4)
20270 .kr(2)
20271 .sr(1)
20272 .m(4)
20273 .n(4)
20274 .k(k)
20275 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20276 }
20277 }
20278
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD64,k_gt_8_subtile)20279 TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD64, k_gt_8_subtile) {
20280 TEST_REQUIRES_X86_XOP;
20281 for (size_t k = 9; k < 16; k++) {
20282 for (uint32_t n = 1; n <= 4; n++) {
20283 for (uint32_t m = 1; m <= 4; m++) {
20284 GemmMicrokernelTester()
20285 .mr(4)
20286 .nr(4)
20287 .kr(2)
20288 .sr(1)
20289 .m(m)
20290 .n(n)
20291 .k(k)
20292 .iterations(1)
20293 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20294 }
20295 }
20296 }
20297 }
20298
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD64,k_div_8)20299 TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD64, k_div_8) {
20300 TEST_REQUIRES_X86_XOP;
20301 for (size_t k = 16; k <= 80; k += 8) {
20302 GemmMicrokernelTester()
20303 .mr(4)
20304 .nr(4)
20305 .kr(2)
20306 .sr(1)
20307 .m(4)
20308 .n(4)
20309 .k(k)
20310 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20311 }
20312 }
20313
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD64,k_div_8_subtile)20314 TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD64, k_div_8_subtile) {
20315 TEST_REQUIRES_X86_XOP;
20316 for (size_t k = 16; k <= 80; k += 8) {
20317 for (uint32_t n = 1; n <= 4; n++) {
20318 for (uint32_t m = 1; m <= 4; m++) {
20319 GemmMicrokernelTester()
20320 .mr(4)
20321 .nr(4)
20322 .kr(2)
20323 .sr(1)
20324 .m(m)
20325 .n(n)
20326 .k(k)
20327 .iterations(1)
20328 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20329 }
20330 }
20331 }
20332 }
20333
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD64,n_gt_4)20334 TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD64, n_gt_4) {
20335 TEST_REQUIRES_X86_XOP;
20336 for (uint32_t n = 5; n < 8; n++) {
20337 for (size_t k = 1; k <= 40; k += 9) {
20338 GemmMicrokernelTester()
20339 .mr(4)
20340 .nr(4)
20341 .kr(2)
20342 .sr(1)
20343 .m(4)
20344 .n(n)
20345 .k(k)
20346 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20347 }
20348 }
20349 }
20350
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD64,n_gt_4_strided_cn)20351 TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD64, n_gt_4_strided_cn) {
20352 TEST_REQUIRES_X86_XOP;
20353 for (uint32_t n = 5; n < 8; n++) {
20354 for (size_t k = 1; k <= 40; k += 9) {
20355 GemmMicrokernelTester()
20356 .mr(4)
20357 .nr(4)
20358 .kr(2)
20359 .sr(1)
20360 .m(4)
20361 .n(n)
20362 .k(k)
20363 .cn_stride(7)
20364 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20365 }
20366 }
20367 }
20368
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD64,n_gt_4_subtile)20369 TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD64, n_gt_4_subtile) {
20370 TEST_REQUIRES_X86_XOP;
20371 for (uint32_t n = 5; n < 8; n++) {
20372 for (size_t k = 1; k <= 40; k += 9) {
20373 for (uint32_t m = 1; m <= 4; m++) {
20374 GemmMicrokernelTester()
20375 .mr(4)
20376 .nr(4)
20377 .kr(2)
20378 .sr(1)
20379 .m(m)
20380 .n(n)
20381 .k(k)
20382 .iterations(1)
20383 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20384 }
20385 }
20386 }
20387 }
20388
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD64,n_div_4)20389 TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD64, n_div_4) {
20390 TEST_REQUIRES_X86_XOP;
20391 for (uint32_t n = 8; n <= 12; n += 4) {
20392 for (size_t k = 1; k <= 40; k += 9) {
20393 GemmMicrokernelTester()
20394 .mr(4)
20395 .nr(4)
20396 .kr(2)
20397 .sr(1)
20398 .m(4)
20399 .n(n)
20400 .k(k)
20401 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20402 }
20403 }
20404 }
20405
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD64,n_div_4_strided_cn)20406 TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD64, n_div_4_strided_cn) {
20407 TEST_REQUIRES_X86_XOP;
20408 for (uint32_t n = 8; n <= 12; n += 4) {
20409 for (size_t k = 1; k <= 40; k += 9) {
20410 GemmMicrokernelTester()
20411 .mr(4)
20412 .nr(4)
20413 .kr(2)
20414 .sr(1)
20415 .m(4)
20416 .n(n)
20417 .k(k)
20418 .cn_stride(7)
20419 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20420 }
20421 }
20422 }
20423
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD64,n_div_4_subtile)20424 TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD64, n_div_4_subtile) {
20425 TEST_REQUIRES_X86_XOP;
20426 for (uint32_t n = 8; n <= 12; n += 4) {
20427 for (size_t k = 1; k <= 40; k += 9) {
20428 for (uint32_t m = 1; m <= 4; m++) {
20429 GemmMicrokernelTester()
20430 .mr(4)
20431 .nr(4)
20432 .kr(2)
20433 .sr(1)
20434 .m(m)
20435 .n(n)
20436 .k(k)
20437 .iterations(1)
20438 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20439 }
20440 }
20441 }
20442 }
20443
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD64,small_kernel)20444 TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD64, small_kernel) {
20445 TEST_REQUIRES_X86_XOP;
20446 for (size_t k = 1; k <= 40; k += 9) {
20447 GemmMicrokernelTester()
20448 .mr(4)
20449 .nr(4)
20450 .kr(2)
20451 .sr(1)
20452 .m(4)
20453 .n(4)
20454 .k(k)
20455 .ks(3)
20456 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20457 }
20458 }
20459
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD64,small_kernel_subtile)20460 TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD64, small_kernel_subtile) {
20461 TEST_REQUIRES_X86_XOP;
20462 for (size_t k = 1; k <= 40; k += 9) {
20463 for (uint32_t n = 1; n <= 4; n++) {
20464 for (uint32_t m = 1; m <= 4; m++) {
20465 GemmMicrokernelTester()
20466 .mr(4)
20467 .nr(4)
20468 .kr(2)
20469 .sr(1)
20470 .m(m)
20471 .n(n)
20472 .k(k)
20473 .ks(3)
20474 .iterations(1)
20475 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20476 }
20477 }
20478 }
20479 }
20480
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD64,n_gt_4_small_kernel)20481 TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD64, n_gt_4_small_kernel) {
20482 TEST_REQUIRES_X86_XOP;
20483 for (uint32_t n = 5; n < 8; n++) {
20484 for (size_t k = 1; k <= 40; k += 9) {
20485 GemmMicrokernelTester()
20486 .mr(4)
20487 .nr(4)
20488 .kr(2)
20489 .sr(1)
20490 .m(4)
20491 .n(n)
20492 .k(k)
20493 .ks(3)
20494 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20495 }
20496 }
20497 }
20498
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD64,n_div_4_small_kernel)20499 TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD64, n_div_4_small_kernel) {
20500 TEST_REQUIRES_X86_XOP;
20501 for (uint32_t n = 8; n <= 12; n += 4) {
20502 for (size_t k = 1; k <= 40; k += 9) {
20503 GemmMicrokernelTester()
20504 .mr(4)
20505 .nr(4)
20506 .kr(2)
20507 .sr(1)
20508 .m(4)
20509 .n(n)
20510 .k(k)
20511 .ks(3)
20512 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20513 }
20514 }
20515 }
20516
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD64,strided_cm_subtile)20517 TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD64, strided_cm_subtile) {
20518 TEST_REQUIRES_X86_XOP;
20519 for (size_t k = 1; k <= 40; k += 9) {
20520 for (uint32_t n = 1; n <= 4; n++) {
20521 for (uint32_t m = 1; m <= 4; m++) {
20522 GemmMicrokernelTester()
20523 .mr(4)
20524 .nr(4)
20525 .kr(2)
20526 .sr(1)
20527 .m(m)
20528 .n(n)
20529 .k(k)
20530 .cm_stride(7)
20531 .iterations(1)
20532 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20533 }
20534 }
20535 }
20536 }
20537
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD64,a_offset)20538 TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD64, a_offset) {
20539 TEST_REQUIRES_X86_XOP;
20540 for (size_t k = 1; k <= 40; k += 9) {
20541 GemmMicrokernelTester()
20542 .mr(4)
20543 .nr(4)
20544 .kr(2)
20545 .sr(1)
20546 .m(4)
20547 .n(4)
20548 .k(k)
20549 .ks(3)
20550 .a_offset(163)
20551 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20552 }
20553 }
20554
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD64,zero)20555 TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD64, zero) {
20556 TEST_REQUIRES_X86_XOP;
20557 for (size_t k = 1; k <= 40; k += 9) {
20558 for (uint32_t mz = 0; mz < 4; mz++) {
20559 GemmMicrokernelTester()
20560 .mr(4)
20561 .nr(4)
20562 .kr(2)
20563 .sr(1)
20564 .m(4)
20565 .n(4)
20566 .k(k)
20567 .ks(3)
20568 .a_offset(163)
20569 .zero_index(mz)
20570 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20571 }
20572 }
20573 }
20574
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD64,qmin)20575 TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD64, qmin) {
20576 TEST_REQUIRES_X86_XOP;
20577 GemmMicrokernelTester()
20578 .mr(4)
20579 .nr(4)
20580 .kr(2)
20581 .sr(1)
20582 .m(4)
20583 .n(4)
20584 .k(8)
20585 .qmin(128)
20586 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20587 }
20588
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD64,qmax)20589 TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD64, qmax) {
20590 TEST_REQUIRES_X86_XOP;
20591 GemmMicrokernelTester()
20592 .mr(4)
20593 .nr(4)
20594 .kr(2)
20595 .sr(1)
20596 .m(4)
20597 .n(4)
20598 .k(8)
20599 .qmax(128)
20600 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20601 }
20602
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD64,strided_cm)20603 TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD64, strided_cm) {
20604 TEST_REQUIRES_X86_XOP;
20605 GemmMicrokernelTester()
20606 .mr(4)
20607 .nr(4)
20608 .kr(2)
20609 .sr(1)
20610 .m(4)
20611 .n(4)
20612 .k(8)
20613 .cm_stride(7)
20614 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20615 }
20616 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
20617
20618
20619 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD128,k_eq_8)20620 TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD128, k_eq_8) {
20621 TEST_REQUIRES_X86_SSE41;
20622 GemmMicrokernelTester()
20623 .mr(1)
20624 .nr(4)
20625 .kr(2)
20626 .sr(1)
20627 .m(1)
20628 .n(4)
20629 .k(8)
20630 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20631 }
20632
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD128,strided_cn)20633 TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD128, strided_cn) {
20634 TEST_REQUIRES_X86_SSE41;
20635 GemmMicrokernelTester()
20636 .mr(1)
20637 .nr(4)
20638 .kr(2)
20639 .sr(1)
20640 .m(1)
20641 .n(4)
20642 .k(8)
20643 .cn_stride(7)
20644 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20645 }
20646
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD128,k_eq_8_subtile)20647 TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD128, k_eq_8_subtile) {
20648 TEST_REQUIRES_X86_SSE41;
20649 for (uint32_t n = 1; n <= 4; n++) {
20650 for (uint32_t m = 1; m <= 1; m++) {
20651 GemmMicrokernelTester()
20652 .mr(1)
20653 .nr(4)
20654 .kr(2)
20655 .sr(1)
20656 .m(m)
20657 .n(n)
20658 .k(8)
20659 .iterations(1)
20660 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20661 }
20662 }
20663 }
20664
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD128,k_eq_8_subtile_m)20665 TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD128, k_eq_8_subtile_m) {
20666 TEST_REQUIRES_X86_SSE41;
20667 for (uint32_t m = 1; m <= 1; m++) {
20668 GemmMicrokernelTester()
20669 .mr(1)
20670 .nr(4)
20671 .kr(2)
20672 .sr(1)
20673 .m(m)
20674 .n(4)
20675 .k(8)
20676 .iterations(1)
20677 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20678 }
20679 }
20680
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD128,k_eq_8_subtile_n)20681 TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD128, k_eq_8_subtile_n) {
20682 TEST_REQUIRES_X86_SSE41;
20683 for (uint32_t n = 1; n <= 4; n++) {
20684 GemmMicrokernelTester()
20685 .mr(1)
20686 .nr(4)
20687 .kr(2)
20688 .sr(1)
20689 .m(1)
20690 .n(n)
20691 .k(8)
20692 .iterations(1)
20693 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20694 }
20695 }
20696
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD128,k_lt_8)20697 TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD128, k_lt_8) {
20698 TEST_REQUIRES_X86_SSE41;
20699 for (size_t k = 1; k < 8; k++) {
20700 GemmMicrokernelTester()
20701 .mr(1)
20702 .nr(4)
20703 .kr(2)
20704 .sr(1)
20705 .m(1)
20706 .n(4)
20707 .k(k)
20708 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20709 }
20710 }
20711
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD128,k_lt_8_subtile)20712 TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD128, k_lt_8_subtile) {
20713 TEST_REQUIRES_X86_SSE41;
20714 for (size_t k = 1; k < 8; k++) {
20715 for (uint32_t n = 1; n <= 4; n++) {
20716 for (uint32_t m = 1; m <= 1; m++) {
20717 GemmMicrokernelTester()
20718 .mr(1)
20719 .nr(4)
20720 .kr(2)
20721 .sr(1)
20722 .m(m)
20723 .n(n)
20724 .k(k)
20725 .iterations(1)
20726 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20727 }
20728 }
20729 }
20730 }
20731
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD128,k_gt_8)20732 TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD128, k_gt_8) {
20733 TEST_REQUIRES_X86_SSE41;
20734 for (size_t k = 9; k < 16; k++) {
20735 GemmMicrokernelTester()
20736 .mr(1)
20737 .nr(4)
20738 .kr(2)
20739 .sr(1)
20740 .m(1)
20741 .n(4)
20742 .k(k)
20743 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20744 }
20745 }
20746
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD128,k_gt_8_subtile)20747 TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD128, k_gt_8_subtile) {
20748 TEST_REQUIRES_X86_SSE41;
20749 for (size_t k = 9; k < 16; k++) {
20750 for (uint32_t n = 1; n <= 4; n++) {
20751 for (uint32_t m = 1; m <= 1; m++) {
20752 GemmMicrokernelTester()
20753 .mr(1)
20754 .nr(4)
20755 .kr(2)
20756 .sr(1)
20757 .m(m)
20758 .n(n)
20759 .k(k)
20760 .iterations(1)
20761 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20762 }
20763 }
20764 }
20765 }
20766
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD128,k_div_8)20767 TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD128, k_div_8) {
20768 TEST_REQUIRES_X86_SSE41;
20769 for (size_t k = 16; k <= 80; k += 8) {
20770 GemmMicrokernelTester()
20771 .mr(1)
20772 .nr(4)
20773 .kr(2)
20774 .sr(1)
20775 .m(1)
20776 .n(4)
20777 .k(k)
20778 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20779 }
20780 }
20781
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD128,k_div_8_subtile)20782 TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD128, k_div_8_subtile) {
20783 TEST_REQUIRES_X86_SSE41;
20784 for (size_t k = 16; k <= 80; k += 8) {
20785 for (uint32_t n = 1; n <= 4; n++) {
20786 for (uint32_t m = 1; m <= 1; m++) {
20787 GemmMicrokernelTester()
20788 .mr(1)
20789 .nr(4)
20790 .kr(2)
20791 .sr(1)
20792 .m(m)
20793 .n(n)
20794 .k(k)
20795 .iterations(1)
20796 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20797 }
20798 }
20799 }
20800 }
20801
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD128,n_gt_4)20802 TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD128, n_gt_4) {
20803 TEST_REQUIRES_X86_SSE41;
20804 for (uint32_t n = 5; n < 8; n++) {
20805 for (size_t k = 1; k <= 40; k += 9) {
20806 GemmMicrokernelTester()
20807 .mr(1)
20808 .nr(4)
20809 .kr(2)
20810 .sr(1)
20811 .m(1)
20812 .n(n)
20813 .k(k)
20814 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20815 }
20816 }
20817 }
20818
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD128,n_gt_4_strided_cn)20819 TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD128, n_gt_4_strided_cn) {
20820 TEST_REQUIRES_X86_SSE41;
20821 for (uint32_t n = 5; n < 8; n++) {
20822 for (size_t k = 1; k <= 40; k += 9) {
20823 GemmMicrokernelTester()
20824 .mr(1)
20825 .nr(4)
20826 .kr(2)
20827 .sr(1)
20828 .m(1)
20829 .n(n)
20830 .k(k)
20831 .cn_stride(7)
20832 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20833 }
20834 }
20835 }
20836
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD128,n_gt_4_subtile)20837 TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD128, n_gt_4_subtile) {
20838 TEST_REQUIRES_X86_SSE41;
20839 for (uint32_t n = 5; n < 8; n++) {
20840 for (size_t k = 1; k <= 40; k += 9) {
20841 for (uint32_t m = 1; m <= 1; m++) {
20842 GemmMicrokernelTester()
20843 .mr(1)
20844 .nr(4)
20845 .kr(2)
20846 .sr(1)
20847 .m(m)
20848 .n(n)
20849 .k(k)
20850 .iterations(1)
20851 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20852 }
20853 }
20854 }
20855 }
20856
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD128,n_div_4)20857 TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD128, n_div_4) {
20858 TEST_REQUIRES_X86_SSE41;
20859 for (uint32_t n = 8; n <= 12; n += 4) {
20860 for (size_t k = 1; k <= 40; k += 9) {
20861 GemmMicrokernelTester()
20862 .mr(1)
20863 .nr(4)
20864 .kr(2)
20865 .sr(1)
20866 .m(1)
20867 .n(n)
20868 .k(k)
20869 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20870 }
20871 }
20872 }
20873
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD128,n_div_4_strided_cn)20874 TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD128, n_div_4_strided_cn) {
20875 TEST_REQUIRES_X86_SSE41;
20876 for (uint32_t n = 8; n <= 12; n += 4) {
20877 for (size_t k = 1; k <= 40; k += 9) {
20878 GemmMicrokernelTester()
20879 .mr(1)
20880 .nr(4)
20881 .kr(2)
20882 .sr(1)
20883 .m(1)
20884 .n(n)
20885 .k(k)
20886 .cn_stride(7)
20887 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20888 }
20889 }
20890 }
20891
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD128,n_div_4_subtile)20892 TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD128, n_div_4_subtile) {
20893 TEST_REQUIRES_X86_SSE41;
20894 for (uint32_t n = 8; n <= 12; n += 4) {
20895 for (size_t k = 1; k <= 40; k += 9) {
20896 for (uint32_t m = 1; m <= 1; m++) {
20897 GemmMicrokernelTester()
20898 .mr(1)
20899 .nr(4)
20900 .kr(2)
20901 .sr(1)
20902 .m(m)
20903 .n(n)
20904 .k(k)
20905 .iterations(1)
20906 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20907 }
20908 }
20909 }
20910 }
20911
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD128,small_kernel)20912 TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD128, small_kernel) {
20913 TEST_REQUIRES_X86_SSE41;
20914 for (size_t k = 1; k <= 40; k += 9) {
20915 GemmMicrokernelTester()
20916 .mr(1)
20917 .nr(4)
20918 .kr(2)
20919 .sr(1)
20920 .m(1)
20921 .n(4)
20922 .k(k)
20923 .ks(3)
20924 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20925 }
20926 }
20927
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD128,small_kernel_subtile)20928 TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD128, small_kernel_subtile) {
20929 TEST_REQUIRES_X86_SSE41;
20930 for (size_t k = 1; k <= 40; k += 9) {
20931 for (uint32_t n = 1; n <= 4; n++) {
20932 for (uint32_t m = 1; m <= 1; m++) {
20933 GemmMicrokernelTester()
20934 .mr(1)
20935 .nr(4)
20936 .kr(2)
20937 .sr(1)
20938 .m(m)
20939 .n(n)
20940 .k(k)
20941 .ks(3)
20942 .iterations(1)
20943 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20944 }
20945 }
20946 }
20947 }
20948
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD128,n_gt_4_small_kernel)20949 TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD128, n_gt_4_small_kernel) {
20950 TEST_REQUIRES_X86_SSE41;
20951 for (uint32_t n = 5; n < 8; n++) {
20952 for (size_t k = 1; k <= 40; k += 9) {
20953 GemmMicrokernelTester()
20954 .mr(1)
20955 .nr(4)
20956 .kr(2)
20957 .sr(1)
20958 .m(1)
20959 .n(n)
20960 .k(k)
20961 .ks(3)
20962 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20963 }
20964 }
20965 }
20966
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD128,n_div_4_small_kernel)20967 TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD128, n_div_4_small_kernel) {
20968 TEST_REQUIRES_X86_SSE41;
20969 for (uint32_t n = 8; n <= 12; n += 4) {
20970 for (size_t k = 1; k <= 40; k += 9) {
20971 GemmMicrokernelTester()
20972 .mr(1)
20973 .nr(4)
20974 .kr(2)
20975 .sr(1)
20976 .m(1)
20977 .n(n)
20978 .k(k)
20979 .ks(3)
20980 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20981 }
20982 }
20983 }
20984
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD128,strided_cm_subtile)20985 TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD128, strided_cm_subtile) {
20986 TEST_REQUIRES_X86_SSE41;
20987 for (size_t k = 1; k <= 40; k += 9) {
20988 for (uint32_t n = 1; n <= 4; n++) {
20989 for (uint32_t m = 1; m <= 1; m++) {
20990 GemmMicrokernelTester()
20991 .mr(1)
20992 .nr(4)
20993 .kr(2)
20994 .sr(1)
20995 .m(m)
20996 .n(n)
20997 .k(k)
20998 .cm_stride(7)
20999 .iterations(1)
21000 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21001 }
21002 }
21003 }
21004 }
21005
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD128,a_offset)21006 TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD128, a_offset) {
21007 TEST_REQUIRES_X86_SSE41;
21008 for (size_t k = 1; k <= 40; k += 9) {
21009 GemmMicrokernelTester()
21010 .mr(1)
21011 .nr(4)
21012 .kr(2)
21013 .sr(1)
21014 .m(1)
21015 .n(4)
21016 .k(k)
21017 .ks(3)
21018 .a_offset(43)
21019 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21020 }
21021 }
21022
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD128,zero)21023 TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD128, zero) {
21024 TEST_REQUIRES_X86_SSE41;
21025 for (size_t k = 1; k <= 40; k += 9) {
21026 for (uint32_t mz = 0; mz < 1; mz++) {
21027 GemmMicrokernelTester()
21028 .mr(1)
21029 .nr(4)
21030 .kr(2)
21031 .sr(1)
21032 .m(1)
21033 .n(4)
21034 .k(k)
21035 .ks(3)
21036 .a_offset(43)
21037 .zero_index(mz)
21038 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21039 }
21040 }
21041 }
21042
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD128,qmin)21043 TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD128, qmin) {
21044 TEST_REQUIRES_X86_SSE41;
21045 GemmMicrokernelTester()
21046 .mr(1)
21047 .nr(4)
21048 .kr(2)
21049 .sr(1)
21050 .m(1)
21051 .n(4)
21052 .k(8)
21053 .qmin(128)
21054 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21055 }
21056
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD128,qmax)21057 TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD128, qmax) {
21058 TEST_REQUIRES_X86_SSE41;
21059 GemmMicrokernelTester()
21060 .mr(1)
21061 .nr(4)
21062 .kr(2)
21063 .sr(1)
21064 .m(1)
21065 .n(4)
21066 .k(8)
21067 .qmax(128)
21068 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21069 }
21070
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD128,strided_cm)21071 TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD128, strided_cm) {
21072 TEST_REQUIRES_X86_SSE41;
21073 GemmMicrokernelTester()
21074 .mr(1)
21075 .nr(4)
21076 .kr(2)
21077 .sr(1)
21078 .m(1)
21079 .n(4)
21080 .k(8)
21081 .cm_stride(7)
21082 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21083 }
21084 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
21085
21086
21087 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD128,k_eq_8)21088 TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD128, k_eq_8) {
21089 TEST_REQUIRES_X86_AVX;
21090 GemmMicrokernelTester()
21091 .mr(1)
21092 .nr(4)
21093 .kr(2)
21094 .sr(1)
21095 .m(1)
21096 .n(4)
21097 .k(8)
21098 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21099 }
21100
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD128,strided_cn)21101 TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD128, strided_cn) {
21102 TEST_REQUIRES_X86_AVX;
21103 GemmMicrokernelTester()
21104 .mr(1)
21105 .nr(4)
21106 .kr(2)
21107 .sr(1)
21108 .m(1)
21109 .n(4)
21110 .k(8)
21111 .cn_stride(7)
21112 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21113 }
21114
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD128,k_eq_8_subtile)21115 TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD128, k_eq_8_subtile) {
21116 TEST_REQUIRES_X86_AVX;
21117 for (uint32_t n = 1; n <= 4; n++) {
21118 for (uint32_t m = 1; m <= 1; m++) {
21119 GemmMicrokernelTester()
21120 .mr(1)
21121 .nr(4)
21122 .kr(2)
21123 .sr(1)
21124 .m(m)
21125 .n(n)
21126 .k(8)
21127 .iterations(1)
21128 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21129 }
21130 }
21131 }
21132
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD128,k_eq_8_subtile_m)21133 TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD128, k_eq_8_subtile_m) {
21134 TEST_REQUIRES_X86_AVX;
21135 for (uint32_t m = 1; m <= 1; m++) {
21136 GemmMicrokernelTester()
21137 .mr(1)
21138 .nr(4)
21139 .kr(2)
21140 .sr(1)
21141 .m(m)
21142 .n(4)
21143 .k(8)
21144 .iterations(1)
21145 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21146 }
21147 }
21148
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD128,k_eq_8_subtile_n)21149 TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD128, k_eq_8_subtile_n) {
21150 TEST_REQUIRES_X86_AVX;
21151 for (uint32_t n = 1; n <= 4; n++) {
21152 GemmMicrokernelTester()
21153 .mr(1)
21154 .nr(4)
21155 .kr(2)
21156 .sr(1)
21157 .m(1)
21158 .n(n)
21159 .k(8)
21160 .iterations(1)
21161 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21162 }
21163 }
21164
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD128,k_lt_8)21165 TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD128, k_lt_8) {
21166 TEST_REQUIRES_X86_AVX;
21167 for (size_t k = 1; k < 8; k++) {
21168 GemmMicrokernelTester()
21169 .mr(1)
21170 .nr(4)
21171 .kr(2)
21172 .sr(1)
21173 .m(1)
21174 .n(4)
21175 .k(k)
21176 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21177 }
21178 }
21179
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD128,k_lt_8_subtile)21180 TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD128, k_lt_8_subtile) {
21181 TEST_REQUIRES_X86_AVX;
21182 for (size_t k = 1; k < 8; k++) {
21183 for (uint32_t n = 1; n <= 4; n++) {
21184 for (uint32_t m = 1; m <= 1; m++) {
21185 GemmMicrokernelTester()
21186 .mr(1)
21187 .nr(4)
21188 .kr(2)
21189 .sr(1)
21190 .m(m)
21191 .n(n)
21192 .k(k)
21193 .iterations(1)
21194 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21195 }
21196 }
21197 }
21198 }
21199
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD128,k_gt_8)21200 TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD128, k_gt_8) {
21201 TEST_REQUIRES_X86_AVX;
21202 for (size_t k = 9; k < 16; k++) {
21203 GemmMicrokernelTester()
21204 .mr(1)
21205 .nr(4)
21206 .kr(2)
21207 .sr(1)
21208 .m(1)
21209 .n(4)
21210 .k(k)
21211 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21212 }
21213 }
21214
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD128,k_gt_8_subtile)21215 TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD128, k_gt_8_subtile) {
21216 TEST_REQUIRES_X86_AVX;
21217 for (size_t k = 9; k < 16; k++) {
21218 for (uint32_t n = 1; n <= 4; n++) {
21219 for (uint32_t m = 1; m <= 1; m++) {
21220 GemmMicrokernelTester()
21221 .mr(1)
21222 .nr(4)
21223 .kr(2)
21224 .sr(1)
21225 .m(m)
21226 .n(n)
21227 .k(k)
21228 .iterations(1)
21229 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21230 }
21231 }
21232 }
21233 }
21234
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD128,k_div_8)21235 TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD128, k_div_8) {
21236 TEST_REQUIRES_X86_AVX;
21237 for (size_t k = 16; k <= 80; k += 8) {
21238 GemmMicrokernelTester()
21239 .mr(1)
21240 .nr(4)
21241 .kr(2)
21242 .sr(1)
21243 .m(1)
21244 .n(4)
21245 .k(k)
21246 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21247 }
21248 }
21249
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD128,k_div_8_subtile)21250 TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD128, k_div_8_subtile) {
21251 TEST_REQUIRES_X86_AVX;
21252 for (size_t k = 16; k <= 80; k += 8) {
21253 for (uint32_t n = 1; n <= 4; n++) {
21254 for (uint32_t m = 1; m <= 1; m++) {
21255 GemmMicrokernelTester()
21256 .mr(1)
21257 .nr(4)
21258 .kr(2)
21259 .sr(1)
21260 .m(m)
21261 .n(n)
21262 .k(k)
21263 .iterations(1)
21264 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21265 }
21266 }
21267 }
21268 }
21269
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD128,n_gt_4)21270 TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD128, n_gt_4) {
21271 TEST_REQUIRES_X86_AVX;
21272 for (uint32_t n = 5; n < 8; n++) {
21273 for (size_t k = 1; k <= 40; k += 9) {
21274 GemmMicrokernelTester()
21275 .mr(1)
21276 .nr(4)
21277 .kr(2)
21278 .sr(1)
21279 .m(1)
21280 .n(n)
21281 .k(k)
21282 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21283 }
21284 }
21285 }
21286
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD128,n_gt_4_strided_cn)21287 TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD128, n_gt_4_strided_cn) {
21288 TEST_REQUIRES_X86_AVX;
21289 for (uint32_t n = 5; n < 8; n++) {
21290 for (size_t k = 1; k <= 40; k += 9) {
21291 GemmMicrokernelTester()
21292 .mr(1)
21293 .nr(4)
21294 .kr(2)
21295 .sr(1)
21296 .m(1)
21297 .n(n)
21298 .k(k)
21299 .cn_stride(7)
21300 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21301 }
21302 }
21303 }
21304
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD128,n_gt_4_subtile)21305 TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD128, n_gt_4_subtile) {
21306 TEST_REQUIRES_X86_AVX;
21307 for (uint32_t n = 5; n < 8; n++) {
21308 for (size_t k = 1; k <= 40; k += 9) {
21309 for (uint32_t m = 1; m <= 1; m++) {
21310 GemmMicrokernelTester()
21311 .mr(1)
21312 .nr(4)
21313 .kr(2)
21314 .sr(1)
21315 .m(m)
21316 .n(n)
21317 .k(k)
21318 .iterations(1)
21319 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21320 }
21321 }
21322 }
21323 }
21324
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD128,n_div_4)21325 TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD128, n_div_4) {
21326 TEST_REQUIRES_X86_AVX;
21327 for (uint32_t n = 8; n <= 12; n += 4) {
21328 for (size_t k = 1; k <= 40; k += 9) {
21329 GemmMicrokernelTester()
21330 .mr(1)
21331 .nr(4)
21332 .kr(2)
21333 .sr(1)
21334 .m(1)
21335 .n(n)
21336 .k(k)
21337 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21338 }
21339 }
21340 }
21341
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD128,n_div_4_strided_cn)21342 TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD128, n_div_4_strided_cn) {
21343 TEST_REQUIRES_X86_AVX;
21344 for (uint32_t n = 8; n <= 12; n += 4) {
21345 for (size_t k = 1; k <= 40; k += 9) {
21346 GemmMicrokernelTester()
21347 .mr(1)
21348 .nr(4)
21349 .kr(2)
21350 .sr(1)
21351 .m(1)
21352 .n(n)
21353 .k(k)
21354 .cn_stride(7)
21355 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21356 }
21357 }
21358 }
21359
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD128,n_div_4_subtile)21360 TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD128, n_div_4_subtile) {
21361 TEST_REQUIRES_X86_AVX;
21362 for (uint32_t n = 8; n <= 12; n += 4) {
21363 for (size_t k = 1; k <= 40; k += 9) {
21364 for (uint32_t m = 1; m <= 1; m++) {
21365 GemmMicrokernelTester()
21366 .mr(1)
21367 .nr(4)
21368 .kr(2)
21369 .sr(1)
21370 .m(m)
21371 .n(n)
21372 .k(k)
21373 .iterations(1)
21374 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21375 }
21376 }
21377 }
21378 }
21379
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD128,small_kernel)21380 TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD128, small_kernel) {
21381 TEST_REQUIRES_X86_AVX;
21382 for (size_t k = 1; k <= 40; k += 9) {
21383 GemmMicrokernelTester()
21384 .mr(1)
21385 .nr(4)
21386 .kr(2)
21387 .sr(1)
21388 .m(1)
21389 .n(4)
21390 .k(k)
21391 .ks(3)
21392 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21393 }
21394 }
21395
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD128,small_kernel_subtile)21396 TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD128, small_kernel_subtile) {
21397 TEST_REQUIRES_X86_AVX;
21398 for (size_t k = 1; k <= 40; k += 9) {
21399 for (uint32_t n = 1; n <= 4; n++) {
21400 for (uint32_t m = 1; m <= 1; m++) {
21401 GemmMicrokernelTester()
21402 .mr(1)
21403 .nr(4)
21404 .kr(2)
21405 .sr(1)
21406 .m(m)
21407 .n(n)
21408 .k(k)
21409 .ks(3)
21410 .iterations(1)
21411 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21412 }
21413 }
21414 }
21415 }
21416
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD128,n_gt_4_small_kernel)21417 TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD128, n_gt_4_small_kernel) {
21418 TEST_REQUIRES_X86_AVX;
21419 for (uint32_t n = 5; n < 8; n++) {
21420 for (size_t k = 1; k <= 40; k += 9) {
21421 GemmMicrokernelTester()
21422 .mr(1)
21423 .nr(4)
21424 .kr(2)
21425 .sr(1)
21426 .m(1)
21427 .n(n)
21428 .k(k)
21429 .ks(3)
21430 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21431 }
21432 }
21433 }
21434
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD128,n_div_4_small_kernel)21435 TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD128, n_div_4_small_kernel) {
21436 TEST_REQUIRES_X86_AVX;
21437 for (uint32_t n = 8; n <= 12; n += 4) {
21438 for (size_t k = 1; k <= 40; k += 9) {
21439 GemmMicrokernelTester()
21440 .mr(1)
21441 .nr(4)
21442 .kr(2)
21443 .sr(1)
21444 .m(1)
21445 .n(n)
21446 .k(k)
21447 .ks(3)
21448 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21449 }
21450 }
21451 }
21452
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD128,strided_cm_subtile)21453 TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD128, strided_cm_subtile) {
21454 TEST_REQUIRES_X86_AVX;
21455 for (size_t k = 1; k <= 40; k += 9) {
21456 for (uint32_t n = 1; n <= 4; n++) {
21457 for (uint32_t m = 1; m <= 1; m++) {
21458 GemmMicrokernelTester()
21459 .mr(1)
21460 .nr(4)
21461 .kr(2)
21462 .sr(1)
21463 .m(m)
21464 .n(n)
21465 .k(k)
21466 .cm_stride(7)
21467 .iterations(1)
21468 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21469 }
21470 }
21471 }
21472 }
21473
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD128,a_offset)21474 TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD128, a_offset) {
21475 TEST_REQUIRES_X86_AVX;
21476 for (size_t k = 1; k <= 40; k += 9) {
21477 GemmMicrokernelTester()
21478 .mr(1)
21479 .nr(4)
21480 .kr(2)
21481 .sr(1)
21482 .m(1)
21483 .n(4)
21484 .k(k)
21485 .ks(3)
21486 .a_offset(43)
21487 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21488 }
21489 }
21490
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD128,zero)21491 TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD128, zero) {
21492 TEST_REQUIRES_X86_AVX;
21493 for (size_t k = 1; k <= 40; k += 9) {
21494 for (uint32_t mz = 0; mz < 1; mz++) {
21495 GemmMicrokernelTester()
21496 .mr(1)
21497 .nr(4)
21498 .kr(2)
21499 .sr(1)
21500 .m(1)
21501 .n(4)
21502 .k(k)
21503 .ks(3)
21504 .a_offset(43)
21505 .zero_index(mz)
21506 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21507 }
21508 }
21509 }
21510
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD128,qmin)21511 TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD128, qmin) {
21512 TEST_REQUIRES_X86_AVX;
21513 GemmMicrokernelTester()
21514 .mr(1)
21515 .nr(4)
21516 .kr(2)
21517 .sr(1)
21518 .m(1)
21519 .n(4)
21520 .k(8)
21521 .qmin(128)
21522 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21523 }
21524
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD128,qmax)21525 TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD128, qmax) {
21526 TEST_REQUIRES_X86_AVX;
21527 GemmMicrokernelTester()
21528 .mr(1)
21529 .nr(4)
21530 .kr(2)
21531 .sr(1)
21532 .m(1)
21533 .n(4)
21534 .k(8)
21535 .qmax(128)
21536 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21537 }
21538
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD128,strided_cm)21539 TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__AVX_LD128, strided_cm) {
21540 TEST_REQUIRES_X86_AVX;
21541 GemmMicrokernelTester()
21542 .mr(1)
21543 .nr(4)
21544 .kr(2)
21545 .sr(1)
21546 .m(1)
21547 .n(4)
21548 .k(8)
21549 .cm_stride(7)
21550 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21551 }
21552 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
21553
21554
21555 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD128,k_eq_8)21556 TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD128, k_eq_8) {
21557 TEST_REQUIRES_X86_XOP;
21558 GemmMicrokernelTester()
21559 .mr(2)
21560 .nr(4)
21561 .kr(2)
21562 .sr(1)
21563 .m(2)
21564 .n(4)
21565 .k(8)
21566 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21567 }
21568
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD128,strided_cn)21569 TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD128, strided_cn) {
21570 TEST_REQUIRES_X86_XOP;
21571 GemmMicrokernelTester()
21572 .mr(2)
21573 .nr(4)
21574 .kr(2)
21575 .sr(1)
21576 .m(2)
21577 .n(4)
21578 .k(8)
21579 .cn_stride(7)
21580 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21581 }
21582
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD128,k_eq_8_subtile)21583 TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD128, k_eq_8_subtile) {
21584 TEST_REQUIRES_X86_XOP;
21585 for (uint32_t n = 1; n <= 4; n++) {
21586 for (uint32_t m = 1; m <= 2; m++) {
21587 GemmMicrokernelTester()
21588 .mr(2)
21589 .nr(4)
21590 .kr(2)
21591 .sr(1)
21592 .m(m)
21593 .n(n)
21594 .k(8)
21595 .iterations(1)
21596 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21597 }
21598 }
21599 }
21600
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD128,k_eq_8_subtile_m)21601 TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD128, k_eq_8_subtile_m) {
21602 TEST_REQUIRES_X86_XOP;
21603 for (uint32_t m = 1; m <= 2; m++) {
21604 GemmMicrokernelTester()
21605 .mr(2)
21606 .nr(4)
21607 .kr(2)
21608 .sr(1)
21609 .m(m)
21610 .n(4)
21611 .k(8)
21612 .iterations(1)
21613 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21614 }
21615 }
21616
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD128,k_eq_8_subtile_n)21617 TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD128, k_eq_8_subtile_n) {
21618 TEST_REQUIRES_X86_XOP;
21619 for (uint32_t n = 1; n <= 4; n++) {
21620 GemmMicrokernelTester()
21621 .mr(2)
21622 .nr(4)
21623 .kr(2)
21624 .sr(1)
21625 .m(2)
21626 .n(n)
21627 .k(8)
21628 .iterations(1)
21629 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21630 }
21631 }
21632
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD128,k_lt_8)21633 TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD128, k_lt_8) {
21634 TEST_REQUIRES_X86_XOP;
21635 for (size_t k = 1; k < 8; k++) {
21636 GemmMicrokernelTester()
21637 .mr(2)
21638 .nr(4)
21639 .kr(2)
21640 .sr(1)
21641 .m(2)
21642 .n(4)
21643 .k(k)
21644 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21645 }
21646 }
21647
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD128,k_lt_8_subtile)21648 TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD128, k_lt_8_subtile) {
21649 TEST_REQUIRES_X86_XOP;
21650 for (size_t k = 1; k < 8; k++) {
21651 for (uint32_t n = 1; n <= 4; n++) {
21652 for (uint32_t m = 1; m <= 2; m++) {
21653 GemmMicrokernelTester()
21654 .mr(2)
21655 .nr(4)
21656 .kr(2)
21657 .sr(1)
21658 .m(m)
21659 .n(n)
21660 .k(k)
21661 .iterations(1)
21662 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21663 }
21664 }
21665 }
21666 }
21667
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD128,k_gt_8)21668 TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD128, k_gt_8) {
21669 TEST_REQUIRES_X86_XOP;
21670 for (size_t k = 9; k < 16; k++) {
21671 GemmMicrokernelTester()
21672 .mr(2)
21673 .nr(4)
21674 .kr(2)
21675 .sr(1)
21676 .m(2)
21677 .n(4)
21678 .k(k)
21679 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21680 }
21681 }
21682
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD128,k_gt_8_subtile)21683 TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD128, k_gt_8_subtile) {
21684 TEST_REQUIRES_X86_XOP;
21685 for (size_t k = 9; k < 16; k++) {
21686 for (uint32_t n = 1; n <= 4; n++) {
21687 for (uint32_t m = 1; m <= 2; m++) {
21688 GemmMicrokernelTester()
21689 .mr(2)
21690 .nr(4)
21691 .kr(2)
21692 .sr(1)
21693 .m(m)
21694 .n(n)
21695 .k(k)
21696 .iterations(1)
21697 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21698 }
21699 }
21700 }
21701 }
21702
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD128,k_div_8)21703 TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD128, k_div_8) {
21704 TEST_REQUIRES_X86_XOP;
21705 for (size_t k = 16; k <= 80; k += 8) {
21706 GemmMicrokernelTester()
21707 .mr(2)
21708 .nr(4)
21709 .kr(2)
21710 .sr(1)
21711 .m(2)
21712 .n(4)
21713 .k(k)
21714 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21715 }
21716 }
21717
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD128,k_div_8_subtile)21718 TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD128, k_div_8_subtile) {
21719 TEST_REQUIRES_X86_XOP;
21720 for (size_t k = 16; k <= 80; k += 8) {
21721 for (uint32_t n = 1; n <= 4; n++) {
21722 for (uint32_t m = 1; m <= 2; m++) {
21723 GemmMicrokernelTester()
21724 .mr(2)
21725 .nr(4)
21726 .kr(2)
21727 .sr(1)
21728 .m(m)
21729 .n(n)
21730 .k(k)
21731 .iterations(1)
21732 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21733 }
21734 }
21735 }
21736 }
21737
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD128,n_gt_4)21738 TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD128, n_gt_4) {
21739 TEST_REQUIRES_X86_XOP;
21740 for (uint32_t n = 5; n < 8; n++) {
21741 for (size_t k = 1; k <= 40; k += 9) {
21742 GemmMicrokernelTester()
21743 .mr(2)
21744 .nr(4)
21745 .kr(2)
21746 .sr(1)
21747 .m(2)
21748 .n(n)
21749 .k(k)
21750 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21751 }
21752 }
21753 }
21754
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD128,n_gt_4_strided_cn)21755 TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD128, n_gt_4_strided_cn) {
21756 TEST_REQUIRES_X86_XOP;
21757 for (uint32_t n = 5; n < 8; n++) {
21758 for (size_t k = 1; k <= 40; k += 9) {
21759 GemmMicrokernelTester()
21760 .mr(2)
21761 .nr(4)
21762 .kr(2)
21763 .sr(1)
21764 .m(2)
21765 .n(n)
21766 .k(k)
21767 .cn_stride(7)
21768 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21769 }
21770 }
21771 }
21772
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD128,n_gt_4_subtile)21773 TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD128, n_gt_4_subtile) {
21774 TEST_REQUIRES_X86_XOP;
21775 for (uint32_t n = 5; n < 8; n++) {
21776 for (size_t k = 1; k <= 40; k += 9) {
21777 for (uint32_t m = 1; m <= 2; m++) {
21778 GemmMicrokernelTester()
21779 .mr(2)
21780 .nr(4)
21781 .kr(2)
21782 .sr(1)
21783 .m(m)
21784 .n(n)
21785 .k(k)
21786 .iterations(1)
21787 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21788 }
21789 }
21790 }
21791 }
21792
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD128,n_div_4)21793 TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD128, n_div_4) {
21794 TEST_REQUIRES_X86_XOP;
21795 for (uint32_t n = 8; n <= 12; n += 4) {
21796 for (size_t k = 1; k <= 40; k += 9) {
21797 GemmMicrokernelTester()
21798 .mr(2)
21799 .nr(4)
21800 .kr(2)
21801 .sr(1)
21802 .m(2)
21803 .n(n)
21804 .k(k)
21805 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21806 }
21807 }
21808 }
21809
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD128,n_div_4_strided_cn)21810 TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD128, n_div_4_strided_cn) {
21811 TEST_REQUIRES_X86_XOP;
21812 for (uint32_t n = 8; n <= 12; n += 4) {
21813 for (size_t k = 1; k <= 40; k += 9) {
21814 GemmMicrokernelTester()
21815 .mr(2)
21816 .nr(4)
21817 .kr(2)
21818 .sr(1)
21819 .m(2)
21820 .n(n)
21821 .k(k)
21822 .cn_stride(7)
21823 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21824 }
21825 }
21826 }
21827
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD128,n_div_4_subtile)21828 TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD128, n_div_4_subtile) {
21829 TEST_REQUIRES_X86_XOP;
21830 for (uint32_t n = 8; n <= 12; n += 4) {
21831 for (size_t k = 1; k <= 40; k += 9) {
21832 for (uint32_t m = 1; m <= 2; m++) {
21833 GemmMicrokernelTester()
21834 .mr(2)
21835 .nr(4)
21836 .kr(2)
21837 .sr(1)
21838 .m(m)
21839 .n(n)
21840 .k(k)
21841 .iterations(1)
21842 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21843 }
21844 }
21845 }
21846 }
21847
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD128,small_kernel)21848 TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD128, small_kernel) {
21849 TEST_REQUIRES_X86_XOP;
21850 for (size_t k = 1; k <= 40; k += 9) {
21851 GemmMicrokernelTester()
21852 .mr(2)
21853 .nr(4)
21854 .kr(2)
21855 .sr(1)
21856 .m(2)
21857 .n(4)
21858 .k(k)
21859 .ks(3)
21860 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21861 }
21862 }
21863
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD128,small_kernel_subtile)21864 TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD128, small_kernel_subtile) {
21865 TEST_REQUIRES_X86_XOP;
21866 for (size_t k = 1; k <= 40; k += 9) {
21867 for (uint32_t n = 1; n <= 4; n++) {
21868 for (uint32_t m = 1; m <= 2; m++) {
21869 GemmMicrokernelTester()
21870 .mr(2)
21871 .nr(4)
21872 .kr(2)
21873 .sr(1)
21874 .m(m)
21875 .n(n)
21876 .k(k)
21877 .ks(3)
21878 .iterations(1)
21879 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21880 }
21881 }
21882 }
21883 }
21884
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD128,n_gt_4_small_kernel)21885 TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD128, n_gt_4_small_kernel) {
21886 TEST_REQUIRES_X86_XOP;
21887 for (uint32_t n = 5; n < 8; n++) {
21888 for (size_t k = 1; k <= 40; k += 9) {
21889 GemmMicrokernelTester()
21890 .mr(2)
21891 .nr(4)
21892 .kr(2)
21893 .sr(1)
21894 .m(2)
21895 .n(n)
21896 .k(k)
21897 .ks(3)
21898 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21899 }
21900 }
21901 }
21902
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD128,n_div_4_small_kernel)21903 TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD128, n_div_4_small_kernel) {
21904 TEST_REQUIRES_X86_XOP;
21905 for (uint32_t n = 8; n <= 12; n += 4) {
21906 for (size_t k = 1; k <= 40; k += 9) {
21907 GemmMicrokernelTester()
21908 .mr(2)
21909 .nr(4)
21910 .kr(2)
21911 .sr(1)
21912 .m(2)
21913 .n(n)
21914 .k(k)
21915 .ks(3)
21916 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21917 }
21918 }
21919 }
21920
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD128,strided_cm_subtile)21921 TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD128, strided_cm_subtile) {
21922 TEST_REQUIRES_X86_XOP;
21923 for (size_t k = 1; k <= 40; k += 9) {
21924 for (uint32_t n = 1; n <= 4; n++) {
21925 for (uint32_t m = 1; m <= 2; m++) {
21926 GemmMicrokernelTester()
21927 .mr(2)
21928 .nr(4)
21929 .kr(2)
21930 .sr(1)
21931 .m(m)
21932 .n(n)
21933 .k(k)
21934 .cm_stride(7)
21935 .iterations(1)
21936 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21937 }
21938 }
21939 }
21940 }
21941
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD128,a_offset)21942 TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD128, a_offset) {
21943 TEST_REQUIRES_X86_XOP;
21944 for (size_t k = 1; k <= 40; k += 9) {
21945 GemmMicrokernelTester()
21946 .mr(2)
21947 .nr(4)
21948 .kr(2)
21949 .sr(1)
21950 .m(2)
21951 .n(4)
21952 .k(k)
21953 .ks(3)
21954 .a_offset(83)
21955 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21956 }
21957 }
21958
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD128,zero)21959 TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD128, zero) {
21960 TEST_REQUIRES_X86_XOP;
21961 for (size_t k = 1; k <= 40; k += 9) {
21962 for (uint32_t mz = 0; mz < 2; mz++) {
21963 GemmMicrokernelTester()
21964 .mr(2)
21965 .nr(4)
21966 .kr(2)
21967 .sr(1)
21968 .m(2)
21969 .n(4)
21970 .k(k)
21971 .ks(3)
21972 .a_offset(83)
21973 .zero_index(mz)
21974 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21975 }
21976 }
21977 }
21978
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD128,qmin)21979 TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD128, qmin) {
21980 TEST_REQUIRES_X86_XOP;
21981 GemmMicrokernelTester()
21982 .mr(2)
21983 .nr(4)
21984 .kr(2)
21985 .sr(1)
21986 .m(2)
21987 .n(4)
21988 .k(8)
21989 .qmin(128)
21990 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21991 }
21992
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD128,qmax)21993 TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD128, qmax) {
21994 TEST_REQUIRES_X86_XOP;
21995 GemmMicrokernelTester()
21996 .mr(2)
21997 .nr(4)
21998 .kr(2)
21999 .sr(1)
22000 .m(2)
22001 .n(4)
22002 .k(8)
22003 .qmax(128)
22004 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
22005 }
22006
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD128,strided_cm)22007 TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD128, strided_cm) {
22008 TEST_REQUIRES_X86_XOP;
22009 GemmMicrokernelTester()
22010 .mr(2)
22011 .nr(4)
22012 .kr(2)
22013 .sr(1)
22014 .m(2)
22015 .n(4)
22016 .k(8)
22017 .cm_stride(7)
22018 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
22019 }
22020 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
22021
22022
22023 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD64,k_eq_8)22024 TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD64, k_eq_8) {
22025 TEST_REQUIRES_X86_SSE2;
22026 GemmMicrokernelTester()
22027 .mr(1)
22028 .nr(4)
22029 .kr(2)
22030 .sr(4)
22031 .m(1)
22032 .n(4)
22033 .k(8)
22034 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2s4__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
22035 }
22036
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD64,strided_cn)22037 TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD64, strided_cn) {
22038 TEST_REQUIRES_X86_SSE2;
22039 GemmMicrokernelTester()
22040 .mr(1)
22041 .nr(4)
22042 .kr(2)
22043 .sr(4)
22044 .m(1)
22045 .n(4)
22046 .k(8)
22047 .cn_stride(7)
22048 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2s4__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
22049 }
22050
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD64,k_eq_8_subtile)22051 TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD64, k_eq_8_subtile) {
22052 TEST_REQUIRES_X86_SSE2;
22053 for (uint32_t n = 1; n <= 4; n++) {
22054 for (uint32_t m = 1; m <= 1; m++) {
22055 GemmMicrokernelTester()
22056 .mr(1)
22057 .nr(4)
22058 .kr(2)
22059 .sr(4)
22060 .m(m)
22061 .n(n)
22062 .k(8)
22063 .iterations(1)
22064 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2s4__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
22065 }
22066 }
22067 }
22068
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD64,k_eq_8_subtile_m)22069 TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD64, k_eq_8_subtile_m) {
22070 TEST_REQUIRES_X86_SSE2;
22071 for (uint32_t m = 1; m <= 1; m++) {
22072 GemmMicrokernelTester()
22073 .mr(1)
22074 .nr(4)
22075 .kr(2)
22076 .sr(4)
22077 .m(m)
22078 .n(4)
22079 .k(8)
22080 .iterations(1)
22081 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2s4__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
22082 }
22083 }
22084
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD64,k_eq_8_subtile_n)22085 TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD64, k_eq_8_subtile_n) {
22086 TEST_REQUIRES_X86_SSE2;
22087 for (uint32_t n = 1; n <= 4; n++) {
22088 GemmMicrokernelTester()
22089 .mr(1)
22090 .nr(4)
22091 .kr(2)
22092 .sr(4)
22093 .m(1)
22094 .n(n)
22095 .k(8)
22096 .iterations(1)
22097 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2s4__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
22098 }
22099 }
22100
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD64,k_lt_8)22101 TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD64, k_lt_8) {
22102 TEST_REQUIRES_X86_SSE2;
22103 for (size_t k = 1; k < 8; k++) {
22104 GemmMicrokernelTester()
22105 .mr(1)
22106 .nr(4)
22107 .kr(2)
22108 .sr(4)
22109 .m(1)
22110 .n(4)
22111 .k(k)
22112 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2s4__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
22113 }
22114 }
22115
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD64,k_lt_8_subtile)22116 TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD64, k_lt_8_subtile) {
22117 TEST_REQUIRES_X86_SSE2;
22118 for (size_t k = 1; k < 8; k++) {
22119 for (uint32_t n = 1; n <= 4; n++) {
22120 for (uint32_t m = 1; m <= 1; m++) {
22121 GemmMicrokernelTester()
22122 .mr(1)
22123 .nr(4)
22124 .kr(2)
22125 .sr(4)
22126 .m(m)
22127 .n(n)
22128 .k(k)
22129 .iterations(1)
22130 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2s4__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
22131 }
22132 }
22133 }
22134 }
22135
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD64,k_gt_8)22136 TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD64, k_gt_8) {
22137 TEST_REQUIRES_X86_SSE2;
22138 for (size_t k = 9; k < 16; k++) {
22139 GemmMicrokernelTester()
22140 .mr(1)
22141 .nr(4)
22142 .kr(2)
22143 .sr(4)
22144 .m(1)
22145 .n(4)
22146 .k(k)
22147 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2s4__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
22148 }
22149 }
22150
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD64,k_gt_8_subtile)22151 TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD64, k_gt_8_subtile) {
22152 TEST_REQUIRES_X86_SSE2;
22153 for (size_t k = 9; k < 16; k++) {
22154 for (uint32_t n = 1; n <= 4; n++) {
22155 for (uint32_t m = 1; m <= 1; m++) {
22156 GemmMicrokernelTester()
22157 .mr(1)
22158 .nr(4)
22159 .kr(2)
22160 .sr(4)
22161 .m(m)
22162 .n(n)
22163 .k(k)
22164 .iterations(1)
22165 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2s4__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
22166 }
22167 }
22168 }
22169 }
22170
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD64,k_div_8)22171 TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD64, k_div_8) {
22172 TEST_REQUIRES_X86_SSE2;
22173 for (size_t k = 16; k <= 80; k += 8) {
22174 GemmMicrokernelTester()
22175 .mr(1)
22176 .nr(4)
22177 .kr(2)
22178 .sr(4)
22179 .m(1)
22180 .n(4)
22181 .k(k)
22182 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2s4__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
22183 }
22184 }
22185
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD64,k_div_8_subtile)22186 TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD64, k_div_8_subtile) {
22187 TEST_REQUIRES_X86_SSE2;
22188 for (size_t k = 16; k <= 80; k += 8) {
22189 for (uint32_t n = 1; n <= 4; n++) {
22190 for (uint32_t m = 1; m <= 1; m++) {
22191 GemmMicrokernelTester()
22192 .mr(1)
22193 .nr(4)
22194 .kr(2)
22195 .sr(4)
22196 .m(m)
22197 .n(n)
22198 .k(k)
22199 .iterations(1)
22200 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2s4__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
22201 }
22202 }
22203 }
22204 }
22205
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD64,n_gt_4)22206 TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD64, n_gt_4) {
22207 TEST_REQUIRES_X86_SSE2;
22208 for (uint32_t n = 5; n < 8; n++) {
22209 for (size_t k = 1; k <= 40; k += 9) {
22210 GemmMicrokernelTester()
22211 .mr(1)
22212 .nr(4)
22213 .kr(2)
22214 .sr(4)
22215 .m(1)
22216 .n(n)
22217 .k(k)
22218 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2s4__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
22219 }
22220 }
22221 }
22222
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD64,n_gt_4_strided_cn)22223 TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD64, n_gt_4_strided_cn) {
22224 TEST_REQUIRES_X86_SSE2;
22225 for (uint32_t n = 5; n < 8; n++) {
22226 for (size_t k = 1; k <= 40; k += 9) {
22227 GemmMicrokernelTester()
22228 .mr(1)
22229 .nr(4)
22230 .kr(2)
22231 .sr(4)
22232 .m(1)
22233 .n(n)
22234 .k(k)
22235 .cn_stride(7)
22236 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2s4__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
22237 }
22238 }
22239 }
22240
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD64,n_gt_4_subtile)22241 TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD64, n_gt_4_subtile) {
22242 TEST_REQUIRES_X86_SSE2;
22243 for (uint32_t n = 5; n < 8; n++) {
22244 for (size_t k = 1; k <= 40; k += 9) {
22245 for (uint32_t m = 1; m <= 1; m++) {
22246 GemmMicrokernelTester()
22247 .mr(1)
22248 .nr(4)
22249 .kr(2)
22250 .sr(4)
22251 .m(m)
22252 .n(n)
22253 .k(k)
22254 .iterations(1)
22255 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2s4__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
22256 }
22257 }
22258 }
22259 }
22260
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD64,n_div_4)22261 TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD64, n_div_4) {
22262 TEST_REQUIRES_X86_SSE2;
22263 for (uint32_t n = 8; n <= 12; n += 4) {
22264 for (size_t k = 1; k <= 40; k += 9) {
22265 GemmMicrokernelTester()
22266 .mr(1)
22267 .nr(4)
22268 .kr(2)
22269 .sr(4)
22270 .m(1)
22271 .n(n)
22272 .k(k)
22273 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2s4__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
22274 }
22275 }
22276 }
22277
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD64,n_div_4_strided_cn)22278 TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD64, n_div_4_strided_cn) {
22279 TEST_REQUIRES_X86_SSE2;
22280 for (uint32_t n = 8; n <= 12; n += 4) {
22281 for (size_t k = 1; k <= 40; k += 9) {
22282 GemmMicrokernelTester()
22283 .mr(1)
22284 .nr(4)
22285 .kr(2)
22286 .sr(4)
22287 .m(1)
22288 .n(n)
22289 .k(k)
22290 .cn_stride(7)
22291 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2s4__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
22292 }
22293 }
22294 }
22295
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD64,n_div_4_subtile)22296 TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD64, n_div_4_subtile) {
22297 TEST_REQUIRES_X86_SSE2;
22298 for (uint32_t n = 8; n <= 12; n += 4) {
22299 for (size_t k = 1; k <= 40; k += 9) {
22300 for (uint32_t m = 1; m <= 1; m++) {
22301 GemmMicrokernelTester()
22302 .mr(1)
22303 .nr(4)
22304 .kr(2)
22305 .sr(4)
22306 .m(m)
22307 .n(n)
22308 .k(k)
22309 .iterations(1)
22310 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2s4__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
22311 }
22312 }
22313 }
22314 }
22315
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD64,small_kernel)22316 TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD64, small_kernel) {
22317 TEST_REQUIRES_X86_SSE2;
22318 for (size_t k = 1; k <= 40; k += 9) {
22319 GemmMicrokernelTester()
22320 .mr(1)
22321 .nr(4)
22322 .kr(2)
22323 .sr(4)
22324 .m(1)
22325 .n(4)
22326 .k(k)
22327 .ks(3)
22328 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2s4__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
22329 }
22330 }
22331
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD64,small_kernel_subtile)22332 TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD64, small_kernel_subtile) {
22333 TEST_REQUIRES_X86_SSE2;
22334 for (size_t k = 1; k <= 40; k += 9) {
22335 for (uint32_t n = 1; n <= 4; n++) {
22336 for (uint32_t m = 1; m <= 1; m++) {
22337 GemmMicrokernelTester()
22338 .mr(1)
22339 .nr(4)
22340 .kr(2)
22341 .sr(4)
22342 .m(m)
22343 .n(n)
22344 .k(k)
22345 .ks(3)
22346 .iterations(1)
22347 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2s4__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
22348 }
22349 }
22350 }
22351 }
22352
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD64,n_gt_4_small_kernel)22353 TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD64, n_gt_4_small_kernel) {
22354 TEST_REQUIRES_X86_SSE2;
22355 for (uint32_t n = 5; n < 8; n++) {
22356 for (size_t k = 1; k <= 40; k += 9) {
22357 GemmMicrokernelTester()
22358 .mr(1)
22359 .nr(4)
22360 .kr(2)
22361 .sr(4)
22362 .m(1)
22363 .n(n)
22364 .k(k)
22365 .ks(3)
22366 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2s4__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
22367 }
22368 }
22369 }
22370
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD64,n_div_4_small_kernel)22371 TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD64, n_div_4_small_kernel) {
22372 TEST_REQUIRES_X86_SSE2;
22373 for (uint32_t n = 8; n <= 12; n += 4) {
22374 for (size_t k = 1; k <= 40; k += 9) {
22375 GemmMicrokernelTester()
22376 .mr(1)
22377 .nr(4)
22378 .kr(2)
22379 .sr(4)
22380 .m(1)
22381 .n(n)
22382 .k(k)
22383 .ks(3)
22384 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2s4__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
22385 }
22386 }
22387 }
22388
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD64,strided_cm_subtile)22389 TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD64, strided_cm_subtile) {
22390 TEST_REQUIRES_X86_SSE2;
22391 for (size_t k = 1; k <= 40; k += 9) {
22392 for (uint32_t n = 1; n <= 4; n++) {
22393 for (uint32_t m = 1; m <= 1; m++) {
22394 GemmMicrokernelTester()
22395 .mr(1)
22396 .nr(4)
22397 .kr(2)
22398 .sr(4)
22399 .m(m)
22400 .n(n)
22401 .k(k)
22402 .cm_stride(7)
22403 .iterations(1)
22404 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2s4__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
22405 }
22406 }
22407 }
22408 }
22409
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD64,a_offset)22410 TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD64, a_offset) {
22411 TEST_REQUIRES_X86_SSE2;
22412 for (size_t k = 1; k <= 40; k += 9) {
22413 GemmMicrokernelTester()
22414 .mr(1)
22415 .nr(4)
22416 .kr(2)
22417 .sr(4)
22418 .m(1)
22419 .n(4)
22420 .k(k)
22421 .ks(3)
22422 .a_offset(43)
22423 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2s4__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
22424 }
22425 }
22426
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD64,zero)22427 TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD64, zero) {
22428 TEST_REQUIRES_X86_SSE2;
22429 for (size_t k = 1; k <= 40; k += 9) {
22430 for (uint32_t mz = 0; mz < 1; mz++) {
22431 GemmMicrokernelTester()
22432 .mr(1)
22433 .nr(4)
22434 .kr(2)
22435 .sr(4)
22436 .m(1)
22437 .n(4)
22438 .k(k)
22439 .ks(3)
22440 .a_offset(43)
22441 .zero_index(mz)
22442 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2s4__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
22443 }
22444 }
22445 }
22446
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD64,qmin)22447 TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD64, qmin) {
22448 TEST_REQUIRES_X86_SSE2;
22449 GemmMicrokernelTester()
22450 .mr(1)
22451 .nr(4)
22452 .kr(2)
22453 .sr(4)
22454 .m(1)
22455 .n(4)
22456 .k(8)
22457 .qmin(128)
22458 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2s4__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
22459 }
22460
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD64,qmax)22461 TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD64, qmax) {
22462 TEST_REQUIRES_X86_SSE2;
22463 GemmMicrokernelTester()
22464 .mr(1)
22465 .nr(4)
22466 .kr(2)
22467 .sr(4)
22468 .m(1)
22469 .n(4)
22470 .k(8)
22471 .qmax(128)
22472 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2s4__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
22473 }
22474
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD64,strided_cm)22475 TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD64, strided_cm) {
22476 TEST_REQUIRES_X86_SSE2;
22477 GemmMicrokernelTester()
22478 .mr(1)
22479 .nr(4)
22480 .kr(2)
22481 .sr(4)
22482 .m(1)
22483 .n(4)
22484 .k(8)
22485 .cm_stride(7)
22486 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2s4__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
22487 }
22488 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
22489
22490
22491 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD64,k_eq_8)22492 TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD64, k_eq_8) {
22493 TEST_REQUIRES_X86_SSE2;
22494 GemmMicrokernelTester()
22495 .mr(2)
22496 .nr(4)
22497 .kr(2)
22498 .sr(4)
22499 .m(2)
22500 .n(4)
22501 .k(8)
22502 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2s4__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
22503 }
22504
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD64,strided_cn)22505 TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD64, strided_cn) {
22506 TEST_REQUIRES_X86_SSE2;
22507 GemmMicrokernelTester()
22508 .mr(2)
22509 .nr(4)
22510 .kr(2)
22511 .sr(4)
22512 .m(2)
22513 .n(4)
22514 .k(8)
22515 .cn_stride(7)
22516 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2s4__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
22517 }
22518
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD64,k_eq_8_subtile)22519 TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD64, k_eq_8_subtile) {
22520 TEST_REQUIRES_X86_SSE2;
22521 for (uint32_t n = 1; n <= 4; n++) {
22522 for (uint32_t m = 1; m <= 2; m++) {
22523 GemmMicrokernelTester()
22524 .mr(2)
22525 .nr(4)
22526 .kr(2)
22527 .sr(4)
22528 .m(m)
22529 .n(n)
22530 .k(8)
22531 .iterations(1)
22532 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2s4__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
22533 }
22534 }
22535 }
22536
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD64,k_eq_8_subtile_m)22537 TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD64, k_eq_8_subtile_m) {
22538 TEST_REQUIRES_X86_SSE2;
22539 for (uint32_t m = 1; m <= 2; m++) {
22540 GemmMicrokernelTester()
22541 .mr(2)
22542 .nr(4)
22543 .kr(2)
22544 .sr(4)
22545 .m(m)
22546 .n(4)
22547 .k(8)
22548 .iterations(1)
22549 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2s4__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
22550 }
22551 }
22552
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD64,k_eq_8_subtile_n)22553 TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD64, k_eq_8_subtile_n) {
22554 TEST_REQUIRES_X86_SSE2;
22555 for (uint32_t n = 1; n <= 4; n++) {
22556 GemmMicrokernelTester()
22557 .mr(2)
22558 .nr(4)
22559 .kr(2)
22560 .sr(4)
22561 .m(2)
22562 .n(n)
22563 .k(8)
22564 .iterations(1)
22565 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2s4__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
22566 }
22567 }
22568
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD64,k_lt_8)22569 TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD64, k_lt_8) {
22570 TEST_REQUIRES_X86_SSE2;
22571 for (size_t k = 1; k < 8; k++) {
22572 GemmMicrokernelTester()
22573 .mr(2)
22574 .nr(4)
22575 .kr(2)
22576 .sr(4)
22577 .m(2)
22578 .n(4)
22579 .k(k)
22580 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2s4__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
22581 }
22582 }
22583
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD64,k_lt_8_subtile)22584 TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD64, k_lt_8_subtile) {
22585 TEST_REQUIRES_X86_SSE2;
22586 for (size_t k = 1; k < 8; k++) {
22587 for (uint32_t n = 1; n <= 4; n++) {
22588 for (uint32_t m = 1; m <= 2; m++) {
22589 GemmMicrokernelTester()
22590 .mr(2)
22591 .nr(4)
22592 .kr(2)
22593 .sr(4)
22594 .m(m)
22595 .n(n)
22596 .k(k)
22597 .iterations(1)
22598 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2s4__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
22599 }
22600 }
22601 }
22602 }
22603
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD64,k_gt_8)22604 TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD64, k_gt_8) {
22605 TEST_REQUIRES_X86_SSE2;
22606 for (size_t k = 9; k < 16; k++) {
22607 GemmMicrokernelTester()
22608 .mr(2)
22609 .nr(4)
22610 .kr(2)
22611 .sr(4)
22612 .m(2)
22613 .n(4)
22614 .k(k)
22615 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2s4__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
22616 }
22617 }
22618
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD64,k_gt_8_subtile)22619 TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD64, k_gt_8_subtile) {
22620 TEST_REQUIRES_X86_SSE2;
22621 for (size_t k = 9; k < 16; k++) {
22622 for (uint32_t n = 1; n <= 4; n++) {
22623 for (uint32_t m = 1; m <= 2; m++) {
22624 GemmMicrokernelTester()
22625 .mr(2)
22626 .nr(4)
22627 .kr(2)
22628 .sr(4)
22629 .m(m)
22630 .n(n)
22631 .k(k)
22632 .iterations(1)
22633 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2s4__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
22634 }
22635 }
22636 }
22637 }
22638
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD64,k_div_8)22639 TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD64, k_div_8) {
22640 TEST_REQUIRES_X86_SSE2;
22641 for (size_t k = 16; k <= 80; k += 8) {
22642 GemmMicrokernelTester()
22643 .mr(2)
22644 .nr(4)
22645 .kr(2)
22646 .sr(4)
22647 .m(2)
22648 .n(4)
22649 .k(k)
22650 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2s4__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
22651 }
22652 }
22653
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD64,k_div_8_subtile)22654 TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD64, k_div_8_subtile) {
22655 TEST_REQUIRES_X86_SSE2;
22656 for (size_t k = 16; k <= 80; k += 8) {
22657 for (uint32_t n = 1; n <= 4; n++) {
22658 for (uint32_t m = 1; m <= 2; m++) {
22659 GemmMicrokernelTester()
22660 .mr(2)
22661 .nr(4)
22662 .kr(2)
22663 .sr(4)
22664 .m(m)
22665 .n(n)
22666 .k(k)
22667 .iterations(1)
22668 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2s4__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
22669 }
22670 }
22671 }
22672 }
22673
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD64,n_gt_4)22674 TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD64, n_gt_4) {
22675 TEST_REQUIRES_X86_SSE2;
22676 for (uint32_t n = 5; n < 8; n++) {
22677 for (size_t k = 1; k <= 40; k += 9) {
22678 GemmMicrokernelTester()
22679 .mr(2)
22680 .nr(4)
22681 .kr(2)
22682 .sr(4)
22683 .m(2)
22684 .n(n)
22685 .k(k)
22686 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2s4__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
22687 }
22688 }
22689 }
22690
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD64,n_gt_4_strided_cn)22691 TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD64, n_gt_4_strided_cn) {
22692 TEST_REQUIRES_X86_SSE2;
22693 for (uint32_t n = 5; n < 8; n++) {
22694 for (size_t k = 1; k <= 40; k += 9) {
22695 GemmMicrokernelTester()
22696 .mr(2)
22697 .nr(4)
22698 .kr(2)
22699 .sr(4)
22700 .m(2)
22701 .n(n)
22702 .k(k)
22703 .cn_stride(7)
22704 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2s4__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
22705 }
22706 }
22707 }
22708
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD64,n_gt_4_subtile)22709 TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD64, n_gt_4_subtile) {
22710 TEST_REQUIRES_X86_SSE2;
22711 for (uint32_t n = 5; n < 8; n++) {
22712 for (size_t k = 1; k <= 40; k += 9) {
22713 for (uint32_t m = 1; m <= 2; m++) {
22714 GemmMicrokernelTester()
22715 .mr(2)
22716 .nr(4)
22717 .kr(2)
22718 .sr(4)
22719 .m(m)
22720 .n(n)
22721 .k(k)
22722 .iterations(1)
22723 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2s4__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
22724 }
22725 }
22726 }
22727 }
22728
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD64,n_div_4)22729 TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD64, n_div_4) {
22730 TEST_REQUIRES_X86_SSE2;
22731 for (uint32_t n = 8; n <= 12; n += 4) {
22732 for (size_t k = 1; k <= 40; k += 9) {
22733 GemmMicrokernelTester()
22734 .mr(2)
22735 .nr(4)
22736 .kr(2)
22737 .sr(4)
22738 .m(2)
22739 .n(n)
22740 .k(k)
22741 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2s4__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
22742 }
22743 }
22744 }
22745
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD64,n_div_4_strided_cn)22746 TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD64, n_div_4_strided_cn) {
22747 TEST_REQUIRES_X86_SSE2;
22748 for (uint32_t n = 8; n <= 12; n += 4) {
22749 for (size_t k = 1; k <= 40; k += 9) {
22750 GemmMicrokernelTester()
22751 .mr(2)
22752 .nr(4)
22753 .kr(2)
22754 .sr(4)
22755 .m(2)
22756 .n(n)
22757 .k(k)
22758 .cn_stride(7)
22759 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2s4__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
22760 }
22761 }
22762 }
22763
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD64,n_div_4_subtile)22764 TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD64, n_div_4_subtile) {
22765 TEST_REQUIRES_X86_SSE2;
22766 for (uint32_t n = 8; n <= 12; n += 4) {
22767 for (size_t k = 1; k <= 40; k += 9) {
22768 for (uint32_t m = 1; m <= 2; m++) {
22769 GemmMicrokernelTester()
22770 .mr(2)
22771 .nr(4)
22772 .kr(2)
22773 .sr(4)
22774 .m(m)
22775 .n(n)
22776 .k(k)
22777 .iterations(1)
22778 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2s4__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
22779 }
22780 }
22781 }
22782 }
22783
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD64,small_kernel)22784 TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD64, small_kernel) {
22785 TEST_REQUIRES_X86_SSE2;
22786 for (size_t k = 1; k <= 40; k += 9) {
22787 GemmMicrokernelTester()
22788 .mr(2)
22789 .nr(4)
22790 .kr(2)
22791 .sr(4)
22792 .m(2)
22793 .n(4)
22794 .k(k)
22795 .ks(3)
22796 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2s4__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
22797 }
22798 }
22799
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD64,small_kernel_subtile)22800 TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD64, small_kernel_subtile) {
22801 TEST_REQUIRES_X86_SSE2;
22802 for (size_t k = 1; k <= 40; k += 9) {
22803 for (uint32_t n = 1; n <= 4; n++) {
22804 for (uint32_t m = 1; m <= 2; m++) {
22805 GemmMicrokernelTester()
22806 .mr(2)
22807 .nr(4)
22808 .kr(2)
22809 .sr(4)
22810 .m(m)
22811 .n(n)
22812 .k(k)
22813 .ks(3)
22814 .iterations(1)
22815 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2s4__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
22816 }
22817 }
22818 }
22819 }
22820
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD64,n_gt_4_small_kernel)22821 TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD64, n_gt_4_small_kernel) {
22822 TEST_REQUIRES_X86_SSE2;
22823 for (uint32_t n = 5; n < 8; n++) {
22824 for (size_t k = 1; k <= 40; k += 9) {
22825 GemmMicrokernelTester()
22826 .mr(2)
22827 .nr(4)
22828 .kr(2)
22829 .sr(4)
22830 .m(2)
22831 .n(n)
22832 .k(k)
22833 .ks(3)
22834 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2s4__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
22835 }
22836 }
22837 }
22838
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD64,n_div_4_small_kernel)22839 TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD64, n_div_4_small_kernel) {
22840 TEST_REQUIRES_X86_SSE2;
22841 for (uint32_t n = 8; n <= 12; n += 4) {
22842 for (size_t k = 1; k <= 40; k += 9) {
22843 GemmMicrokernelTester()
22844 .mr(2)
22845 .nr(4)
22846 .kr(2)
22847 .sr(4)
22848 .m(2)
22849 .n(n)
22850 .k(k)
22851 .ks(3)
22852 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2s4__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
22853 }
22854 }
22855 }
22856
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD64,strided_cm_subtile)22857 TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD64, strided_cm_subtile) {
22858 TEST_REQUIRES_X86_SSE2;
22859 for (size_t k = 1; k <= 40; k += 9) {
22860 for (uint32_t n = 1; n <= 4; n++) {
22861 for (uint32_t m = 1; m <= 2; m++) {
22862 GemmMicrokernelTester()
22863 .mr(2)
22864 .nr(4)
22865 .kr(2)
22866 .sr(4)
22867 .m(m)
22868 .n(n)
22869 .k(k)
22870 .cm_stride(7)
22871 .iterations(1)
22872 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2s4__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
22873 }
22874 }
22875 }
22876 }
22877
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD64,a_offset)22878 TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD64, a_offset) {
22879 TEST_REQUIRES_X86_SSE2;
22880 for (size_t k = 1; k <= 40; k += 9) {
22881 GemmMicrokernelTester()
22882 .mr(2)
22883 .nr(4)
22884 .kr(2)
22885 .sr(4)
22886 .m(2)
22887 .n(4)
22888 .k(k)
22889 .ks(3)
22890 .a_offset(83)
22891 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2s4__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
22892 }
22893 }
22894
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD64,zero)22895 TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD64, zero) {
22896 TEST_REQUIRES_X86_SSE2;
22897 for (size_t k = 1; k <= 40; k += 9) {
22898 for (uint32_t mz = 0; mz < 2; mz++) {
22899 GemmMicrokernelTester()
22900 .mr(2)
22901 .nr(4)
22902 .kr(2)
22903 .sr(4)
22904 .m(2)
22905 .n(4)
22906 .k(k)
22907 .ks(3)
22908 .a_offset(83)
22909 .zero_index(mz)
22910 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2s4__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
22911 }
22912 }
22913 }
22914
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD64,qmin)22915 TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD64, qmin) {
22916 TEST_REQUIRES_X86_SSE2;
22917 GemmMicrokernelTester()
22918 .mr(2)
22919 .nr(4)
22920 .kr(2)
22921 .sr(4)
22922 .m(2)
22923 .n(4)
22924 .k(8)
22925 .qmin(128)
22926 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2s4__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
22927 }
22928
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD64,qmax)22929 TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD64, qmax) {
22930 TEST_REQUIRES_X86_SSE2;
22931 GemmMicrokernelTester()
22932 .mr(2)
22933 .nr(4)
22934 .kr(2)
22935 .sr(4)
22936 .m(2)
22937 .n(4)
22938 .k(8)
22939 .qmax(128)
22940 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2s4__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
22941 }
22942
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD64,strided_cm)22943 TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD64, strided_cm) {
22944 TEST_REQUIRES_X86_SSE2;
22945 GemmMicrokernelTester()
22946 .mr(2)
22947 .nr(4)
22948 .kr(2)
22949 .sr(4)
22950 .m(2)
22951 .n(4)
22952 .k(8)
22953 .cm_stride(7)
22954 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2s4__sse2_ld64, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
22955 }
22956 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
22957
22958
22959 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD64,k_eq_8)22960 TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD64, k_eq_8) {
22961 TEST_REQUIRES_X86_SSE41;
22962 GemmMicrokernelTester()
22963 .mr(3)
22964 .nr(4)
22965 .kr(2)
22966 .sr(4)
22967 .m(3)
22968 .n(4)
22969 .k(8)
22970 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2s4__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
22971 }
22972
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD64,strided_cn)22973 TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD64, strided_cn) {
22974 TEST_REQUIRES_X86_SSE41;
22975 GemmMicrokernelTester()
22976 .mr(3)
22977 .nr(4)
22978 .kr(2)
22979 .sr(4)
22980 .m(3)
22981 .n(4)
22982 .k(8)
22983 .cn_stride(7)
22984 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2s4__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
22985 }
22986
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD64,k_eq_8_subtile)22987 TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD64, k_eq_8_subtile) {
22988 TEST_REQUIRES_X86_SSE41;
22989 for (uint32_t n = 1; n <= 4; n++) {
22990 for (uint32_t m = 1; m <= 3; m++) {
22991 GemmMicrokernelTester()
22992 .mr(3)
22993 .nr(4)
22994 .kr(2)
22995 .sr(4)
22996 .m(m)
22997 .n(n)
22998 .k(8)
22999 .iterations(1)
23000 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2s4__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23001 }
23002 }
23003 }
23004
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD64,k_eq_8_subtile_m)23005 TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD64, k_eq_8_subtile_m) {
23006 TEST_REQUIRES_X86_SSE41;
23007 for (uint32_t m = 1; m <= 3; m++) {
23008 GemmMicrokernelTester()
23009 .mr(3)
23010 .nr(4)
23011 .kr(2)
23012 .sr(4)
23013 .m(m)
23014 .n(4)
23015 .k(8)
23016 .iterations(1)
23017 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2s4__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23018 }
23019 }
23020
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD64,k_eq_8_subtile_n)23021 TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD64, k_eq_8_subtile_n) {
23022 TEST_REQUIRES_X86_SSE41;
23023 for (uint32_t n = 1; n <= 4; n++) {
23024 GemmMicrokernelTester()
23025 .mr(3)
23026 .nr(4)
23027 .kr(2)
23028 .sr(4)
23029 .m(3)
23030 .n(n)
23031 .k(8)
23032 .iterations(1)
23033 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2s4__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23034 }
23035 }
23036
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD64,k_lt_8)23037 TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD64, k_lt_8) {
23038 TEST_REQUIRES_X86_SSE41;
23039 for (size_t k = 1; k < 8; k++) {
23040 GemmMicrokernelTester()
23041 .mr(3)
23042 .nr(4)
23043 .kr(2)
23044 .sr(4)
23045 .m(3)
23046 .n(4)
23047 .k(k)
23048 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2s4__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23049 }
23050 }
23051
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD64,k_lt_8_subtile)23052 TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD64, k_lt_8_subtile) {
23053 TEST_REQUIRES_X86_SSE41;
23054 for (size_t k = 1; k < 8; k++) {
23055 for (uint32_t n = 1; n <= 4; n++) {
23056 for (uint32_t m = 1; m <= 3; m++) {
23057 GemmMicrokernelTester()
23058 .mr(3)
23059 .nr(4)
23060 .kr(2)
23061 .sr(4)
23062 .m(m)
23063 .n(n)
23064 .k(k)
23065 .iterations(1)
23066 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2s4__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23067 }
23068 }
23069 }
23070 }
23071
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD64,k_gt_8)23072 TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD64, k_gt_8) {
23073 TEST_REQUIRES_X86_SSE41;
23074 for (size_t k = 9; k < 16; k++) {
23075 GemmMicrokernelTester()
23076 .mr(3)
23077 .nr(4)
23078 .kr(2)
23079 .sr(4)
23080 .m(3)
23081 .n(4)
23082 .k(k)
23083 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2s4__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23084 }
23085 }
23086
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD64,k_gt_8_subtile)23087 TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD64, k_gt_8_subtile) {
23088 TEST_REQUIRES_X86_SSE41;
23089 for (size_t k = 9; k < 16; k++) {
23090 for (uint32_t n = 1; n <= 4; n++) {
23091 for (uint32_t m = 1; m <= 3; m++) {
23092 GemmMicrokernelTester()
23093 .mr(3)
23094 .nr(4)
23095 .kr(2)
23096 .sr(4)
23097 .m(m)
23098 .n(n)
23099 .k(k)
23100 .iterations(1)
23101 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2s4__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23102 }
23103 }
23104 }
23105 }
23106
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD64,k_div_8)23107 TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD64, k_div_8) {
23108 TEST_REQUIRES_X86_SSE41;
23109 for (size_t k = 16; k <= 80; k += 8) {
23110 GemmMicrokernelTester()
23111 .mr(3)
23112 .nr(4)
23113 .kr(2)
23114 .sr(4)
23115 .m(3)
23116 .n(4)
23117 .k(k)
23118 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2s4__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23119 }
23120 }
23121
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD64,k_div_8_subtile)23122 TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD64, k_div_8_subtile) {
23123 TEST_REQUIRES_X86_SSE41;
23124 for (size_t k = 16; k <= 80; k += 8) {
23125 for (uint32_t n = 1; n <= 4; n++) {
23126 for (uint32_t m = 1; m <= 3; m++) {
23127 GemmMicrokernelTester()
23128 .mr(3)
23129 .nr(4)
23130 .kr(2)
23131 .sr(4)
23132 .m(m)
23133 .n(n)
23134 .k(k)
23135 .iterations(1)
23136 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2s4__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23137 }
23138 }
23139 }
23140 }
23141
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD64,n_gt_4)23142 TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD64, n_gt_4) {
23143 TEST_REQUIRES_X86_SSE41;
23144 for (uint32_t n = 5; n < 8; n++) {
23145 for (size_t k = 1; k <= 40; k += 9) {
23146 GemmMicrokernelTester()
23147 .mr(3)
23148 .nr(4)
23149 .kr(2)
23150 .sr(4)
23151 .m(3)
23152 .n(n)
23153 .k(k)
23154 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2s4__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23155 }
23156 }
23157 }
23158
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD64,n_gt_4_strided_cn)23159 TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD64, n_gt_4_strided_cn) {
23160 TEST_REQUIRES_X86_SSE41;
23161 for (uint32_t n = 5; n < 8; n++) {
23162 for (size_t k = 1; k <= 40; k += 9) {
23163 GemmMicrokernelTester()
23164 .mr(3)
23165 .nr(4)
23166 .kr(2)
23167 .sr(4)
23168 .m(3)
23169 .n(n)
23170 .k(k)
23171 .cn_stride(7)
23172 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2s4__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23173 }
23174 }
23175 }
23176
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD64,n_gt_4_subtile)23177 TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD64, n_gt_4_subtile) {
23178 TEST_REQUIRES_X86_SSE41;
23179 for (uint32_t n = 5; n < 8; n++) {
23180 for (size_t k = 1; k <= 40; k += 9) {
23181 for (uint32_t m = 1; m <= 3; m++) {
23182 GemmMicrokernelTester()
23183 .mr(3)
23184 .nr(4)
23185 .kr(2)
23186 .sr(4)
23187 .m(m)
23188 .n(n)
23189 .k(k)
23190 .iterations(1)
23191 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2s4__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23192 }
23193 }
23194 }
23195 }
23196
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD64,n_div_4)23197 TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD64, n_div_4) {
23198 TEST_REQUIRES_X86_SSE41;
23199 for (uint32_t n = 8; n <= 12; n += 4) {
23200 for (size_t k = 1; k <= 40; k += 9) {
23201 GemmMicrokernelTester()
23202 .mr(3)
23203 .nr(4)
23204 .kr(2)
23205 .sr(4)
23206 .m(3)
23207 .n(n)
23208 .k(k)
23209 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2s4__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23210 }
23211 }
23212 }
23213
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD64,n_div_4_strided_cn)23214 TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD64, n_div_4_strided_cn) {
23215 TEST_REQUIRES_X86_SSE41;
23216 for (uint32_t n = 8; n <= 12; n += 4) {
23217 for (size_t k = 1; k <= 40; k += 9) {
23218 GemmMicrokernelTester()
23219 .mr(3)
23220 .nr(4)
23221 .kr(2)
23222 .sr(4)
23223 .m(3)
23224 .n(n)
23225 .k(k)
23226 .cn_stride(7)
23227 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2s4__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23228 }
23229 }
23230 }
23231
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD64,n_div_4_subtile)23232 TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD64, n_div_4_subtile) {
23233 TEST_REQUIRES_X86_SSE41;
23234 for (uint32_t n = 8; n <= 12; n += 4) {
23235 for (size_t k = 1; k <= 40; k += 9) {
23236 for (uint32_t m = 1; m <= 3; m++) {
23237 GemmMicrokernelTester()
23238 .mr(3)
23239 .nr(4)
23240 .kr(2)
23241 .sr(4)
23242 .m(m)
23243 .n(n)
23244 .k(k)
23245 .iterations(1)
23246 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2s4__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23247 }
23248 }
23249 }
23250 }
23251
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD64,small_kernel)23252 TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD64, small_kernel) {
23253 TEST_REQUIRES_X86_SSE41;
23254 for (size_t k = 1; k <= 40; k += 9) {
23255 GemmMicrokernelTester()
23256 .mr(3)
23257 .nr(4)
23258 .kr(2)
23259 .sr(4)
23260 .m(3)
23261 .n(4)
23262 .k(k)
23263 .ks(3)
23264 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2s4__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23265 }
23266 }
23267
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD64,small_kernel_subtile)23268 TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD64, small_kernel_subtile) {
23269 TEST_REQUIRES_X86_SSE41;
23270 for (size_t k = 1; k <= 40; k += 9) {
23271 for (uint32_t n = 1; n <= 4; n++) {
23272 for (uint32_t m = 1; m <= 3; m++) {
23273 GemmMicrokernelTester()
23274 .mr(3)
23275 .nr(4)
23276 .kr(2)
23277 .sr(4)
23278 .m(m)
23279 .n(n)
23280 .k(k)
23281 .ks(3)
23282 .iterations(1)
23283 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2s4__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23284 }
23285 }
23286 }
23287 }
23288
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD64,n_gt_4_small_kernel)23289 TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD64, n_gt_4_small_kernel) {
23290 TEST_REQUIRES_X86_SSE41;
23291 for (uint32_t n = 5; n < 8; n++) {
23292 for (size_t k = 1; k <= 40; k += 9) {
23293 GemmMicrokernelTester()
23294 .mr(3)
23295 .nr(4)
23296 .kr(2)
23297 .sr(4)
23298 .m(3)
23299 .n(n)
23300 .k(k)
23301 .ks(3)
23302 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2s4__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23303 }
23304 }
23305 }
23306
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD64,n_div_4_small_kernel)23307 TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD64, n_div_4_small_kernel) {
23308 TEST_REQUIRES_X86_SSE41;
23309 for (uint32_t n = 8; n <= 12; n += 4) {
23310 for (size_t k = 1; k <= 40; k += 9) {
23311 GemmMicrokernelTester()
23312 .mr(3)
23313 .nr(4)
23314 .kr(2)
23315 .sr(4)
23316 .m(3)
23317 .n(n)
23318 .k(k)
23319 .ks(3)
23320 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2s4__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23321 }
23322 }
23323 }
23324
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD64,strided_cm_subtile)23325 TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD64, strided_cm_subtile) {
23326 TEST_REQUIRES_X86_SSE41;
23327 for (size_t k = 1; k <= 40; k += 9) {
23328 for (uint32_t n = 1; n <= 4; n++) {
23329 for (uint32_t m = 1; m <= 3; m++) {
23330 GemmMicrokernelTester()
23331 .mr(3)
23332 .nr(4)
23333 .kr(2)
23334 .sr(4)
23335 .m(m)
23336 .n(n)
23337 .k(k)
23338 .cm_stride(7)
23339 .iterations(1)
23340 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2s4__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23341 }
23342 }
23343 }
23344 }
23345
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD64,a_offset)23346 TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD64, a_offset) {
23347 TEST_REQUIRES_X86_SSE41;
23348 for (size_t k = 1; k <= 40; k += 9) {
23349 GemmMicrokernelTester()
23350 .mr(3)
23351 .nr(4)
23352 .kr(2)
23353 .sr(4)
23354 .m(3)
23355 .n(4)
23356 .k(k)
23357 .ks(3)
23358 .a_offset(127)
23359 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2s4__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23360 }
23361 }
23362
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD64,zero)23363 TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD64, zero) {
23364 TEST_REQUIRES_X86_SSE41;
23365 for (size_t k = 1; k <= 40; k += 9) {
23366 for (uint32_t mz = 0; mz < 3; mz++) {
23367 GemmMicrokernelTester()
23368 .mr(3)
23369 .nr(4)
23370 .kr(2)
23371 .sr(4)
23372 .m(3)
23373 .n(4)
23374 .k(k)
23375 .ks(3)
23376 .a_offset(127)
23377 .zero_index(mz)
23378 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2s4__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23379 }
23380 }
23381 }
23382
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD64,qmin)23383 TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD64, qmin) {
23384 TEST_REQUIRES_X86_SSE41;
23385 GemmMicrokernelTester()
23386 .mr(3)
23387 .nr(4)
23388 .kr(2)
23389 .sr(4)
23390 .m(3)
23391 .n(4)
23392 .k(8)
23393 .qmin(128)
23394 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2s4__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23395 }
23396
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD64,qmax)23397 TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD64, qmax) {
23398 TEST_REQUIRES_X86_SSE41;
23399 GemmMicrokernelTester()
23400 .mr(3)
23401 .nr(4)
23402 .kr(2)
23403 .sr(4)
23404 .m(3)
23405 .n(4)
23406 .k(8)
23407 .qmax(128)
23408 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2s4__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23409 }
23410
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD64,strided_cm)23411 TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD64, strided_cm) {
23412 TEST_REQUIRES_X86_SSE41;
23413 GemmMicrokernelTester()
23414 .mr(3)
23415 .nr(4)
23416 .kr(2)
23417 .sr(4)
23418 .m(3)
23419 .n(4)
23420 .k(8)
23421 .cm_stride(7)
23422 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2s4__sse41_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23423 }
23424 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
23425
23426
23427 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD64,k_eq_8)23428 TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD64, k_eq_8) {
23429 TEST_REQUIRES_X86_AVX;
23430 GemmMicrokernelTester()
23431 .mr(1)
23432 .nr(4)
23433 .kr(2)
23434 .sr(4)
23435 .m(1)
23436 .n(4)
23437 .k(8)
23438 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2s4__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23439 }
23440
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD64,strided_cn)23441 TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD64, strided_cn) {
23442 TEST_REQUIRES_X86_AVX;
23443 GemmMicrokernelTester()
23444 .mr(1)
23445 .nr(4)
23446 .kr(2)
23447 .sr(4)
23448 .m(1)
23449 .n(4)
23450 .k(8)
23451 .cn_stride(7)
23452 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2s4__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23453 }
23454
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD64,k_eq_8_subtile)23455 TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD64, k_eq_8_subtile) {
23456 TEST_REQUIRES_X86_AVX;
23457 for (uint32_t n = 1; n <= 4; n++) {
23458 for (uint32_t m = 1; m <= 1; m++) {
23459 GemmMicrokernelTester()
23460 .mr(1)
23461 .nr(4)
23462 .kr(2)
23463 .sr(4)
23464 .m(m)
23465 .n(n)
23466 .k(8)
23467 .iterations(1)
23468 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2s4__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23469 }
23470 }
23471 }
23472
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD64,k_eq_8_subtile_m)23473 TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD64, k_eq_8_subtile_m) {
23474 TEST_REQUIRES_X86_AVX;
23475 for (uint32_t m = 1; m <= 1; m++) {
23476 GemmMicrokernelTester()
23477 .mr(1)
23478 .nr(4)
23479 .kr(2)
23480 .sr(4)
23481 .m(m)
23482 .n(4)
23483 .k(8)
23484 .iterations(1)
23485 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2s4__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23486 }
23487 }
23488
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD64,k_eq_8_subtile_n)23489 TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD64, k_eq_8_subtile_n) {
23490 TEST_REQUIRES_X86_AVX;
23491 for (uint32_t n = 1; n <= 4; n++) {
23492 GemmMicrokernelTester()
23493 .mr(1)
23494 .nr(4)
23495 .kr(2)
23496 .sr(4)
23497 .m(1)
23498 .n(n)
23499 .k(8)
23500 .iterations(1)
23501 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2s4__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23502 }
23503 }
23504
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD64,k_lt_8)23505 TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD64, k_lt_8) {
23506 TEST_REQUIRES_X86_AVX;
23507 for (size_t k = 1; k < 8; k++) {
23508 GemmMicrokernelTester()
23509 .mr(1)
23510 .nr(4)
23511 .kr(2)
23512 .sr(4)
23513 .m(1)
23514 .n(4)
23515 .k(k)
23516 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2s4__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23517 }
23518 }
23519
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD64,k_lt_8_subtile)23520 TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD64, k_lt_8_subtile) {
23521 TEST_REQUIRES_X86_AVX;
23522 for (size_t k = 1; k < 8; k++) {
23523 for (uint32_t n = 1; n <= 4; n++) {
23524 for (uint32_t m = 1; m <= 1; m++) {
23525 GemmMicrokernelTester()
23526 .mr(1)
23527 .nr(4)
23528 .kr(2)
23529 .sr(4)
23530 .m(m)
23531 .n(n)
23532 .k(k)
23533 .iterations(1)
23534 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2s4__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23535 }
23536 }
23537 }
23538 }
23539
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD64,k_gt_8)23540 TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD64, k_gt_8) {
23541 TEST_REQUIRES_X86_AVX;
23542 for (size_t k = 9; k < 16; k++) {
23543 GemmMicrokernelTester()
23544 .mr(1)
23545 .nr(4)
23546 .kr(2)
23547 .sr(4)
23548 .m(1)
23549 .n(4)
23550 .k(k)
23551 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2s4__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23552 }
23553 }
23554
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD64,k_gt_8_subtile)23555 TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD64, k_gt_8_subtile) {
23556 TEST_REQUIRES_X86_AVX;
23557 for (size_t k = 9; k < 16; k++) {
23558 for (uint32_t n = 1; n <= 4; n++) {
23559 for (uint32_t m = 1; m <= 1; m++) {
23560 GemmMicrokernelTester()
23561 .mr(1)
23562 .nr(4)
23563 .kr(2)
23564 .sr(4)
23565 .m(m)
23566 .n(n)
23567 .k(k)
23568 .iterations(1)
23569 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2s4__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23570 }
23571 }
23572 }
23573 }
23574
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD64,k_div_8)23575 TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD64, k_div_8) {
23576 TEST_REQUIRES_X86_AVX;
23577 for (size_t k = 16; k <= 80; k += 8) {
23578 GemmMicrokernelTester()
23579 .mr(1)
23580 .nr(4)
23581 .kr(2)
23582 .sr(4)
23583 .m(1)
23584 .n(4)
23585 .k(k)
23586 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2s4__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23587 }
23588 }
23589
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD64,k_div_8_subtile)23590 TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD64, k_div_8_subtile) {
23591 TEST_REQUIRES_X86_AVX;
23592 for (size_t k = 16; k <= 80; k += 8) {
23593 for (uint32_t n = 1; n <= 4; n++) {
23594 for (uint32_t m = 1; m <= 1; m++) {
23595 GemmMicrokernelTester()
23596 .mr(1)
23597 .nr(4)
23598 .kr(2)
23599 .sr(4)
23600 .m(m)
23601 .n(n)
23602 .k(k)
23603 .iterations(1)
23604 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2s4__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23605 }
23606 }
23607 }
23608 }
23609
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD64,n_gt_4)23610 TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD64, n_gt_4) {
23611 TEST_REQUIRES_X86_AVX;
23612 for (uint32_t n = 5; n < 8; n++) {
23613 for (size_t k = 1; k <= 40; k += 9) {
23614 GemmMicrokernelTester()
23615 .mr(1)
23616 .nr(4)
23617 .kr(2)
23618 .sr(4)
23619 .m(1)
23620 .n(n)
23621 .k(k)
23622 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2s4__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23623 }
23624 }
23625 }
23626
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD64,n_gt_4_strided_cn)23627 TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD64, n_gt_4_strided_cn) {
23628 TEST_REQUIRES_X86_AVX;
23629 for (uint32_t n = 5; n < 8; n++) {
23630 for (size_t k = 1; k <= 40; k += 9) {
23631 GemmMicrokernelTester()
23632 .mr(1)
23633 .nr(4)
23634 .kr(2)
23635 .sr(4)
23636 .m(1)
23637 .n(n)
23638 .k(k)
23639 .cn_stride(7)
23640 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2s4__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23641 }
23642 }
23643 }
23644
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD64,n_gt_4_subtile)23645 TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD64, n_gt_4_subtile) {
23646 TEST_REQUIRES_X86_AVX;
23647 for (uint32_t n = 5; n < 8; n++) {
23648 for (size_t k = 1; k <= 40; k += 9) {
23649 for (uint32_t m = 1; m <= 1; m++) {
23650 GemmMicrokernelTester()
23651 .mr(1)
23652 .nr(4)
23653 .kr(2)
23654 .sr(4)
23655 .m(m)
23656 .n(n)
23657 .k(k)
23658 .iterations(1)
23659 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2s4__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23660 }
23661 }
23662 }
23663 }
23664
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD64,n_div_4)23665 TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD64, n_div_4) {
23666 TEST_REQUIRES_X86_AVX;
23667 for (uint32_t n = 8; n <= 12; n += 4) {
23668 for (size_t k = 1; k <= 40; k += 9) {
23669 GemmMicrokernelTester()
23670 .mr(1)
23671 .nr(4)
23672 .kr(2)
23673 .sr(4)
23674 .m(1)
23675 .n(n)
23676 .k(k)
23677 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2s4__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23678 }
23679 }
23680 }
23681
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD64,n_div_4_strided_cn)23682 TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD64, n_div_4_strided_cn) {
23683 TEST_REQUIRES_X86_AVX;
23684 for (uint32_t n = 8; n <= 12; n += 4) {
23685 for (size_t k = 1; k <= 40; k += 9) {
23686 GemmMicrokernelTester()
23687 .mr(1)
23688 .nr(4)
23689 .kr(2)
23690 .sr(4)
23691 .m(1)
23692 .n(n)
23693 .k(k)
23694 .cn_stride(7)
23695 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2s4__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23696 }
23697 }
23698 }
23699
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD64,n_div_4_subtile)23700 TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD64, n_div_4_subtile) {
23701 TEST_REQUIRES_X86_AVX;
23702 for (uint32_t n = 8; n <= 12; n += 4) {
23703 for (size_t k = 1; k <= 40; k += 9) {
23704 for (uint32_t m = 1; m <= 1; m++) {
23705 GemmMicrokernelTester()
23706 .mr(1)
23707 .nr(4)
23708 .kr(2)
23709 .sr(4)
23710 .m(m)
23711 .n(n)
23712 .k(k)
23713 .iterations(1)
23714 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2s4__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23715 }
23716 }
23717 }
23718 }
23719
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD64,small_kernel)23720 TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD64, small_kernel) {
23721 TEST_REQUIRES_X86_AVX;
23722 for (size_t k = 1; k <= 40; k += 9) {
23723 GemmMicrokernelTester()
23724 .mr(1)
23725 .nr(4)
23726 .kr(2)
23727 .sr(4)
23728 .m(1)
23729 .n(4)
23730 .k(k)
23731 .ks(3)
23732 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2s4__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23733 }
23734 }
23735
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD64,small_kernel_subtile)23736 TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD64, small_kernel_subtile) {
23737 TEST_REQUIRES_X86_AVX;
23738 for (size_t k = 1; k <= 40; k += 9) {
23739 for (uint32_t n = 1; n <= 4; n++) {
23740 for (uint32_t m = 1; m <= 1; m++) {
23741 GemmMicrokernelTester()
23742 .mr(1)
23743 .nr(4)
23744 .kr(2)
23745 .sr(4)
23746 .m(m)
23747 .n(n)
23748 .k(k)
23749 .ks(3)
23750 .iterations(1)
23751 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2s4__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23752 }
23753 }
23754 }
23755 }
23756
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD64,n_gt_4_small_kernel)23757 TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD64, n_gt_4_small_kernel) {
23758 TEST_REQUIRES_X86_AVX;
23759 for (uint32_t n = 5; n < 8; n++) {
23760 for (size_t k = 1; k <= 40; k += 9) {
23761 GemmMicrokernelTester()
23762 .mr(1)
23763 .nr(4)
23764 .kr(2)
23765 .sr(4)
23766 .m(1)
23767 .n(n)
23768 .k(k)
23769 .ks(3)
23770 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2s4__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23771 }
23772 }
23773 }
23774
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD64,n_div_4_small_kernel)23775 TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD64, n_div_4_small_kernel) {
23776 TEST_REQUIRES_X86_AVX;
23777 for (uint32_t n = 8; n <= 12; n += 4) {
23778 for (size_t k = 1; k <= 40; k += 9) {
23779 GemmMicrokernelTester()
23780 .mr(1)
23781 .nr(4)
23782 .kr(2)
23783 .sr(4)
23784 .m(1)
23785 .n(n)
23786 .k(k)
23787 .ks(3)
23788 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2s4__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23789 }
23790 }
23791 }
23792
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD64,strided_cm_subtile)23793 TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD64, strided_cm_subtile) {
23794 TEST_REQUIRES_X86_AVX;
23795 for (size_t k = 1; k <= 40; k += 9) {
23796 for (uint32_t n = 1; n <= 4; n++) {
23797 for (uint32_t m = 1; m <= 1; m++) {
23798 GemmMicrokernelTester()
23799 .mr(1)
23800 .nr(4)
23801 .kr(2)
23802 .sr(4)
23803 .m(m)
23804 .n(n)
23805 .k(k)
23806 .cm_stride(7)
23807 .iterations(1)
23808 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2s4__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23809 }
23810 }
23811 }
23812 }
23813
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD64,a_offset)23814 TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD64, a_offset) {
23815 TEST_REQUIRES_X86_AVX;
23816 for (size_t k = 1; k <= 40; k += 9) {
23817 GemmMicrokernelTester()
23818 .mr(1)
23819 .nr(4)
23820 .kr(2)
23821 .sr(4)
23822 .m(1)
23823 .n(4)
23824 .k(k)
23825 .ks(3)
23826 .a_offset(43)
23827 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2s4__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23828 }
23829 }
23830
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD64,zero)23831 TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD64, zero) {
23832 TEST_REQUIRES_X86_AVX;
23833 for (size_t k = 1; k <= 40; k += 9) {
23834 for (uint32_t mz = 0; mz < 1; mz++) {
23835 GemmMicrokernelTester()
23836 .mr(1)
23837 .nr(4)
23838 .kr(2)
23839 .sr(4)
23840 .m(1)
23841 .n(4)
23842 .k(k)
23843 .ks(3)
23844 .a_offset(43)
23845 .zero_index(mz)
23846 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2s4__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23847 }
23848 }
23849 }
23850
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD64,qmin)23851 TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD64, qmin) {
23852 TEST_REQUIRES_X86_AVX;
23853 GemmMicrokernelTester()
23854 .mr(1)
23855 .nr(4)
23856 .kr(2)
23857 .sr(4)
23858 .m(1)
23859 .n(4)
23860 .k(8)
23861 .qmin(128)
23862 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2s4__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23863 }
23864
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD64,qmax)23865 TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD64, qmax) {
23866 TEST_REQUIRES_X86_AVX;
23867 GemmMicrokernelTester()
23868 .mr(1)
23869 .nr(4)
23870 .kr(2)
23871 .sr(4)
23872 .m(1)
23873 .n(4)
23874 .k(8)
23875 .qmax(128)
23876 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2s4__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23877 }
23878
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD64,strided_cm)23879 TEST(QC8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD64, strided_cm) {
23880 TEST_REQUIRES_X86_AVX;
23881 GemmMicrokernelTester()
23882 .mr(1)
23883 .nr(4)
23884 .kr(2)
23885 .sr(4)
23886 .m(1)
23887 .n(4)
23888 .k(8)
23889 .cm_stride(7)
23890 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2s4__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23891 }
23892 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
23893
23894
23895 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD64,k_eq_8)23896 TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD64, k_eq_8) {
23897 TEST_REQUIRES_X86_AVX;
23898 GemmMicrokernelTester()
23899 .mr(2)
23900 .nr(4)
23901 .kr(2)
23902 .sr(4)
23903 .m(2)
23904 .n(4)
23905 .k(8)
23906 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2s4__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23907 }
23908
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD64,strided_cn)23909 TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD64, strided_cn) {
23910 TEST_REQUIRES_X86_AVX;
23911 GemmMicrokernelTester()
23912 .mr(2)
23913 .nr(4)
23914 .kr(2)
23915 .sr(4)
23916 .m(2)
23917 .n(4)
23918 .k(8)
23919 .cn_stride(7)
23920 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2s4__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23921 }
23922
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD64,k_eq_8_subtile)23923 TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD64, k_eq_8_subtile) {
23924 TEST_REQUIRES_X86_AVX;
23925 for (uint32_t n = 1; n <= 4; n++) {
23926 for (uint32_t m = 1; m <= 2; m++) {
23927 GemmMicrokernelTester()
23928 .mr(2)
23929 .nr(4)
23930 .kr(2)
23931 .sr(4)
23932 .m(m)
23933 .n(n)
23934 .k(8)
23935 .iterations(1)
23936 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2s4__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23937 }
23938 }
23939 }
23940
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD64,k_eq_8_subtile_m)23941 TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD64, k_eq_8_subtile_m) {
23942 TEST_REQUIRES_X86_AVX;
23943 for (uint32_t m = 1; m <= 2; m++) {
23944 GemmMicrokernelTester()
23945 .mr(2)
23946 .nr(4)
23947 .kr(2)
23948 .sr(4)
23949 .m(m)
23950 .n(4)
23951 .k(8)
23952 .iterations(1)
23953 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2s4__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23954 }
23955 }
23956
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD64,k_eq_8_subtile_n)23957 TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD64, k_eq_8_subtile_n) {
23958 TEST_REQUIRES_X86_AVX;
23959 for (uint32_t n = 1; n <= 4; n++) {
23960 GemmMicrokernelTester()
23961 .mr(2)
23962 .nr(4)
23963 .kr(2)
23964 .sr(4)
23965 .m(2)
23966 .n(n)
23967 .k(8)
23968 .iterations(1)
23969 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2s4__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23970 }
23971 }
23972
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD64,k_lt_8)23973 TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD64, k_lt_8) {
23974 TEST_REQUIRES_X86_AVX;
23975 for (size_t k = 1; k < 8; k++) {
23976 GemmMicrokernelTester()
23977 .mr(2)
23978 .nr(4)
23979 .kr(2)
23980 .sr(4)
23981 .m(2)
23982 .n(4)
23983 .k(k)
23984 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2s4__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23985 }
23986 }
23987
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD64,k_lt_8_subtile)23988 TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD64, k_lt_8_subtile) {
23989 TEST_REQUIRES_X86_AVX;
23990 for (size_t k = 1; k < 8; k++) {
23991 for (uint32_t n = 1; n <= 4; n++) {
23992 for (uint32_t m = 1; m <= 2; m++) {
23993 GemmMicrokernelTester()
23994 .mr(2)
23995 .nr(4)
23996 .kr(2)
23997 .sr(4)
23998 .m(m)
23999 .n(n)
24000 .k(k)
24001 .iterations(1)
24002 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2s4__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
24003 }
24004 }
24005 }
24006 }
24007
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD64,k_gt_8)24008 TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD64, k_gt_8) {
24009 TEST_REQUIRES_X86_AVX;
24010 for (size_t k = 9; k < 16; k++) {
24011 GemmMicrokernelTester()
24012 .mr(2)
24013 .nr(4)
24014 .kr(2)
24015 .sr(4)
24016 .m(2)
24017 .n(4)
24018 .k(k)
24019 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2s4__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
24020 }
24021 }
24022
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD64,k_gt_8_subtile)24023 TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD64, k_gt_8_subtile) {
24024 TEST_REQUIRES_X86_AVX;
24025 for (size_t k = 9; k < 16; k++) {
24026 for (uint32_t n = 1; n <= 4; n++) {
24027 for (uint32_t m = 1; m <= 2; m++) {
24028 GemmMicrokernelTester()
24029 .mr(2)
24030 .nr(4)
24031 .kr(2)
24032 .sr(4)
24033 .m(m)
24034 .n(n)
24035 .k(k)
24036 .iterations(1)
24037 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2s4__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
24038 }
24039 }
24040 }
24041 }
24042
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD64,k_div_8)24043 TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD64, k_div_8) {
24044 TEST_REQUIRES_X86_AVX;
24045 for (size_t k = 16; k <= 80; k += 8) {
24046 GemmMicrokernelTester()
24047 .mr(2)
24048 .nr(4)
24049 .kr(2)
24050 .sr(4)
24051 .m(2)
24052 .n(4)
24053 .k(k)
24054 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2s4__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
24055 }
24056 }
24057
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD64,k_div_8_subtile)24058 TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD64, k_div_8_subtile) {
24059 TEST_REQUIRES_X86_AVX;
24060 for (size_t k = 16; k <= 80; k += 8) {
24061 for (uint32_t n = 1; n <= 4; n++) {
24062 for (uint32_t m = 1; m <= 2; m++) {
24063 GemmMicrokernelTester()
24064 .mr(2)
24065 .nr(4)
24066 .kr(2)
24067 .sr(4)
24068 .m(m)
24069 .n(n)
24070 .k(k)
24071 .iterations(1)
24072 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2s4__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
24073 }
24074 }
24075 }
24076 }
24077
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD64,n_gt_4)24078 TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD64, n_gt_4) {
24079 TEST_REQUIRES_X86_AVX;
24080 for (uint32_t n = 5; n < 8; n++) {
24081 for (size_t k = 1; k <= 40; k += 9) {
24082 GemmMicrokernelTester()
24083 .mr(2)
24084 .nr(4)
24085 .kr(2)
24086 .sr(4)
24087 .m(2)
24088 .n(n)
24089 .k(k)
24090 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2s4__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
24091 }
24092 }
24093 }
24094
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD64,n_gt_4_strided_cn)24095 TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD64, n_gt_4_strided_cn) {
24096 TEST_REQUIRES_X86_AVX;
24097 for (uint32_t n = 5; n < 8; n++) {
24098 for (size_t k = 1; k <= 40; k += 9) {
24099 GemmMicrokernelTester()
24100 .mr(2)
24101 .nr(4)
24102 .kr(2)
24103 .sr(4)
24104 .m(2)
24105 .n(n)
24106 .k(k)
24107 .cn_stride(7)
24108 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2s4__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
24109 }
24110 }
24111 }
24112
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD64,n_gt_4_subtile)24113 TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD64, n_gt_4_subtile) {
24114 TEST_REQUIRES_X86_AVX;
24115 for (uint32_t n = 5; n < 8; n++) {
24116 for (size_t k = 1; k <= 40; k += 9) {
24117 for (uint32_t m = 1; m <= 2; m++) {
24118 GemmMicrokernelTester()
24119 .mr(2)
24120 .nr(4)
24121 .kr(2)
24122 .sr(4)
24123 .m(m)
24124 .n(n)
24125 .k(k)
24126 .iterations(1)
24127 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2s4__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
24128 }
24129 }
24130 }
24131 }
24132
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD64,n_div_4)24133 TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD64, n_div_4) {
24134 TEST_REQUIRES_X86_AVX;
24135 for (uint32_t n = 8; n <= 12; n += 4) {
24136 for (size_t k = 1; k <= 40; k += 9) {
24137 GemmMicrokernelTester()
24138 .mr(2)
24139 .nr(4)
24140 .kr(2)
24141 .sr(4)
24142 .m(2)
24143 .n(n)
24144 .k(k)
24145 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2s4__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
24146 }
24147 }
24148 }
24149
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD64,n_div_4_strided_cn)24150 TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD64, n_div_4_strided_cn) {
24151 TEST_REQUIRES_X86_AVX;
24152 for (uint32_t n = 8; n <= 12; n += 4) {
24153 for (size_t k = 1; k <= 40; k += 9) {
24154 GemmMicrokernelTester()
24155 .mr(2)
24156 .nr(4)
24157 .kr(2)
24158 .sr(4)
24159 .m(2)
24160 .n(n)
24161 .k(k)
24162 .cn_stride(7)
24163 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2s4__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
24164 }
24165 }
24166 }
24167
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD64,n_div_4_subtile)24168 TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD64, n_div_4_subtile) {
24169 TEST_REQUIRES_X86_AVX;
24170 for (uint32_t n = 8; n <= 12; n += 4) {
24171 for (size_t k = 1; k <= 40; k += 9) {
24172 for (uint32_t m = 1; m <= 2; m++) {
24173 GemmMicrokernelTester()
24174 .mr(2)
24175 .nr(4)
24176 .kr(2)
24177 .sr(4)
24178 .m(m)
24179 .n(n)
24180 .k(k)
24181 .iterations(1)
24182 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2s4__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
24183 }
24184 }
24185 }
24186 }
24187
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD64,small_kernel)24188 TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD64, small_kernel) {
24189 TEST_REQUIRES_X86_AVX;
24190 for (size_t k = 1; k <= 40; k += 9) {
24191 GemmMicrokernelTester()
24192 .mr(2)
24193 .nr(4)
24194 .kr(2)
24195 .sr(4)
24196 .m(2)
24197 .n(4)
24198 .k(k)
24199 .ks(3)
24200 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2s4__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
24201 }
24202 }
24203
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD64,small_kernel_subtile)24204 TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD64, small_kernel_subtile) {
24205 TEST_REQUIRES_X86_AVX;
24206 for (size_t k = 1; k <= 40; k += 9) {
24207 for (uint32_t n = 1; n <= 4; n++) {
24208 for (uint32_t m = 1; m <= 2; m++) {
24209 GemmMicrokernelTester()
24210 .mr(2)
24211 .nr(4)
24212 .kr(2)
24213 .sr(4)
24214 .m(m)
24215 .n(n)
24216 .k(k)
24217 .ks(3)
24218 .iterations(1)
24219 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2s4__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
24220 }
24221 }
24222 }
24223 }
24224
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD64,n_gt_4_small_kernel)24225 TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD64, n_gt_4_small_kernel) {
24226 TEST_REQUIRES_X86_AVX;
24227 for (uint32_t n = 5; n < 8; n++) {
24228 for (size_t k = 1; k <= 40; k += 9) {
24229 GemmMicrokernelTester()
24230 .mr(2)
24231 .nr(4)
24232 .kr(2)
24233 .sr(4)
24234 .m(2)
24235 .n(n)
24236 .k(k)
24237 .ks(3)
24238 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2s4__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
24239 }
24240 }
24241 }
24242
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD64,n_div_4_small_kernel)24243 TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD64, n_div_4_small_kernel) {
24244 TEST_REQUIRES_X86_AVX;
24245 for (uint32_t n = 8; n <= 12; n += 4) {
24246 for (size_t k = 1; k <= 40; k += 9) {
24247 GemmMicrokernelTester()
24248 .mr(2)
24249 .nr(4)
24250 .kr(2)
24251 .sr(4)
24252 .m(2)
24253 .n(n)
24254 .k(k)
24255 .ks(3)
24256 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2s4__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
24257 }
24258 }
24259 }
24260
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD64,strided_cm_subtile)24261 TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD64, strided_cm_subtile) {
24262 TEST_REQUIRES_X86_AVX;
24263 for (size_t k = 1; k <= 40; k += 9) {
24264 for (uint32_t n = 1; n <= 4; n++) {
24265 for (uint32_t m = 1; m <= 2; m++) {
24266 GemmMicrokernelTester()
24267 .mr(2)
24268 .nr(4)
24269 .kr(2)
24270 .sr(4)
24271 .m(m)
24272 .n(n)
24273 .k(k)
24274 .cm_stride(7)
24275 .iterations(1)
24276 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2s4__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
24277 }
24278 }
24279 }
24280 }
24281
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD64,a_offset)24282 TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD64, a_offset) {
24283 TEST_REQUIRES_X86_AVX;
24284 for (size_t k = 1; k <= 40; k += 9) {
24285 GemmMicrokernelTester()
24286 .mr(2)
24287 .nr(4)
24288 .kr(2)
24289 .sr(4)
24290 .m(2)
24291 .n(4)
24292 .k(k)
24293 .ks(3)
24294 .a_offset(83)
24295 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2s4__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
24296 }
24297 }
24298
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD64,zero)24299 TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD64, zero) {
24300 TEST_REQUIRES_X86_AVX;
24301 for (size_t k = 1; k <= 40; k += 9) {
24302 for (uint32_t mz = 0; mz < 2; mz++) {
24303 GemmMicrokernelTester()
24304 .mr(2)
24305 .nr(4)
24306 .kr(2)
24307 .sr(4)
24308 .m(2)
24309 .n(4)
24310 .k(k)
24311 .ks(3)
24312 .a_offset(83)
24313 .zero_index(mz)
24314 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2s4__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
24315 }
24316 }
24317 }
24318
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD64,qmin)24319 TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD64, qmin) {
24320 TEST_REQUIRES_X86_AVX;
24321 GemmMicrokernelTester()
24322 .mr(2)
24323 .nr(4)
24324 .kr(2)
24325 .sr(4)
24326 .m(2)
24327 .n(4)
24328 .k(8)
24329 .qmin(128)
24330 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2s4__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
24331 }
24332
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD64,qmax)24333 TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD64, qmax) {
24334 TEST_REQUIRES_X86_AVX;
24335 GemmMicrokernelTester()
24336 .mr(2)
24337 .nr(4)
24338 .kr(2)
24339 .sr(4)
24340 .m(2)
24341 .n(4)
24342 .k(8)
24343 .qmax(128)
24344 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2s4__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
24345 }
24346
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD64,strided_cm)24347 TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD64, strided_cm) {
24348 TEST_REQUIRES_X86_AVX;
24349 GemmMicrokernelTester()
24350 .mr(2)
24351 .nr(4)
24352 .kr(2)
24353 .sr(4)
24354 .m(2)
24355 .n(4)
24356 .k(8)
24357 .cm_stride(7)
24358 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2s4__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
24359 }
24360 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
24361
24362
24363 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD64,k_eq_8)24364 TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD64, k_eq_8) {
24365 TEST_REQUIRES_X86_AVX;
24366 GemmMicrokernelTester()
24367 .mr(4)
24368 .nr(4)
24369 .kr(2)
24370 .sr(4)
24371 .m(4)
24372 .n(4)
24373 .k(8)
24374 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2s4__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
24375 }
24376
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD64,strided_cn)24377 TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD64, strided_cn) {
24378 TEST_REQUIRES_X86_AVX;
24379 GemmMicrokernelTester()
24380 .mr(4)
24381 .nr(4)
24382 .kr(2)
24383 .sr(4)
24384 .m(4)
24385 .n(4)
24386 .k(8)
24387 .cn_stride(7)
24388 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2s4__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
24389 }
24390
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD64,k_eq_8_subtile)24391 TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD64, k_eq_8_subtile) {
24392 TEST_REQUIRES_X86_AVX;
24393 for (uint32_t n = 1; n <= 4; n++) {
24394 for (uint32_t m = 1; m <= 4; m++) {
24395 GemmMicrokernelTester()
24396 .mr(4)
24397 .nr(4)
24398 .kr(2)
24399 .sr(4)
24400 .m(m)
24401 .n(n)
24402 .k(8)
24403 .iterations(1)
24404 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2s4__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
24405 }
24406 }
24407 }
24408
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD64,k_eq_8_subtile_m)24409 TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD64, k_eq_8_subtile_m) {
24410 TEST_REQUIRES_X86_AVX;
24411 for (uint32_t m = 1; m <= 4; m++) {
24412 GemmMicrokernelTester()
24413 .mr(4)
24414 .nr(4)
24415 .kr(2)
24416 .sr(4)
24417 .m(m)
24418 .n(4)
24419 .k(8)
24420 .iterations(1)
24421 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2s4__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
24422 }
24423 }
24424
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD64,k_eq_8_subtile_n)24425 TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD64, k_eq_8_subtile_n) {
24426 TEST_REQUIRES_X86_AVX;
24427 for (uint32_t n = 1; n <= 4; n++) {
24428 GemmMicrokernelTester()
24429 .mr(4)
24430 .nr(4)
24431 .kr(2)
24432 .sr(4)
24433 .m(4)
24434 .n(n)
24435 .k(8)
24436 .iterations(1)
24437 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2s4__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
24438 }
24439 }
24440
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD64,k_lt_8)24441 TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD64, k_lt_8) {
24442 TEST_REQUIRES_X86_AVX;
24443 for (size_t k = 1; k < 8; k++) {
24444 GemmMicrokernelTester()
24445 .mr(4)
24446 .nr(4)
24447 .kr(2)
24448 .sr(4)
24449 .m(4)
24450 .n(4)
24451 .k(k)
24452 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2s4__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
24453 }
24454 }
24455
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD64,k_lt_8_subtile)24456 TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD64, k_lt_8_subtile) {
24457 TEST_REQUIRES_X86_AVX;
24458 for (size_t k = 1; k < 8; k++) {
24459 for (uint32_t n = 1; n <= 4; n++) {
24460 for (uint32_t m = 1; m <= 4; m++) {
24461 GemmMicrokernelTester()
24462 .mr(4)
24463 .nr(4)
24464 .kr(2)
24465 .sr(4)
24466 .m(m)
24467 .n(n)
24468 .k(k)
24469 .iterations(1)
24470 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2s4__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
24471 }
24472 }
24473 }
24474 }
24475
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD64,k_gt_8)24476 TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD64, k_gt_8) {
24477 TEST_REQUIRES_X86_AVX;
24478 for (size_t k = 9; k < 16; k++) {
24479 GemmMicrokernelTester()
24480 .mr(4)
24481 .nr(4)
24482 .kr(2)
24483 .sr(4)
24484 .m(4)
24485 .n(4)
24486 .k(k)
24487 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2s4__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
24488 }
24489 }
24490
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD64,k_gt_8_subtile)24491 TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD64, k_gt_8_subtile) {
24492 TEST_REQUIRES_X86_AVX;
24493 for (size_t k = 9; k < 16; k++) {
24494 for (uint32_t n = 1; n <= 4; n++) {
24495 for (uint32_t m = 1; m <= 4; m++) {
24496 GemmMicrokernelTester()
24497 .mr(4)
24498 .nr(4)
24499 .kr(2)
24500 .sr(4)
24501 .m(m)
24502 .n(n)
24503 .k(k)
24504 .iterations(1)
24505 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2s4__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
24506 }
24507 }
24508 }
24509 }
24510
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD64,k_div_8)24511 TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD64, k_div_8) {
24512 TEST_REQUIRES_X86_AVX;
24513 for (size_t k = 16; k <= 80; k += 8) {
24514 GemmMicrokernelTester()
24515 .mr(4)
24516 .nr(4)
24517 .kr(2)
24518 .sr(4)
24519 .m(4)
24520 .n(4)
24521 .k(k)
24522 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2s4__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
24523 }
24524 }
24525
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD64,k_div_8_subtile)24526 TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD64, k_div_8_subtile) {
24527 TEST_REQUIRES_X86_AVX;
24528 for (size_t k = 16; k <= 80; k += 8) {
24529 for (uint32_t n = 1; n <= 4; n++) {
24530 for (uint32_t m = 1; m <= 4; m++) {
24531 GemmMicrokernelTester()
24532 .mr(4)
24533 .nr(4)
24534 .kr(2)
24535 .sr(4)
24536 .m(m)
24537 .n(n)
24538 .k(k)
24539 .iterations(1)
24540 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2s4__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
24541 }
24542 }
24543 }
24544 }
24545
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD64,n_gt_4)24546 TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD64, n_gt_4) {
24547 TEST_REQUIRES_X86_AVX;
24548 for (uint32_t n = 5; n < 8; n++) {
24549 for (size_t k = 1; k <= 40; k += 9) {
24550 GemmMicrokernelTester()
24551 .mr(4)
24552 .nr(4)
24553 .kr(2)
24554 .sr(4)
24555 .m(4)
24556 .n(n)
24557 .k(k)
24558 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2s4__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
24559 }
24560 }
24561 }
24562
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD64,n_gt_4_strided_cn)24563 TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD64, n_gt_4_strided_cn) {
24564 TEST_REQUIRES_X86_AVX;
24565 for (uint32_t n = 5; n < 8; n++) {
24566 for (size_t k = 1; k <= 40; k += 9) {
24567 GemmMicrokernelTester()
24568 .mr(4)
24569 .nr(4)
24570 .kr(2)
24571 .sr(4)
24572 .m(4)
24573 .n(n)
24574 .k(k)
24575 .cn_stride(7)
24576 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2s4__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
24577 }
24578 }
24579 }
24580
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD64,n_gt_4_subtile)24581 TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD64, n_gt_4_subtile) {
24582 TEST_REQUIRES_X86_AVX;
24583 for (uint32_t n = 5; n < 8; n++) {
24584 for (size_t k = 1; k <= 40; k += 9) {
24585 for (uint32_t m = 1; m <= 4; m++) {
24586 GemmMicrokernelTester()
24587 .mr(4)
24588 .nr(4)
24589 .kr(2)
24590 .sr(4)
24591 .m(m)
24592 .n(n)
24593 .k(k)
24594 .iterations(1)
24595 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2s4__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
24596 }
24597 }
24598 }
24599 }
24600
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD64,n_div_4)24601 TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD64, n_div_4) {
24602 TEST_REQUIRES_X86_AVX;
24603 for (uint32_t n = 8; n <= 12; n += 4) {
24604 for (size_t k = 1; k <= 40; k += 9) {
24605 GemmMicrokernelTester()
24606 .mr(4)
24607 .nr(4)
24608 .kr(2)
24609 .sr(4)
24610 .m(4)
24611 .n(n)
24612 .k(k)
24613 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2s4__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
24614 }
24615 }
24616 }
24617
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD64,n_div_4_strided_cn)24618 TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD64, n_div_4_strided_cn) {
24619 TEST_REQUIRES_X86_AVX;
24620 for (uint32_t n = 8; n <= 12; n += 4) {
24621 for (size_t k = 1; k <= 40; k += 9) {
24622 GemmMicrokernelTester()
24623 .mr(4)
24624 .nr(4)
24625 .kr(2)
24626 .sr(4)
24627 .m(4)
24628 .n(n)
24629 .k(k)
24630 .cn_stride(7)
24631 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2s4__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
24632 }
24633 }
24634 }
24635
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD64,n_div_4_subtile)24636 TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD64, n_div_4_subtile) {
24637 TEST_REQUIRES_X86_AVX;
24638 for (uint32_t n = 8; n <= 12; n += 4) {
24639 for (size_t k = 1; k <= 40; k += 9) {
24640 for (uint32_t m = 1; m <= 4; m++) {
24641 GemmMicrokernelTester()
24642 .mr(4)
24643 .nr(4)
24644 .kr(2)
24645 .sr(4)
24646 .m(m)
24647 .n(n)
24648 .k(k)
24649 .iterations(1)
24650 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2s4__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
24651 }
24652 }
24653 }
24654 }
24655
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD64,small_kernel)24656 TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD64, small_kernel) {
24657 TEST_REQUIRES_X86_AVX;
24658 for (size_t k = 1; k <= 40; k += 9) {
24659 GemmMicrokernelTester()
24660 .mr(4)
24661 .nr(4)
24662 .kr(2)
24663 .sr(4)
24664 .m(4)
24665 .n(4)
24666 .k(k)
24667 .ks(3)
24668 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2s4__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
24669 }
24670 }
24671
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD64,small_kernel_subtile)24672 TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD64, small_kernel_subtile) {
24673 TEST_REQUIRES_X86_AVX;
24674 for (size_t k = 1; k <= 40; k += 9) {
24675 for (uint32_t n = 1; n <= 4; n++) {
24676 for (uint32_t m = 1; m <= 4; m++) {
24677 GemmMicrokernelTester()
24678 .mr(4)
24679 .nr(4)
24680 .kr(2)
24681 .sr(4)
24682 .m(m)
24683 .n(n)
24684 .k(k)
24685 .ks(3)
24686 .iterations(1)
24687 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2s4__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
24688 }
24689 }
24690 }
24691 }
24692
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD64,n_gt_4_small_kernel)24693 TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD64, n_gt_4_small_kernel) {
24694 TEST_REQUIRES_X86_AVX;
24695 for (uint32_t n = 5; n < 8; n++) {
24696 for (size_t k = 1; k <= 40; k += 9) {
24697 GemmMicrokernelTester()
24698 .mr(4)
24699 .nr(4)
24700 .kr(2)
24701 .sr(4)
24702 .m(4)
24703 .n(n)
24704 .k(k)
24705 .ks(3)
24706 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2s4__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
24707 }
24708 }
24709 }
24710
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD64,n_div_4_small_kernel)24711 TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD64, n_div_4_small_kernel) {
24712 TEST_REQUIRES_X86_AVX;
24713 for (uint32_t n = 8; n <= 12; n += 4) {
24714 for (size_t k = 1; k <= 40; k += 9) {
24715 GemmMicrokernelTester()
24716 .mr(4)
24717 .nr(4)
24718 .kr(2)
24719 .sr(4)
24720 .m(4)
24721 .n(n)
24722 .k(k)
24723 .ks(3)
24724 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2s4__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
24725 }
24726 }
24727 }
24728
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD64,strided_cm_subtile)24729 TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD64, strided_cm_subtile) {
24730 TEST_REQUIRES_X86_AVX;
24731 for (size_t k = 1; k <= 40; k += 9) {
24732 for (uint32_t n = 1; n <= 4; n++) {
24733 for (uint32_t m = 1; m <= 4; m++) {
24734 GemmMicrokernelTester()
24735 .mr(4)
24736 .nr(4)
24737 .kr(2)
24738 .sr(4)
24739 .m(m)
24740 .n(n)
24741 .k(k)
24742 .cm_stride(7)
24743 .iterations(1)
24744 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2s4__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
24745 }
24746 }
24747 }
24748 }
24749
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD64,a_offset)24750 TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD64, a_offset) {
24751 TEST_REQUIRES_X86_AVX;
24752 for (size_t k = 1; k <= 40; k += 9) {
24753 GemmMicrokernelTester()
24754 .mr(4)
24755 .nr(4)
24756 .kr(2)
24757 .sr(4)
24758 .m(4)
24759 .n(4)
24760 .k(k)
24761 .ks(3)
24762 .a_offset(163)
24763 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2s4__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
24764 }
24765 }
24766
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD64,zero)24767 TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD64, zero) {
24768 TEST_REQUIRES_X86_AVX;
24769 for (size_t k = 1; k <= 40; k += 9) {
24770 for (uint32_t mz = 0; mz < 4; mz++) {
24771 GemmMicrokernelTester()
24772 .mr(4)
24773 .nr(4)
24774 .kr(2)
24775 .sr(4)
24776 .m(4)
24777 .n(4)
24778 .k(k)
24779 .ks(3)
24780 .a_offset(163)
24781 .zero_index(mz)
24782 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2s4__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
24783 }
24784 }
24785 }
24786
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD64,qmin)24787 TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD64, qmin) {
24788 TEST_REQUIRES_X86_AVX;
24789 GemmMicrokernelTester()
24790 .mr(4)
24791 .nr(4)
24792 .kr(2)
24793 .sr(4)
24794 .m(4)
24795 .n(4)
24796 .k(8)
24797 .qmin(128)
24798 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2s4__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
24799 }
24800
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD64,qmax)24801 TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD64, qmax) {
24802 TEST_REQUIRES_X86_AVX;
24803 GemmMicrokernelTester()
24804 .mr(4)
24805 .nr(4)
24806 .kr(2)
24807 .sr(4)
24808 .m(4)
24809 .n(4)
24810 .k(8)
24811 .qmax(128)
24812 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2s4__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
24813 }
24814
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD64,strided_cm)24815 TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD64, strided_cm) {
24816 TEST_REQUIRES_X86_AVX;
24817 GemmMicrokernelTester()
24818 .mr(4)
24819 .nr(4)
24820 .kr(2)
24821 .sr(4)
24822 .m(4)
24823 .n(4)
24824 .k(8)
24825 .cm_stride(7)
24826 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2s4__avx_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
24827 }
24828 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
24829
24830
24831 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD128,k_eq_8)24832 TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD128, k_eq_8) {
24833 TEST_REQUIRES_X86_SSE2;
24834 GemmMicrokernelTester()
24835 .mr(4)
24836 .nr(4)
24837 .kr(2)
24838 .sr(4)
24839 .m(4)
24840 .n(4)
24841 .k(8)
24842 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2s4__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
24843 }
24844
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD128,strided_cn)24845 TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD128, strided_cn) {
24846 TEST_REQUIRES_X86_SSE2;
24847 GemmMicrokernelTester()
24848 .mr(4)
24849 .nr(4)
24850 .kr(2)
24851 .sr(4)
24852 .m(4)
24853 .n(4)
24854 .k(8)
24855 .cn_stride(7)
24856 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2s4__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
24857 }
24858
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD128,k_eq_8_subtile)24859 TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD128, k_eq_8_subtile) {
24860 TEST_REQUIRES_X86_SSE2;
24861 for (uint32_t n = 1; n <= 4; n++) {
24862 for (uint32_t m = 1; m <= 4; m++) {
24863 GemmMicrokernelTester()
24864 .mr(4)
24865 .nr(4)
24866 .kr(2)
24867 .sr(4)
24868 .m(m)
24869 .n(n)
24870 .k(8)
24871 .iterations(1)
24872 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2s4__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
24873 }
24874 }
24875 }
24876
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD128,k_eq_8_subtile_m)24877 TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD128, k_eq_8_subtile_m) {
24878 TEST_REQUIRES_X86_SSE2;
24879 for (uint32_t m = 1; m <= 4; m++) {
24880 GemmMicrokernelTester()
24881 .mr(4)
24882 .nr(4)
24883 .kr(2)
24884 .sr(4)
24885 .m(m)
24886 .n(4)
24887 .k(8)
24888 .iterations(1)
24889 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2s4__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
24890 }
24891 }
24892
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD128,k_eq_8_subtile_n)24893 TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD128, k_eq_8_subtile_n) {
24894 TEST_REQUIRES_X86_SSE2;
24895 for (uint32_t n = 1; n <= 4; n++) {
24896 GemmMicrokernelTester()
24897 .mr(4)
24898 .nr(4)
24899 .kr(2)
24900 .sr(4)
24901 .m(4)
24902 .n(n)
24903 .k(8)
24904 .iterations(1)
24905 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2s4__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
24906 }
24907 }
24908
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD128,k_lt_8)24909 TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD128, k_lt_8) {
24910 TEST_REQUIRES_X86_SSE2;
24911 for (size_t k = 1; k < 8; k++) {
24912 GemmMicrokernelTester()
24913 .mr(4)
24914 .nr(4)
24915 .kr(2)
24916 .sr(4)
24917 .m(4)
24918 .n(4)
24919 .k(k)
24920 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2s4__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
24921 }
24922 }
24923
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD128,k_lt_8_subtile)24924 TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD128, k_lt_8_subtile) {
24925 TEST_REQUIRES_X86_SSE2;
24926 for (size_t k = 1; k < 8; k++) {
24927 for (uint32_t n = 1; n <= 4; n++) {
24928 for (uint32_t m = 1; m <= 4; m++) {
24929 GemmMicrokernelTester()
24930 .mr(4)
24931 .nr(4)
24932 .kr(2)
24933 .sr(4)
24934 .m(m)
24935 .n(n)
24936 .k(k)
24937 .iterations(1)
24938 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2s4__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
24939 }
24940 }
24941 }
24942 }
24943
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD128,k_gt_8)24944 TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD128, k_gt_8) {
24945 TEST_REQUIRES_X86_SSE2;
24946 for (size_t k = 9; k < 16; k++) {
24947 GemmMicrokernelTester()
24948 .mr(4)
24949 .nr(4)
24950 .kr(2)
24951 .sr(4)
24952 .m(4)
24953 .n(4)
24954 .k(k)
24955 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2s4__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
24956 }
24957 }
24958
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD128,k_gt_8_subtile)24959 TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD128, k_gt_8_subtile) {
24960 TEST_REQUIRES_X86_SSE2;
24961 for (size_t k = 9; k < 16; k++) {
24962 for (uint32_t n = 1; n <= 4; n++) {
24963 for (uint32_t m = 1; m <= 4; m++) {
24964 GemmMicrokernelTester()
24965 .mr(4)
24966 .nr(4)
24967 .kr(2)
24968 .sr(4)
24969 .m(m)
24970 .n(n)
24971 .k(k)
24972 .iterations(1)
24973 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2s4__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
24974 }
24975 }
24976 }
24977 }
24978
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD128,k_div_8)24979 TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD128, k_div_8) {
24980 TEST_REQUIRES_X86_SSE2;
24981 for (size_t k = 16; k <= 80; k += 8) {
24982 GemmMicrokernelTester()
24983 .mr(4)
24984 .nr(4)
24985 .kr(2)
24986 .sr(4)
24987 .m(4)
24988 .n(4)
24989 .k(k)
24990 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2s4__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
24991 }
24992 }
24993
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD128,k_div_8_subtile)24994 TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD128, k_div_8_subtile) {
24995 TEST_REQUIRES_X86_SSE2;
24996 for (size_t k = 16; k <= 80; k += 8) {
24997 for (uint32_t n = 1; n <= 4; n++) {
24998 for (uint32_t m = 1; m <= 4; m++) {
24999 GemmMicrokernelTester()
25000 .mr(4)
25001 .nr(4)
25002 .kr(2)
25003 .sr(4)
25004 .m(m)
25005 .n(n)
25006 .k(k)
25007 .iterations(1)
25008 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2s4__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
25009 }
25010 }
25011 }
25012 }
25013
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD128,n_gt_4)25014 TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD128, n_gt_4) {
25015 TEST_REQUIRES_X86_SSE2;
25016 for (uint32_t n = 5; n < 8; n++) {
25017 for (size_t k = 1; k <= 40; k += 9) {
25018 GemmMicrokernelTester()
25019 .mr(4)
25020 .nr(4)
25021 .kr(2)
25022 .sr(4)
25023 .m(4)
25024 .n(n)
25025 .k(k)
25026 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2s4__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
25027 }
25028 }
25029 }
25030
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD128,n_gt_4_strided_cn)25031 TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD128, n_gt_4_strided_cn) {
25032 TEST_REQUIRES_X86_SSE2;
25033 for (uint32_t n = 5; n < 8; n++) {
25034 for (size_t k = 1; k <= 40; k += 9) {
25035 GemmMicrokernelTester()
25036 .mr(4)
25037 .nr(4)
25038 .kr(2)
25039 .sr(4)
25040 .m(4)
25041 .n(n)
25042 .k(k)
25043 .cn_stride(7)
25044 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2s4__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
25045 }
25046 }
25047 }
25048
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD128,n_gt_4_subtile)25049 TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD128, n_gt_4_subtile) {
25050 TEST_REQUIRES_X86_SSE2;
25051 for (uint32_t n = 5; n < 8; n++) {
25052 for (size_t k = 1; k <= 40; k += 9) {
25053 for (uint32_t m = 1; m <= 4; m++) {
25054 GemmMicrokernelTester()
25055 .mr(4)
25056 .nr(4)
25057 .kr(2)
25058 .sr(4)
25059 .m(m)
25060 .n(n)
25061 .k(k)
25062 .iterations(1)
25063 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2s4__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
25064 }
25065 }
25066 }
25067 }
25068
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD128,n_div_4)25069 TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD128, n_div_4) {
25070 TEST_REQUIRES_X86_SSE2;
25071 for (uint32_t n = 8; n <= 12; n += 4) {
25072 for (size_t k = 1; k <= 40; k += 9) {
25073 GemmMicrokernelTester()
25074 .mr(4)
25075 .nr(4)
25076 .kr(2)
25077 .sr(4)
25078 .m(4)
25079 .n(n)
25080 .k(k)
25081 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2s4__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
25082 }
25083 }
25084 }
25085
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD128,n_div_4_strided_cn)25086 TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD128, n_div_4_strided_cn) {
25087 TEST_REQUIRES_X86_SSE2;
25088 for (uint32_t n = 8; n <= 12; n += 4) {
25089 for (size_t k = 1; k <= 40; k += 9) {
25090 GemmMicrokernelTester()
25091 .mr(4)
25092 .nr(4)
25093 .kr(2)
25094 .sr(4)
25095 .m(4)
25096 .n(n)
25097 .k(k)
25098 .cn_stride(7)
25099 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2s4__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
25100 }
25101 }
25102 }
25103
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD128,n_div_4_subtile)25104 TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD128, n_div_4_subtile) {
25105 TEST_REQUIRES_X86_SSE2;
25106 for (uint32_t n = 8; n <= 12; n += 4) {
25107 for (size_t k = 1; k <= 40; k += 9) {
25108 for (uint32_t m = 1; m <= 4; m++) {
25109 GemmMicrokernelTester()
25110 .mr(4)
25111 .nr(4)
25112 .kr(2)
25113 .sr(4)
25114 .m(m)
25115 .n(n)
25116 .k(k)
25117 .iterations(1)
25118 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2s4__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
25119 }
25120 }
25121 }
25122 }
25123
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD128,small_kernel)25124 TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD128, small_kernel) {
25125 TEST_REQUIRES_X86_SSE2;
25126 for (size_t k = 1; k <= 40; k += 9) {
25127 GemmMicrokernelTester()
25128 .mr(4)
25129 .nr(4)
25130 .kr(2)
25131 .sr(4)
25132 .m(4)
25133 .n(4)
25134 .k(k)
25135 .ks(3)
25136 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2s4__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
25137 }
25138 }
25139
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD128,small_kernel_subtile)25140 TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD128, small_kernel_subtile) {
25141 TEST_REQUIRES_X86_SSE2;
25142 for (size_t k = 1; k <= 40; k += 9) {
25143 for (uint32_t n = 1; n <= 4; n++) {
25144 for (uint32_t m = 1; m <= 4; m++) {
25145 GemmMicrokernelTester()
25146 .mr(4)
25147 .nr(4)
25148 .kr(2)
25149 .sr(4)
25150 .m(m)
25151 .n(n)
25152 .k(k)
25153 .ks(3)
25154 .iterations(1)
25155 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2s4__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
25156 }
25157 }
25158 }
25159 }
25160
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD128,n_gt_4_small_kernel)25161 TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD128, n_gt_4_small_kernel) {
25162 TEST_REQUIRES_X86_SSE2;
25163 for (uint32_t n = 5; n < 8; n++) {
25164 for (size_t k = 1; k <= 40; k += 9) {
25165 GemmMicrokernelTester()
25166 .mr(4)
25167 .nr(4)
25168 .kr(2)
25169 .sr(4)
25170 .m(4)
25171 .n(n)
25172 .k(k)
25173 .ks(3)
25174 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2s4__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
25175 }
25176 }
25177 }
25178
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD128,n_div_4_small_kernel)25179 TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD128, n_div_4_small_kernel) {
25180 TEST_REQUIRES_X86_SSE2;
25181 for (uint32_t n = 8; n <= 12; n += 4) {
25182 for (size_t k = 1; k <= 40; k += 9) {
25183 GemmMicrokernelTester()
25184 .mr(4)
25185 .nr(4)
25186 .kr(2)
25187 .sr(4)
25188 .m(4)
25189 .n(n)
25190 .k(k)
25191 .ks(3)
25192 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2s4__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
25193 }
25194 }
25195 }
25196
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD128,strided_cm_subtile)25197 TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD128, strided_cm_subtile) {
25198 TEST_REQUIRES_X86_SSE2;
25199 for (size_t k = 1; k <= 40; k += 9) {
25200 for (uint32_t n = 1; n <= 4; n++) {
25201 for (uint32_t m = 1; m <= 4; m++) {
25202 GemmMicrokernelTester()
25203 .mr(4)
25204 .nr(4)
25205 .kr(2)
25206 .sr(4)
25207 .m(m)
25208 .n(n)
25209 .k(k)
25210 .cm_stride(7)
25211 .iterations(1)
25212 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2s4__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
25213 }
25214 }
25215 }
25216 }
25217
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD128,a_offset)25218 TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD128, a_offset) {
25219 TEST_REQUIRES_X86_SSE2;
25220 for (size_t k = 1; k <= 40; k += 9) {
25221 GemmMicrokernelTester()
25222 .mr(4)
25223 .nr(4)
25224 .kr(2)
25225 .sr(4)
25226 .m(4)
25227 .n(4)
25228 .k(k)
25229 .ks(3)
25230 .a_offset(163)
25231 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2s4__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
25232 }
25233 }
25234
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD128,zero)25235 TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD128, zero) {
25236 TEST_REQUIRES_X86_SSE2;
25237 for (size_t k = 1; k <= 40; k += 9) {
25238 for (uint32_t mz = 0; mz < 4; mz++) {
25239 GemmMicrokernelTester()
25240 .mr(4)
25241 .nr(4)
25242 .kr(2)
25243 .sr(4)
25244 .m(4)
25245 .n(4)
25246 .k(k)
25247 .ks(3)
25248 .a_offset(163)
25249 .zero_index(mz)
25250 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2s4__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
25251 }
25252 }
25253 }
25254
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD128,qmin)25255 TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD128, qmin) {
25256 TEST_REQUIRES_X86_SSE2;
25257 GemmMicrokernelTester()
25258 .mr(4)
25259 .nr(4)
25260 .kr(2)
25261 .sr(4)
25262 .m(4)
25263 .n(4)
25264 .k(8)
25265 .qmin(128)
25266 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2s4__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
25267 }
25268
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD128,qmax)25269 TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD128, qmax) {
25270 TEST_REQUIRES_X86_SSE2;
25271 GemmMicrokernelTester()
25272 .mr(4)
25273 .nr(4)
25274 .kr(2)
25275 .sr(4)
25276 .m(4)
25277 .n(4)
25278 .k(8)
25279 .qmax(128)
25280 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2s4__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
25281 }
25282
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD128,strided_cm)25283 TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD128, strided_cm) {
25284 TEST_REQUIRES_X86_SSE2;
25285 GemmMicrokernelTester()
25286 .mr(4)
25287 .nr(4)
25288 .kr(2)
25289 .sr(4)
25290 .m(4)
25291 .n(4)
25292 .k(8)
25293 .cm_stride(7)
25294 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2s4__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
25295 }
25296 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
25297
25298
25299 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD128,k_eq_8)25300 TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD128, k_eq_8) {
25301 TEST_REQUIRES_X86_XOP;
25302 GemmMicrokernelTester()
25303 .mr(3)
25304 .nr(4)
25305 .kr(2)
25306 .sr(4)
25307 .m(3)
25308 .n(4)
25309 .k(8)
25310 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2s4__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25311 }
25312
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD128,strided_cn)25313 TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD128, strided_cn) {
25314 TEST_REQUIRES_X86_XOP;
25315 GemmMicrokernelTester()
25316 .mr(3)
25317 .nr(4)
25318 .kr(2)
25319 .sr(4)
25320 .m(3)
25321 .n(4)
25322 .k(8)
25323 .cn_stride(7)
25324 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2s4__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25325 }
25326
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD128,k_eq_8_subtile)25327 TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD128, k_eq_8_subtile) {
25328 TEST_REQUIRES_X86_XOP;
25329 for (uint32_t n = 1; n <= 4; n++) {
25330 for (uint32_t m = 1; m <= 3; m++) {
25331 GemmMicrokernelTester()
25332 .mr(3)
25333 .nr(4)
25334 .kr(2)
25335 .sr(4)
25336 .m(m)
25337 .n(n)
25338 .k(8)
25339 .iterations(1)
25340 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2s4__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25341 }
25342 }
25343 }
25344
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD128,k_eq_8_subtile_m)25345 TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD128, k_eq_8_subtile_m) {
25346 TEST_REQUIRES_X86_XOP;
25347 for (uint32_t m = 1; m <= 3; m++) {
25348 GemmMicrokernelTester()
25349 .mr(3)
25350 .nr(4)
25351 .kr(2)
25352 .sr(4)
25353 .m(m)
25354 .n(4)
25355 .k(8)
25356 .iterations(1)
25357 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2s4__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25358 }
25359 }
25360
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD128,k_eq_8_subtile_n)25361 TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD128, k_eq_8_subtile_n) {
25362 TEST_REQUIRES_X86_XOP;
25363 for (uint32_t n = 1; n <= 4; n++) {
25364 GemmMicrokernelTester()
25365 .mr(3)
25366 .nr(4)
25367 .kr(2)
25368 .sr(4)
25369 .m(3)
25370 .n(n)
25371 .k(8)
25372 .iterations(1)
25373 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2s4__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25374 }
25375 }
25376
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD128,k_lt_8)25377 TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD128, k_lt_8) {
25378 TEST_REQUIRES_X86_XOP;
25379 for (size_t k = 1; k < 8; k++) {
25380 GemmMicrokernelTester()
25381 .mr(3)
25382 .nr(4)
25383 .kr(2)
25384 .sr(4)
25385 .m(3)
25386 .n(4)
25387 .k(k)
25388 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2s4__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25389 }
25390 }
25391
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD128,k_lt_8_subtile)25392 TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD128, k_lt_8_subtile) {
25393 TEST_REQUIRES_X86_XOP;
25394 for (size_t k = 1; k < 8; k++) {
25395 for (uint32_t n = 1; n <= 4; n++) {
25396 for (uint32_t m = 1; m <= 3; m++) {
25397 GemmMicrokernelTester()
25398 .mr(3)
25399 .nr(4)
25400 .kr(2)
25401 .sr(4)
25402 .m(m)
25403 .n(n)
25404 .k(k)
25405 .iterations(1)
25406 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2s4__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25407 }
25408 }
25409 }
25410 }
25411
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD128,k_gt_8)25412 TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD128, k_gt_8) {
25413 TEST_REQUIRES_X86_XOP;
25414 for (size_t k = 9; k < 16; k++) {
25415 GemmMicrokernelTester()
25416 .mr(3)
25417 .nr(4)
25418 .kr(2)
25419 .sr(4)
25420 .m(3)
25421 .n(4)
25422 .k(k)
25423 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2s4__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25424 }
25425 }
25426
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD128,k_gt_8_subtile)25427 TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD128, k_gt_8_subtile) {
25428 TEST_REQUIRES_X86_XOP;
25429 for (size_t k = 9; k < 16; k++) {
25430 for (uint32_t n = 1; n <= 4; n++) {
25431 for (uint32_t m = 1; m <= 3; m++) {
25432 GemmMicrokernelTester()
25433 .mr(3)
25434 .nr(4)
25435 .kr(2)
25436 .sr(4)
25437 .m(m)
25438 .n(n)
25439 .k(k)
25440 .iterations(1)
25441 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2s4__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25442 }
25443 }
25444 }
25445 }
25446
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD128,k_div_8)25447 TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD128, k_div_8) {
25448 TEST_REQUIRES_X86_XOP;
25449 for (size_t k = 16; k <= 80; k += 8) {
25450 GemmMicrokernelTester()
25451 .mr(3)
25452 .nr(4)
25453 .kr(2)
25454 .sr(4)
25455 .m(3)
25456 .n(4)
25457 .k(k)
25458 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2s4__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25459 }
25460 }
25461
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD128,k_div_8_subtile)25462 TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD128, k_div_8_subtile) {
25463 TEST_REQUIRES_X86_XOP;
25464 for (size_t k = 16; k <= 80; k += 8) {
25465 for (uint32_t n = 1; n <= 4; n++) {
25466 for (uint32_t m = 1; m <= 3; m++) {
25467 GemmMicrokernelTester()
25468 .mr(3)
25469 .nr(4)
25470 .kr(2)
25471 .sr(4)
25472 .m(m)
25473 .n(n)
25474 .k(k)
25475 .iterations(1)
25476 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2s4__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25477 }
25478 }
25479 }
25480 }
25481
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD128,n_gt_4)25482 TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD128, n_gt_4) {
25483 TEST_REQUIRES_X86_XOP;
25484 for (uint32_t n = 5; n < 8; n++) {
25485 for (size_t k = 1; k <= 40; k += 9) {
25486 GemmMicrokernelTester()
25487 .mr(3)
25488 .nr(4)
25489 .kr(2)
25490 .sr(4)
25491 .m(3)
25492 .n(n)
25493 .k(k)
25494 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2s4__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25495 }
25496 }
25497 }
25498
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD128,n_gt_4_strided_cn)25499 TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD128, n_gt_4_strided_cn) {
25500 TEST_REQUIRES_X86_XOP;
25501 for (uint32_t n = 5; n < 8; n++) {
25502 for (size_t k = 1; k <= 40; k += 9) {
25503 GemmMicrokernelTester()
25504 .mr(3)
25505 .nr(4)
25506 .kr(2)
25507 .sr(4)
25508 .m(3)
25509 .n(n)
25510 .k(k)
25511 .cn_stride(7)
25512 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2s4__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25513 }
25514 }
25515 }
25516
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD128,n_gt_4_subtile)25517 TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD128, n_gt_4_subtile) {
25518 TEST_REQUIRES_X86_XOP;
25519 for (uint32_t n = 5; n < 8; n++) {
25520 for (size_t k = 1; k <= 40; k += 9) {
25521 for (uint32_t m = 1; m <= 3; m++) {
25522 GemmMicrokernelTester()
25523 .mr(3)
25524 .nr(4)
25525 .kr(2)
25526 .sr(4)
25527 .m(m)
25528 .n(n)
25529 .k(k)
25530 .iterations(1)
25531 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2s4__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25532 }
25533 }
25534 }
25535 }
25536
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD128,n_div_4)25537 TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD128, n_div_4) {
25538 TEST_REQUIRES_X86_XOP;
25539 for (uint32_t n = 8; n <= 12; n += 4) {
25540 for (size_t k = 1; k <= 40; k += 9) {
25541 GemmMicrokernelTester()
25542 .mr(3)
25543 .nr(4)
25544 .kr(2)
25545 .sr(4)
25546 .m(3)
25547 .n(n)
25548 .k(k)
25549 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2s4__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25550 }
25551 }
25552 }
25553
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD128,n_div_4_strided_cn)25554 TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD128, n_div_4_strided_cn) {
25555 TEST_REQUIRES_X86_XOP;
25556 for (uint32_t n = 8; n <= 12; n += 4) {
25557 for (size_t k = 1; k <= 40; k += 9) {
25558 GemmMicrokernelTester()
25559 .mr(3)
25560 .nr(4)
25561 .kr(2)
25562 .sr(4)
25563 .m(3)
25564 .n(n)
25565 .k(k)
25566 .cn_stride(7)
25567 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2s4__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25568 }
25569 }
25570 }
25571
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD128,n_div_4_subtile)25572 TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD128, n_div_4_subtile) {
25573 TEST_REQUIRES_X86_XOP;
25574 for (uint32_t n = 8; n <= 12; n += 4) {
25575 for (size_t k = 1; k <= 40; k += 9) {
25576 for (uint32_t m = 1; m <= 3; m++) {
25577 GemmMicrokernelTester()
25578 .mr(3)
25579 .nr(4)
25580 .kr(2)
25581 .sr(4)
25582 .m(m)
25583 .n(n)
25584 .k(k)
25585 .iterations(1)
25586 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2s4__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25587 }
25588 }
25589 }
25590 }
25591
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD128,small_kernel)25592 TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD128, small_kernel) {
25593 TEST_REQUIRES_X86_XOP;
25594 for (size_t k = 1; k <= 40; k += 9) {
25595 GemmMicrokernelTester()
25596 .mr(3)
25597 .nr(4)
25598 .kr(2)
25599 .sr(4)
25600 .m(3)
25601 .n(4)
25602 .k(k)
25603 .ks(3)
25604 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2s4__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25605 }
25606 }
25607
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD128,small_kernel_subtile)25608 TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD128, small_kernel_subtile) {
25609 TEST_REQUIRES_X86_XOP;
25610 for (size_t k = 1; k <= 40; k += 9) {
25611 for (uint32_t n = 1; n <= 4; n++) {
25612 for (uint32_t m = 1; m <= 3; m++) {
25613 GemmMicrokernelTester()
25614 .mr(3)
25615 .nr(4)
25616 .kr(2)
25617 .sr(4)
25618 .m(m)
25619 .n(n)
25620 .k(k)
25621 .ks(3)
25622 .iterations(1)
25623 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2s4__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25624 }
25625 }
25626 }
25627 }
25628
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD128,n_gt_4_small_kernel)25629 TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD128, n_gt_4_small_kernel) {
25630 TEST_REQUIRES_X86_XOP;
25631 for (uint32_t n = 5; n < 8; n++) {
25632 for (size_t k = 1; k <= 40; k += 9) {
25633 GemmMicrokernelTester()
25634 .mr(3)
25635 .nr(4)
25636 .kr(2)
25637 .sr(4)
25638 .m(3)
25639 .n(n)
25640 .k(k)
25641 .ks(3)
25642 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2s4__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25643 }
25644 }
25645 }
25646
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD128,n_div_4_small_kernel)25647 TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD128, n_div_4_small_kernel) {
25648 TEST_REQUIRES_X86_XOP;
25649 for (uint32_t n = 8; n <= 12; n += 4) {
25650 for (size_t k = 1; k <= 40; k += 9) {
25651 GemmMicrokernelTester()
25652 .mr(3)
25653 .nr(4)
25654 .kr(2)
25655 .sr(4)
25656 .m(3)
25657 .n(n)
25658 .k(k)
25659 .ks(3)
25660 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2s4__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25661 }
25662 }
25663 }
25664
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD128,strided_cm_subtile)25665 TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD128, strided_cm_subtile) {
25666 TEST_REQUIRES_X86_XOP;
25667 for (size_t k = 1; k <= 40; k += 9) {
25668 for (uint32_t n = 1; n <= 4; n++) {
25669 for (uint32_t m = 1; m <= 3; m++) {
25670 GemmMicrokernelTester()
25671 .mr(3)
25672 .nr(4)
25673 .kr(2)
25674 .sr(4)
25675 .m(m)
25676 .n(n)
25677 .k(k)
25678 .cm_stride(7)
25679 .iterations(1)
25680 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2s4__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25681 }
25682 }
25683 }
25684 }
25685
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD128,a_offset)25686 TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD128, a_offset) {
25687 TEST_REQUIRES_X86_XOP;
25688 for (size_t k = 1; k <= 40; k += 9) {
25689 GemmMicrokernelTester()
25690 .mr(3)
25691 .nr(4)
25692 .kr(2)
25693 .sr(4)
25694 .m(3)
25695 .n(4)
25696 .k(k)
25697 .ks(3)
25698 .a_offset(127)
25699 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2s4__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25700 }
25701 }
25702
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD128,zero)25703 TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD128, zero) {
25704 TEST_REQUIRES_X86_XOP;
25705 for (size_t k = 1; k <= 40; k += 9) {
25706 for (uint32_t mz = 0; mz < 3; mz++) {
25707 GemmMicrokernelTester()
25708 .mr(3)
25709 .nr(4)
25710 .kr(2)
25711 .sr(4)
25712 .m(3)
25713 .n(4)
25714 .k(k)
25715 .ks(3)
25716 .a_offset(127)
25717 .zero_index(mz)
25718 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2s4__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25719 }
25720 }
25721 }
25722
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD128,qmin)25723 TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD128, qmin) {
25724 TEST_REQUIRES_X86_XOP;
25725 GemmMicrokernelTester()
25726 .mr(3)
25727 .nr(4)
25728 .kr(2)
25729 .sr(4)
25730 .m(3)
25731 .n(4)
25732 .k(8)
25733 .qmin(128)
25734 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2s4__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25735 }
25736
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD128,qmax)25737 TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD128, qmax) {
25738 TEST_REQUIRES_X86_XOP;
25739 GemmMicrokernelTester()
25740 .mr(3)
25741 .nr(4)
25742 .kr(2)
25743 .sr(4)
25744 .m(3)
25745 .n(4)
25746 .k(8)
25747 .qmax(128)
25748 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2s4__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25749 }
25750
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD128,strided_cm)25751 TEST(QC8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD128, strided_cm) {
25752 TEST_REQUIRES_X86_XOP;
25753 GemmMicrokernelTester()
25754 .mr(3)
25755 .nr(4)
25756 .kr(2)
25757 .sr(4)
25758 .m(3)
25759 .n(4)
25760 .k(8)
25761 .cm_stride(7)
25762 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2s4__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25763 }
25764 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
25765
25766
25767 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD128,k_eq_8)25768 TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD128, k_eq_8) {
25769 TEST_REQUIRES_X86_AVX;
25770 GemmMicrokernelTester()
25771 .mr(4)
25772 .nr(4)
25773 .kr(2)
25774 .sr(4)
25775 .m(4)
25776 .n(4)
25777 .k(8)
25778 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2s4__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25779 }
25780
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD128,strided_cn)25781 TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD128, strided_cn) {
25782 TEST_REQUIRES_X86_AVX;
25783 GemmMicrokernelTester()
25784 .mr(4)
25785 .nr(4)
25786 .kr(2)
25787 .sr(4)
25788 .m(4)
25789 .n(4)
25790 .k(8)
25791 .cn_stride(7)
25792 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2s4__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25793 }
25794
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD128,k_eq_8_subtile)25795 TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD128, k_eq_8_subtile) {
25796 TEST_REQUIRES_X86_AVX;
25797 for (uint32_t n = 1; n <= 4; n++) {
25798 for (uint32_t m = 1; m <= 4; m++) {
25799 GemmMicrokernelTester()
25800 .mr(4)
25801 .nr(4)
25802 .kr(2)
25803 .sr(4)
25804 .m(m)
25805 .n(n)
25806 .k(8)
25807 .iterations(1)
25808 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2s4__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25809 }
25810 }
25811 }
25812
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD128,k_eq_8_subtile_m)25813 TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD128, k_eq_8_subtile_m) {
25814 TEST_REQUIRES_X86_AVX;
25815 for (uint32_t m = 1; m <= 4; m++) {
25816 GemmMicrokernelTester()
25817 .mr(4)
25818 .nr(4)
25819 .kr(2)
25820 .sr(4)
25821 .m(m)
25822 .n(4)
25823 .k(8)
25824 .iterations(1)
25825 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2s4__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25826 }
25827 }
25828
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD128,k_eq_8_subtile_n)25829 TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD128, k_eq_8_subtile_n) {
25830 TEST_REQUIRES_X86_AVX;
25831 for (uint32_t n = 1; n <= 4; n++) {
25832 GemmMicrokernelTester()
25833 .mr(4)
25834 .nr(4)
25835 .kr(2)
25836 .sr(4)
25837 .m(4)
25838 .n(n)
25839 .k(8)
25840 .iterations(1)
25841 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2s4__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25842 }
25843 }
25844
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD128,k_lt_8)25845 TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD128, k_lt_8) {
25846 TEST_REQUIRES_X86_AVX;
25847 for (size_t k = 1; k < 8; k++) {
25848 GemmMicrokernelTester()
25849 .mr(4)
25850 .nr(4)
25851 .kr(2)
25852 .sr(4)
25853 .m(4)
25854 .n(4)
25855 .k(k)
25856 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2s4__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25857 }
25858 }
25859
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD128,k_lt_8_subtile)25860 TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD128, k_lt_8_subtile) {
25861 TEST_REQUIRES_X86_AVX;
25862 for (size_t k = 1; k < 8; k++) {
25863 for (uint32_t n = 1; n <= 4; n++) {
25864 for (uint32_t m = 1; m <= 4; m++) {
25865 GemmMicrokernelTester()
25866 .mr(4)
25867 .nr(4)
25868 .kr(2)
25869 .sr(4)
25870 .m(m)
25871 .n(n)
25872 .k(k)
25873 .iterations(1)
25874 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2s4__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25875 }
25876 }
25877 }
25878 }
25879
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD128,k_gt_8)25880 TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD128, k_gt_8) {
25881 TEST_REQUIRES_X86_AVX;
25882 for (size_t k = 9; k < 16; k++) {
25883 GemmMicrokernelTester()
25884 .mr(4)
25885 .nr(4)
25886 .kr(2)
25887 .sr(4)
25888 .m(4)
25889 .n(4)
25890 .k(k)
25891 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2s4__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25892 }
25893 }
25894
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD128,k_gt_8_subtile)25895 TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD128, k_gt_8_subtile) {
25896 TEST_REQUIRES_X86_AVX;
25897 for (size_t k = 9; k < 16; k++) {
25898 for (uint32_t n = 1; n <= 4; n++) {
25899 for (uint32_t m = 1; m <= 4; m++) {
25900 GemmMicrokernelTester()
25901 .mr(4)
25902 .nr(4)
25903 .kr(2)
25904 .sr(4)
25905 .m(m)
25906 .n(n)
25907 .k(k)
25908 .iterations(1)
25909 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2s4__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25910 }
25911 }
25912 }
25913 }
25914
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD128,k_div_8)25915 TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD128, k_div_8) {
25916 TEST_REQUIRES_X86_AVX;
25917 for (size_t k = 16; k <= 80; k += 8) {
25918 GemmMicrokernelTester()
25919 .mr(4)
25920 .nr(4)
25921 .kr(2)
25922 .sr(4)
25923 .m(4)
25924 .n(4)
25925 .k(k)
25926 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2s4__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25927 }
25928 }
25929
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD128,k_div_8_subtile)25930 TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD128, k_div_8_subtile) {
25931 TEST_REQUIRES_X86_AVX;
25932 for (size_t k = 16; k <= 80; k += 8) {
25933 for (uint32_t n = 1; n <= 4; n++) {
25934 for (uint32_t m = 1; m <= 4; m++) {
25935 GemmMicrokernelTester()
25936 .mr(4)
25937 .nr(4)
25938 .kr(2)
25939 .sr(4)
25940 .m(m)
25941 .n(n)
25942 .k(k)
25943 .iterations(1)
25944 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2s4__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25945 }
25946 }
25947 }
25948 }
25949
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD128,n_gt_4)25950 TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD128, n_gt_4) {
25951 TEST_REQUIRES_X86_AVX;
25952 for (uint32_t n = 5; n < 8; n++) {
25953 for (size_t k = 1; k <= 40; k += 9) {
25954 GemmMicrokernelTester()
25955 .mr(4)
25956 .nr(4)
25957 .kr(2)
25958 .sr(4)
25959 .m(4)
25960 .n(n)
25961 .k(k)
25962 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2s4__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25963 }
25964 }
25965 }
25966
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD128,n_gt_4_strided_cn)25967 TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD128, n_gt_4_strided_cn) {
25968 TEST_REQUIRES_X86_AVX;
25969 for (uint32_t n = 5; n < 8; n++) {
25970 for (size_t k = 1; k <= 40; k += 9) {
25971 GemmMicrokernelTester()
25972 .mr(4)
25973 .nr(4)
25974 .kr(2)
25975 .sr(4)
25976 .m(4)
25977 .n(n)
25978 .k(k)
25979 .cn_stride(7)
25980 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2s4__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25981 }
25982 }
25983 }
25984
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD128,n_gt_4_subtile)25985 TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD128, n_gt_4_subtile) {
25986 TEST_REQUIRES_X86_AVX;
25987 for (uint32_t n = 5; n < 8; n++) {
25988 for (size_t k = 1; k <= 40; k += 9) {
25989 for (uint32_t m = 1; m <= 4; m++) {
25990 GemmMicrokernelTester()
25991 .mr(4)
25992 .nr(4)
25993 .kr(2)
25994 .sr(4)
25995 .m(m)
25996 .n(n)
25997 .k(k)
25998 .iterations(1)
25999 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2s4__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
26000 }
26001 }
26002 }
26003 }
26004
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD128,n_div_4)26005 TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD128, n_div_4) {
26006 TEST_REQUIRES_X86_AVX;
26007 for (uint32_t n = 8; n <= 12; n += 4) {
26008 for (size_t k = 1; k <= 40; k += 9) {
26009 GemmMicrokernelTester()
26010 .mr(4)
26011 .nr(4)
26012 .kr(2)
26013 .sr(4)
26014 .m(4)
26015 .n(n)
26016 .k(k)
26017 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2s4__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
26018 }
26019 }
26020 }
26021
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD128,n_div_4_strided_cn)26022 TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD128, n_div_4_strided_cn) {
26023 TEST_REQUIRES_X86_AVX;
26024 for (uint32_t n = 8; n <= 12; n += 4) {
26025 for (size_t k = 1; k <= 40; k += 9) {
26026 GemmMicrokernelTester()
26027 .mr(4)
26028 .nr(4)
26029 .kr(2)
26030 .sr(4)
26031 .m(4)
26032 .n(n)
26033 .k(k)
26034 .cn_stride(7)
26035 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2s4__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
26036 }
26037 }
26038 }
26039
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD128,n_div_4_subtile)26040 TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD128, n_div_4_subtile) {
26041 TEST_REQUIRES_X86_AVX;
26042 for (uint32_t n = 8; n <= 12; n += 4) {
26043 for (size_t k = 1; k <= 40; k += 9) {
26044 for (uint32_t m = 1; m <= 4; m++) {
26045 GemmMicrokernelTester()
26046 .mr(4)
26047 .nr(4)
26048 .kr(2)
26049 .sr(4)
26050 .m(m)
26051 .n(n)
26052 .k(k)
26053 .iterations(1)
26054 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2s4__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
26055 }
26056 }
26057 }
26058 }
26059
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD128,small_kernel)26060 TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD128, small_kernel) {
26061 TEST_REQUIRES_X86_AVX;
26062 for (size_t k = 1; k <= 40; k += 9) {
26063 GemmMicrokernelTester()
26064 .mr(4)
26065 .nr(4)
26066 .kr(2)
26067 .sr(4)
26068 .m(4)
26069 .n(4)
26070 .k(k)
26071 .ks(3)
26072 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2s4__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
26073 }
26074 }
26075
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD128,small_kernel_subtile)26076 TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD128, small_kernel_subtile) {
26077 TEST_REQUIRES_X86_AVX;
26078 for (size_t k = 1; k <= 40; k += 9) {
26079 for (uint32_t n = 1; n <= 4; n++) {
26080 for (uint32_t m = 1; m <= 4; m++) {
26081 GemmMicrokernelTester()
26082 .mr(4)
26083 .nr(4)
26084 .kr(2)
26085 .sr(4)
26086 .m(m)
26087 .n(n)
26088 .k(k)
26089 .ks(3)
26090 .iterations(1)
26091 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2s4__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
26092 }
26093 }
26094 }
26095 }
26096
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD128,n_gt_4_small_kernel)26097 TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD128, n_gt_4_small_kernel) {
26098 TEST_REQUIRES_X86_AVX;
26099 for (uint32_t n = 5; n < 8; n++) {
26100 for (size_t k = 1; k <= 40; k += 9) {
26101 GemmMicrokernelTester()
26102 .mr(4)
26103 .nr(4)
26104 .kr(2)
26105 .sr(4)
26106 .m(4)
26107 .n(n)
26108 .k(k)
26109 .ks(3)
26110 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2s4__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
26111 }
26112 }
26113 }
26114
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD128,n_div_4_small_kernel)26115 TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD128, n_div_4_small_kernel) {
26116 TEST_REQUIRES_X86_AVX;
26117 for (uint32_t n = 8; n <= 12; n += 4) {
26118 for (size_t k = 1; k <= 40; k += 9) {
26119 GemmMicrokernelTester()
26120 .mr(4)
26121 .nr(4)
26122 .kr(2)
26123 .sr(4)
26124 .m(4)
26125 .n(n)
26126 .k(k)
26127 .ks(3)
26128 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2s4__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
26129 }
26130 }
26131 }
26132
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD128,strided_cm_subtile)26133 TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD128, strided_cm_subtile) {
26134 TEST_REQUIRES_X86_AVX;
26135 for (size_t k = 1; k <= 40; k += 9) {
26136 for (uint32_t n = 1; n <= 4; n++) {
26137 for (uint32_t m = 1; m <= 4; m++) {
26138 GemmMicrokernelTester()
26139 .mr(4)
26140 .nr(4)
26141 .kr(2)
26142 .sr(4)
26143 .m(m)
26144 .n(n)
26145 .k(k)
26146 .cm_stride(7)
26147 .iterations(1)
26148 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2s4__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
26149 }
26150 }
26151 }
26152 }
26153
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD128,a_offset)26154 TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD128, a_offset) {
26155 TEST_REQUIRES_X86_AVX;
26156 for (size_t k = 1; k <= 40; k += 9) {
26157 GemmMicrokernelTester()
26158 .mr(4)
26159 .nr(4)
26160 .kr(2)
26161 .sr(4)
26162 .m(4)
26163 .n(4)
26164 .k(k)
26165 .ks(3)
26166 .a_offset(163)
26167 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2s4__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
26168 }
26169 }
26170
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD128,zero)26171 TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD128, zero) {
26172 TEST_REQUIRES_X86_AVX;
26173 for (size_t k = 1; k <= 40; k += 9) {
26174 for (uint32_t mz = 0; mz < 4; mz++) {
26175 GemmMicrokernelTester()
26176 .mr(4)
26177 .nr(4)
26178 .kr(2)
26179 .sr(4)
26180 .m(4)
26181 .n(4)
26182 .k(k)
26183 .ks(3)
26184 .a_offset(163)
26185 .zero_index(mz)
26186 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2s4__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
26187 }
26188 }
26189 }
26190
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD128,qmin)26191 TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD128, qmin) {
26192 TEST_REQUIRES_X86_AVX;
26193 GemmMicrokernelTester()
26194 .mr(4)
26195 .nr(4)
26196 .kr(2)
26197 .sr(4)
26198 .m(4)
26199 .n(4)
26200 .k(8)
26201 .qmin(128)
26202 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2s4__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
26203 }
26204
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD128,qmax)26205 TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD128, qmax) {
26206 TEST_REQUIRES_X86_AVX;
26207 GemmMicrokernelTester()
26208 .mr(4)
26209 .nr(4)
26210 .kr(2)
26211 .sr(4)
26212 .m(4)
26213 .n(4)
26214 .k(8)
26215 .qmax(128)
26216 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2s4__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
26217 }
26218
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD128,strided_cm)26219 TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__AVX_LD128, strided_cm) {
26220 TEST_REQUIRES_X86_AVX;
26221 GemmMicrokernelTester()
26222 .mr(4)
26223 .nr(4)
26224 .kr(2)
26225 .sr(4)
26226 .m(4)
26227 .n(4)
26228 .k(8)
26229 .cm_stride(7)
26230 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2s4__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
26231 }
26232 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
26233
26234
26235 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD64,k_eq_8)26236 TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD64, k_eq_8) {
26237 TEST_REQUIRES_X86_XOP;
26238 GemmMicrokernelTester()
26239 .mr(2)
26240 .nr(4)
26241 .kr(8)
26242 .sr(1)
26243 .m(2)
26244 .n(4)
26245 .k(8)
26246 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c8__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
26247 }
26248
TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD64,strided_cn)26249 TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD64, strided_cn) {
26250 TEST_REQUIRES_X86_XOP;
26251 GemmMicrokernelTester()
26252 .mr(2)
26253 .nr(4)
26254 .kr(8)
26255 .sr(1)
26256 .m(2)
26257 .n(4)
26258 .k(8)
26259 .cn_stride(7)
26260 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c8__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
26261 }
26262
TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD64,k_eq_8_subtile)26263 TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD64, k_eq_8_subtile) {
26264 TEST_REQUIRES_X86_XOP;
26265 for (uint32_t n = 1; n <= 4; n++) {
26266 for (uint32_t m = 1; m <= 2; m++) {
26267 GemmMicrokernelTester()
26268 .mr(2)
26269 .nr(4)
26270 .kr(8)
26271 .sr(1)
26272 .m(m)
26273 .n(n)
26274 .k(8)
26275 .iterations(1)
26276 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c8__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
26277 }
26278 }
26279 }
26280
TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD64,k_eq_8_subtile_m)26281 TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD64, k_eq_8_subtile_m) {
26282 TEST_REQUIRES_X86_XOP;
26283 for (uint32_t m = 1; m <= 2; m++) {
26284 GemmMicrokernelTester()
26285 .mr(2)
26286 .nr(4)
26287 .kr(8)
26288 .sr(1)
26289 .m(m)
26290 .n(4)
26291 .k(8)
26292 .iterations(1)
26293 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c8__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
26294 }
26295 }
26296
TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD64,k_eq_8_subtile_n)26297 TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD64, k_eq_8_subtile_n) {
26298 TEST_REQUIRES_X86_XOP;
26299 for (uint32_t n = 1; n <= 4; n++) {
26300 GemmMicrokernelTester()
26301 .mr(2)
26302 .nr(4)
26303 .kr(8)
26304 .sr(1)
26305 .m(2)
26306 .n(n)
26307 .k(8)
26308 .iterations(1)
26309 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c8__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
26310 }
26311 }
26312
TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD64,k_lt_8)26313 TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD64, k_lt_8) {
26314 TEST_REQUIRES_X86_XOP;
26315 for (size_t k = 1; k < 8; k++) {
26316 GemmMicrokernelTester()
26317 .mr(2)
26318 .nr(4)
26319 .kr(8)
26320 .sr(1)
26321 .m(2)
26322 .n(4)
26323 .k(k)
26324 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c8__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
26325 }
26326 }
26327
TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD64,k_lt_8_subtile)26328 TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD64, k_lt_8_subtile) {
26329 TEST_REQUIRES_X86_XOP;
26330 for (size_t k = 1; k < 8; k++) {
26331 for (uint32_t n = 1; n <= 4; n++) {
26332 for (uint32_t m = 1; m <= 2; m++) {
26333 GemmMicrokernelTester()
26334 .mr(2)
26335 .nr(4)
26336 .kr(8)
26337 .sr(1)
26338 .m(m)
26339 .n(n)
26340 .k(k)
26341 .iterations(1)
26342 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c8__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
26343 }
26344 }
26345 }
26346 }
26347
TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD64,k_gt_8)26348 TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD64, k_gt_8) {
26349 TEST_REQUIRES_X86_XOP;
26350 for (size_t k = 9; k < 16; k++) {
26351 GemmMicrokernelTester()
26352 .mr(2)
26353 .nr(4)
26354 .kr(8)
26355 .sr(1)
26356 .m(2)
26357 .n(4)
26358 .k(k)
26359 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c8__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
26360 }
26361 }
26362
TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD64,k_gt_8_subtile)26363 TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD64, k_gt_8_subtile) {
26364 TEST_REQUIRES_X86_XOP;
26365 for (size_t k = 9; k < 16; k++) {
26366 for (uint32_t n = 1; n <= 4; n++) {
26367 for (uint32_t m = 1; m <= 2; m++) {
26368 GemmMicrokernelTester()
26369 .mr(2)
26370 .nr(4)
26371 .kr(8)
26372 .sr(1)
26373 .m(m)
26374 .n(n)
26375 .k(k)
26376 .iterations(1)
26377 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c8__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
26378 }
26379 }
26380 }
26381 }
26382
TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD64,k_div_8)26383 TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD64, k_div_8) {
26384 TEST_REQUIRES_X86_XOP;
26385 for (size_t k = 16; k <= 80; k += 8) {
26386 GemmMicrokernelTester()
26387 .mr(2)
26388 .nr(4)
26389 .kr(8)
26390 .sr(1)
26391 .m(2)
26392 .n(4)
26393 .k(k)
26394 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c8__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
26395 }
26396 }
26397
TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD64,k_div_8_subtile)26398 TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD64, k_div_8_subtile) {
26399 TEST_REQUIRES_X86_XOP;
26400 for (size_t k = 16; k <= 80; k += 8) {
26401 for (uint32_t n = 1; n <= 4; n++) {
26402 for (uint32_t m = 1; m <= 2; m++) {
26403 GemmMicrokernelTester()
26404 .mr(2)
26405 .nr(4)
26406 .kr(8)
26407 .sr(1)
26408 .m(m)
26409 .n(n)
26410 .k(k)
26411 .iterations(1)
26412 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c8__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
26413 }
26414 }
26415 }
26416 }
26417
TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD64,n_gt_4)26418 TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD64, n_gt_4) {
26419 TEST_REQUIRES_X86_XOP;
26420 for (uint32_t n = 5; n < 8; n++) {
26421 for (size_t k = 1; k <= 40; k += 9) {
26422 GemmMicrokernelTester()
26423 .mr(2)
26424 .nr(4)
26425 .kr(8)
26426 .sr(1)
26427 .m(2)
26428 .n(n)
26429 .k(k)
26430 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c8__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
26431 }
26432 }
26433 }
26434
TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD64,n_gt_4_strided_cn)26435 TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD64, n_gt_4_strided_cn) {
26436 TEST_REQUIRES_X86_XOP;
26437 for (uint32_t n = 5; n < 8; n++) {
26438 for (size_t k = 1; k <= 40; k += 9) {
26439 GemmMicrokernelTester()
26440 .mr(2)
26441 .nr(4)
26442 .kr(8)
26443 .sr(1)
26444 .m(2)
26445 .n(n)
26446 .k(k)
26447 .cn_stride(7)
26448 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c8__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
26449 }
26450 }
26451 }
26452
TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD64,n_gt_4_subtile)26453 TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD64, n_gt_4_subtile) {
26454 TEST_REQUIRES_X86_XOP;
26455 for (uint32_t n = 5; n < 8; n++) {
26456 for (size_t k = 1; k <= 40; k += 9) {
26457 for (uint32_t m = 1; m <= 2; m++) {
26458 GemmMicrokernelTester()
26459 .mr(2)
26460 .nr(4)
26461 .kr(8)
26462 .sr(1)
26463 .m(m)
26464 .n(n)
26465 .k(k)
26466 .iterations(1)
26467 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c8__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
26468 }
26469 }
26470 }
26471 }
26472
TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD64,n_div_4)26473 TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD64, n_div_4) {
26474 TEST_REQUIRES_X86_XOP;
26475 for (uint32_t n = 8; n <= 12; n += 4) {
26476 for (size_t k = 1; k <= 40; k += 9) {
26477 GemmMicrokernelTester()
26478 .mr(2)
26479 .nr(4)
26480 .kr(8)
26481 .sr(1)
26482 .m(2)
26483 .n(n)
26484 .k(k)
26485 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c8__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
26486 }
26487 }
26488 }
26489
TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD64,n_div_4_strided_cn)26490 TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD64, n_div_4_strided_cn) {
26491 TEST_REQUIRES_X86_XOP;
26492 for (uint32_t n = 8; n <= 12; n += 4) {
26493 for (size_t k = 1; k <= 40; k += 9) {
26494 GemmMicrokernelTester()
26495 .mr(2)
26496 .nr(4)
26497 .kr(8)
26498 .sr(1)
26499 .m(2)
26500 .n(n)
26501 .k(k)
26502 .cn_stride(7)
26503 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c8__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
26504 }
26505 }
26506 }
26507
TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD64,n_div_4_subtile)26508 TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD64, n_div_4_subtile) {
26509 TEST_REQUIRES_X86_XOP;
26510 for (uint32_t n = 8; n <= 12; n += 4) {
26511 for (size_t k = 1; k <= 40; k += 9) {
26512 for (uint32_t m = 1; m <= 2; m++) {
26513 GemmMicrokernelTester()
26514 .mr(2)
26515 .nr(4)
26516 .kr(8)
26517 .sr(1)
26518 .m(m)
26519 .n(n)
26520 .k(k)
26521 .iterations(1)
26522 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c8__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
26523 }
26524 }
26525 }
26526 }
26527
TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD64,small_kernel)26528 TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD64, small_kernel) {
26529 TEST_REQUIRES_X86_XOP;
26530 for (size_t k = 1; k <= 40; k += 9) {
26531 GemmMicrokernelTester()
26532 .mr(2)
26533 .nr(4)
26534 .kr(8)
26535 .sr(1)
26536 .m(2)
26537 .n(4)
26538 .k(k)
26539 .ks(3)
26540 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c8__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
26541 }
26542 }
26543
TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD64,small_kernel_subtile)26544 TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD64, small_kernel_subtile) {
26545 TEST_REQUIRES_X86_XOP;
26546 for (size_t k = 1; k <= 40; k += 9) {
26547 for (uint32_t n = 1; n <= 4; n++) {
26548 for (uint32_t m = 1; m <= 2; m++) {
26549 GemmMicrokernelTester()
26550 .mr(2)
26551 .nr(4)
26552 .kr(8)
26553 .sr(1)
26554 .m(m)
26555 .n(n)
26556 .k(k)
26557 .ks(3)
26558 .iterations(1)
26559 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c8__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
26560 }
26561 }
26562 }
26563 }
26564
TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD64,n_gt_4_small_kernel)26565 TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD64, n_gt_4_small_kernel) {
26566 TEST_REQUIRES_X86_XOP;
26567 for (uint32_t n = 5; n < 8; n++) {
26568 for (size_t k = 1; k <= 40; k += 9) {
26569 GemmMicrokernelTester()
26570 .mr(2)
26571 .nr(4)
26572 .kr(8)
26573 .sr(1)
26574 .m(2)
26575 .n(n)
26576 .k(k)
26577 .ks(3)
26578 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c8__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
26579 }
26580 }
26581 }
26582
TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD64,n_div_4_small_kernel)26583 TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD64, n_div_4_small_kernel) {
26584 TEST_REQUIRES_X86_XOP;
26585 for (uint32_t n = 8; n <= 12; n += 4) {
26586 for (size_t k = 1; k <= 40; k += 9) {
26587 GemmMicrokernelTester()
26588 .mr(2)
26589 .nr(4)
26590 .kr(8)
26591 .sr(1)
26592 .m(2)
26593 .n(n)
26594 .k(k)
26595 .ks(3)
26596 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c8__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
26597 }
26598 }
26599 }
26600
TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD64,strided_cm_subtile)26601 TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD64, strided_cm_subtile) {
26602 TEST_REQUIRES_X86_XOP;
26603 for (size_t k = 1; k <= 40; k += 9) {
26604 for (uint32_t n = 1; n <= 4; n++) {
26605 for (uint32_t m = 1; m <= 2; m++) {
26606 GemmMicrokernelTester()
26607 .mr(2)
26608 .nr(4)
26609 .kr(8)
26610 .sr(1)
26611 .m(m)
26612 .n(n)
26613 .k(k)
26614 .cm_stride(7)
26615 .iterations(1)
26616 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c8__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
26617 }
26618 }
26619 }
26620 }
26621
TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD64,a_offset)26622 TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD64, a_offset) {
26623 TEST_REQUIRES_X86_XOP;
26624 for (size_t k = 1; k <= 40; k += 9) {
26625 GemmMicrokernelTester()
26626 .mr(2)
26627 .nr(4)
26628 .kr(8)
26629 .sr(1)
26630 .m(2)
26631 .n(4)
26632 .k(k)
26633 .ks(3)
26634 .a_offset(83)
26635 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c8__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
26636 }
26637 }
26638
TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD64,zero)26639 TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD64, zero) {
26640 TEST_REQUIRES_X86_XOP;
26641 for (size_t k = 1; k <= 40; k += 9) {
26642 for (uint32_t mz = 0; mz < 2; mz++) {
26643 GemmMicrokernelTester()
26644 .mr(2)
26645 .nr(4)
26646 .kr(8)
26647 .sr(1)
26648 .m(2)
26649 .n(4)
26650 .k(k)
26651 .ks(3)
26652 .a_offset(83)
26653 .zero_index(mz)
26654 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c8__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
26655 }
26656 }
26657 }
26658
TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD64,qmin)26659 TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD64, qmin) {
26660 TEST_REQUIRES_X86_XOP;
26661 GemmMicrokernelTester()
26662 .mr(2)
26663 .nr(4)
26664 .kr(8)
26665 .sr(1)
26666 .m(2)
26667 .n(4)
26668 .k(8)
26669 .qmin(128)
26670 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c8__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
26671 }
26672
TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD64,qmax)26673 TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD64, qmax) {
26674 TEST_REQUIRES_X86_XOP;
26675 GemmMicrokernelTester()
26676 .mr(2)
26677 .nr(4)
26678 .kr(8)
26679 .sr(1)
26680 .m(2)
26681 .n(4)
26682 .k(8)
26683 .qmax(128)
26684 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c8__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
26685 }
26686
TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD64,strided_cm)26687 TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD64, strided_cm) {
26688 TEST_REQUIRES_X86_XOP;
26689 GemmMicrokernelTester()
26690 .mr(2)
26691 .nr(4)
26692 .kr(8)
26693 .sr(1)
26694 .m(2)
26695 .n(4)
26696 .k(8)
26697 .cm_stride(7)
26698 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c8__xop_ld64, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
26699 }
26700 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
26701
26702
26703 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD128,k_eq_8)26704 TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD128, k_eq_8) {
26705 TEST_REQUIRES_X86_SSE41;
26706 GemmMicrokernelTester()
26707 .mr(1)
26708 .nr(4)
26709 .kr(8)
26710 .sr(1)
26711 .m(1)
26712 .n(4)
26713 .k(8)
26714 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c8__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
26715 }
26716
TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD128,strided_cn)26717 TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD128, strided_cn) {
26718 TEST_REQUIRES_X86_SSE41;
26719 GemmMicrokernelTester()
26720 .mr(1)
26721 .nr(4)
26722 .kr(8)
26723 .sr(1)
26724 .m(1)
26725 .n(4)
26726 .k(8)
26727 .cn_stride(7)
26728 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c8__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
26729 }
26730
TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD128,k_eq_8_subtile)26731 TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD128, k_eq_8_subtile) {
26732 TEST_REQUIRES_X86_SSE41;
26733 for (uint32_t n = 1; n <= 4; n++) {
26734 for (uint32_t m = 1; m <= 1; m++) {
26735 GemmMicrokernelTester()
26736 .mr(1)
26737 .nr(4)
26738 .kr(8)
26739 .sr(1)
26740 .m(m)
26741 .n(n)
26742 .k(8)
26743 .iterations(1)
26744 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c8__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
26745 }
26746 }
26747 }
26748
TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD128,k_eq_8_subtile_m)26749 TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD128, k_eq_8_subtile_m) {
26750 TEST_REQUIRES_X86_SSE41;
26751 for (uint32_t m = 1; m <= 1; m++) {
26752 GemmMicrokernelTester()
26753 .mr(1)
26754 .nr(4)
26755 .kr(8)
26756 .sr(1)
26757 .m(m)
26758 .n(4)
26759 .k(8)
26760 .iterations(1)
26761 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c8__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
26762 }
26763 }
26764
TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD128,k_eq_8_subtile_n)26765 TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD128, k_eq_8_subtile_n) {
26766 TEST_REQUIRES_X86_SSE41;
26767 for (uint32_t n = 1; n <= 4; n++) {
26768 GemmMicrokernelTester()
26769 .mr(1)
26770 .nr(4)
26771 .kr(8)
26772 .sr(1)
26773 .m(1)
26774 .n(n)
26775 .k(8)
26776 .iterations(1)
26777 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c8__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
26778 }
26779 }
26780
TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD128,k_lt_8)26781 TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD128, k_lt_8) {
26782 TEST_REQUIRES_X86_SSE41;
26783 for (size_t k = 1; k < 8; k++) {
26784 GemmMicrokernelTester()
26785 .mr(1)
26786 .nr(4)
26787 .kr(8)
26788 .sr(1)
26789 .m(1)
26790 .n(4)
26791 .k(k)
26792 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c8__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
26793 }
26794 }
26795
TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD128,k_lt_8_subtile)26796 TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD128, k_lt_8_subtile) {
26797 TEST_REQUIRES_X86_SSE41;
26798 for (size_t k = 1; k < 8; k++) {
26799 for (uint32_t n = 1; n <= 4; n++) {
26800 for (uint32_t m = 1; m <= 1; m++) {
26801 GemmMicrokernelTester()
26802 .mr(1)
26803 .nr(4)
26804 .kr(8)
26805 .sr(1)
26806 .m(m)
26807 .n(n)
26808 .k(k)
26809 .iterations(1)
26810 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c8__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
26811 }
26812 }
26813 }
26814 }
26815
TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD128,k_gt_8)26816 TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD128, k_gt_8) {
26817 TEST_REQUIRES_X86_SSE41;
26818 for (size_t k = 9; k < 16; k++) {
26819 GemmMicrokernelTester()
26820 .mr(1)
26821 .nr(4)
26822 .kr(8)
26823 .sr(1)
26824 .m(1)
26825 .n(4)
26826 .k(k)
26827 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c8__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
26828 }
26829 }
26830
TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD128,k_gt_8_subtile)26831 TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD128, k_gt_8_subtile) {
26832 TEST_REQUIRES_X86_SSE41;
26833 for (size_t k = 9; k < 16; k++) {
26834 for (uint32_t n = 1; n <= 4; n++) {
26835 for (uint32_t m = 1; m <= 1; m++) {
26836 GemmMicrokernelTester()
26837 .mr(1)
26838 .nr(4)
26839 .kr(8)
26840 .sr(1)
26841 .m(m)
26842 .n(n)
26843 .k(k)
26844 .iterations(1)
26845 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c8__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
26846 }
26847 }
26848 }
26849 }
26850
TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD128,k_div_8)26851 TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD128, k_div_8) {
26852 TEST_REQUIRES_X86_SSE41;
26853 for (size_t k = 16; k <= 80; k += 8) {
26854 GemmMicrokernelTester()
26855 .mr(1)
26856 .nr(4)
26857 .kr(8)
26858 .sr(1)
26859 .m(1)
26860 .n(4)
26861 .k(k)
26862 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c8__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
26863 }
26864 }
26865
TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD128,k_div_8_subtile)26866 TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD128, k_div_8_subtile) {
26867 TEST_REQUIRES_X86_SSE41;
26868 for (size_t k = 16; k <= 80; k += 8) {
26869 for (uint32_t n = 1; n <= 4; n++) {
26870 for (uint32_t m = 1; m <= 1; m++) {
26871 GemmMicrokernelTester()
26872 .mr(1)
26873 .nr(4)
26874 .kr(8)
26875 .sr(1)
26876 .m(m)
26877 .n(n)
26878 .k(k)
26879 .iterations(1)
26880 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c8__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
26881 }
26882 }
26883 }
26884 }
26885
TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD128,n_gt_4)26886 TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD128, n_gt_4) {
26887 TEST_REQUIRES_X86_SSE41;
26888 for (uint32_t n = 5; n < 8; n++) {
26889 for (size_t k = 1; k <= 40; k += 9) {
26890 GemmMicrokernelTester()
26891 .mr(1)
26892 .nr(4)
26893 .kr(8)
26894 .sr(1)
26895 .m(1)
26896 .n(n)
26897 .k(k)
26898 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c8__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
26899 }
26900 }
26901 }
26902
TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD128,n_gt_4_strided_cn)26903 TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD128, n_gt_4_strided_cn) {
26904 TEST_REQUIRES_X86_SSE41;
26905 for (uint32_t n = 5; n < 8; n++) {
26906 for (size_t k = 1; k <= 40; k += 9) {
26907 GemmMicrokernelTester()
26908 .mr(1)
26909 .nr(4)
26910 .kr(8)
26911 .sr(1)
26912 .m(1)
26913 .n(n)
26914 .k(k)
26915 .cn_stride(7)
26916 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c8__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
26917 }
26918 }
26919 }
26920
TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD128,n_gt_4_subtile)26921 TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD128, n_gt_4_subtile) {
26922 TEST_REQUIRES_X86_SSE41;
26923 for (uint32_t n = 5; n < 8; n++) {
26924 for (size_t k = 1; k <= 40; k += 9) {
26925 for (uint32_t m = 1; m <= 1; m++) {
26926 GemmMicrokernelTester()
26927 .mr(1)
26928 .nr(4)
26929 .kr(8)
26930 .sr(1)
26931 .m(m)
26932 .n(n)
26933 .k(k)
26934 .iterations(1)
26935 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c8__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
26936 }
26937 }
26938 }
26939 }
26940
TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD128,n_div_4)26941 TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD128, n_div_4) {
26942 TEST_REQUIRES_X86_SSE41;
26943 for (uint32_t n = 8; n <= 12; n += 4) {
26944 for (size_t k = 1; k <= 40; k += 9) {
26945 GemmMicrokernelTester()
26946 .mr(1)
26947 .nr(4)
26948 .kr(8)
26949 .sr(1)
26950 .m(1)
26951 .n(n)
26952 .k(k)
26953 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c8__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
26954 }
26955 }
26956 }
26957
TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD128,n_div_4_strided_cn)26958 TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD128, n_div_4_strided_cn) {
26959 TEST_REQUIRES_X86_SSE41;
26960 for (uint32_t n = 8; n <= 12; n += 4) {
26961 for (size_t k = 1; k <= 40; k += 9) {
26962 GemmMicrokernelTester()
26963 .mr(1)
26964 .nr(4)
26965 .kr(8)
26966 .sr(1)
26967 .m(1)
26968 .n(n)
26969 .k(k)
26970 .cn_stride(7)
26971 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c8__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
26972 }
26973 }
26974 }
26975
TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD128,n_div_4_subtile)26976 TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD128, n_div_4_subtile) {
26977 TEST_REQUIRES_X86_SSE41;
26978 for (uint32_t n = 8; n <= 12; n += 4) {
26979 for (size_t k = 1; k <= 40; k += 9) {
26980 for (uint32_t m = 1; m <= 1; m++) {
26981 GemmMicrokernelTester()
26982 .mr(1)
26983 .nr(4)
26984 .kr(8)
26985 .sr(1)
26986 .m(m)
26987 .n(n)
26988 .k(k)
26989 .iterations(1)
26990 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c8__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
26991 }
26992 }
26993 }
26994 }
26995
TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD128,small_kernel)26996 TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD128, small_kernel) {
26997 TEST_REQUIRES_X86_SSE41;
26998 for (size_t k = 1; k <= 40; k += 9) {
26999 GemmMicrokernelTester()
27000 .mr(1)
27001 .nr(4)
27002 .kr(8)
27003 .sr(1)
27004 .m(1)
27005 .n(4)
27006 .k(k)
27007 .ks(3)
27008 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c8__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
27009 }
27010 }
27011
TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD128,small_kernel_subtile)27012 TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD128, small_kernel_subtile) {
27013 TEST_REQUIRES_X86_SSE41;
27014 for (size_t k = 1; k <= 40; k += 9) {
27015 for (uint32_t n = 1; n <= 4; n++) {
27016 for (uint32_t m = 1; m <= 1; m++) {
27017 GemmMicrokernelTester()
27018 .mr(1)
27019 .nr(4)
27020 .kr(8)
27021 .sr(1)
27022 .m(m)
27023 .n(n)
27024 .k(k)
27025 .ks(3)
27026 .iterations(1)
27027 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c8__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
27028 }
27029 }
27030 }
27031 }
27032
TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD128,n_gt_4_small_kernel)27033 TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD128, n_gt_4_small_kernel) {
27034 TEST_REQUIRES_X86_SSE41;
27035 for (uint32_t n = 5; n < 8; n++) {
27036 for (size_t k = 1; k <= 40; k += 9) {
27037 GemmMicrokernelTester()
27038 .mr(1)
27039 .nr(4)
27040 .kr(8)
27041 .sr(1)
27042 .m(1)
27043 .n(n)
27044 .k(k)
27045 .ks(3)
27046 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c8__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
27047 }
27048 }
27049 }
27050
TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD128,n_div_4_small_kernel)27051 TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD128, n_div_4_small_kernel) {
27052 TEST_REQUIRES_X86_SSE41;
27053 for (uint32_t n = 8; n <= 12; n += 4) {
27054 for (size_t k = 1; k <= 40; k += 9) {
27055 GemmMicrokernelTester()
27056 .mr(1)
27057 .nr(4)
27058 .kr(8)
27059 .sr(1)
27060 .m(1)
27061 .n(n)
27062 .k(k)
27063 .ks(3)
27064 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c8__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
27065 }
27066 }
27067 }
27068
TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD128,strided_cm_subtile)27069 TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD128, strided_cm_subtile) {
27070 TEST_REQUIRES_X86_SSE41;
27071 for (size_t k = 1; k <= 40; k += 9) {
27072 for (uint32_t n = 1; n <= 4; n++) {
27073 for (uint32_t m = 1; m <= 1; m++) {
27074 GemmMicrokernelTester()
27075 .mr(1)
27076 .nr(4)
27077 .kr(8)
27078 .sr(1)
27079 .m(m)
27080 .n(n)
27081 .k(k)
27082 .cm_stride(7)
27083 .iterations(1)
27084 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c8__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
27085 }
27086 }
27087 }
27088 }
27089
TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD128,a_offset)27090 TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD128, a_offset) {
27091 TEST_REQUIRES_X86_SSE41;
27092 for (size_t k = 1; k <= 40; k += 9) {
27093 GemmMicrokernelTester()
27094 .mr(1)
27095 .nr(4)
27096 .kr(8)
27097 .sr(1)
27098 .m(1)
27099 .n(4)
27100 .k(k)
27101 .ks(3)
27102 .a_offset(43)
27103 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c8__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
27104 }
27105 }
27106
TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD128,zero)27107 TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD128, zero) {
27108 TEST_REQUIRES_X86_SSE41;
27109 for (size_t k = 1; k <= 40; k += 9) {
27110 for (uint32_t mz = 0; mz < 1; mz++) {
27111 GemmMicrokernelTester()
27112 .mr(1)
27113 .nr(4)
27114 .kr(8)
27115 .sr(1)
27116 .m(1)
27117 .n(4)
27118 .k(k)
27119 .ks(3)
27120 .a_offset(43)
27121 .zero_index(mz)
27122 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c8__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
27123 }
27124 }
27125 }
27126
TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD128,qmin)27127 TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD128, qmin) {
27128 TEST_REQUIRES_X86_SSE41;
27129 GemmMicrokernelTester()
27130 .mr(1)
27131 .nr(4)
27132 .kr(8)
27133 .sr(1)
27134 .m(1)
27135 .n(4)
27136 .k(8)
27137 .qmin(128)
27138 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c8__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
27139 }
27140
TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD128,qmax)27141 TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD128, qmax) {
27142 TEST_REQUIRES_X86_SSE41;
27143 GemmMicrokernelTester()
27144 .mr(1)
27145 .nr(4)
27146 .kr(8)
27147 .sr(1)
27148 .m(1)
27149 .n(4)
27150 .k(8)
27151 .qmax(128)
27152 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c8__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
27153 }
27154
TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD128,strided_cm)27155 TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__SSE41_LD128, strided_cm) {
27156 TEST_REQUIRES_X86_SSE41;
27157 GemmMicrokernelTester()
27158 .mr(1)
27159 .nr(4)
27160 .kr(8)
27161 .sr(1)
27162 .m(1)
27163 .n(4)
27164 .k(8)
27165 .cm_stride(7)
27166 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c8__sse41_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
27167 }
27168 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
27169
27170
27171 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD128,k_eq_8)27172 TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD128, k_eq_8) {
27173 TEST_REQUIRES_X86_SSE2;
27174 GemmMicrokernelTester()
27175 .mr(3)
27176 .nr(4)
27177 .kr(8)
27178 .sr(1)
27179 .m(3)
27180 .n(4)
27181 .k(8)
27182 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c8__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
27183 }
27184
TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD128,strided_cn)27185 TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD128, strided_cn) {
27186 TEST_REQUIRES_X86_SSE2;
27187 GemmMicrokernelTester()
27188 .mr(3)
27189 .nr(4)
27190 .kr(8)
27191 .sr(1)
27192 .m(3)
27193 .n(4)
27194 .k(8)
27195 .cn_stride(7)
27196 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c8__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
27197 }
27198
TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD128,k_eq_8_subtile)27199 TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD128, k_eq_8_subtile) {
27200 TEST_REQUIRES_X86_SSE2;
27201 for (uint32_t n = 1; n <= 4; n++) {
27202 for (uint32_t m = 1; m <= 3; m++) {
27203 GemmMicrokernelTester()
27204 .mr(3)
27205 .nr(4)
27206 .kr(8)
27207 .sr(1)
27208 .m(m)
27209 .n(n)
27210 .k(8)
27211 .iterations(1)
27212 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c8__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
27213 }
27214 }
27215 }
27216
TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD128,k_eq_8_subtile_m)27217 TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD128, k_eq_8_subtile_m) {
27218 TEST_REQUIRES_X86_SSE2;
27219 for (uint32_t m = 1; m <= 3; m++) {
27220 GemmMicrokernelTester()
27221 .mr(3)
27222 .nr(4)
27223 .kr(8)
27224 .sr(1)
27225 .m(m)
27226 .n(4)
27227 .k(8)
27228 .iterations(1)
27229 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c8__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
27230 }
27231 }
27232
TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD128,k_eq_8_subtile_n)27233 TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD128, k_eq_8_subtile_n) {
27234 TEST_REQUIRES_X86_SSE2;
27235 for (uint32_t n = 1; n <= 4; n++) {
27236 GemmMicrokernelTester()
27237 .mr(3)
27238 .nr(4)
27239 .kr(8)
27240 .sr(1)
27241 .m(3)
27242 .n(n)
27243 .k(8)
27244 .iterations(1)
27245 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c8__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
27246 }
27247 }
27248
TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD128,k_lt_8)27249 TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD128, k_lt_8) {
27250 TEST_REQUIRES_X86_SSE2;
27251 for (size_t k = 1; k < 8; k++) {
27252 GemmMicrokernelTester()
27253 .mr(3)
27254 .nr(4)
27255 .kr(8)
27256 .sr(1)
27257 .m(3)
27258 .n(4)
27259 .k(k)
27260 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c8__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
27261 }
27262 }
27263
TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD128,k_lt_8_subtile)27264 TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD128, k_lt_8_subtile) {
27265 TEST_REQUIRES_X86_SSE2;
27266 for (size_t k = 1; k < 8; k++) {
27267 for (uint32_t n = 1; n <= 4; n++) {
27268 for (uint32_t m = 1; m <= 3; m++) {
27269 GemmMicrokernelTester()
27270 .mr(3)
27271 .nr(4)
27272 .kr(8)
27273 .sr(1)
27274 .m(m)
27275 .n(n)
27276 .k(k)
27277 .iterations(1)
27278 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c8__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
27279 }
27280 }
27281 }
27282 }
27283
TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD128,k_gt_8)27284 TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD128, k_gt_8) {
27285 TEST_REQUIRES_X86_SSE2;
27286 for (size_t k = 9; k < 16; k++) {
27287 GemmMicrokernelTester()
27288 .mr(3)
27289 .nr(4)
27290 .kr(8)
27291 .sr(1)
27292 .m(3)
27293 .n(4)
27294 .k(k)
27295 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c8__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
27296 }
27297 }
27298
TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD128,k_gt_8_subtile)27299 TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD128, k_gt_8_subtile) {
27300 TEST_REQUIRES_X86_SSE2;
27301 for (size_t k = 9; k < 16; k++) {
27302 for (uint32_t n = 1; n <= 4; n++) {
27303 for (uint32_t m = 1; m <= 3; m++) {
27304 GemmMicrokernelTester()
27305 .mr(3)
27306 .nr(4)
27307 .kr(8)
27308 .sr(1)
27309 .m(m)
27310 .n(n)
27311 .k(k)
27312 .iterations(1)
27313 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c8__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
27314 }
27315 }
27316 }
27317 }
27318
TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD128,k_div_8)27319 TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD128, k_div_8) {
27320 TEST_REQUIRES_X86_SSE2;
27321 for (size_t k = 16; k <= 80; k += 8) {
27322 GemmMicrokernelTester()
27323 .mr(3)
27324 .nr(4)
27325 .kr(8)
27326 .sr(1)
27327 .m(3)
27328 .n(4)
27329 .k(k)
27330 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c8__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
27331 }
27332 }
27333
TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD128,k_div_8_subtile)27334 TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD128, k_div_8_subtile) {
27335 TEST_REQUIRES_X86_SSE2;
27336 for (size_t k = 16; k <= 80; k += 8) {
27337 for (uint32_t n = 1; n <= 4; n++) {
27338 for (uint32_t m = 1; m <= 3; m++) {
27339 GemmMicrokernelTester()
27340 .mr(3)
27341 .nr(4)
27342 .kr(8)
27343 .sr(1)
27344 .m(m)
27345 .n(n)
27346 .k(k)
27347 .iterations(1)
27348 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c8__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
27349 }
27350 }
27351 }
27352 }
27353
TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD128,n_gt_4)27354 TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD128, n_gt_4) {
27355 TEST_REQUIRES_X86_SSE2;
27356 for (uint32_t n = 5; n < 8; n++) {
27357 for (size_t k = 1; k <= 40; k += 9) {
27358 GemmMicrokernelTester()
27359 .mr(3)
27360 .nr(4)
27361 .kr(8)
27362 .sr(1)
27363 .m(3)
27364 .n(n)
27365 .k(k)
27366 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c8__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
27367 }
27368 }
27369 }
27370
TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD128,n_gt_4_strided_cn)27371 TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD128, n_gt_4_strided_cn) {
27372 TEST_REQUIRES_X86_SSE2;
27373 for (uint32_t n = 5; n < 8; n++) {
27374 for (size_t k = 1; k <= 40; k += 9) {
27375 GemmMicrokernelTester()
27376 .mr(3)
27377 .nr(4)
27378 .kr(8)
27379 .sr(1)
27380 .m(3)
27381 .n(n)
27382 .k(k)
27383 .cn_stride(7)
27384 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c8__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
27385 }
27386 }
27387 }
27388
TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD128,n_gt_4_subtile)27389 TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD128, n_gt_4_subtile) {
27390 TEST_REQUIRES_X86_SSE2;
27391 for (uint32_t n = 5; n < 8; n++) {
27392 for (size_t k = 1; k <= 40; k += 9) {
27393 for (uint32_t m = 1; m <= 3; m++) {
27394 GemmMicrokernelTester()
27395 .mr(3)
27396 .nr(4)
27397 .kr(8)
27398 .sr(1)
27399 .m(m)
27400 .n(n)
27401 .k(k)
27402 .iterations(1)
27403 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c8__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
27404 }
27405 }
27406 }
27407 }
27408
TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD128,n_div_4)27409 TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD128, n_div_4) {
27410 TEST_REQUIRES_X86_SSE2;
27411 for (uint32_t n = 8; n <= 12; n += 4) {
27412 for (size_t k = 1; k <= 40; k += 9) {
27413 GemmMicrokernelTester()
27414 .mr(3)
27415 .nr(4)
27416 .kr(8)
27417 .sr(1)
27418 .m(3)
27419 .n(n)
27420 .k(k)
27421 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c8__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
27422 }
27423 }
27424 }
27425
TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD128,n_div_4_strided_cn)27426 TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD128, n_div_4_strided_cn) {
27427 TEST_REQUIRES_X86_SSE2;
27428 for (uint32_t n = 8; n <= 12; n += 4) {
27429 for (size_t k = 1; k <= 40; k += 9) {
27430 GemmMicrokernelTester()
27431 .mr(3)
27432 .nr(4)
27433 .kr(8)
27434 .sr(1)
27435 .m(3)
27436 .n(n)
27437 .k(k)
27438 .cn_stride(7)
27439 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c8__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
27440 }
27441 }
27442 }
27443
TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD128,n_div_4_subtile)27444 TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD128, n_div_4_subtile) {
27445 TEST_REQUIRES_X86_SSE2;
27446 for (uint32_t n = 8; n <= 12; n += 4) {
27447 for (size_t k = 1; k <= 40; k += 9) {
27448 for (uint32_t m = 1; m <= 3; m++) {
27449 GemmMicrokernelTester()
27450 .mr(3)
27451 .nr(4)
27452 .kr(8)
27453 .sr(1)
27454 .m(m)
27455 .n(n)
27456 .k(k)
27457 .iterations(1)
27458 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c8__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
27459 }
27460 }
27461 }
27462 }
27463
TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD128,small_kernel)27464 TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD128, small_kernel) {
27465 TEST_REQUIRES_X86_SSE2;
27466 for (size_t k = 1; k <= 40; k += 9) {
27467 GemmMicrokernelTester()
27468 .mr(3)
27469 .nr(4)
27470 .kr(8)
27471 .sr(1)
27472 .m(3)
27473 .n(4)
27474 .k(k)
27475 .ks(3)
27476 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c8__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
27477 }
27478 }
27479
TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD128,small_kernel_subtile)27480 TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD128, small_kernel_subtile) {
27481 TEST_REQUIRES_X86_SSE2;
27482 for (size_t k = 1; k <= 40; k += 9) {
27483 for (uint32_t n = 1; n <= 4; n++) {
27484 for (uint32_t m = 1; m <= 3; m++) {
27485 GemmMicrokernelTester()
27486 .mr(3)
27487 .nr(4)
27488 .kr(8)
27489 .sr(1)
27490 .m(m)
27491 .n(n)
27492 .k(k)
27493 .ks(3)
27494 .iterations(1)
27495 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c8__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
27496 }
27497 }
27498 }
27499 }
27500
TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD128,n_gt_4_small_kernel)27501 TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD128, n_gt_4_small_kernel) {
27502 TEST_REQUIRES_X86_SSE2;
27503 for (uint32_t n = 5; n < 8; n++) {
27504 for (size_t k = 1; k <= 40; k += 9) {
27505 GemmMicrokernelTester()
27506 .mr(3)
27507 .nr(4)
27508 .kr(8)
27509 .sr(1)
27510 .m(3)
27511 .n(n)
27512 .k(k)
27513 .ks(3)
27514 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c8__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
27515 }
27516 }
27517 }
27518
TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD128,n_div_4_small_kernel)27519 TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD128, n_div_4_small_kernel) {
27520 TEST_REQUIRES_X86_SSE2;
27521 for (uint32_t n = 8; n <= 12; n += 4) {
27522 for (size_t k = 1; k <= 40; k += 9) {
27523 GemmMicrokernelTester()
27524 .mr(3)
27525 .nr(4)
27526 .kr(8)
27527 .sr(1)
27528 .m(3)
27529 .n(n)
27530 .k(k)
27531 .ks(3)
27532 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c8__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
27533 }
27534 }
27535 }
27536
TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD128,strided_cm_subtile)27537 TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD128, strided_cm_subtile) {
27538 TEST_REQUIRES_X86_SSE2;
27539 for (size_t k = 1; k <= 40; k += 9) {
27540 for (uint32_t n = 1; n <= 4; n++) {
27541 for (uint32_t m = 1; m <= 3; m++) {
27542 GemmMicrokernelTester()
27543 .mr(3)
27544 .nr(4)
27545 .kr(8)
27546 .sr(1)
27547 .m(m)
27548 .n(n)
27549 .k(k)
27550 .cm_stride(7)
27551 .iterations(1)
27552 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c8__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
27553 }
27554 }
27555 }
27556 }
27557
TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD128,a_offset)27558 TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD128, a_offset) {
27559 TEST_REQUIRES_X86_SSE2;
27560 for (size_t k = 1; k <= 40; k += 9) {
27561 GemmMicrokernelTester()
27562 .mr(3)
27563 .nr(4)
27564 .kr(8)
27565 .sr(1)
27566 .m(3)
27567 .n(4)
27568 .k(k)
27569 .ks(3)
27570 .a_offset(127)
27571 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c8__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
27572 }
27573 }
27574
TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD128,zero)27575 TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD128, zero) {
27576 TEST_REQUIRES_X86_SSE2;
27577 for (size_t k = 1; k <= 40; k += 9) {
27578 for (uint32_t mz = 0; mz < 3; mz++) {
27579 GemmMicrokernelTester()
27580 .mr(3)
27581 .nr(4)
27582 .kr(8)
27583 .sr(1)
27584 .m(3)
27585 .n(4)
27586 .k(k)
27587 .ks(3)
27588 .a_offset(127)
27589 .zero_index(mz)
27590 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c8__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
27591 }
27592 }
27593 }
27594
TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD128,qmin)27595 TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD128, qmin) {
27596 TEST_REQUIRES_X86_SSE2;
27597 GemmMicrokernelTester()
27598 .mr(3)
27599 .nr(4)
27600 .kr(8)
27601 .sr(1)
27602 .m(3)
27603 .n(4)
27604 .k(8)
27605 .qmin(128)
27606 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c8__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
27607 }
27608
TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD128,qmax)27609 TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD128, qmax) {
27610 TEST_REQUIRES_X86_SSE2;
27611 GemmMicrokernelTester()
27612 .mr(3)
27613 .nr(4)
27614 .kr(8)
27615 .sr(1)
27616 .m(3)
27617 .n(4)
27618 .k(8)
27619 .qmax(128)
27620 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c8__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
27621 }
27622
TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD128,strided_cm)27623 TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__SSE2_LD128, strided_cm) {
27624 TEST_REQUIRES_X86_SSE2;
27625 GemmMicrokernelTester()
27626 .mr(3)
27627 .nr(4)
27628 .kr(8)
27629 .sr(1)
27630 .m(3)
27631 .n(4)
27632 .k(8)
27633 .cm_stride(7)
27634 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c8__sse2_ld128, xnn_init_qc8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
27635 }
27636 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
27637
27638
27639 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD128,k_eq_8)27640 TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD128, k_eq_8) {
27641 TEST_REQUIRES_X86_AVX;
27642 GemmMicrokernelTester()
27643 .mr(1)
27644 .nr(4)
27645 .kr(8)
27646 .sr(1)
27647 .m(1)
27648 .n(4)
27649 .k(8)
27650 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c8__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
27651 }
27652
TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD128,strided_cn)27653 TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD128, strided_cn) {
27654 TEST_REQUIRES_X86_AVX;
27655 GemmMicrokernelTester()
27656 .mr(1)
27657 .nr(4)
27658 .kr(8)
27659 .sr(1)
27660 .m(1)
27661 .n(4)
27662 .k(8)
27663 .cn_stride(7)
27664 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c8__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
27665 }
27666
TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD128,k_eq_8_subtile)27667 TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD128, k_eq_8_subtile) {
27668 TEST_REQUIRES_X86_AVX;
27669 for (uint32_t n = 1; n <= 4; n++) {
27670 for (uint32_t m = 1; m <= 1; m++) {
27671 GemmMicrokernelTester()
27672 .mr(1)
27673 .nr(4)
27674 .kr(8)
27675 .sr(1)
27676 .m(m)
27677 .n(n)
27678 .k(8)
27679 .iterations(1)
27680 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c8__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
27681 }
27682 }
27683 }
27684
TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD128,k_eq_8_subtile_m)27685 TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD128, k_eq_8_subtile_m) {
27686 TEST_REQUIRES_X86_AVX;
27687 for (uint32_t m = 1; m <= 1; m++) {
27688 GemmMicrokernelTester()
27689 .mr(1)
27690 .nr(4)
27691 .kr(8)
27692 .sr(1)
27693 .m(m)
27694 .n(4)
27695 .k(8)
27696 .iterations(1)
27697 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c8__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
27698 }
27699 }
27700
TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD128,k_eq_8_subtile_n)27701 TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD128, k_eq_8_subtile_n) {
27702 TEST_REQUIRES_X86_AVX;
27703 for (uint32_t n = 1; n <= 4; n++) {
27704 GemmMicrokernelTester()
27705 .mr(1)
27706 .nr(4)
27707 .kr(8)
27708 .sr(1)
27709 .m(1)
27710 .n(n)
27711 .k(8)
27712 .iterations(1)
27713 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c8__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
27714 }
27715 }
27716
TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD128,k_lt_8)27717 TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD128, k_lt_8) {
27718 TEST_REQUIRES_X86_AVX;
27719 for (size_t k = 1; k < 8; k++) {
27720 GemmMicrokernelTester()
27721 .mr(1)
27722 .nr(4)
27723 .kr(8)
27724 .sr(1)
27725 .m(1)
27726 .n(4)
27727 .k(k)
27728 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c8__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
27729 }
27730 }
27731
TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD128,k_lt_8_subtile)27732 TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD128, k_lt_8_subtile) {
27733 TEST_REQUIRES_X86_AVX;
27734 for (size_t k = 1; k < 8; k++) {
27735 for (uint32_t n = 1; n <= 4; n++) {
27736 for (uint32_t m = 1; m <= 1; m++) {
27737 GemmMicrokernelTester()
27738 .mr(1)
27739 .nr(4)
27740 .kr(8)
27741 .sr(1)
27742 .m(m)
27743 .n(n)
27744 .k(k)
27745 .iterations(1)
27746 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c8__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
27747 }
27748 }
27749 }
27750 }
27751
TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD128,k_gt_8)27752 TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD128, k_gt_8) {
27753 TEST_REQUIRES_X86_AVX;
27754 for (size_t k = 9; k < 16; k++) {
27755 GemmMicrokernelTester()
27756 .mr(1)
27757 .nr(4)
27758 .kr(8)
27759 .sr(1)
27760 .m(1)
27761 .n(4)
27762 .k(k)
27763 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c8__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
27764 }
27765 }
27766
TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD128,k_gt_8_subtile)27767 TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD128, k_gt_8_subtile) {
27768 TEST_REQUIRES_X86_AVX;
27769 for (size_t k = 9; k < 16; k++) {
27770 for (uint32_t n = 1; n <= 4; n++) {
27771 for (uint32_t m = 1; m <= 1; m++) {
27772 GemmMicrokernelTester()
27773 .mr(1)
27774 .nr(4)
27775 .kr(8)
27776 .sr(1)
27777 .m(m)
27778 .n(n)
27779 .k(k)
27780 .iterations(1)
27781 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c8__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
27782 }
27783 }
27784 }
27785 }
27786
TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD128,k_div_8)27787 TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD128, k_div_8) {
27788 TEST_REQUIRES_X86_AVX;
27789 for (size_t k = 16; k <= 80; k += 8) {
27790 GemmMicrokernelTester()
27791 .mr(1)
27792 .nr(4)
27793 .kr(8)
27794 .sr(1)
27795 .m(1)
27796 .n(4)
27797 .k(k)
27798 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c8__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
27799 }
27800 }
27801
TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD128,k_div_8_subtile)27802 TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD128, k_div_8_subtile) {
27803 TEST_REQUIRES_X86_AVX;
27804 for (size_t k = 16; k <= 80; k += 8) {
27805 for (uint32_t n = 1; n <= 4; n++) {
27806 for (uint32_t m = 1; m <= 1; m++) {
27807 GemmMicrokernelTester()
27808 .mr(1)
27809 .nr(4)
27810 .kr(8)
27811 .sr(1)
27812 .m(m)
27813 .n(n)
27814 .k(k)
27815 .iterations(1)
27816 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c8__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
27817 }
27818 }
27819 }
27820 }
27821
TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD128,n_gt_4)27822 TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD128, n_gt_4) {
27823 TEST_REQUIRES_X86_AVX;
27824 for (uint32_t n = 5; n < 8; n++) {
27825 for (size_t k = 1; k <= 40; k += 9) {
27826 GemmMicrokernelTester()
27827 .mr(1)
27828 .nr(4)
27829 .kr(8)
27830 .sr(1)
27831 .m(1)
27832 .n(n)
27833 .k(k)
27834 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c8__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
27835 }
27836 }
27837 }
27838
TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD128,n_gt_4_strided_cn)27839 TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD128, n_gt_4_strided_cn) {
27840 TEST_REQUIRES_X86_AVX;
27841 for (uint32_t n = 5; n < 8; n++) {
27842 for (size_t k = 1; k <= 40; k += 9) {
27843 GemmMicrokernelTester()
27844 .mr(1)
27845 .nr(4)
27846 .kr(8)
27847 .sr(1)
27848 .m(1)
27849 .n(n)
27850 .k(k)
27851 .cn_stride(7)
27852 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c8__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
27853 }
27854 }
27855 }
27856
TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD128,n_gt_4_subtile)27857 TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD128, n_gt_4_subtile) {
27858 TEST_REQUIRES_X86_AVX;
27859 for (uint32_t n = 5; n < 8; n++) {
27860 for (size_t k = 1; k <= 40; k += 9) {
27861 for (uint32_t m = 1; m <= 1; m++) {
27862 GemmMicrokernelTester()
27863 .mr(1)
27864 .nr(4)
27865 .kr(8)
27866 .sr(1)
27867 .m(m)
27868 .n(n)
27869 .k(k)
27870 .iterations(1)
27871 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c8__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
27872 }
27873 }
27874 }
27875 }
27876
TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD128,n_div_4)27877 TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD128, n_div_4) {
27878 TEST_REQUIRES_X86_AVX;
27879 for (uint32_t n = 8; n <= 12; n += 4) {
27880 for (size_t k = 1; k <= 40; k += 9) {
27881 GemmMicrokernelTester()
27882 .mr(1)
27883 .nr(4)
27884 .kr(8)
27885 .sr(1)
27886 .m(1)
27887 .n(n)
27888 .k(k)
27889 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c8__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
27890 }
27891 }
27892 }
27893
TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD128,n_div_4_strided_cn)27894 TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD128, n_div_4_strided_cn) {
27895 TEST_REQUIRES_X86_AVX;
27896 for (uint32_t n = 8; n <= 12; n += 4) {
27897 for (size_t k = 1; k <= 40; k += 9) {
27898 GemmMicrokernelTester()
27899 .mr(1)
27900 .nr(4)
27901 .kr(8)
27902 .sr(1)
27903 .m(1)
27904 .n(n)
27905 .k(k)
27906 .cn_stride(7)
27907 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c8__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
27908 }
27909 }
27910 }
27911
TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD128,n_div_4_subtile)27912 TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD128, n_div_4_subtile) {
27913 TEST_REQUIRES_X86_AVX;
27914 for (uint32_t n = 8; n <= 12; n += 4) {
27915 for (size_t k = 1; k <= 40; k += 9) {
27916 for (uint32_t m = 1; m <= 1; m++) {
27917 GemmMicrokernelTester()
27918 .mr(1)
27919 .nr(4)
27920 .kr(8)
27921 .sr(1)
27922 .m(m)
27923 .n(n)
27924 .k(k)
27925 .iterations(1)
27926 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c8__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
27927 }
27928 }
27929 }
27930 }
27931
TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD128,small_kernel)27932 TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD128, small_kernel) {
27933 TEST_REQUIRES_X86_AVX;
27934 for (size_t k = 1; k <= 40; k += 9) {
27935 GemmMicrokernelTester()
27936 .mr(1)
27937 .nr(4)
27938 .kr(8)
27939 .sr(1)
27940 .m(1)
27941 .n(4)
27942 .k(k)
27943 .ks(3)
27944 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c8__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
27945 }
27946 }
27947
TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD128,small_kernel_subtile)27948 TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD128, small_kernel_subtile) {
27949 TEST_REQUIRES_X86_AVX;
27950 for (size_t k = 1; k <= 40; k += 9) {
27951 for (uint32_t n = 1; n <= 4; n++) {
27952 for (uint32_t m = 1; m <= 1; m++) {
27953 GemmMicrokernelTester()
27954 .mr(1)
27955 .nr(4)
27956 .kr(8)
27957 .sr(1)
27958 .m(m)
27959 .n(n)
27960 .k(k)
27961 .ks(3)
27962 .iterations(1)
27963 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c8__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
27964 }
27965 }
27966 }
27967 }
27968
TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD128,n_gt_4_small_kernel)27969 TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD128, n_gt_4_small_kernel) {
27970 TEST_REQUIRES_X86_AVX;
27971 for (uint32_t n = 5; n < 8; n++) {
27972 for (size_t k = 1; k <= 40; k += 9) {
27973 GemmMicrokernelTester()
27974 .mr(1)
27975 .nr(4)
27976 .kr(8)
27977 .sr(1)
27978 .m(1)
27979 .n(n)
27980 .k(k)
27981 .ks(3)
27982 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c8__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
27983 }
27984 }
27985 }
27986
TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD128,n_div_4_small_kernel)27987 TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD128, n_div_4_small_kernel) {
27988 TEST_REQUIRES_X86_AVX;
27989 for (uint32_t n = 8; n <= 12; n += 4) {
27990 for (size_t k = 1; k <= 40; k += 9) {
27991 GemmMicrokernelTester()
27992 .mr(1)
27993 .nr(4)
27994 .kr(8)
27995 .sr(1)
27996 .m(1)
27997 .n(n)
27998 .k(k)
27999 .ks(3)
28000 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c8__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
28001 }
28002 }
28003 }
28004
TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD128,strided_cm_subtile)28005 TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD128, strided_cm_subtile) {
28006 TEST_REQUIRES_X86_AVX;
28007 for (size_t k = 1; k <= 40; k += 9) {
28008 for (uint32_t n = 1; n <= 4; n++) {
28009 for (uint32_t m = 1; m <= 1; m++) {
28010 GemmMicrokernelTester()
28011 .mr(1)
28012 .nr(4)
28013 .kr(8)
28014 .sr(1)
28015 .m(m)
28016 .n(n)
28017 .k(k)
28018 .cm_stride(7)
28019 .iterations(1)
28020 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c8__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
28021 }
28022 }
28023 }
28024 }
28025
TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD128,a_offset)28026 TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD128, a_offset) {
28027 TEST_REQUIRES_X86_AVX;
28028 for (size_t k = 1; k <= 40; k += 9) {
28029 GemmMicrokernelTester()
28030 .mr(1)
28031 .nr(4)
28032 .kr(8)
28033 .sr(1)
28034 .m(1)
28035 .n(4)
28036 .k(k)
28037 .ks(3)
28038 .a_offset(43)
28039 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c8__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
28040 }
28041 }
28042
TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD128,zero)28043 TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD128, zero) {
28044 TEST_REQUIRES_X86_AVX;
28045 for (size_t k = 1; k <= 40; k += 9) {
28046 for (uint32_t mz = 0; mz < 1; mz++) {
28047 GemmMicrokernelTester()
28048 .mr(1)
28049 .nr(4)
28050 .kr(8)
28051 .sr(1)
28052 .m(1)
28053 .n(4)
28054 .k(k)
28055 .ks(3)
28056 .a_offset(43)
28057 .zero_index(mz)
28058 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c8__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
28059 }
28060 }
28061 }
28062
TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD128,qmin)28063 TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD128, qmin) {
28064 TEST_REQUIRES_X86_AVX;
28065 GemmMicrokernelTester()
28066 .mr(1)
28067 .nr(4)
28068 .kr(8)
28069 .sr(1)
28070 .m(1)
28071 .n(4)
28072 .k(8)
28073 .qmin(128)
28074 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c8__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
28075 }
28076
TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD128,qmax)28077 TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD128, qmax) {
28078 TEST_REQUIRES_X86_AVX;
28079 GemmMicrokernelTester()
28080 .mr(1)
28081 .nr(4)
28082 .kr(8)
28083 .sr(1)
28084 .m(1)
28085 .n(4)
28086 .k(8)
28087 .qmax(128)
28088 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c8__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
28089 }
28090
TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD128,strided_cm)28091 TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD128, strided_cm) {
28092 TEST_REQUIRES_X86_AVX;
28093 GemmMicrokernelTester()
28094 .mr(1)
28095 .nr(4)
28096 .kr(8)
28097 .sr(1)
28098 .m(1)
28099 .n(4)
28100 .k(8)
28101 .cm_stride(7)
28102 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c8__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
28103 }
28104 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
28105
28106
28107 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD128,k_eq_8)28108 TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD128, k_eq_8) {
28109 TEST_REQUIRES_X86_XOP;
28110 GemmMicrokernelTester()
28111 .mr(1)
28112 .nr(4)
28113 .kr(8)
28114 .sr(1)
28115 .m(1)
28116 .n(4)
28117 .k(8)
28118 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c8__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
28119 }
28120
TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD128,strided_cn)28121 TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD128, strided_cn) {
28122 TEST_REQUIRES_X86_XOP;
28123 GemmMicrokernelTester()
28124 .mr(1)
28125 .nr(4)
28126 .kr(8)
28127 .sr(1)
28128 .m(1)
28129 .n(4)
28130 .k(8)
28131 .cn_stride(7)
28132 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c8__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
28133 }
28134
TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD128,k_eq_8_subtile)28135 TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD128, k_eq_8_subtile) {
28136 TEST_REQUIRES_X86_XOP;
28137 for (uint32_t n = 1; n <= 4; n++) {
28138 for (uint32_t m = 1; m <= 1; m++) {
28139 GemmMicrokernelTester()
28140 .mr(1)
28141 .nr(4)
28142 .kr(8)
28143 .sr(1)
28144 .m(m)
28145 .n(n)
28146 .k(8)
28147 .iterations(1)
28148 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c8__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
28149 }
28150 }
28151 }
28152
TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD128,k_eq_8_subtile_m)28153 TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD128, k_eq_8_subtile_m) {
28154 TEST_REQUIRES_X86_XOP;
28155 for (uint32_t m = 1; m <= 1; m++) {
28156 GemmMicrokernelTester()
28157 .mr(1)
28158 .nr(4)
28159 .kr(8)
28160 .sr(1)
28161 .m(m)
28162 .n(4)
28163 .k(8)
28164 .iterations(1)
28165 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c8__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
28166 }
28167 }
28168
TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD128,k_eq_8_subtile_n)28169 TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD128, k_eq_8_subtile_n) {
28170 TEST_REQUIRES_X86_XOP;
28171 for (uint32_t n = 1; n <= 4; n++) {
28172 GemmMicrokernelTester()
28173 .mr(1)
28174 .nr(4)
28175 .kr(8)
28176 .sr(1)
28177 .m(1)
28178 .n(n)
28179 .k(8)
28180 .iterations(1)
28181 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c8__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
28182 }
28183 }
28184
TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD128,k_lt_8)28185 TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD128, k_lt_8) {
28186 TEST_REQUIRES_X86_XOP;
28187 for (size_t k = 1; k < 8; k++) {
28188 GemmMicrokernelTester()
28189 .mr(1)
28190 .nr(4)
28191 .kr(8)
28192 .sr(1)
28193 .m(1)
28194 .n(4)
28195 .k(k)
28196 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c8__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
28197 }
28198 }
28199
TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD128,k_lt_8_subtile)28200 TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD128, k_lt_8_subtile) {
28201 TEST_REQUIRES_X86_XOP;
28202 for (size_t k = 1; k < 8; k++) {
28203 for (uint32_t n = 1; n <= 4; n++) {
28204 for (uint32_t m = 1; m <= 1; m++) {
28205 GemmMicrokernelTester()
28206 .mr(1)
28207 .nr(4)
28208 .kr(8)
28209 .sr(1)
28210 .m(m)
28211 .n(n)
28212 .k(k)
28213 .iterations(1)
28214 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c8__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
28215 }
28216 }
28217 }
28218 }
28219
TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD128,k_gt_8)28220 TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD128, k_gt_8) {
28221 TEST_REQUIRES_X86_XOP;
28222 for (size_t k = 9; k < 16; k++) {
28223 GemmMicrokernelTester()
28224 .mr(1)
28225 .nr(4)
28226 .kr(8)
28227 .sr(1)
28228 .m(1)
28229 .n(4)
28230 .k(k)
28231 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c8__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
28232 }
28233 }
28234
TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD128,k_gt_8_subtile)28235 TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD128, k_gt_8_subtile) {
28236 TEST_REQUIRES_X86_XOP;
28237 for (size_t k = 9; k < 16; k++) {
28238 for (uint32_t n = 1; n <= 4; n++) {
28239 for (uint32_t m = 1; m <= 1; m++) {
28240 GemmMicrokernelTester()
28241 .mr(1)
28242 .nr(4)
28243 .kr(8)
28244 .sr(1)
28245 .m(m)
28246 .n(n)
28247 .k(k)
28248 .iterations(1)
28249 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c8__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
28250 }
28251 }
28252 }
28253 }
28254
TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD128,k_div_8)28255 TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD128, k_div_8) {
28256 TEST_REQUIRES_X86_XOP;
28257 for (size_t k = 16; k <= 80; k += 8) {
28258 GemmMicrokernelTester()
28259 .mr(1)
28260 .nr(4)
28261 .kr(8)
28262 .sr(1)
28263 .m(1)
28264 .n(4)
28265 .k(k)
28266 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c8__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
28267 }
28268 }
28269
TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD128,k_div_8_subtile)28270 TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD128, k_div_8_subtile) {
28271 TEST_REQUIRES_X86_XOP;
28272 for (size_t k = 16; k <= 80; k += 8) {
28273 for (uint32_t n = 1; n <= 4; n++) {
28274 for (uint32_t m = 1; m <= 1; m++) {
28275 GemmMicrokernelTester()
28276 .mr(1)
28277 .nr(4)
28278 .kr(8)
28279 .sr(1)
28280 .m(m)
28281 .n(n)
28282 .k(k)
28283 .iterations(1)
28284 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c8__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
28285 }
28286 }
28287 }
28288 }
28289
TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD128,n_gt_4)28290 TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD128, n_gt_4) {
28291 TEST_REQUIRES_X86_XOP;
28292 for (uint32_t n = 5; n < 8; n++) {
28293 for (size_t k = 1; k <= 40; k += 9) {
28294 GemmMicrokernelTester()
28295 .mr(1)
28296 .nr(4)
28297 .kr(8)
28298 .sr(1)
28299 .m(1)
28300 .n(n)
28301 .k(k)
28302 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c8__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
28303 }
28304 }
28305 }
28306
TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD128,n_gt_4_strided_cn)28307 TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD128, n_gt_4_strided_cn) {
28308 TEST_REQUIRES_X86_XOP;
28309 for (uint32_t n = 5; n < 8; n++) {
28310 for (size_t k = 1; k <= 40; k += 9) {
28311 GemmMicrokernelTester()
28312 .mr(1)
28313 .nr(4)
28314 .kr(8)
28315 .sr(1)
28316 .m(1)
28317 .n(n)
28318 .k(k)
28319 .cn_stride(7)
28320 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c8__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
28321 }
28322 }
28323 }
28324
TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD128,n_gt_4_subtile)28325 TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD128, n_gt_4_subtile) {
28326 TEST_REQUIRES_X86_XOP;
28327 for (uint32_t n = 5; n < 8; n++) {
28328 for (size_t k = 1; k <= 40; k += 9) {
28329 for (uint32_t m = 1; m <= 1; m++) {
28330 GemmMicrokernelTester()
28331 .mr(1)
28332 .nr(4)
28333 .kr(8)
28334 .sr(1)
28335 .m(m)
28336 .n(n)
28337 .k(k)
28338 .iterations(1)
28339 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c8__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
28340 }
28341 }
28342 }
28343 }
28344
TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD128,n_div_4)28345 TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD128, n_div_4) {
28346 TEST_REQUIRES_X86_XOP;
28347 for (uint32_t n = 8; n <= 12; n += 4) {
28348 for (size_t k = 1; k <= 40; k += 9) {
28349 GemmMicrokernelTester()
28350 .mr(1)
28351 .nr(4)
28352 .kr(8)
28353 .sr(1)
28354 .m(1)
28355 .n(n)
28356 .k(k)
28357 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c8__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
28358 }
28359 }
28360 }
28361
TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD128,n_div_4_strided_cn)28362 TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD128, n_div_4_strided_cn) {
28363 TEST_REQUIRES_X86_XOP;
28364 for (uint32_t n = 8; n <= 12; n += 4) {
28365 for (size_t k = 1; k <= 40; k += 9) {
28366 GemmMicrokernelTester()
28367 .mr(1)
28368 .nr(4)
28369 .kr(8)
28370 .sr(1)
28371 .m(1)
28372 .n(n)
28373 .k(k)
28374 .cn_stride(7)
28375 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c8__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
28376 }
28377 }
28378 }
28379
TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD128,n_div_4_subtile)28380 TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD128, n_div_4_subtile) {
28381 TEST_REQUIRES_X86_XOP;
28382 for (uint32_t n = 8; n <= 12; n += 4) {
28383 for (size_t k = 1; k <= 40; k += 9) {
28384 for (uint32_t m = 1; m <= 1; m++) {
28385 GemmMicrokernelTester()
28386 .mr(1)
28387 .nr(4)
28388 .kr(8)
28389 .sr(1)
28390 .m(m)
28391 .n(n)
28392 .k(k)
28393 .iterations(1)
28394 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c8__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
28395 }
28396 }
28397 }
28398 }
28399
TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD128,small_kernel)28400 TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD128, small_kernel) {
28401 TEST_REQUIRES_X86_XOP;
28402 for (size_t k = 1; k <= 40; k += 9) {
28403 GemmMicrokernelTester()
28404 .mr(1)
28405 .nr(4)
28406 .kr(8)
28407 .sr(1)
28408 .m(1)
28409 .n(4)
28410 .k(k)
28411 .ks(3)
28412 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c8__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
28413 }
28414 }
28415
TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD128,small_kernel_subtile)28416 TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD128, small_kernel_subtile) {
28417 TEST_REQUIRES_X86_XOP;
28418 for (size_t k = 1; k <= 40; k += 9) {
28419 for (uint32_t n = 1; n <= 4; n++) {
28420 for (uint32_t m = 1; m <= 1; m++) {
28421 GemmMicrokernelTester()
28422 .mr(1)
28423 .nr(4)
28424 .kr(8)
28425 .sr(1)
28426 .m(m)
28427 .n(n)
28428 .k(k)
28429 .ks(3)
28430 .iterations(1)
28431 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c8__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
28432 }
28433 }
28434 }
28435 }
28436
TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD128,n_gt_4_small_kernel)28437 TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD128, n_gt_4_small_kernel) {
28438 TEST_REQUIRES_X86_XOP;
28439 for (uint32_t n = 5; n < 8; n++) {
28440 for (size_t k = 1; k <= 40; k += 9) {
28441 GemmMicrokernelTester()
28442 .mr(1)
28443 .nr(4)
28444 .kr(8)
28445 .sr(1)
28446 .m(1)
28447 .n(n)
28448 .k(k)
28449 .ks(3)
28450 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c8__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
28451 }
28452 }
28453 }
28454
TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD128,n_div_4_small_kernel)28455 TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD128, n_div_4_small_kernel) {
28456 TEST_REQUIRES_X86_XOP;
28457 for (uint32_t n = 8; n <= 12; n += 4) {
28458 for (size_t k = 1; k <= 40; k += 9) {
28459 GemmMicrokernelTester()
28460 .mr(1)
28461 .nr(4)
28462 .kr(8)
28463 .sr(1)
28464 .m(1)
28465 .n(n)
28466 .k(k)
28467 .ks(3)
28468 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c8__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
28469 }
28470 }
28471 }
28472
TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD128,strided_cm_subtile)28473 TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD128, strided_cm_subtile) {
28474 TEST_REQUIRES_X86_XOP;
28475 for (size_t k = 1; k <= 40; k += 9) {
28476 for (uint32_t n = 1; n <= 4; n++) {
28477 for (uint32_t m = 1; m <= 1; m++) {
28478 GemmMicrokernelTester()
28479 .mr(1)
28480 .nr(4)
28481 .kr(8)
28482 .sr(1)
28483 .m(m)
28484 .n(n)
28485 .k(k)
28486 .cm_stride(7)
28487 .iterations(1)
28488 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c8__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
28489 }
28490 }
28491 }
28492 }
28493
TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD128,a_offset)28494 TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD128, a_offset) {
28495 TEST_REQUIRES_X86_XOP;
28496 for (size_t k = 1; k <= 40; k += 9) {
28497 GemmMicrokernelTester()
28498 .mr(1)
28499 .nr(4)
28500 .kr(8)
28501 .sr(1)
28502 .m(1)
28503 .n(4)
28504 .k(k)
28505 .ks(3)
28506 .a_offset(43)
28507 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c8__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
28508 }
28509 }
28510
TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD128,zero)28511 TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD128, zero) {
28512 TEST_REQUIRES_X86_XOP;
28513 for (size_t k = 1; k <= 40; k += 9) {
28514 for (uint32_t mz = 0; mz < 1; mz++) {
28515 GemmMicrokernelTester()
28516 .mr(1)
28517 .nr(4)
28518 .kr(8)
28519 .sr(1)
28520 .m(1)
28521 .n(4)
28522 .k(k)
28523 .ks(3)
28524 .a_offset(43)
28525 .zero_index(mz)
28526 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c8__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
28527 }
28528 }
28529 }
28530
TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD128,qmin)28531 TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD128, qmin) {
28532 TEST_REQUIRES_X86_XOP;
28533 GemmMicrokernelTester()
28534 .mr(1)
28535 .nr(4)
28536 .kr(8)
28537 .sr(1)
28538 .m(1)
28539 .n(4)
28540 .k(8)
28541 .qmin(128)
28542 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c8__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
28543 }
28544
TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD128,qmax)28545 TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD128, qmax) {
28546 TEST_REQUIRES_X86_XOP;
28547 GemmMicrokernelTester()
28548 .mr(1)
28549 .nr(4)
28550 .kr(8)
28551 .sr(1)
28552 .m(1)
28553 .n(4)
28554 .k(8)
28555 .qmax(128)
28556 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c8__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
28557 }
28558
TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD128,strided_cm)28559 TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__XOP_LD128, strided_cm) {
28560 TEST_REQUIRES_X86_XOP;
28561 GemmMicrokernelTester()
28562 .mr(1)
28563 .nr(4)
28564 .kr(8)
28565 .sr(1)
28566 .m(1)
28567 .n(4)
28568 .k(8)
28569 .cm_stride(7)
28570 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c8__xop_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
28571 }
28572 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
28573
28574
28575 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD128,k_eq_8)28576 TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD128, k_eq_8) {
28577 TEST_REQUIRES_X86_AVX;
28578 GemmMicrokernelTester()
28579 .mr(2)
28580 .nr(4)
28581 .kr(8)
28582 .sr(1)
28583 .m(2)
28584 .n(4)
28585 .k(8)
28586 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
28587 }
28588
TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD128,strided_cn)28589 TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD128, strided_cn) {
28590 TEST_REQUIRES_X86_AVX;
28591 GemmMicrokernelTester()
28592 .mr(2)
28593 .nr(4)
28594 .kr(8)
28595 .sr(1)
28596 .m(2)
28597 .n(4)
28598 .k(8)
28599 .cn_stride(7)
28600 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
28601 }
28602
TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD128,k_eq_8_subtile)28603 TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD128, k_eq_8_subtile) {
28604 TEST_REQUIRES_X86_AVX;
28605 for (uint32_t n = 1; n <= 4; n++) {
28606 for (uint32_t m = 1; m <= 2; m++) {
28607 GemmMicrokernelTester()
28608 .mr(2)
28609 .nr(4)
28610 .kr(8)
28611 .sr(1)
28612 .m(m)
28613 .n(n)
28614 .k(8)
28615 .iterations(1)
28616 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
28617 }
28618 }
28619 }
28620
TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD128,k_eq_8_subtile_m)28621 TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD128, k_eq_8_subtile_m) {
28622 TEST_REQUIRES_X86_AVX;
28623 for (uint32_t m = 1; m <= 2; m++) {
28624 GemmMicrokernelTester()
28625 .mr(2)
28626 .nr(4)
28627 .kr(8)
28628 .sr(1)
28629 .m(m)
28630 .n(4)
28631 .k(8)
28632 .iterations(1)
28633 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
28634 }
28635 }
28636
TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD128,k_eq_8_subtile_n)28637 TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD128, k_eq_8_subtile_n) {
28638 TEST_REQUIRES_X86_AVX;
28639 for (uint32_t n = 1; n <= 4; n++) {
28640 GemmMicrokernelTester()
28641 .mr(2)
28642 .nr(4)
28643 .kr(8)
28644 .sr(1)
28645 .m(2)
28646 .n(n)
28647 .k(8)
28648 .iterations(1)
28649 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
28650 }
28651 }
28652
TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD128,k_lt_8)28653 TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD128, k_lt_8) {
28654 TEST_REQUIRES_X86_AVX;
28655 for (size_t k = 1; k < 8; k++) {
28656 GemmMicrokernelTester()
28657 .mr(2)
28658 .nr(4)
28659 .kr(8)
28660 .sr(1)
28661 .m(2)
28662 .n(4)
28663 .k(k)
28664 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
28665 }
28666 }
28667
TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD128,k_lt_8_subtile)28668 TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD128, k_lt_8_subtile) {
28669 TEST_REQUIRES_X86_AVX;
28670 for (size_t k = 1; k < 8; k++) {
28671 for (uint32_t n = 1; n <= 4; n++) {
28672 for (uint32_t m = 1; m <= 2; m++) {
28673 GemmMicrokernelTester()
28674 .mr(2)
28675 .nr(4)
28676 .kr(8)
28677 .sr(1)
28678 .m(m)
28679 .n(n)
28680 .k(k)
28681 .iterations(1)
28682 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
28683 }
28684 }
28685 }
28686 }
28687
TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD128,k_gt_8)28688 TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD128, k_gt_8) {
28689 TEST_REQUIRES_X86_AVX;
28690 for (size_t k = 9; k < 16; k++) {
28691 GemmMicrokernelTester()
28692 .mr(2)
28693 .nr(4)
28694 .kr(8)
28695 .sr(1)
28696 .m(2)
28697 .n(4)
28698 .k(k)
28699 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
28700 }
28701 }
28702
TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD128,k_gt_8_subtile)28703 TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD128, k_gt_8_subtile) {
28704 TEST_REQUIRES_X86_AVX;
28705 for (size_t k = 9; k < 16; k++) {
28706 for (uint32_t n = 1; n <= 4; n++) {
28707 for (uint32_t m = 1; m <= 2; m++) {
28708 GemmMicrokernelTester()
28709 .mr(2)
28710 .nr(4)
28711 .kr(8)
28712 .sr(1)
28713 .m(m)
28714 .n(n)
28715 .k(k)
28716 .iterations(1)
28717 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
28718 }
28719 }
28720 }
28721 }
28722
TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD128,k_div_8)28723 TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD128, k_div_8) {
28724 TEST_REQUIRES_X86_AVX;
28725 for (size_t k = 16; k <= 80; k += 8) {
28726 GemmMicrokernelTester()
28727 .mr(2)
28728 .nr(4)
28729 .kr(8)
28730 .sr(1)
28731 .m(2)
28732 .n(4)
28733 .k(k)
28734 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
28735 }
28736 }
28737
TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD128,k_div_8_subtile)28738 TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD128, k_div_8_subtile) {
28739 TEST_REQUIRES_X86_AVX;
28740 for (size_t k = 16; k <= 80; k += 8) {
28741 for (uint32_t n = 1; n <= 4; n++) {
28742 for (uint32_t m = 1; m <= 2; m++) {
28743 GemmMicrokernelTester()
28744 .mr(2)
28745 .nr(4)
28746 .kr(8)
28747 .sr(1)
28748 .m(m)
28749 .n(n)
28750 .k(k)
28751 .iterations(1)
28752 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
28753 }
28754 }
28755 }
28756 }
28757
TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD128,n_gt_4)28758 TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD128, n_gt_4) {
28759 TEST_REQUIRES_X86_AVX;
28760 for (uint32_t n = 5; n < 8; n++) {
28761 for (size_t k = 1; k <= 40; k += 9) {
28762 GemmMicrokernelTester()
28763 .mr(2)
28764 .nr(4)
28765 .kr(8)
28766 .sr(1)
28767 .m(2)
28768 .n(n)
28769 .k(k)
28770 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
28771 }
28772 }
28773 }
28774
TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD128,n_gt_4_strided_cn)28775 TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD128, n_gt_4_strided_cn) {
28776 TEST_REQUIRES_X86_AVX;
28777 for (uint32_t n = 5; n < 8; n++) {
28778 for (size_t k = 1; k <= 40; k += 9) {
28779 GemmMicrokernelTester()
28780 .mr(2)
28781 .nr(4)
28782 .kr(8)
28783 .sr(1)
28784 .m(2)
28785 .n(n)
28786 .k(k)
28787 .cn_stride(7)
28788 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
28789 }
28790 }
28791 }
28792
TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD128,n_gt_4_subtile)28793 TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD128, n_gt_4_subtile) {
28794 TEST_REQUIRES_X86_AVX;
28795 for (uint32_t n = 5; n < 8; n++) {
28796 for (size_t k = 1; k <= 40; k += 9) {
28797 for (uint32_t m = 1; m <= 2; m++) {
28798 GemmMicrokernelTester()
28799 .mr(2)
28800 .nr(4)
28801 .kr(8)
28802 .sr(1)
28803 .m(m)
28804 .n(n)
28805 .k(k)
28806 .iterations(1)
28807 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
28808 }
28809 }
28810 }
28811 }
28812
TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD128,n_div_4)28813 TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD128, n_div_4) {
28814 TEST_REQUIRES_X86_AVX;
28815 for (uint32_t n = 8; n <= 12; n += 4) {
28816 for (size_t k = 1; k <= 40; k += 9) {
28817 GemmMicrokernelTester()
28818 .mr(2)
28819 .nr(4)
28820 .kr(8)
28821 .sr(1)
28822 .m(2)
28823 .n(n)
28824 .k(k)
28825 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
28826 }
28827 }
28828 }
28829
TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD128,n_div_4_strided_cn)28830 TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD128, n_div_4_strided_cn) {
28831 TEST_REQUIRES_X86_AVX;
28832 for (uint32_t n = 8; n <= 12; n += 4) {
28833 for (size_t k = 1; k <= 40; k += 9) {
28834 GemmMicrokernelTester()
28835 .mr(2)
28836 .nr(4)
28837 .kr(8)
28838 .sr(1)
28839 .m(2)
28840 .n(n)
28841 .k(k)
28842 .cn_stride(7)
28843 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
28844 }
28845 }
28846 }
28847
TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD128,n_div_4_subtile)28848 TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD128, n_div_4_subtile) {
28849 TEST_REQUIRES_X86_AVX;
28850 for (uint32_t n = 8; n <= 12; n += 4) {
28851 for (size_t k = 1; k <= 40; k += 9) {
28852 for (uint32_t m = 1; m <= 2; m++) {
28853 GemmMicrokernelTester()
28854 .mr(2)
28855 .nr(4)
28856 .kr(8)
28857 .sr(1)
28858 .m(m)
28859 .n(n)
28860 .k(k)
28861 .iterations(1)
28862 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
28863 }
28864 }
28865 }
28866 }
28867
TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD128,small_kernel)28868 TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD128, small_kernel) {
28869 TEST_REQUIRES_X86_AVX;
28870 for (size_t k = 1; k <= 40; k += 9) {
28871 GemmMicrokernelTester()
28872 .mr(2)
28873 .nr(4)
28874 .kr(8)
28875 .sr(1)
28876 .m(2)
28877 .n(4)
28878 .k(k)
28879 .ks(3)
28880 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
28881 }
28882 }
28883
TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD128,small_kernel_subtile)28884 TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD128, small_kernel_subtile) {
28885 TEST_REQUIRES_X86_AVX;
28886 for (size_t k = 1; k <= 40; k += 9) {
28887 for (uint32_t n = 1; n <= 4; n++) {
28888 for (uint32_t m = 1; m <= 2; m++) {
28889 GemmMicrokernelTester()
28890 .mr(2)
28891 .nr(4)
28892 .kr(8)
28893 .sr(1)
28894 .m(m)
28895 .n(n)
28896 .k(k)
28897 .ks(3)
28898 .iterations(1)
28899 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
28900 }
28901 }
28902 }
28903 }
28904
TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD128,n_gt_4_small_kernel)28905 TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD128, n_gt_4_small_kernel) {
28906 TEST_REQUIRES_X86_AVX;
28907 for (uint32_t n = 5; n < 8; n++) {
28908 for (size_t k = 1; k <= 40; k += 9) {
28909 GemmMicrokernelTester()
28910 .mr(2)
28911 .nr(4)
28912 .kr(8)
28913 .sr(1)
28914 .m(2)
28915 .n(n)
28916 .k(k)
28917 .ks(3)
28918 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
28919 }
28920 }
28921 }
28922
TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD128,n_div_4_small_kernel)28923 TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD128, n_div_4_small_kernel) {
28924 TEST_REQUIRES_X86_AVX;
28925 for (uint32_t n = 8; n <= 12; n += 4) {
28926 for (size_t k = 1; k <= 40; k += 9) {
28927 GemmMicrokernelTester()
28928 .mr(2)
28929 .nr(4)
28930 .kr(8)
28931 .sr(1)
28932 .m(2)
28933 .n(n)
28934 .k(k)
28935 .ks(3)
28936 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
28937 }
28938 }
28939 }
28940
TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD128,strided_cm_subtile)28941 TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD128, strided_cm_subtile) {
28942 TEST_REQUIRES_X86_AVX;
28943 for (size_t k = 1; k <= 40; k += 9) {
28944 for (uint32_t n = 1; n <= 4; n++) {
28945 for (uint32_t m = 1; m <= 2; m++) {
28946 GemmMicrokernelTester()
28947 .mr(2)
28948 .nr(4)
28949 .kr(8)
28950 .sr(1)
28951 .m(m)
28952 .n(n)
28953 .k(k)
28954 .cm_stride(7)
28955 .iterations(1)
28956 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
28957 }
28958 }
28959 }
28960 }
28961
TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD128,a_offset)28962 TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD128, a_offset) {
28963 TEST_REQUIRES_X86_AVX;
28964 for (size_t k = 1; k <= 40; k += 9) {
28965 GemmMicrokernelTester()
28966 .mr(2)
28967 .nr(4)
28968 .kr(8)
28969 .sr(1)
28970 .m(2)
28971 .n(4)
28972 .k(k)
28973 .ks(3)
28974 .a_offset(83)
28975 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
28976 }
28977 }
28978
TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD128,zero)28979 TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD128, zero) {
28980 TEST_REQUIRES_X86_AVX;
28981 for (size_t k = 1; k <= 40; k += 9) {
28982 for (uint32_t mz = 0; mz < 2; mz++) {
28983 GemmMicrokernelTester()
28984 .mr(2)
28985 .nr(4)
28986 .kr(8)
28987 .sr(1)
28988 .m(2)
28989 .n(4)
28990 .k(k)
28991 .ks(3)
28992 .a_offset(83)
28993 .zero_index(mz)
28994 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
28995 }
28996 }
28997 }
28998
TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD128,qmin)28999 TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD128, qmin) {
29000 TEST_REQUIRES_X86_AVX;
29001 GemmMicrokernelTester()
29002 .mr(2)
29003 .nr(4)
29004 .kr(8)
29005 .sr(1)
29006 .m(2)
29007 .n(4)
29008 .k(8)
29009 .qmin(128)
29010 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
29011 }
29012
TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD128,qmax)29013 TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD128, qmax) {
29014 TEST_REQUIRES_X86_AVX;
29015 GemmMicrokernelTester()
29016 .mr(2)
29017 .nr(4)
29018 .kr(8)
29019 .sr(1)
29020 .m(2)
29021 .n(4)
29022 .k(8)
29023 .qmax(128)
29024 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
29025 }
29026
TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD128,strided_cm)29027 TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD128, strided_cm) {
29028 TEST_REQUIRES_X86_AVX;
29029 GemmMicrokernelTester()
29030 .mr(2)
29031 .nr(4)
29032 .kr(8)
29033 .sr(1)
29034 .m(2)
29035 .n(4)
29036 .k(8)
29037 .cm_stride(7)
29038 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld128, xnn_init_qc8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
29039 }
29040 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
29041
29042
29043 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__AVX2,k_eq_8)29044 TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__AVX2, k_eq_8) {
29045 TEST_REQUIRES_X86_AVX2;
29046 GemmMicrokernelTester()
29047 .mr(2)
29048 .nr(8)
29049 .kr(8)
29050 .sr(1)
29051 .m(2)
29052 .n(8)
29053 .k(8)
29054 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c8__avx2, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
29055 }
29056
TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__AVX2,strided_cn)29057 TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__AVX2, strided_cn) {
29058 TEST_REQUIRES_X86_AVX2;
29059 GemmMicrokernelTester()
29060 .mr(2)
29061 .nr(8)
29062 .kr(8)
29063 .sr(1)
29064 .m(2)
29065 .n(8)
29066 .k(8)
29067 .cn_stride(11)
29068 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c8__avx2, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
29069 }
29070
TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__AVX2,k_eq_8_subtile)29071 TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__AVX2, k_eq_8_subtile) {
29072 TEST_REQUIRES_X86_AVX2;
29073 for (uint32_t n = 1; n <= 8; n++) {
29074 for (uint32_t m = 1; m <= 2; m++) {
29075 GemmMicrokernelTester()
29076 .mr(2)
29077 .nr(8)
29078 .kr(8)
29079 .sr(1)
29080 .m(m)
29081 .n(n)
29082 .k(8)
29083 .iterations(1)
29084 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c8__avx2, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
29085 }
29086 }
29087 }
29088
TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__AVX2,k_eq_8_subtile_m)29089 TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__AVX2, k_eq_8_subtile_m) {
29090 TEST_REQUIRES_X86_AVX2;
29091 for (uint32_t m = 1; m <= 2; m++) {
29092 GemmMicrokernelTester()
29093 .mr(2)
29094 .nr(8)
29095 .kr(8)
29096 .sr(1)
29097 .m(m)
29098 .n(8)
29099 .k(8)
29100 .iterations(1)
29101 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c8__avx2, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
29102 }
29103 }
29104
TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__AVX2,k_eq_8_subtile_n)29105 TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__AVX2, k_eq_8_subtile_n) {
29106 TEST_REQUIRES_X86_AVX2;
29107 for (uint32_t n = 1; n <= 8; n++) {
29108 GemmMicrokernelTester()
29109 .mr(2)
29110 .nr(8)
29111 .kr(8)
29112 .sr(1)
29113 .m(2)
29114 .n(n)
29115 .k(8)
29116 .iterations(1)
29117 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c8__avx2, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
29118 }
29119 }
29120
TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__AVX2,k_lt_8)29121 TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__AVX2, k_lt_8) {
29122 TEST_REQUIRES_X86_AVX2;
29123 for (size_t k = 1; k < 8; k++) {
29124 GemmMicrokernelTester()
29125 .mr(2)
29126 .nr(8)
29127 .kr(8)
29128 .sr(1)
29129 .m(2)
29130 .n(8)
29131 .k(k)
29132 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c8__avx2, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
29133 }
29134 }
29135
TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__AVX2,k_lt_8_subtile)29136 TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__AVX2, k_lt_8_subtile) {
29137 TEST_REQUIRES_X86_AVX2;
29138 for (size_t k = 1; k < 8; k++) {
29139 for (uint32_t n = 1; n <= 8; n++) {
29140 for (uint32_t m = 1; m <= 2; m++) {
29141 GemmMicrokernelTester()
29142 .mr(2)
29143 .nr(8)
29144 .kr(8)
29145 .sr(1)
29146 .m(m)
29147 .n(n)
29148 .k(k)
29149 .iterations(1)
29150 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c8__avx2, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
29151 }
29152 }
29153 }
29154 }
29155
TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__AVX2,k_gt_8)29156 TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__AVX2, k_gt_8) {
29157 TEST_REQUIRES_X86_AVX2;
29158 for (size_t k = 9; k < 16; k++) {
29159 GemmMicrokernelTester()
29160 .mr(2)
29161 .nr(8)
29162 .kr(8)
29163 .sr(1)
29164 .m(2)
29165 .n(8)
29166 .k(k)
29167 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c8__avx2, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
29168 }
29169 }
29170
TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__AVX2,k_gt_8_subtile)29171 TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__AVX2, k_gt_8_subtile) {
29172 TEST_REQUIRES_X86_AVX2;
29173 for (size_t k = 9; k < 16; k++) {
29174 for (uint32_t n = 1; n <= 8; n++) {
29175 for (uint32_t m = 1; m <= 2; m++) {
29176 GemmMicrokernelTester()
29177 .mr(2)
29178 .nr(8)
29179 .kr(8)
29180 .sr(1)
29181 .m(m)
29182 .n(n)
29183 .k(k)
29184 .iterations(1)
29185 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c8__avx2, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
29186 }
29187 }
29188 }
29189 }
29190
TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__AVX2,k_div_8)29191 TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__AVX2, k_div_8) {
29192 TEST_REQUIRES_X86_AVX2;
29193 for (size_t k = 16; k <= 80; k += 8) {
29194 GemmMicrokernelTester()
29195 .mr(2)
29196 .nr(8)
29197 .kr(8)
29198 .sr(1)
29199 .m(2)
29200 .n(8)
29201 .k(k)
29202 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c8__avx2, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
29203 }
29204 }
29205
TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__AVX2,k_div_8_subtile)29206 TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__AVX2, k_div_8_subtile) {
29207 TEST_REQUIRES_X86_AVX2;
29208 for (size_t k = 16; k <= 80; k += 8) {
29209 for (uint32_t n = 1; n <= 8; n++) {
29210 for (uint32_t m = 1; m <= 2; m++) {
29211 GemmMicrokernelTester()
29212 .mr(2)
29213 .nr(8)
29214 .kr(8)
29215 .sr(1)
29216 .m(m)
29217 .n(n)
29218 .k(k)
29219 .iterations(1)
29220 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c8__avx2, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
29221 }
29222 }
29223 }
29224 }
29225
TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__AVX2,n_gt_8)29226 TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__AVX2, n_gt_8) {
29227 TEST_REQUIRES_X86_AVX2;
29228 for (uint32_t n = 9; n < 16; n++) {
29229 for (size_t k = 1; k <= 40; k += 9) {
29230 GemmMicrokernelTester()
29231 .mr(2)
29232 .nr(8)
29233 .kr(8)
29234 .sr(1)
29235 .m(2)
29236 .n(n)
29237 .k(k)
29238 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c8__avx2, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
29239 }
29240 }
29241 }
29242
TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__AVX2,n_gt_8_strided_cn)29243 TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__AVX2, n_gt_8_strided_cn) {
29244 TEST_REQUIRES_X86_AVX2;
29245 for (uint32_t n = 9; n < 16; n++) {
29246 for (size_t k = 1; k <= 40; k += 9) {
29247 GemmMicrokernelTester()
29248 .mr(2)
29249 .nr(8)
29250 .kr(8)
29251 .sr(1)
29252 .m(2)
29253 .n(n)
29254 .k(k)
29255 .cn_stride(11)
29256 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c8__avx2, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
29257 }
29258 }
29259 }
29260
TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__AVX2,n_gt_8_subtile)29261 TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__AVX2, n_gt_8_subtile) {
29262 TEST_REQUIRES_X86_AVX2;
29263 for (uint32_t n = 9; n < 16; n++) {
29264 for (size_t k = 1; k <= 40; k += 9) {
29265 for (uint32_t m = 1; m <= 2; m++) {
29266 GemmMicrokernelTester()
29267 .mr(2)
29268 .nr(8)
29269 .kr(8)
29270 .sr(1)
29271 .m(m)
29272 .n(n)
29273 .k(k)
29274 .iterations(1)
29275 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c8__avx2, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
29276 }
29277 }
29278 }
29279 }
29280
TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__AVX2,n_div_8)29281 TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__AVX2, n_div_8) {
29282 TEST_REQUIRES_X86_AVX2;
29283 for (uint32_t n = 16; n <= 24; n += 8) {
29284 for (size_t k = 1; k <= 40; k += 9) {
29285 GemmMicrokernelTester()
29286 .mr(2)
29287 .nr(8)
29288 .kr(8)
29289 .sr(1)
29290 .m(2)
29291 .n(n)
29292 .k(k)
29293 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c8__avx2, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
29294 }
29295 }
29296 }
29297
TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__AVX2,n_div_8_strided_cn)29298 TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__AVX2, n_div_8_strided_cn) {
29299 TEST_REQUIRES_X86_AVX2;
29300 for (uint32_t n = 16; n <= 24; n += 8) {
29301 for (size_t k = 1; k <= 40; k += 9) {
29302 GemmMicrokernelTester()
29303 .mr(2)
29304 .nr(8)
29305 .kr(8)
29306 .sr(1)
29307 .m(2)
29308 .n(n)
29309 .k(k)
29310 .cn_stride(11)
29311 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c8__avx2, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
29312 }
29313 }
29314 }
29315
TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__AVX2,n_div_8_subtile)29316 TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__AVX2, n_div_8_subtile) {
29317 TEST_REQUIRES_X86_AVX2;
29318 for (uint32_t n = 16; n <= 24; n += 8) {
29319 for (size_t k = 1; k <= 40; k += 9) {
29320 for (uint32_t m = 1; m <= 2; m++) {
29321 GemmMicrokernelTester()
29322 .mr(2)
29323 .nr(8)
29324 .kr(8)
29325 .sr(1)
29326 .m(m)
29327 .n(n)
29328 .k(k)
29329 .iterations(1)
29330 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c8__avx2, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
29331 }
29332 }
29333 }
29334 }
29335
TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__AVX2,small_kernel)29336 TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__AVX2, small_kernel) {
29337 TEST_REQUIRES_X86_AVX2;
29338 for (size_t k = 1; k <= 40; k += 9) {
29339 GemmMicrokernelTester()
29340 .mr(2)
29341 .nr(8)
29342 .kr(8)
29343 .sr(1)
29344 .m(2)
29345 .n(8)
29346 .k(k)
29347 .ks(3)
29348 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c8__avx2, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
29349 }
29350 }
29351
TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__AVX2,small_kernel_subtile)29352 TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__AVX2, small_kernel_subtile) {
29353 TEST_REQUIRES_X86_AVX2;
29354 for (size_t k = 1; k <= 40; k += 9) {
29355 for (uint32_t n = 1; n <= 8; n++) {
29356 for (uint32_t m = 1; m <= 2; m++) {
29357 GemmMicrokernelTester()
29358 .mr(2)
29359 .nr(8)
29360 .kr(8)
29361 .sr(1)
29362 .m(m)
29363 .n(n)
29364 .k(k)
29365 .ks(3)
29366 .iterations(1)
29367 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c8__avx2, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
29368 }
29369 }
29370 }
29371 }
29372
TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__AVX2,n_gt_8_small_kernel)29373 TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__AVX2, n_gt_8_small_kernel) {
29374 TEST_REQUIRES_X86_AVX2;
29375 for (uint32_t n = 9; n < 16; n++) {
29376 for (size_t k = 1; k <= 40; k += 9) {
29377 GemmMicrokernelTester()
29378 .mr(2)
29379 .nr(8)
29380 .kr(8)
29381 .sr(1)
29382 .m(2)
29383 .n(n)
29384 .k(k)
29385 .ks(3)
29386 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c8__avx2, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
29387 }
29388 }
29389 }
29390
TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__AVX2,n_div_8_small_kernel)29391 TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__AVX2, n_div_8_small_kernel) {
29392 TEST_REQUIRES_X86_AVX2;
29393 for (uint32_t n = 16; n <= 24; n += 8) {
29394 for (size_t k = 1; k <= 40; k += 9) {
29395 GemmMicrokernelTester()
29396 .mr(2)
29397 .nr(8)
29398 .kr(8)
29399 .sr(1)
29400 .m(2)
29401 .n(n)
29402 .k(k)
29403 .ks(3)
29404 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c8__avx2, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
29405 }
29406 }
29407 }
29408
TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__AVX2,strided_cm_subtile)29409 TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__AVX2, strided_cm_subtile) {
29410 TEST_REQUIRES_X86_AVX2;
29411 for (size_t k = 1; k <= 40; k += 9) {
29412 for (uint32_t n = 1; n <= 8; n++) {
29413 for (uint32_t m = 1; m <= 2; m++) {
29414 GemmMicrokernelTester()
29415 .mr(2)
29416 .nr(8)
29417 .kr(8)
29418 .sr(1)
29419 .m(m)
29420 .n(n)
29421 .k(k)
29422 .cm_stride(11)
29423 .iterations(1)
29424 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c8__avx2, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
29425 }
29426 }
29427 }
29428 }
29429
TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__AVX2,a_offset)29430 TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__AVX2, a_offset) {
29431 TEST_REQUIRES_X86_AVX2;
29432 for (size_t k = 1; k <= 40; k += 9) {
29433 GemmMicrokernelTester()
29434 .mr(2)
29435 .nr(8)
29436 .kr(8)
29437 .sr(1)
29438 .m(2)
29439 .n(8)
29440 .k(k)
29441 .ks(3)
29442 .a_offset(83)
29443 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c8__avx2, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
29444 }
29445 }
29446
TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__AVX2,zero)29447 TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__AVX2, zero) {
29448 TEST_REQUIRES_X86_AVX2;
29449 for (size_t k = 1; k <= 40; k += 9) {
29450 for (uint32_t mz = 0; mz < 2; mz++) {
29451 GemmMicrokernelTester()
29452 .mr(2)
29453 .nr(8)
29454 .kr(8)
29455 .sr(1)
29456 .m(2)
29457 .n(8)
29458 .k(k)
29459 .ks(3)
29460 .a_offset(83)
29461 .zero_index(mz)
29462 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c8__avx2, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
29463 }
29464 }
29465 }
29466
TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__AVX2,qmin)29467 TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__AVX2, qmin) {
29468 TEST_REQUIRES_X86_AVX2;
29469 GemmMicrokernelTester()
29470 .mr(2)
29471 .nr(8)
29472 .kr(8)
29473 .sr(1)
29474 .m(2)
29475 .n(8)
29476 .k(8)
29477 .qmin(128)
29478 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c8__avx2, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
29479 }
29480
TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__AVX2,qmax)29481 TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__AVX2, qmax) {
29482 TEST_REQUIRES_X86_AVX2;
29483 GemmMicrokernelTester()
29484 .mr(2)
29485 .nr(8)
29486 .kr(8)
29487 .sr(1)
29488 .m(2)
29489 .n(8)
29490 .k(8)
29491 .qmax(128)
29492 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c8__avx2, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
29493 }
29494
TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__AVX2,strided_cm)29495 TEST(QC8_IGEMM_MINMAX_FP32_2X8C8__AVX2, strided_cm) {
29496 TEST_REQUIRES_X86_AVX2;
29497 GemmMicrokernelTester()
29498 .mr(2)
29499 .nr(8)
29500 .kr(8)
29501 .sr(1)
29502 .m(2)
29503 .n(8)
29504 .k(8)
29505 .cm_stride(11)
29506 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x8c8__avx2, xnn_init_qc8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
29507 }
29508 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
29509
29510
29511 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QC8_IGEMM_MINMAX_FP32_4X16C8__AVX512SKX,k_eq_8)29512 TEST(QC8_IGEMM_MINMAX_FP32_4X16C8__AVX512SKX, k_eq_8) {
29513 TEST_REQUIRES_X86_AVX512SKX;
29514 GemmMicrokernelTester()
29515 .mr(4)
29516 .nr(16)
29517 .kr(8)
29518 .sr(1)
29519 .m(4)
29520 .n(16)
29521 .k(8)
29522 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x16c8__avx512skx, xnn_init_qc8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
29523 }
29524
TEST(QC8_IGEMM_MINMAX_FP32_4X16C8__AVX512SKX,strided_cn)29525 TEST(QC8_IGEMM_MINMAX_FP32_4X16C8__AVX512SKX, strided_cn) {
29526 TEST_REQUIRES_X86_AVX512SKX;
29527 GemmMicrokernelTester()
29528 .mr(4)
29529 .nr(16)
29530 .kr(8)
29531 .sr(1)
29532 .m(4)
29533 .n(16)
29534 .k(8)
29535 .cn_stride(19)
29536 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x16c8__avx512skx, xnn_init_qc8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
29537 }
29538
TEST(QC8_IGEMM_MINMAX_FP32_4X16C8__AVX512SKX,k_eq_8_subtile)29539 TEST(QC8_IGEMM_MINMAX_FP32_4X16C8__AVX512SKX, k_eq_8_subtile) {
29540 TEST_REQUIRES_X86_AVX512SKX;
29541 for (uint32_t n = 1; n <= 16; n++) {
29542 for (uint32_t m = 1; m <= 4; m++) {
29543 GemmMicrokernelTester()
29544 .mr(4)
29545 .nr(16)
29546 .kr(8)
29547 .sr(1)
29548 .m(m)
29549 .n(n)
29550 .k(8)
29551 .iterations(1)
29552 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x16c8__avx512skx, xnn_init_qc8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
29553 }
29554 }
29555 }
29556
TEST(QC8_IGEMM_MINMAX_FP32_4X16C8__AVX512SKX,k_eq_8_subtile_m)29557 TEST(QC8_IGEMM_MINMAX_FP32_4X16C8__AVX512SKX, k_eq_8_subtile_m) {
29558 TEST_REQUIRES_X86_AVX512SKX;
29559 for (uint32_t m = 1; m <= 4; m++) {
29560 GemmMicrokernelTester()
29561 .mr(4)
29562 .nr(16)
29563 .kr(8)
29564 .sr(1)
29565 .m(m)
29566 .n(16)
29567 .k(8)
29568 .iterations(1)
29569 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x16c8__avx512skx, xnn_init_qc8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
29570 }
29571 }
29572
TEST(QC8_IGEMM_MINMAX_FP32_4X16C8__AVX512SKX,k_eq_8_subtile_n)29573 TEST(QC8_IGEMM_MINMAX_FP32_4X16C8__AVX512SKX, k_eq_8_subtile_n) {
29574 TEST_REQUIRES_X86_AVX512SKX;
29575 for (uint32_t n = 1; n <= 16; n++) {
29576 GemmMicrokernelTester()
29577 .mr(4)
29578 .nr(16)
29579 .kr(8)
29580 .sr(1)
29581 .m(4)
29582 .n(n)
29583 .k(8)
29584 .iterations(1)
29585 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x16c8__avx512skx, xnn_init_qc8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
29586 }
29587 }
29588
TEST(QC8_IGEMM_MINMAX_FP32_4X16C8__AVX512SKX,k_lt_8)29589 TEST(QC8_IGEMM_MINMAX_FP32_4X16C8__AVX512SKX, k_lt_8) {
29590 TEST_REQUIRES_X86_AVX512SKX;
29591 for (size_t k = 1; k < 8; k++) {
29592 GemmMicrokernelTester()
29593 .mr(4)
29594 .nr(16)
29595 .kr(8)
29596 .sr(1)
29597 .m(4)
29598 .n(16)
29599 .k(k)
29600 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x16c8__avx512skx, xnn_init_qc8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
29601 }
29602 }
29603
TEST(QC8_IGEMM_MINMAX_FP32_4X16C8__AVX512SKX,k_lt_8_subtile)29604 TEST(QC8_IGEMM_MINMAX_FP32_4X16C8__AVX512SKX, k_lt_8_subtile) {
29605 TEST_REQUIRES_X86_AVX512SKX;
29606 for (size_t k = 1; k < 8; k++) {
29607 for (uint32_t n = 1; n <= 16; n++) {
29608 for (uint32_t m = 1; m <= 4; m++) {
29609 GemmMicrokernelTester()
29610 .mr(4)
29611 .nr(16)
29612 .kr(8)
29613 .sr(1)
29614 .m(m)
29615 .n(n)
29616 .k(k)
29617 .iterations(1)
29618 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x16c8__avx512skx, xnn_init_qc8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
29619 }
29620 }
29621 }
29622 }
29623
TEST(QC8_IGEMM_MINMAX_FP32_4X16C8__AVX512SKX,k_gt_8)29624 TEST(QC8_IGEMM_MINMAX_FP32_4X16C8__AVX512SKX, k_gt_8) {
29625 TEST_REQUIRES_X86_AVX512SKX;
29626 for (size_t k = 9; k < 16; k++) {
29627 GemmMicrokernelTester()
29628 .mr(4)
29629 .nr(16)
29630 .kr(8)
29631 .sr(1)
29632 .m(4)
29633 .n(16)
29634 .k(k)
29635 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x16c8__avx512skx, xnn_init_qc8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
29636 }
29637 }
29638
TEST(QC8_IGEMM_MINMAX_FP32_4X16C8__AVX512SKX,k_gt_8_subtile)29639 TEST(QC8_IGEMM_MINMAX_FP32_4X16C8__AVX512SKX, k_gt_8_subtile) {
29640 TEST_REQUIRES_X86_AVX512SKX;
29641 for (size_t k = 9; k < 16; k++) {
29642 for (uint32_t n = 1; n <= 16; n++) {
29643 for (uint32_t m = 1; m <= 4; m++) {
29644 GemmMicrokernelTester()
29645 .mr(4)
29646 .nr(16)
29647 .kr(8)
29648 .sr(1)
29649 .m(m)
29650 .n(n)
29651 .k(k)
29652 .iterations(1)
29653 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x16c8__avx512skx, xnn_init_qc8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
29654 }
29655 }
29656 }
29657 }
29658
TEST(QC8_IGEMM_MINMAX_FP32_4X16C8__AVX512SKX,k_div_8)29659 TEST(QC8_IGEMM_MINMAX_FP32_4X16C8__AVX512SKX, k_div_8) {
29660 TEST_REQUIRES_X86_AVX512SKX;
29661 for (size_t k = 16; k <= 80; k += 8) {
29662 GemmMicrokernelTester()
29663 .mr(4)
29664 .nr(16)
29665 .kr(8)
29666 .sr(1)
29667 .m(4)
29668 .n(16)
29669 .k(k)
29670 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x16c8__avx512skx, xnn_init_qc8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
29671 }
29672 }
29673
TEST(QC8_IGEMM_MINMAX_FP32_4X16C8__AVX512SKX,k_div_8_subtile)29674 TEST(QC8_IGEMM_MINMAX_FP32_4X16C8__AVX512SKX, k_div_8_subtile) {
29675 TEST_REQUIRES_X86_AVX512SKX;
29676 for (size_t k = 16; k <= 80; k += 8) {
29677 for (uint32_t n = 1; n <= 16; n++) {
29678 for (uint32_t m = 1; m <= 4; m++) {
29679 GemmMicrokernelTester()
29680 .mr(4)
29681 .nr(16)
29682 .kr(8)
29683 .sr(1)
29684 .m(m)
29685 .n(n)
29686 .k(k)
29687 .iterations(1)
29688 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x16c8__avx512skx, xnn_init_qc8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
29689 }
29690 }
29691 }
29692 }
29693
TEST(QC8_IGEMM_MINMAX_FP32_4X16C8__AVX512SKX,n_gt_16)29694 TEST(QC8_IGEMM_MINMAX_FP32_4X16C8__AVX512SKX, n_gt_16) {
29695 TEST_REQUIRES_X86_AVX512SKX;
29696 for (uint32_t n = 17; n < 32; n++) {
29697 for (size_t k = 1; k <= 40; k += 9) {
29698 GemmMicrokernelTester()
29699 .mr(4)
29700 .nr(16)
29701 .kr(8)
29702 .sr(1)
29703 .m(4)
29704 .n(n)
29705 .k(k)
29706 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x16c8__avx512skx, xnn_init_qc8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
29707 }
29708 }
29709 }
29710
TEST(QC8_IGEMM_MINMAX_FP32_4X16C8__AVX512SKX,n_gt_16_strided_cn)29711 TEST(QC8_IGEMM_MINMAX_FP32_4X16C8__AVX512SKX, n_gt_16_strided_cn) {
29712 TEST_REQUIRES_X86_AVX512SKX;
29713 for (uint32_t n = 17; n < 32; n++) {
29714 for (size_t k = 1; k <= 40; k += 9) {
29715 GemmMicrokernelTester()
29716 .mr(4)
29717 .nr(16)
29718 .kr(8)
29719 .sr(1)
29720 .m(4)
29721 .n(n)
29722 .k(k)
29723 .cn_stride(19)
29724 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x16c8__avx512skx, xnn_init_qc8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
29725 }
29726 }
29727 }
29728
TEST(QC8_IGEMM_MINMAX_FP32_4X16C8__AVX512SKX,n_gt_16_subtile)29729 TEST(QC8_IGEMM_MINMAX_FP32_4X16C8__AVX512SKX, n_gt_16_subtile) {
29730 TEST_REQUIRES_X86_AVX512SKX;
29731 for (uint32_t n = 17; n < 32; n++) {
29732 for (size_t k = 1; k <= 40; k += 9) {
29733 for (uint32_t m = 1; m <= 4; m++) {
29734 GemmMicrokernelTester()
29735 .mr(4)
29736 .nr(16)
29737 .kr(8)
29738 .sr(1)
29739 .m(m)
29740 .n(n)
29741 .k(k)
29742 .iterations(1)
29743 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x16c8__avx512skx, xnn_init_qc8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
29744 }
29745 }
29746 }
29747 }
29748
TEST(QC8_IGEMM_MINMAX_FP32_4X16C8__AVX512SKX,n_div_16)29749 TEST(QC8_IGEMM_MINMAX_FP32_4X16C8__AVX512SKX, n_div_16) {
29750 TEST_REQUIRES_X86_AVX512SKX;
29751 for (uint32_t n = 32; n <= 48; n += 16) {
29752 for (size_t k = 1; k <= 40; k += 9) {
29753 GemmMicrokernelTester()
29754 .mr(4)
29755 .nr(16)
29756 .kr(8)
29757 .sr(1)
29758 .m(4)
29759 .n(n)
29760 .k(k)
29761 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x16c8__avx512skx, xnn_init_qc8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
29762 }
29763 }
29764 }
29765
TEST(QC8_IGEMM_MINMAX_FP32_4X16C8__AVX512SKX,n_div_16_strided_cn)29766 TEST(QC8_IGEMM_MINMAX_FP32_4X16C8__AVX512SKX, n_div_16_strided_cn) {
29767 TEST_REQUIRES_X86_AVX512SKX;
29768 for (uint32_t n = 32; n <= 48; n += 16) {
29769 for (size_t k = 1; k <= 40; k += 9) {
29770 GemmMicrokernelTester()
29771 .mr(4)
29772 .nr(16)
29773 .kr(8)
29774 .sr(1)
29775 .m(4)
29776 .n(n)
29777 .k(k)
29778 .cn_stride(19)
29779 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x16c8__avx512skx, xnn_init_qc8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
29780 }
29781 }
29782 }
29783
TEST(QC8_IGEMM_MINMAX_FP32_4X16C8__AVX512SKX,n_div_16_subtile)29784 TEST(QC8_IGEMM_MINMAX_FP32_4X16C8__AVX512SKX, n_div_16_subtile) {
29785 TEST_REQUIRES_X86_AVX512SKX;
29786 for (uint32_t n = 32; n <= 48; n += 16) {
29787 for (size_t k = 1; k <= 40; k += 9) {
29788 for (uint32_t m = 1; m <= 4; m++) {
29789 GemmMicrokernelTester()
29790 .mr(4)
29791 .nr(16)
29792 .kr(8)
29793 .sr(1)
29794 .m(m)
29795 .n(n)
29796 .k(k)
29797 .iterations(1)
29798 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x16c8__avx512skx, xnn_init_qc8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
29799 }
29800 }
29801 }
29802 }
29803
TEST(QC8_IGEMM_MINMAX_FP32_4X16C8__AVX512SKX,small_kernel)29804 TEST(QC8_IGEMM_MINMAX_FP32_4X16C8__AVX512SKX, small_kernel) {
29805 TEST_REQUIRES_X86_AVX512SKX;
29806 for (size_t k = 1; k <= 40; k += 9) {
29807 GemmMicrokernelTester()
29808 .mr(4)
29809 .nr(16)
29810 .kr(8)
29811 .sr(1)
29812 .m(4)
29813 .n(16)
29814 .k(k)
29815 .ks(3)
29816 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x16c8__avx512skx, xnn_init_qc8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
29817 }
29818 }
29819
TEST(QC8_IGEMM_MINMAX_FP32_4X16C8__AVX512SKX,small_kernel_subtile)29820 TEST(QC8_IGEMM_MINMAX_FP32_4X16C8__AVX512SKX, small_kernel_subtile) {
29821 TEST_REQUIRES_X86_AVX512SKX;
29822 for (size_t k = 1; k <= 40; k += 9) {
29823 for (uint32_t n = 1; n <= 16; n++) {
29824 for (uint32_t m = 1; m <= 4; m++) {
29825 GemmMicrokernelTester()
29826 .mr(4)
29827 .nr(16)
29828 .kr(8)
29829 .sr(1)
29830 .m(m)
29831 .n(n)
29832 .k(k)
29833 .ks(3)
29834 .iterations(1)
29835 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x16c8__avx512skx, xnn_init_qc8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
29836 }
29837 }
29838 }
29839 }
29840
TEST(QC8_IGEMM_MINMAX_FP32_4X16C8__AVX512SKX,n_gt_16_small_kernel)29841 TEST(QC8_IGEMM_MINMAX_FP32_4X16C8__AVX512SKX, n_gt_16_small_kernel) {
29842 TEST_REQUIRES_X86_AVX512SKX;
29843 for (uint32_t n = 17; n < 32; n++) {
29844 for (size_t k = 1; k <= 40; k += 9) {
29845 GemmMicrokernelTester()
29846 .mr(4)
29847 .nr(16)
29848 .kr(8)
29849 .sr(1)
29850 .m(4)
29851 .n(n)
29852 .k(k)
29853 .ks(3)
29854 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x16c8__avx512skx, xnn_init_qc8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
29855 }
29856 }
29857 }
29858
TEST(QC8_IGEMM_MINMAX_FP32_4X16C8__AVX512SKX,n_div_16_small_kernel)29859 TEST(QC8_IGEMM_MINMAX_FP32_4X16C8__AVX512SKX, n_div_16_small_kernel) {
29860 TEST_REQUIRES_X86_AVX512SKX;
29861 for (uint32_t n = 32; n <= 48; n += 16) {
29862 for (size_t k = 1; k <= 40; k += 9) {
29863 GemmMicrokernelTester()
29864 .mr(4)
29865 .nr(16)
29866 .kr(8)
29867 .sr(1)
29868 .m(4)
29869 .n(n)
29870 .k(k)
29871 .ks(3)
29872 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x16c8__avx512skx, xnn_init_qc8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
29873 }
29874 }
29875 }
29876
TEST(QC8_IGEMM_MINMAX_FP32_4X16C8__AVX512SKX,strided_cm_subtile)29877 TEST(QC8_IGEMM_MINMAX_FP32_4X16C8__AVX512SKX, strided_cm_subtile) {
29878 TEST_REQUIRES_X86_AVX512SKX;
29879 for (size_t k = 1; k <= 40; k += 9) {
29880 for (uint32_t n = 1; n <= 16; n++) {
29881 for (uint32_t m = 1; m <= 4; m++) {
29882 GemmMicrokernelTester()
29883 .mr(4)
29884 .nr(16)
29885 .kr(8)
29886 .sr(1)
29887 .m(m)
29888 .n(n)
29889 .k(k)
29890 .cm_stride(19)
29891 .iterations(1)
29892 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x16c8__avx512skx, xnn_init_qc8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
29893 }
29894 }
29895 }
29896 }
29897
TEST(QC8_IGEMM_MINMAX_FP32_4X16C8__AVX512SKX,a_offset)29898 TEST(QC8_IGEMM_MINMAX_FP32_4X16C8__AVX512SKX, a_offset) {
29899 TEST_REQUIRES_X86_AVX512SKX;
29900 for (size_t k = 1; k <= 40; k += 9) {
29901 GemmMicrokernelTester()
29902 .mr(4)
29903 .nr(16)
29904 .kr(8)
29905 .sr(1)
29906 .m(4)
29907 .n(16)
29908 .k(k)
29909 .ks(3)
29910 .a_offset(163)
29911 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x16c8__avx512skx, xnn_init_qc8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
29912 }
29913 }
29914
TEST(QC8_IGEMM_MINMAX_FP32_4X16C8__AVX512SKX,zero)29915 TEST(QC8_IGEMM_MINMAX_FP32_4X16C8__AVX512SKX, zero) {
29916 TEST_REQUIRES_X86_AVX512SKX;
29917 for (size_t k = 1; k <= 40; k += 9) {
29918 for (uint32_t mz = 0; mz < 4; mz++) {
29919 GemmMicrokernelTester()
29920 .mr(4)
29921 .nr(16)
29922 .kr(8)
29923 .sr(1)
29924 .m(4)
29925 .n(16)
29926 .k(k)
29927 .ks(3)
29928 .a_offset(163)
29929 .zero_index(mz)
29930 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x16c8__avx512skx, xnn_init_qc8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
29931 }
29932 }
29933 }
29934
TEST(QC8_IGEMM_MINMAX_FP32_4X16C8__AVX512SKX,qmin)29935 TEST(QC8_IGEMM_MINMAX_FP32_4X16C8__AVX512SKX, qmin) {
29936 TEST_REQUIRES_X86_AVX512SKX;
29937 GemmMicrokernelTester()
29938 .mr(4)
29939 .nr(16)
29940 .kr(8)
29941 .sr(1)
29942 .m(4)
29943 .n(16)
29944 .k(8)
29945 .qmin(128)
29946 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x16c8__avx512skx, xnn_init_qc8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
29947 }
29948
TEST(QC8_IGEMM_MINMAX_FP32_4X16C8__AVX512SKX,qmax)29949 TEST(QC8_IGEMM_MINMAX_FP32_4X16C8__AVX512SKX, qmax) {
29950 TEST_REQUIRES_X86_AVX512SKX;
29951 GemmMicrokernelTester()
29952 .mr(4)
29953 .nr(16)
29954 .kr(8)
29955 .sr(1)
29956 .m(4)
29957 .n(16)
29958 .k(8)
29959 .qmax(128)
29960 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x16c8__avx512skx, xnn_init_qc8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
29961 }
29962
TEST(QC8_IGEMM_MINMAX_FP32_4X16C8__AVX512SKX,strided_cm)29963 TEST(QC8_IGEMM_MINMAX_FP32_4X16C8__AVX512SKX, strided_cm) {
29964 TEST_REQUIRES_X86_AVX512SKX;
29965 GemmMicrokernelTester()
29966 .mr(4)
29967 .nr(16)
29968 .kr(8)
29969 .sr(1)
29970 .m(4)
29971 .n(16)
29972 .k(8)
29973 .cm_stride(19)
29974 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x16c8__avx512skx, xnn_init_qc8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
29975 }
29976 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
29977
29978
29979 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD128,k_eq_8)29980 TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD128, k_eq_8) {
29981 GemmMicrokernelTester()
29982 .mr(1)
29983 .nr(4)
29984 .kr(2)
29985 .sr(1)
29986 .m(1)
29987 .n(4)
29988 .k(8)
29989 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
29990 }
29991
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD128,strided_cn)29992 TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD128, strided_cn) {
29993 GemmMicrokernelTester()
29994 .mr(1)
29995 .nr(4)
29996 .kr(2)
29997 .sr(1)
29998 .m(1)
29999 .n(4)
30000 .k(8)
30001 .cn_stride(7)
30002 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
30003 }
30004
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD128,k_eq_8_subtile)30005 TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD128, k_eq_8_subtile) {
30006 for (uint32_t n = 1; n <= 4; n++) {
30007 for (uint32_t m = 1; m <= 1; m++) {
30008 GemmMicrokernelTester()
30009 .mr(1)
30010 .nr(4)
30011 .kr(2)
30012 .sr(1)
30013 .m(m)
30014 .n(n)
30015 .k(8)
30016 .iterations(1)
30017 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
30018 }
30019 }
30020 }
30021
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD128,k_eq_8_subtile_m)30022 TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD128, k_eq_8_subtile_m) {
30023 for (uint32_t m = 1; m <= 1; m++) {
30024 GemmMicrokernelTester()
30025 .mr(1)
30026 .nr(4)
30027 .kr(2)
30028 .sr(1)
30029 .m(m)
30030 .n(4)
30031 .k(8)
30032 .iterations(1)
30033 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
30034 }
30035 }
30036
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD128,k_eq_8_subtile_n)30037 TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD128, k_eq_8_subtile_n) {
30038 for (uint32_t n = 1; n <= 4; n++) {
30039 GemmMicrokernelTester()
30040 .mr(1)
30041 .nr(4)
30042 .kr(2)
30043 .sr(1)
30044 .m(1)
30045 .n(n)
30046 .k(8)
30047 .iterations(1)
30048 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
30049 }
30050 }
30051
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD128,k_lt_8)30052 TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD128, k_lt_8) {
30053 for (size_t k = 1; k < 8; k++) {
30054 GemmMicrokernelTester()
30055 .mr(1)
30056 .nr(4)
30057 .kr(2)
30058 .sr(1)
30059 .m(1)
30060 .n(4)
30061 .k(k)
30062 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
30063 }
30064 }
30065
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD128,k_lt_8_subtile)30066 TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD128, k_lt_8_subtile) {
30067 for (size_t k = 1; k < 8; k++) {
30068 for (uint32_t n = 1; n <= 4; n++) {
30069 for (uint32_t m = 1; m <= 1; m++) {
30070 GemmMicrokernelTester()
30071 .mr(1)
30072 .nr(4)
30073 .kr(2)
30074 .sr(1)
30075 .m(m)
30076 .n(n)
30077 .k(k)
30078 .iterations(1)
30079 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
30080 }
30081 }
30082 }
30083 }
30084
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD128,k_gt_8)30085 TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD128, k_gt_8) {
30086 for (size_t k = 9; k < 16; k++) {
30087 GemmMicrokernelTester()
30088 .mr(1)
30089 .nr(4)
30090 .kr(2)
30091 .sr(1)
30092 .m(1)
30093 .n(4)
30094 .k(k)
30095 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
30096 }
30097 }
30098
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD128,k_gt_8_subtile)30099 TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD128, k_gt_8_subtile) {
30100 for (size_t k = 9; k < 16; k++) {
30101 for (uint32_t n = 1; n <= 4; n++) {
30102 for (uint32_t m = 1; m <= 1; m++) {
30103 GemmMicrokernelTester()
30104 .mr(1)
30105 .nr(4)
30106 .kr(2)
30107 .sr(1)
30108 .m(m)
30109 .n(n)
30110 .k(k)
30111 .iterations(1)
30112 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
30113 }
30114 }
30115 }
30116 }
30117
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD128,k_div_8)30118 TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD128, k_div_8) {
30119 for (size_t k = 16; k <= 80; k += 8) {
30120 GemmMicrokernelTester()
30121 .mr(1)
30122 .nr(4)
30123 .kr(2)
30124 .sr(1)
30125 .m(1)
30126 .n(4)
30127 .k(k)
30128 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
30129 }
30130 }
30131
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD128,k_div_8_subtile)30132 TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD128, k_div_8_subtile) {
30133 for (size_t k = 16; k <= 80; k += 8) {
30134 for (uint32_t n = 1; n <= 4; n++) {
30135 for (uint32_t m = 1; m <= 1; m++) {
30136 GemmMicrokernelTester()
30137 .mr(1)
30138 .nr(4)
30139 .kr(2)
30140 .sr(1)
30141 .m(m)
30142 .n(n)
30143 .k(k)
30144 .iterations(1)
30145 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
30146 }
30147 }
30148 }
30149 }
30150
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD128,n_gt_4)30151 TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD128, n_gt_4) {
30152 for (uint32_t n = 5; n < 8; n++) {
30153 for (size_t k = 1; k <= 40; k += 9) {
30154 GemmMicrokernelTester()
30155 .mr(1)
30156 .nr(4)
30157 .kr(2)
30158 .sr(1)
30159 .m(1)
30160 .n(n)
30161 .k(k)
30162 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
30163 }
30164 }
30165 }
30166
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD128,n_gt_4_strided_cn)30167 TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD128, n_gt_4_strided_cn) {
30168 for (uint32_t n = 5; n < 8; n++) {
30169 for (size_t k = 1; k <= 40; k += 9) {
30170 GemmMicrokernelTester()
30171 .mr(1)
30172 .nr(4)
30173 .kr(2)
30174 .sr(1)
30175 .m(1)
30176 .n(n)
30177 .k(k)
30178 .cn_stride(7)
30179 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
30180 }
30181 }
30182 }
30183
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD128,n_gt_4_subtile)30184 TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD128, n_gt_4_subtile) {
30185 for (uint32_t n = 5; n < 8; n++) {
30186 for (size_t k = 1; k <= 40; k += 9) {
30187 for (uint32_t m = 1; m <= 1; m++) {
30188 GemmMicrokernelTester()
30189 .mr(1)
30190 .nr(4)
30191 .kr(2)
30192 .sr(1)
30193 .m(m)
30194 .n(n)
30195 .k(k)
30196 .iterations(1)
30197 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
30198 }
30199 }
30200 }
30201 }
30202
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD128,n_div_4)30203 TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD128, n_div_4) {
30204 for (uint32_t n = 8; n <= 12; n += 4) {
30205 for (size_t k = 1; k <= 40; k += 9) {
30206 GemmMicrokernelTester()
30207 .mr(1)
30208 .nr(4)
30209 .kr(2)
30210 .sr(1)
30211 .m(1)
30212 .n(n)
30213 .k(k)
30214 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
30215 }
30216 }
30217 }
30218
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD128,n_div_4_strided_cn)30219 TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD128, n_div_4_strided_cn) {
30220 for (uint32_t n = 8; n <= 12; n += 4) {
30221 for (size_t k = 1; k <= 40; k += 9) {
30222 GemmMicrokernelTester()
30223 .mr(1)
30224 .nr(4)
30225 .kr(2)
30226 .sr(1)
30227 .m(1)
30228 .n(n)
30229 .k(k)
30230 .cn_stride(7)
30231 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
30232 }
30233 }
30234 }
30235
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD128,n_div_4_subtile)30236 TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD128, n_div_4_subtile) {
30237 for (uint32_t n = 8; n <= 12; n += 4) {
30238 for (size_t k = 1; k <= 40; k += 9) {
30239 for (uint32_t m = 1; m <= 1; m++) {
30240 GemmMicrokernelTester()
30241 .mr(1)
30242 .nr(4)
30243 .kr(2)
30244 .sr(1)
30245 .m(m)
30246 .n(n)
30247 .k(k)
30248 .iterations(1)
30249 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
30250 }
30251 }
30252 }
30253 }
30254
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD128,small_kernel)30255 TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD128, small_kernel) {
30256 for (size_t k = 1; k <= 40; k += 9) {
30257 GemmMicrokernelTester()
30258 .mr(1)
30259 .nr(4)
30260 .kr(2)
30261 .sr(1)
30262 .m(1)
30263 .n(4)
30264 .k(k)
30265 .ks(3)
30266 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
30267 }
30268 }
30269
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD128,small_kernel_subtile)30270 TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD128, small_kernel_subtile) {
30271 for (size_t k = 1; k <= 40; k += 9) {
30272 for (uint32_t n = 1; n <= 4; n++) {
30273 for (uint32_t m = 1; m <= 1; m++) {
30274 GemmMicrokernelTester()
30275 .mr(1)
30276 .nr(4)
30277 .kr(2)
30278 .sr(1)
30279 .m(m)
30280 .n(n)
30281 .k(k)
30282 .ks(3)
30283 .iterations(1)
30284 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
30285 }
30286 }
30287 }
30288 }
30289
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD128,n_gt_4_small_kernel)30290 TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD128, n_gt_4_small_kernel) {
30291 for (uint32_t n = 5; n < 8; n++) {
30292 for (size_t k = 1; k <= 40; k += 9) {
30293 GemmMicrokernelTester()
30294 .mr(1)
30295 .nr(4)
30296 .kr(2)
30297 .sr(1)
30298 .m(1)
30299 .n(n)
30300 .k(k)
30301 .ks(3)
30302 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
30303 }
30304 }
30305 }
30306
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD128,n_div_4_small_kernel)30307 TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD128, n_div_4_small_kernel) {
30308 for (uint32_t n = 8; n <= 12; n += 4) {
30309 for (size_t k = 1; k <= 40; k += 9) {
30310 GemmMicrokernelTester()
30311 .mr(1)
30312 .nr(4)
30313 .kr(2)
30314 .sr(1)
30315 .m(1)
30316 .n(n)
30317 .k(k)
30318 .ks(3)
30319 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
30320 }
30321 }
30322 }
30323
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD128,strided_cm_subtile)30324 TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD128, strided_cm_subtile) {
30325 for (size_t k = 1; k <= 40; k += 9) {
30326 for (uint32_t n = 1; n <= 4; n++) {
30327 for (uint32_t m = 1; m <= 1; m++) {
30328 GemmMicrokernelTester()
30329 .mr(1)
30330 .nr(4)
30331 .kr(2)
30332 .sr(1)
30333 .m(m)
30334 .n(n)
30335 .k(k)
30336 .cm_stride(7)
30337 .iterations(1)
30338 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
30339 }
30340 }
30341 }
30342 }
30343
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD128,a_offset)30344 TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD128, a_offset) {
30345 for (size_t k = 1; k <= 40; k += 9) {
30346 GemmMicrokernelTester()
30347 .mr(1)
30348 .nr(4)
30349 .kr(2)
30350 .sr(1)
30351 .m(1)
30352 .n(4)
30353 .k(k)
30354 .ks(3)
30355 .a_offset(43)
30356 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
30357 }
30358 }
30359
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD128,zero)30360 TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD128, zero) {
30361 for (size_t k = 1; k <= 40; k += 9) {
30362 for (uint32_t mz = 0; mz < 1; mz++) {
30363 GemmMicrokernelTester()
30364 .mr(1)
30365 .nr(4)
30366 .kr(2)
30367 .sr(1)
30368 .m(1)
30369 .n(4)
30370 .k(k)
30371 .ks(3)
30372 .a_offset(43)
30373 .zero_index(mz)
30374 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
30375 }
30376 }
30377 }
30378
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD128,qmin)30379 TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD128, qmin) {
30380 GemmMicrokernelTester()
30381 .mr(1)
30382 .nr(4)
30383 .kr(2)
30384 .sr(1)
30385 .m(1)
30386 .n(4)
30387 .k(8)
30388 .qmin(128)
30389 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
30390 }
30391
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD128,qmax)30392 TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD128, qmax) {
30393 GemmMicrokernelTester()
30394 .mr(1)
30395 .nr(4)
30396 .kr(2)
30397 .sr(1)
30398 .m(1)
30399 .n(4)
30400 .k(8)
30401 .qmax(128)
30402 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
30403 }
30404
TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD128,strided_cm)30405 TEST(QC8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD128, strided_cm) {
30406 GemmMicrokernelTester()
30407 .mr(1)
30408 .nr(4)
30409 .kr(2)
30410 .sr(1)
30411 .m(1)
30412 .n(4)
30413 .k(8)
30414 .cm_stride(7)
30415 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
30416 }
30417 #endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
30418
30419
30420 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD128,k_eq_8)30421 TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD128, k_eq_8) {
30422 GemmMicrokernelTester()
30423 .mr(1)
30424 .nr(4)
30425 .kr(8)
30426 .sr(1)
30427 .m(1)
30428 .n(4)
30429 .k(8)
30430 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
30431 }
30432
TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD128,strided_cn)30433 TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD128, strided_cn) {
30434 GemmMicrokernelTester()
30435 .mr(1)
30436 .nr(4)
30437 .kr(8)
30438 .sr(1)
30439 .m(1)
30440 .n(4)
30441 .k(8)
30442 .cn_stride(7)
30443 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
30444 }
30445
TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD128,k_eq_8_subtile)30446 TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD128, k_eq_8_subtile) {
30447 for (uint32_t n = 1; n <= 4; n++) {
30448 for (uint32_t m = 1; m <= 1; m++) {
30449 GemmMicrokernelTester()
30450 .mr(1)
30451 .nr(4)
30452 .kr(8)
30453 .sr(1)
30454 .m(m)
30455 .n(n)
30456 .k(8)
30457 .iterations(1)
30458 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
30459 }
30460 }
30461 }
30462
TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD128,k_eq_8_subtile_m)30463 TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD128, k_eq_8_subtile_m) {
30464 for (uint32_t m = 1; m <= 1; m++) {
30465 GemmMicrokernelTester()
30466 .mr(1)
30467 .nr(4)
30468 .kr(8)
30469 .sr(1)
30470 .m(m)
30471 .n(4)
30472 .k(8)
30473 .iterations(1)
30474 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
30475 }
30476 }
30477
TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD128,k_eq_8_subtile_n)30478 TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD128, k_eq_8_subtile_n) {
30479 for (uint32_t n = 1; n <= 4; n++) {
30480 GemmMicrokernelTester()
30481 .mr(1)
30482 .nr(4)
30483 .kr(8)
30484 .sr(1)
30485 .m(1)
30486 .n(n)
30487 .k(8)
30488 .iterations(1)
30489 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
30490 }
30491 }
30492
TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD128,k_lt_8)30493 TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD128, k_lt_8) {
30494 for (size_t k = 1; k < 8; k++) {
30495 GemmMicrokernelTester()
30496 .mr(1)
30497 .nr(4)
30498 .kr(8)
30499 .sr(1)
30500 .m(1)
30501 .n(4)
30502 .k(k)
30503 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
30504 }
30505 }
30506
TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD128,k_lt_8_subtile)30507 TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD128, k_lt_8_subtile) {
30508 for (size_t k = 1; k < 8; k++) {
30509 for (uint32_t n = 1; n <= 4; n++) {
30510 for (uint32_t m = 1; m <= 1; m++) {
30511 GemmMicrokernelTester()
30512 .mr(1)
30513 .nr(4)
30514 .kr(8)
30515 .sr(1)
30516 .m(m)
30517 .n(n)
30518 .k(k)
30519 .iterations(1)
30520 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
30521 }
30522 }
30523 }
30524 }
30525
TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD128,k_gt_8)30526 TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD128, k_gt_8) {
30527 for (size_t k = 9; k < 16; k++) {
30528 GemmMicrokernelTester()
30529 .mr(1)
30530 .nr(4)
30531 .kr(8)
30532 .sr(1)
30533 .m(1)
30534 .n(4)
30535 .k(k)
30536 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
30537 }
30538 }
30539
TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD128,k_gt_8_subtile)30540 TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD128, k_gt_8_subtile) {
30541 for (size_t k = 9; k < 16; k++) {
30542 for (uint32_t n = 1; n <= 4; n++) {
30543 for (uint32_t m = 1; m <= 1; m++) {
30544 GemmMicrokernelTester()
30545 .mr(1)
30546 .nr(4)
30547 .kr(8)
30548 .sr(1)
30549 .m(m)
30550 .n(n)
30551 .k(k)
30552 .iterations(1)
30553 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
30554 }
30555 }
30556 }
30557 }
30558
TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD128,k_div_8)30559 TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD128, k_div_8) {
30560 for (size_t k = 16; k <= 80; k += 8) {
30561 GemmMicrokernelTester()
30562 .mr(1)
30563 .nr(4)
30564 .kr(8)
30565 .sr(1)
30566 .m(1)
30567 .n(4)
30568 .k(k)
30569 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
30570 }
30571 }
30572
TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD128,k_div_8_subtile)30573 TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD128, k_div_8_subtile) {
30574 for (size_t k = 16; k <= 80; k += 8) {
30575 for (uint32_t n = 1; n <= 4; n++) {
30576 for (uint32_t m = 1; m <= 1; m++) {
30577 GemmMicrokernelTester()
30578 .mr(1)
30579 .nr(4)
30580 .kr(8)
30581 .sr(1)
30582 .m(m)
30583 .n(n)
30584 .k(k)
30585 .iterations(1)
30586 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
30587 }
30588 }
30589 }
30590 }
30591
TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD128,n_gt_4)30592 TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD128, n_gt_4) {
30593 for (uint32_t n = 5; n < 8; n++) {
30594 for (size_t k = 1; k <= 40; k += 9) {
30595 GemmMicrokernelTester()
30596 .mr(1)
30597 .nr(4)
30598 .kr(8)
30599 .sr(1)
30600 .m(1)
30601 .n(n)
30602 .k(k)
30603 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
30604 }
30605 }
30606 }
30607
TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD128,n_gt_4_strided_cn)30608 TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD128, n_gt_4_strided_cn) {
30609 for (uint32_t n = 5; n < 8; n++) {
30610 for (size_t k = 1; k <= 40; k += 9) {
30611 GemmMicrokernelTester()
30612 .mr(1)
30613 .nr(4)
30614 .kr(8)
30615 .sr(1)
30616 .m(1)
30617 .n(n)
30618 .k(k)
30619 .cn_stride(7)
30620 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
30621 }
30622 }
30623 }
30624
TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD128,n_gt_4_subtile)30625 TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD128, n_gt_4_subtile) {
30626 for (uint32_t n = 5; n < 8; n++) {
30627 for (size_t k = 1; k <= 40; k += 9) {
30628 for (uint32_t m = 1; m <= 1; m++) {
30629 GemmMicrokernelTester()
30630 .mr(1)
30631 .nr(4)
30632 .kr(8)
30633 .sr(1)
30634 .m(m)
30635 .n(n)
30636 .k(k)
30637 .iterations(1)
30638 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
30639 }
30640 }
30641 }
30642 }
30643
TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD128,n_div_4)30644 TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD128, n_div_4) {
30645 for (uint32_t n = 8; n <= 12; n += 4) {
30646 for (size_t k = 1; k <= 40; k += 9) {
30647 GemmMicrokernelTester()
30648 .mr(1)
30649 .nr(4)
30650 .kr(8)
30651 .sr(1)
30652 .m(1)
30653 .n(n)
30654 .k(k)
30655 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
30656 }
30657 }
30658 }
30659
TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD128,n_div_4_strided_cn)30660 TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD128, n_div_4_strided_cn) {
30661 for (uint32_t n = 8; n <= 12; n += 4) {
30662 for (size_t k = 1; k <= 40; k += 9) {
30663 GemmMicrokernelTester()
30664 .mr(1)
30665 .nr(4)
30666 .kr(8)
30667 .sr(1)
30668 .m(1)
30669 .n(n)
30670 .k(k)
30671 .cn_stride(7)
30672 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
30673 }
30674 }
30675 }
30676
TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD128,n_div_4_subtile)30677 TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD128, n_div_4_subtile) {
30678 for (uint32_t n = 8; n <= 12; n += 4) {
30679 for (size_t k = 1; k <= 40; k += 9) {
30680 for (uint32_t m = 1; m <= 1; m++) {
30681 GemmMicrokernelTester()
30682 .mr(1)
30683 .nr(4)
30684 .kr(8)
30685 .sr(1)
30686 .m(m)
30687 .n(n)
30688 .k(k)
30689 .iterations(1)
30690 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
30691 }
30692 }
30693 }
30694 }
30695
TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD128,small_kernel)30696 TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD128, small_kernel) {
30697 for (size_t k = 1; k <= 40; k += 9) {
30698 GemmMicrokernelTester()
30699 .mr(1)
30700 .nr(4)
30701 .kr(8)
30702 .sr(1)
30703 .m(1)
30704 .n(4)
30705 .k(k)
30706 .ks(3)
30707 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
30708 }
30709 }
30710
TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD128,small_kernel_subtile)30711 TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD128, small_kernel_subtile) {
30712 for (size_t k = 1; k <= 40; k += 9) {
30713 for (uint32_t n = 1; n <= 4; n++) {
30714 for (uint32_t m = 1; m <= 1; m++) {
30715 GemmMicrokernelTester()
30716 .mr(1)
30717 .nr(4)
30718 .kr(8)
30719 .sr(1)
30720 .m(m)
30721 .n(n)
30722 .k(k)
30723 .ks(3)
30724 .iterations(1)
30725 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
30726 }
30727 }
30728 }
30729 }
30730
TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD128,n_gt_4_small_kernel)30731 TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD128, n_gt_4_small_kernel) {
30732 for (uint32_t n = 5; n < 8; n++) {
30733 for (size_t k = 1; k <= 40; k += 9) {
30734 GemmMicrokernelTester()
30735 .mr(1)
30736 .nr(4)
30737 .kr(8)
30738 .sr(1)
30739 .m(1)
30740 .n(n)
30741 .k(k)
30742 .ks(3)
30743 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
30744 }
30745 }
30746 }
30747
TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD128,n_div_4_small_kernel)30748 TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD128, n_div_4_small_kernel) {
30749 for (uint32_t n = 8; n <= 12; n += 4) {
30750 for (size_t k = 1; k <= 40; k += 9) {
30751 GemmMicrokernelTester()
30752 .mr(1)
30753 .nr(4)
30754 .kr(8)
30755 .sr(1)
30756 .m(1)
30757 .n(n)
30758 .k(k)
30759 .ks(3)
30760 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
30761 }
30762 }
30763 }
30764
TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD128,strided_cm_subtile)30765 TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD128, strided_cm_subtile) {
30766 for (size_t k = 1; k <= 40; k += 9) {
30767 for (uint32_t n = 1; n <= 4; n++) {
30768 for (uint32_t m = 1; m <= 1; m++) {
30769 GemmMicrokernelTester()
30770 .mr(1)
30771 .nr(4)
30772 .kr(8)
30773 .sr(1)
30774 .m(m)
30775 .n(n)
30776 .k(k)
30777 .cm_stride(7)
30778 .iterations(1)
30779 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
30780 }
30781 }
30782 }
30783 }
30784
TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD128,a_offset)30785 TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD128, a_offset) {
30786 for (size_t k = 1; k <= 40; k += 9) {
30787 GemmMicrokernelTester()
30788 .mr(1)
30789 .nr(4)
30790 .kr(8)
30791 .sr(1)
30792 .m(1)
30793 .n(4)
30794 .k(k)
30795 .ks(3)
30796 .a_offset(43)
30797 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
30798 }
30799 }
30800
TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD128,zero)30801 TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD128, zero) {
30802 for (size_t k = 1; k <= 40; k += 9) {
30803 for (uint32_t mz = 0; mz < 1; mz++) {
30804 GemmMicrokernelTester()
30805 .mr(1)
30806 .nr(4)
30807 .kr(8)
30808 .sr(1)
30809 .m(1)
30810 .n(4)
30811 .k(k)
30812 .ks(3)
30813 .a_offset(43)
30814 .zero_index(mz)
30815 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
30816 }
30817 }
30818 }
30819
TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD128,qmin)30820 TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD128, qmin) {
30821 GemmMicrokernelTester()
30822 .mr(1)
30823 .nr(4)
30824 .kr(8)
30825 .sr(1)
30826 .m(1)
30827 .n(4)
30828 .k(8)
30829 .qmin(128)
30830 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
30831 }
30832
TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD128,qmax)30833 TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD128, qmax) {
30834 GemmMicrokernelTester()
30835 .mr(1)
30836 .nr(4)
30837 .kr(8)
30838 .sr(1)
30839 .m(1)
30840 .n(4)
30841 .k(8)
30842 .qmax(128)
30843 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
30844 }
30845
TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD128,strided_cm)30846 TEST(QC8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD128, strided_cm) {
30847 GemmMicrokernelTester()
30848 .mr(1)
30849 .nr(4)
30850 .kr(8)
30851 .sr(1)
30852 .m(1)
30853 .n(4)
30854 .k(8)
30855 .cm_stride(7)
30856 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
30857 }
30858 #endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
30859
30860
30861 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD64,k_eq_8)30862 TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD64, k_eq_8) {
30863 GemmMicrokernelTester()
30864 .mr(2)
30865 .nr(4)
30866 .kr(2)
30867 .sr(1)
30868 .m(2)
30869 .n(4)
30870 .k(8)
30871 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
30872 }
30873
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD64,strided_cn)30874 TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD64, strided_cn) {
30875 GemmMicrokernelTester()
30876 .mr(2)
30877 .nr(4)
30878 .kr(2)
30879 .sr(1)
30880 .m(2)
30881 .n(4)
30882 .k(8)
30883 .cn_stride(7)
30884 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
30885 }
30886
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD64,k_eq_8_subtile)30887 TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD64, k_eq_8_subtile) {
30888 for (uint32_t n = 1; n <= 4; n++) {
30889 for (uint32_t m = 1; m <= 2; m++) {
30890 GemmMicrokernelTester()
30891 .mr(2)
30892 .nr(4)
30893 .kr(2)
30894 .sr(1)
30895 .m(m)
30896 .n(n)
30897 .k(8)
30898 .iterations(1)
30899 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
30900 }
30901 }
30902 }
30903
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD64,k_eq_8_subtile_m)30904 TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD64, k_eq_8_subtile_m) {
30905 for (uint32_t m = 1; m <= 2; m++) {
30906 GemmMicrokernelTester()
30907 .mr(2)
30908 .nr(4)
30909 .kr(2)
30910 .sr(1)
30911 .m(m)
30912 .n(4)
30913 .k(8)
30914 .iterations(1)
30915 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
30916 }
30917 }
30918
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD64,k_eq_8_subtile_n)30919 TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD64, k_eq_8_subtile_n) {
30920 for (uint32_t n = 1; n <= 4; n++) {
30921 GemmMicrokernelTester()
30922 .mr(2)
30923 .nr(4)
30924 .kr(2)
30925 .sr(1)
30926 .m(2)
30927 .n(n)
30928 .k(8)
30929 .iterations(1)
30930 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
30931 }
30932 }
30933
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD64,k_lt_8)30934 TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD64, k_lt_8) {
30935 for (size_t k = 1; k < 8; k++) {
30936 GemmMicrokernelTester()
30937 .mr(2)
30938 .nr(4)
30939 .kr(2)
30940 .sr(1)
30941 .m(2)
30942 .n(4)
30943 .k(k)
30944 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
30945 }
30946 }
30947
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD64,k_lt_8_subtile)30948 TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD64, k_lt_8_subtile) {
30949 for (size_t k = 1; k < 8; k++) {
30950 for (uint32_t n = 1; n <= 4; n++) {
30951 for (uint32_t m = 1; m <= 2; m++) {
30952 GemmMicrokernelTester()
30953 .mr(2)
30954 .nr(4)
30955 .kr(2)
30956 .sr(1)
30957 .m(m)
30958 .n(n)
30959 .k(k)
30960 .iterations(1)
30961 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
30962 }
30963 }
30964 }
30965 }
30966
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD64,k_gt_8)30967 TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD64, k_gt_8) {
30968 for (size_t k = 9; k < 16; k++) {
30969 GemmMicrokernelTester()
30970 .mr(2)
30971 .nr(4)
30972 .kr(2)
30973 .sr(1)
30974 .m(2)
30975 .n(4)
30976 .k(k)
30977 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
30978 }
30979 }
30980
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD64,k_gt_8_subtile)30981 TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD64, k_gt_8_subtile) {
30982 for (size_t k = 9; k < 16; k++) {
30983 for (uint32_t n = 1; n <= 4; n++) {
30984 for (uint32_t m = 1; m <= 2; m++) {
30985 GemmMicrokernelTester()
30986 .mr(2)
30987 .nr(4)
30988 .kr(2)
30989 .sr(1)
30990 .m(m)
30991 .n(n)
30992 .k(k)
30993 .iterations(1)
30994 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
30995 }
30996 }
30997 }
30998 }
30999
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD64,k_div_8)31000 TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD64, k_div_8) {
31001 for (size_t k = 16; k <= 80; k += 8) {
31002 GemmMicrokernelTester()
31003 .mr(2)
31004 .nr(4)
31005 .kr(2)
31006 .sr(1)
31007 .m(2)
31008 .n(4)
31009 .k(k)
31010 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
31011 }
31012 }
31013
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD64,k_div_8_subtile)31014 TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD64, k_div_8_subtile) {
31015 for (size_t k = 16; k <= 80; k += 8) {
31016 for (uint32_t n = 1; n <= 4; n++) {
31017 for (uint32_t m = 1; m <= 2; m++) {
31018 GemmMicrokernelTester()
31019 .mr(2)
31020 .nr(4)
31021 .kr(2)
31022 .sr(1)
31023 .m(m)
31024 .n(n)
31025 .k(k)
31026 .iterations(1)
31027 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
31028 }
31029 }
31030 }
31031 }
31032
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD64,n_gt_4)31033 TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD64, n_gt_4) {
31034 for (uint32_t n = 5; n < 8; n++) {
31035 for (size_t k = 1; k <= 40; k += 9) {
31036 GemmMicrokernelTester()
31037 .mr(2)
31038 .nr(4)
31039 .kr(2)
31040 .sr(1)
31041 .m(2)
31042 .n(n)
31043 .k(k)
31044 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
31045 }
31046 }
31047 }
31048
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD64,n_gt_4_strided_cn)31049 TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD64, n_gt_4_strided_cn) {
31050 for (uint32_t n = 5; n < 8; n++) {
31051 for (size_t k = 1; k <= 40; k += 9) {
31052 GemmMicrokernelTester()
31053 .mr(2)
31054 .nr(4)
31055 .kr(2)
31056 .sr(1)
31057 .m(2)
31058 .n(n)
31059 .k(k)
31060 .cn_stride(7)
31061 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
31062 }
31063 }
31064 }
31065
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD64,n_gt_4_subtile)31066 TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD64, n_gt_4_subtile) {
31067 for (uint32_t n = 5; n < 8; n++) {
31068 for (size_t k = 1; k <= 40; k += 9) {
31069 for (uint32_t m = 1; m <= 2; m++) {
31070 GemmMicrokernelTester()
31071 .mr(2)
31072 .nr(4)
31073 .kr(2)
31074 .sr(1)
31075 .m(m)
31076 .n(n)
31077 .k(k)
31078 .iterations(1)
31079 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
31080 }
31081 }
31082 }
31083 }
31084
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD64,n_div_4)31085 TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD64, n_div_4) {
31086 for (uint32_t n = 8; n <= 12; n += 4) {
31087 for (size_t k = 1; k <= 40; k += 9) {
31088 GemmMicrokernelTester()
31089 .mr(2)
31090 .nr(4)
31091 .kr(2)
31092 .sr(1)
31093 .m(2)
31094 .n(n)
31095 .k(k)
31096 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
31097 }
31098 }
31099 }
31100
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD64,n_div_4_strided_cn)31101 TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD64, n_div_4_strided_cn) {
31102 for (uint32_t n = 8; n <= 12; n += 4) {
31103 for (size_t k = 1; k <= 40; k += 9) {
31104 GemmMicrokernelTester()
31105 .mr(2)
31106 .nr(4)
31107 .kr(2)
31108 .sr(1)
31109 .m(2)
31110 .n(n)
31111 .k(k)
31112 .cn_stride(7)
31113 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
31114 }
31115 }
31116 }
31117
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD64,n_div_4_subtile)31118 TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD64, n_div_4_subtile) {
31119 for (uint32_t n = 8; n <= 12; n += 4) {
31120 for (size_t k = 1; k <= 40; k += 9) {
31121 for (uint32_t m = 1; m <= 2; m++) {
31122 GemmMicrokernelTester()
31123 .mr(2)
31124 .nr(4)
31125 .kr(2)
31126 .sr(1)
31127 .m(m)
31128 .n(n)
31129 .k(k)
31130 .iterations(1)
31131 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
31132 }
31133 }
31134 }
31135 }
31136
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD64,small_kernel)31137 TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD64, small_kernel) {
31138 for (size_t k = 1; k <= 40; k += 9) {
31139 GemmMicrokernelTester()
31140 .mr(2)
31141 .nr(4)
31142 .kr(2)
31143 .sr(1)
31144 .m(2)
31145 .n(4)
31146 .k(k)
31147 .ks(3)
31148 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
31149 }
31150 }
31151
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD64,small_kernel_subtile)31152 TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD64, small_kernel_subtile) {
31153 for (size_t k = 1; k <= 40; k += 9) {
31154 for (uint32_t n = 1; n <= 4; n++) {
31155 for (uint32_t m = 1; m <= 2; m++) {
31156 GemmMicrokernelTester()
31157 .mr(2)
31158 .nr(4)
31159 .kr(2)
31160 .sr(1)
31161 .m(m)
31162 .n(n)
31163 .k(k)
31164 .ks(3)
31165 .iterations(1)
31166 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
31167 }
31168 }
31169 }
31170 }
31171
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD64,n_gt_4_small_kernel)31172 TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD64, n_gt_4_small_kernel) {
31173 for (uint32_t n = 5; n < 8; n++) {
31174 for (size_t k = 1; k <= 40; k += 9) {
31175 GemmMicrokernelTester()
31176 .mr(2)
31177 .nr(4)
31178 .kr(2)
31179 .sr(1)
31180 .m(2)
31181 .n(n)
31182 .k(k)
31183 .ks(3)
31184 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
31185 }
31186 }
31187 }
31188
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD64,n_div_4_small_kernel)31189 TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD64, n_div_4_small_kernel) {
31190 for (uint32_t n = 8; n <= 12; n += 4) {
31191 for (size_t k = 1; k <= 40; k += 9) {
31192 GemmMicrokernelTester()
31193 .mr(2)
31194 .nr(4)
31195 .kr(2)
31196 .sr(1)
31197 .m(2)
31198 .n(n)
31199 .k(k)
31200 .ks(3)
31201 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
31202 }
31203 }
31204 }
31205
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD64,strided_cm_subtile)31206 TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD64, strided_cm_subtile) {
31207 for (size_t k = 1; k <= 40; k += 9) {
31208 for (uint32_t n = 1; n <= 4; n++) {
31209 for (uint32_t m = 1; m <= 2; m++) {
31210 GemmMicrokernelTester()
31211 .mr(2)
31212 .nr(4)
31213 .kr(2)
31214 .sr(1)
31215 .m(m)
31216 .n(n)
31217 .k(k)
31218 .cm_stride(7)
31219 .iterations(1)
31220 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
31221 }
31222 }
31223 }
31224 }
31225
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD64,a_offset)31226 TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD64, a_offset) {
31227 for (size_t k = 1; k <= 40; k += 9) {
31228 GemmMicrokernelTester()
31229 .mr(2)
31230 .nr(4)
31231 .kr(2)
31232 .sr(1)
31233 .m(2)
31234 .n(4)
31235 .k(k)
31236 .ks(3)
31237 .a_offset(83)
31238 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
31239 }
31240 }
31241
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD64,zero)31242 TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD64, zero) {
31243 for (size_t k = 1; k <= 40; k += 9) {
31244 for (uint32_t mz = 0; mz < 2; mz++) {
31245 GemmMicrokernelTester()
31246 .mr(2)
31247 .nr(4)
31248 .kr(2)
31249 .sr(1)
31250 .m(2)
31251 .n(4)
31252 .k(k)
31253 .ks(3)
31254 .a_offset(83)
31255 .zero_index(mz)
31256 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
31257 }
31258 }
31259 }
31260
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD64,qmin)31261 TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD64, qmin) {
31262 GemmMicrokernelTester()
31263 .mr(2)
31264 .nr(4)
31265 .kr(2)
31266 .sr(1)
31267 .m(2)
31268 .n(4)
31269 .k(8)
31270 .qmin(128)
31271 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
31272 }
31273
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD64,qmax)31274 TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD64, qmax) {
31275 GemmMicrokernelTester()
31276 .mr(2)
31277 .nr(4)
31278 .kr(2)
31279 .sr(1)
31280 .m(2)
31281 .n(4)
31282 .k(8)
31283 .qmax(128)
31284 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
31285 }
31286
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD64,strided_cm)31287 TEST(QC8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD64, strided_cm) {
31288 GemmMicrokernelTester()
31289 .mr(2)
31290 .nr(4)
31291 .kr(2)
31292 .sr(1)
31293 .m(2)
31294 .n(4)
31295 .k(8)
31296 .cm_stride(7)
31297 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
31298 }
31299 #endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
31300
31301
31302 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD128,k_eq_8)31303 TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD128, k_eq_8) {
31304 GemmMicrokernelTester()
31305 .mr(2)
31306 .nr(4)
31307 .kr(2)
31308 .sr(4)
31309 .m(2)
31310 .n(4)
31311 .k(8)
31312 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
31313 }
31314
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD128,strided_cn)31315 TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD128, strided_cn) {
31316 GemmMicrokernelTester()
31317 .mr(2)
31318 .nr(4)
31319 .kr(2)
31320 .sr(4)
31321 .m(2)
31322 .n(4)
31323 .k(8)
31324 .cn_stride(7)
31325 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
31326 }
31327
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD128,k_eq_8_subtile)31328 TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD128, k_eq_8_subtile) {
31329 for (uint32_t n = 1; n <= 4; n++) {
31330 for (uint32_t m = 1; m <= 2; m++) {
31331 GemmMicrokernelTester()
31332 .mr(2)
31333 .nr(4)
31334 .kr(2)
31335 .sr(4)
31336 .m(m)
31337 .n(n)
31338 .k(8)
31339 .iterations(1)
31340 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
31341 }
31342 }
31343 }
31344
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD128,k_eq_8_subtile_m)31345 TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD128, k_eq_8_subtile_m) {
31346 for (uint32_t m = 1; m <= 2; m++) {
31347 GemmMicrokernelTester()
31348 .mr(2)
31349 .nr(4)
31350 .kr(2)
31351 .sr(4)
31352 .m(m)
31353 .n(4)
31354 .k(8)
31355 .iterations(1)
31356 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
31357 }
31358 }
31359
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD128,k_eq_8_subtile_n)31360 TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD128, k_eq_8_subtile_n) {
31361 for (uint32_t n = 1; n <= 4; n++) {
31362 GemmMicrokernelTester()
31363 .mr(2)
31364 .nr(4)
31365 .kr(2)
31366 .sr(4)
31367 .m(2)
31368 .n(n)
31369 .k(8)
31370 .iterations(1)
31371 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
31372 }
31373 }
31374
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD128,k_lt_8)31375 TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD128, k_lt_8) {
31376 for (size_t k = 1; k < 8; k++) {
31377 GemmMicrokernelTester()
31378 .mr(2)
31379 .nr(4)
31380 .kr(2)
31381 .sr(4)
31382 .m(2)
31383 .n(4)
31384 .k(k)
31385 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
31386 }
31387 }
31388
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD128,k_lt_8_subtile)31389 TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD128, k_lt_8_subtile) {
31390 for (size_t k = 1; k < 8; k++) {
31391 for (uint32_t n = 1; n <= 4; n++) {
31392 for (uint32_t m = 1; m <= 2; m++) {
31393 GemmMicrokernelTester()
31394 .mr(2)
31395 .nr(4)
31396 .kr(2)
31397 .sr(4)
31398 .m(m)
31399 .n(n)
31400 .k(k)
31401 .iterations(1)
31402 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
31403 }
31404 }
31405 }
31406 }
31407
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD128,k_gt_8)31408 TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD128, k_gt_8) {
31409 for (size_t k = 9; k < 16; k++) {
31410 GemmMicrokernelTester()
31411 .mr(2)
31412 .nr(4)
31413 .kr(2)
31414 .sr(4)
31415 .m(2)
31416 .n(4)
31417 .k(k)
31418 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
31419 }
31420 }
31421
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD128,k_gt_8_subtile)31422 TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD128, k_gt_8_subtile) {
31423 for (size_t k = 9; k < 16; k++) {
31424 for (uint32_t n = 1; n <= 4; n++) {
31425 for (uint32_t m = 1; m <= 2; m++) {
31426 GemmMicrokernelTester()
31427 .mr(2)
31428 .nr(4)
31429 .kr(2)
31430 .sr(4)
31431 .m(m)
31432 .n(n)
31433 .k(k)
31434 .iterations(1)
31435 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
31436 }
31437 }
31438 }
31439 }
31440
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD128,k_div_8)31441 TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD128, k_div_8) {
31442 for (size_t k = 16; k <= 80; k += 8) {
31443 GemmMicrokernelTester()
31444 .mr(2)
31445 .nr(4)
31446 .kr(2)
31447 .sr(4)
31448 .m(2)
31449 .n(4)
31450 .k(k)
31451 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
31452 }
31453 }
31454
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD128,k_div_8_subtile)31455 TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD128, k_div_8_subtile) {
31456 for (size_t k = 16; k <= 80; k += 8) {
31457 for (uint32_t n = 1; n <= 4; n++) {
31458 for (uint32_t m = 1; m <= 2; m++) {
31459 GemmMicrokernelTester()
31460 .mr(2)
31461 .nr(4)
31462 .kr(2)
31463 .sr(4)
31464 .m(m)
31465 .n(n)
31466 .k(k)
31467 .iterations(1)
31468 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
31469 }
31470 }
31471 }
31472 }
31473
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD128,n_gt_4)31474 TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD128, n_gt_4) {
31475 for (uint32_t n = 5; n < 8; n++) {
31476 for (size_t k = 1; k <= 40; k += 9) {
31477 GemmMicrokernelTester()
31478 .mr(2)
31479 .nr(4)
31480 .kr(2)
31481 .sr(4)
31482 .m(2)
31483 .n(n)
31484 .k(k)
31485 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
31486 }
31487 }
31488 }
31489
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD128,n_gt_4_strided_cn)31490 TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD128, n_gt_4_strided_cn) {
31491 for (uint32_t n = 5; n < 8; n++) {
31492 for (size_t k = 1; k <= 40; k += 9) {
31493 GemmMicrokernelTester()
31494 .mr(2)
31495 .nr(4)
31496 .kr(2)
31497 .sr(4)
31498 .m(2)
31499 .n(n)
31500 .k(k)
31501 .cn_stride(7)
31502 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
31503 }
31504 }
31505 }
31506
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD128,n_gt_4_subtile)31507 TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD128, n_gt_4_subtile) {
31508 for (uint32_t n = 5; n < 8; n++) {
31509 for (size_t k = 1; k <= 40; k += 9) {
31510 for (uint32_t m = 1; m <= 2; m++) {
31511 GemmMicrokernelTester()
31512 .mr(2)
31513 .nr(4)
31514 .kr(2)
31515 .sr(4)
31516 .m(m)
31517 .n(n)
31518 .k(k)
31519 .iterations(1)
31520 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
31521 }
31522 }
31523 }
31524 }
31525
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD128,n_div_4)31526 TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD128, n_div_4) {
31527 for (uint32_t n = 8; n <= 12; n += 4) {
31528 for (size_t k = 1; k <= 40; k += 9) {
31529 GemmMicrokernelTester()
31530 .mr(2)
31531 .nr(4)
31532 .kr(2)
31533 .sr(4)
31534 .m(2)
31535 .n(n)
31536 .k(k)
31537 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
31538 }
31539 }
31540 }
31541
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD128,n_div_4_strided_cn)31542 TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD128, n_div_4_strided_cn) {
31543 for (uint32_t n = 8; n <= 12; n += 4) {
31544 for (size_t k = 1; k <= 40; k += 9) {
31545 GemmMicrokernelTester()
31546 .mr(2)
31547 .nr(4)
31548 .kr(2)
31549 .sr(4)
31550 .m(2)
31551 .n(n)
31552 .k(k)
31553 .cn_stride(7)
31554 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
31555 }
31556 }
31557 }
31558
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD128,n_div_4_subtile)31559 TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD128, n_div_4_subtile) {
31560 for (uint32_t n = 8; n <= 12; n += 4) {
31561 for (size_t k = 1; k <= 40; k += 9) {
31562 for (uint32_t m = 1; m <= 2; m++) {
31563 GemmMicrokernelTester()
31564 .mr(2)
31565 .nr(4)
31566 .kr(2)
31567 .sr(4)
31568 .m(m)
31569 .n(n)
31570 .k(k)
31571 .iterations(1)
31572 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
31573 }
31574 }
31575 }
31576 }
31577
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD128,small_kernel)31578 TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD128, small_kernel) {
31579 for (size_t k = 1; k <= 40; k += 9) {
31580 GemmMicrokernelTester()
31581 .mr(2)
31582 .nr(4)
31583 .kr(2)
31584 .sr(4)
31585 .m(2)
31586 .n(4)
31587 .k(k)
31588 .ks(3)
31589 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
31590 }
31591 }
31592
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD128,small_kernel_subtile)31593 TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD128, small_kernel_subtile) {
31594 for (size_t k = 1; k <= 40; k += 9) {
31595 for (uint32_t n = 1; n <= 4; n++) {
31596 for (uint32_t m = 1; m <= 2; m++) {
31597 GemmMicrokernelTester()
31598 .mr(2)
31599 .nr(4)
31600 .kr(2)
31601 .sr(4)
31602 .m(m)
31603 .n(n)
31604 .k(k)
31605 .ks(3)
31606 .iterations(1)
31607 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
31608 }
31609 }
31610 }
31611 }
31612
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD128,n_gt_4_small_kernel)31613 TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD128, n_gt_4_small_kernel) {
31614 for (uint32_t n = 5; n < 8; n++) {
31615 for (size_t k = 1; k <= 40; k += 9) {
31616 GemmMicrokernelTester()
31617 .mr(2)
31618 .nr(4)
31619 .kr(2)
31620 .sr(4)
31621 .m(2)
31622 .n(n)
31623 .k(k)
31624 .ks(3)
31625 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
31626 }
31627 }
31628 }
31629
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD128,n_div_4_small_kernel)31630 TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD128, n_div_4_small_kernel) {
31631 for (uint32_t n = 8; n <= 12; n += 4) {
31632 for (size_t k = 1; k <= 40; k += 9) {
31633 GemmMicrokernelTester()
31634 .mr(2)
31635 .nr(4)
31636 .kr(2)
31637 .sr(4)
31638 .m(2)
31639 .n(n)
31640 .k(k)
31641 .ks(3)
31642 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
31643 }
31644 }
31645 }
31646
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD128,strided_cm_subtile)31647 TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD128, strided_cm_subtile) {
31648 for (size_t k = 1; k <= 40; k += 9) {
31649 for (uint32_t n = 1; n <= 4; n++) {
31650 for (uint32_t m = 1; m <= 2; m++) {
31651 GemmMicrokernelTester()
31652 .mr(2)
31653 .nr(4)
31654 .kr(2)
31655 .sr(4)
31656 .m(m)
31657 .n(n)
31658 .k(k)
31659 .cm_stride(7)
31660 .iterations(1)
31661 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
31662 }
31663 }
31664 }
31665 }
31666
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD128,a_offset)31667 TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD128, a_offset) {
31668 for (size_t k = 1; k <= 40; k += 9) {
31669 GemmMicrokernelTester()
31670 .mr(2)
31671 .nr(4)
31672 .kr(2)
31673 .sr(4)
31674 .m(2)
31675 .n(4)
31676 .k(k)
31677 .ks(3)
31678 .a_offset(83)
31679 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
31680 }
31681 }
31682
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD128,zero)31683 TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD128, zero) {
31684 for (size_t k = 1; k <= 40; k += 9) {
31685 for (uint32_t mz = 0; mz < 2; mz++) {
31686 GemmMicrokernelTester()
31687 .mr(2)
31688 .nr(4)
31689 .kr(2)
31690 .sr(4)
31691 .m(2)
31692 .n(4)
31693 .k(k)
31694 .ks(3)
31695 .a_offset(83)
31696 .zero_index(mz)
31697 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
31698 }
31699 }
31700 }
31701
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD128,qmin)31702 TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD128, qmin) {
31703 GemmMicrokernelTester()
31704 .mr(2)
31705 .nr(4)
31706 .kr(2)
31707 .sr(4)
31708 .m(2)
31709 .n(4)
31710 .k(8)
31711 .qmin(128)
31712 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
31713 }
31714
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD128,qmax)31715 TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD128, qmax) {
31716 GemmMicrokernelTester()
31717 .mr(2)
31718 .nr(4)
31719 .kr(2)
31720 .sr(4)
31721 .m(2)
31722 .n(4)
31723 .k(8)
31724 .qmax(128)
31725 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
31726 }
31727
TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD128,strided_cm)31728 TEST(QC8_IGEMM_MINMAX_FP32_2X4C2S4__WASMSIMD_DOT16X2_LD128, strided_cm) {
31729 GemmMicrokernelTester()
31730 .mr(2)
31731 .nr(4)
31732 .kr(2)
31733 .sr(4)
31734 .m(2)
31735 .n(4)
31736 .k(8)
31737 .cm_stride(7)
31738 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
31739 }
31740 #endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
31741
31742
31743 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD128,k_eq_8)31744 TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD128, k_eq_8) {
31745 GemmMicrokernelTester()
31746 .mr(2)
31747 .nr(4)
31748 .kr(8)
31749 .sr(1)
31750 .m(2)
31751 .n(4)
31752 .k(8)
31753 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
31754 }
31755
TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD128,strided_cn)31756 TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD128, strided_cn) {
31757 GemmMicrokernelTester()
31758 .mr(2)
31759 .nr(4)
31760 .kr(8)
31761 .sr(1)
31762 .m(2)
31763 .n(4)
31764 .k(8)
31765 .cn_stride(7)
31766 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
31767 }
31768
TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD128,k_eq_8_subtile)31769 TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD128, k_eq_8_subtile) {
31770 for (uint32_t n = 1; n <= 4; n++) {
31771 for (uint32_t m = 1; m <= 2; m++) {
31772 GemmMicrokernelTester()
31773 .mr(2)
31774 .nr(4)
31775 .kr(8)
31776 .sr(1)
31777 .m(m)
31778 .n(n)
31779 .k(8)
31780 .iterations(1)
31781 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
31782 }
31783 }
31784 }
31785
TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD128,k_eq_8_subtile_m)31786 TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD128, k_eq_8_subtile_m) {
31787 for (uint32_t m = 1; m <= 2; m++) {
31788 GemmMicrokernelTester()
31789 .mr(2)
31790 .nr(4)
31791 .kr(8)
31792 .sr(1)
31793 .m(m)
31794 .n(4)
31795 .k(8)
31796 .iterations(1)
31797 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
31798 }
31799 }
31800
TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD128,k_eq_8_subtile_n)31801 TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD128, k_eq_8_subtile_n) {
31802 for (uint32_t n = 1; n <= 4; n++) {
31803 GemmMicrokernelTester()
31804 .mr(2)
31805 .nr(4)
31806 .kr(8)
31807 .sr(1)
31808 .m(2)
31809 .n(n)
31810 .k(8)
31811 .iterations(1)
31812 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
31813 }
31814 }
31815
TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD128,k_lt_8)31816 TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD128, k_lt_8) {
31817 for (size_t k = 1; k < 8; k++) {
31818 GemmMicrokernelTester()
31819 .mr(2)
31820 .nr(4)
31821 .kr(8)
31822 .sr(1)
31823 .m(2)
31824 .n(4)
31825 .k(k)
31826 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
31827 }
31828 }
31829
TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD128,k_lt_8_subtile)31830 TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD128, k_lt_8_subtile) {
31831 for (size_t k = 1; k < 8; k++) {
31832 for (uint32_t n = 1; n <= 4; n++) {
31833 for (uint32_t m = 1; m <= 2; m++) {
31834 GemmMicrokernelTester()
31835 .mr(2)
31836 .nr(4)
31837 .kr(8)
31838 .sr(1)
31839 .m(m)
31840 .n(n)
31841 .k(k)
31842 .iterations(1)
31843 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
31844 }
31845 }
31846 }
31847 }
31848
TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD128,k_gt_8)31849 TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD128, k_gt_8) {
31850 for (size_t k = 9; k < 16; k++) {
31851 GemmMicrokernelTester()
31852 .mr(2)
31853 .nr(4)
31854 .kr(8)
31855 .sr(1)
31856 .m(2)
31857 .n(4)
31858 .k(k)
31859 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
31860 }
31861 }
31862
TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD128,k_gt_8_subtile)31863 TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD128, k_gt_8_subtile) {
31864 for (size_t k = 9; k < 16; k++) {
31865 for (uint32_t n = 1; n <= 4; n++) {
31866 for (uint32_t m = 1; m <= 2; m++) {
31867 GemmMicrokernelTester()
31868 .mr(2)
31869 .nr(4)
31870 .kr(8)
31871 .sr(1)
31872 .m(m)
31873 .n(n)
31874 .k(k)
31875 .iterations(1)
31876 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
31877 }
31878 }
31879 }
31880 }
31881
TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD128,k_div_8)31882 TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD128, k_div_8) {
31883 for (size_t k = 16; k <= 80; k += 8) {
31884 GemmMicrokernelTester()
31885 .mr(2)
31886 .nr(4)
31887 .kr(8)
31888 .sr(1)
31889 .m(2)
31890 .n(4)
31891 .k(k)
31892 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
31893 }
31894 }
31895
TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD128,k_div_8_subtile)31896 TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD128, k_div_8_subtile) {
31897 for (size_t k = 16; k <= 80; k += 8) {
31898 for (uint32_t n = 1; n <= 4; n++) {
31899 for (uint32_t m = 1; m <= 2; m++) {
31900 GemmMicrokernelTester()
31901 .mr(2)
31902 .nr(4)
31903 .kr(8)
31904 .sr(1)
31905 .m(m)
31906 .n(n)
31907 .k(k)
31908 .iterations(1)
31909 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
31910 }
31911 }
31912 }
31913 }
31914
TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD128,n_gt_4)31915 TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD128, n_gt_4) {
31916 for (uint32_t n = 5; n < 8; n++) {
31917 for (size_t k = 1; k <= 40; k += 9) {
31918 GemmMicrokernelTester()
31919 .mr(2)
31920 .nr(4)
31921 .kr(8)
31922 .sr(1)
31923 .m(2)
31924 .n(n)
31925 .k(k)
31926 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
31927 }
31928 }
31929 }
31930
TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD128,n_gt_4_strided_cn)31931 TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD128, n_gt_4_strided_cn) {
31932 for (uint32_t n = 5; n < 8; n++) {
31933 for (size_t k = 1; k <= 40; k += 9) {
31934 GemmMicrokernelTester()
31935 .mr(2)
31936 .nr(4)
31937 .kr(8)
31938 .sr(1)
31939 .m(2)
31940 .n(n)
31941 .k(k)
31942 .cn_stride(7)
31943 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
31944 }
31945 }
31946 }
31947
TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD128,n_gt_4_subtile)31948 TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD128, n_gt_4_subtile) {
31949 for (uint32_t n = 5; n < 8; n++) {
31950 for (size_t k = 1; k <= 40; k += 9) {
31951 for (uint32_t m = 1; m <= 2; m++) {
31952 GemmMicrokernelTester()
31953 .mr(2)
31954 .nr(4)
31955 .kr(8)
31956 .sr(1)
31957 .m(m)
31958 .n(n)
31959 .k(k)
31960 .iterations(1)
31961 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
31962 }
31963 }
31964 }
31965 }
31966
TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD128,n_div_4)31967 TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD128, n_div_4) {
31968 for (uint32_t n = 8; n <= 12; n += 4) {
31969 for (size_t k = 1; k <= 40; k += 9) {
31970 GemmMicrokernelTester()
31971 .mr(2)
31972 .nr(4)
31973 .kr(8)
31974 .sr(1)
31975 .m(2)
31976 .n(n)
31977 .k(k)
31978 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
31979 }
31980 }
31981 }
31982
TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD128,n_div_4_strided_cn)31983 TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD128, n_div_4_strided_cn) {
31984 for (uint32_t n = 8; n <= 12; n += 4) {
31985 for (size_t k = 1; k <= 40; k += 9) {
31986 GemmMicrokernelTester()
31987 .mr(2)
31988 .nr(4)
31989 .kr(8)
31990 .sr(1)
31991 .m(2)
31992 .n(n)
31993 .k(k)
31994 .cn_stride(7)
31995 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
31996 }
31997 }
31998 }
31999
TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD128,n_div_4_subtile)32000 TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD128, n_div_4_subtile) {
32001 for (uint32_t n = 8; n <= 12; n += 4) {
32002 for (size_t k = 1; k <= 40; k += 9) {
32003 for (uint32_t m = 1; m <= 2; m++) {
32004 GemmMicrokernelTester()
32005 .mr(2)
32006 .nr(4)
32007 .kr(8)
32008 .sr(1)
32009 .m(m)
32010 .n(n)
32011 .k(k)
32012 .iterations(1)
32013 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
32014 }
32015 }
32016 }
32017 }
32018
TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD128,small_kernel)32019 TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD128, small_kernel) {
32020 for (size_t k = 1; k <= 40; k += 9) {
32021 GemmMicrokernelTester()
32022 .mr(2)
32023 .nr(4)
32024 .kr(8)
32025 .sr(1)
32026 .m(2)
32027 .n(4)
32028 .k(k)
32029 .ks(3)
32030 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
32031 }
32032 }
32033
TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD128,small_kernel_subtile)32034 TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD128, small_kernel_subtile) {
32035 for (size_t k = 1; k <= 40; k += 9) {
32036 for (uint32_t n = 1; n <= 4; n++) {
32037 for (uint32_t m = 1; m <= 2; m++) {
32038 GemmMicrokernelTester()
32039 .mr(2)
32040 .nr(4)
32041 .kr(8)
32042 .sr(1)
32043 .m(m)
32044 .n(n)
32045 .k(k)
32046 .ks(3)
32047 .iterations(1)
32048 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
32049 }
32050 }
32051 }
32052 }
32053
TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD128,n_gt_4_small_kernel)32054 TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD128, n_gt_4_small_kernel) {
32055 for (uint32_t n = 5; n < 8; n++) {
32056 for (size_t k = 1; k <= 40; k += 9) {
32057 GemmMicrokernelTester()
32058 .mr(2)
32059 .nr(4)
32060 .kr(8)
32061 .sr(1)
32062 .m(2)
32063 .n(n)
32064 .k(k)
32065 .ks(3)
32066 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
32067 }
32068 }
32069 }
32070
TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD128,n_div_4_small_kernel)32071 TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD128, n_div_4_small_kernel) {
32072 for (uint32_t n = 8; n <= 12; n += 4) {
32073 for (size_t k = 1; k <= 40; k += 9) {
32074 GemmMicrokernelTester()
32075 .mr(2)
32076 .nr(4)
32077 .kr(8)
32078 .sr(1)
32079 .m(2)
32080 .n(n)
32081 .k(k)
32082 .ks(3)
32083 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
32084 }
32085 }
32086 }
32087
TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD128,strided_cm_subtile)32088 TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD128, strided_cm_subtile) {
32089 for (size_t k = 1; k <= 40; k += 9) {
32090 for (uint32_t n = 1; n <= 4; n++) {
32091 for (uint32_t m = 1; m <= 2; m++) {
32092 GemmMicrokernelTester()
32093 .mr(2)
32094 .nr(4)
32095 .kr(8)
32096 .sr(1)
32097 .m(m)
32098 .n(n)
32099 .k(k)
32100 .cm_stride(7)
32101 .iterations(1)
32102 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
32103 }
32104 }
32105 }
32106 }
32107
TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD128,a_offset)32108 TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD128, a_offset) {
32109 for (size_t k = 1; k <= 40; k += 9) {
32110 GemmMicrokernelTester()
32111 .mr(2)
32112 .nr(4)
32113 .kr(8)
32114 .sr(1)
32115 .m(2)
32116 .n(4)
32117 .k(k)
32118 .ks(3)
32119 .a_offset(83)
32120 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
32121 }
32122 }
32123
TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD128,zero)32124 TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD128, zero) {
32125 for (size_t k = 1; k <= 40; k += 9) {
32126 for (uint32_t mz = 0; mz < 2; mz++) {
32127 GemmMicrokernelTester()
32128 .mr(2)
32129 .nr(4)
32130 .kr(8)
32131 .sr(1)
32132 .m(2)
32133 .n(4)
32134 .k(k)
32135 .ks(3)
32136 .a_offset(83)
32137 .zero_index(mz)
32138 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
32139 }
32140 }
32141 }
32142
TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD128,qmin)32143 TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD128, qmin) {
32144 GemmMicrokernelTester()
32145 .mr(2)
32146 .nr(4)
32147 .kr(8)
32148 .sr(1)
32149 .m(2)
32150 .n(4)
32151 .k(8)
32152 .qmin(128)
32153 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
32154 }
32155
TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD128,qmax)32156 TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD128, qmax) {
32157 GemmMicrokernelTester()
32158 .mr(2)
32159 .nr(4)
32160 .kr(8)
32161 .sr(1)
32162 .m(2)
32163 .n(4)
32164 .k(8)
32165 .qmax(128)
32166 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
32167 }
32168
TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD128,strided_cm)32169 TEST(QC8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD128, strided_cm) {
32170 GemmMicrokernelTester()
32171 .mr(2)
32172 .nr(4)
32173 .kr(8)
32174 .sr(1)
32175 .m(2)
32176 .n(4)
32177 .k(8)
32178 .cm_stride(7)
32179 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
32180 }
32181 #endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
32182
32183
32184 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD128,k_eq_8)32185 TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD128, k_eq_8) {
32186 GemmMicrokernelTester()
32187 .mr(3)
32188 .nr(4)
32189 .kr(2)
32190 .sr(1)
32191 .m(3)
32192 .n(4)
32193 .k(8)
32194 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
32195 }
32196
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD128,strided_cn)32197 TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD128, strided_cn) {
32198 GemmMicrokernelTester()
32199 .mr(3)
32200 .nr(4)
32201 .kr(2)
32202 .sr(1)
32203 .m(3)
32204 .n(4)
32205 .k(8)
32206 .cn_stride(7)
32207 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
32208 }
32209
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD128,k_eq_8_subtile)32210 TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD128, k_eq_8_subtile) {
32211 for (uint32_t n = 1; n <= 4; n++) {
32212 for (uint32_t m = 1; m <= 3; m++) {
32213 GemmMicrokernelTester()
32214 .mr(3)
32215 .nr(4)
32216 .kr(2)
32217 .sr(1)
32218 .m(m)
32219 .n(n)
32220 .k(8)
32221 .iterations(1)
32222 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
32223 }
32224 }
32225 }
32226
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD128,k_eq_8_subtile_m)32227 TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD128, k_eq_8_subtile_m) {
32228 for (uint32_t m = 1; m <= 3; m++) {
32229 GemmMicrokernelTester()
32230 .mr(3)
32231 .nr(4)
32232 .kr(2)
32233 .sr(1)
32234 .m(m)
32235 .n(4)
32236 .k(8)
32237 .iterations(1)
32238 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
32239 }
32240 }
32241
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD128,k_eq_8_subtile_n)32242 TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD128, k_eq_8_subtile_n) {
32243 for (uint32_t n = 1; n <= 4; n++) {
32244 GemmMicrokernelTester()
32245 .mr(3)
32246 .nr(4)
32247 .kr(2)
32248 .sr(1)
32249 .m(3)
32250 .n(n)
32251 .k(8)
32252 .iterations(1)
32253 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
32254 }
32255 }
32256
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD128,k_lt_8)32257 TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD128, k_lt_8) {
32258 for (size_t k = 1; k < 8; k++) {
32259 GemmMicrokernelTester()
32260 .mr(3)
32261 .nr(4)
32262 .kr(2)
32263 .sr(1)
32264 .m(3)
32265 .n(4)
32266 .k(k)
32267 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
32268 }
32269 }
32270
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD128,k_lt_8_subtile)32271 TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD128, k_lt_8_subtile) {
32272 for (size_t k = 1; k < 8; k++) {
32273 for (uint32_t n = 1; n <= 4; n++) {
32274 for (uint32_t m = 1; m <= 3; m++) {
32275 GemmMicrokernelTester()
32276 .mr(3)
32277 .nr(4)
32278 .kr(2)
32279 .sr(1)
32280 .m(m)
32281 .n(n)
32282 .k(k)
32283 .iterations(1)
32284 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
32285 }
32286 }
32287 }
32288 }
32289
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD128,k_gt_8)32290 TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD128, k_gt_8) {
32291 for (size_t k = 9; k < 16; k++) {
32292 GemmMicrokernelTester()
32293 .mr(3)
32294 .nr(4)
32295 .kr(2)
32296 .sr(1)
32297 .m(3)
32298 .n(4)
32299 .k(k)
32300 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
32301 }
32302 }
32303
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD128,k_gt_8_subtile)32304 TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD128, k_gt_8_subtile) {
32305 for (size_t k = 9; k < 16; k++) {
32306 for (uint32_t n = 1; n <= 4; n++) {
32307 for (uint32_t m = 1; m <= 3; m++) {
32308 GemmMicrokernelTester()
32309 .mr(3)
32310 .nr(4)
32311 .kr(2)
32312 .sr(1)
32313 .m(m)
32314 .n(n)
32315 .k(k)
32316 .iterations(1)
32317 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
32318 }
32319 }
32320 }
32321 }
32322
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD128,k_div_8)32323 TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD128, k_div_8) {
32324 for (size_t k = 16; k <= 80; k += 8) {
32325 GemmMicrokernelTester()
32326 .mr(3)
32327 .nr(4)
32328 .kr(2)
32329 .sr(1)
32330 .m(3)
32331 .n(4)
32332 .k(k)
32333 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
32334 }
32335 }
32336
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD128,k_div_8_subtile)32337 TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD128, k_div_8_subtile) {
32338 for (size_t k = 16; k <= 80; k += 8) {
32339 for (uint32_t n = 1; n <= 4; n++) {
32340 for (uint32_t m = 1; m <= 3; m++) {
32341 GemmMicrokernelTester()
32342 .mr(3)
32343 .nr(4)
32344 .kr(2)
32345 .sr(1)
32346 .m(m)
32347 .n(n)
32348 .k(k)
32349 .iterations(1)
32350 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
32351 }
32352 }
32353 }
32354 }
32355
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD128,n_gt_4)32356 TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD128, n_gt_4) {
32357 for (uint32_t n = 5; n < 8; n++) {
32358 for (size_t k = 1; k <= 40; k += 9) {
32359 GemmMicrokernelTester()
32360 .mr(3)
32361 .nr(4)
32362 .kr(2)
32363 .sr(1)
32364 .m(3)
32365 .n(n)
32366 .k(k)
32367 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
32368 }
32369 }
32370 }
32371
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD128,n_gt_4_strided_cn)32372 TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD128, n_gt_4_strided_cn) {
32373 for (uint32_t n = 5; n < 8; n++) {
32374 for (size_t k = 1; k <= 40; k += 9) {
32375 GemmMicrokernelTester()
32376 .mr(3)
32377 .nr(4)
32378 .kr(2)
32379 .sr(1)
32380 .m(3)
32381 .n(n)
32382 .k(k)
32383 .cn_stride(7)
32384 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
32385 }
32386 }
32387 }
32388
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD128,n_gt_4_subtile)32389 TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD128, n_gt_4_subtile) {
32390 for (uint32_t n = 5; n < 8; n++) {
32391 for (size_t k = 1; k <= 40; k += 9) {
32392 for (uint32_t m = 1; m <= 3; m++) {
32393 GemmMicrokernelTester()
32394 .mr(3)
32395 .nr(4)
32396 .kr(2)
32397 .sr(1)
32398 .m(m)
32399 .n(n)
32400 .k(k)
32401 .iterations(1)
32402 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
32403 }
32404 }
32405 }
32406 }
32407
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD128,n_div_4)32408 TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD128, n_div_4) {
32409 for (uint32_t n = 8; n <= 12; n += 4) {
32410 for (size_t k = 1; k <= 40; k += 9) {
32411 GemmMicrokernelTester()
32412 .mr(3)
32413 .nr(4)
32414 .kr(2)
32415 .sr(1)
32416 .m(3)
32417 .n(n)
32418 .k(k)
32419 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
32420 }
32421 }
32422 }
32423
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD128,n_div_4_strided_cn)32424 TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD128, n_div_4_strided_cn) {
32425 for (uint32_t n = 8; n <= 12; n += 4) {
32426 for (size_t k = 1; k <= 40; k += 9) {
32427 GemmMicrokernelTester()
32428 .mr(3)
32429 .nr(4)
32430 .kr(2)
32431 .sr(1)
32432 .m(3)
32433 .n(n)
32434 .k(k)
32435 .cn_stride(7)
32436 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
32437 }
32438 }
32439 }
32440
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD128,n_div_4_subtile)32441 TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD128, n_div_4_subtile) {
32442 for (uint32_t n = 8; n <= 12; n += 4) {
32443 for (size_t k = 1; k <= 40; k += 9) {
32444 for (uint32_t m = 1; m <= 3; m++) {
32445 GemmMicrokernelTester()
32446 .mr(3)
32447 .nr(4)
32448 .kr(2)
32449 .sr(1)
32450 .m(m)
32451 .n(n)
32452 .k(k)
32453 .iterations(1)
32454 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
32455 }
32456 }
32457 }
32458 }
32459
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD128,small_kernel)32460 TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD128, small_kernel) {
32461 for (size_t k = 1; k <= 40; k += 9) {
32462 GemmMicrokernelTester()
32463 .mr(3)
32464 .nr(4)
32465 .kr(2)
32466 .sr(1)
32467 .m(3)
32468 .n(4)
32469 .k(k)
32470 .ks(3)
32471 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
32472 }
32473 }
32474
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD128,small_kernel_subtile)32475 TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD128, small_kernel_subtile) {
32476 for (size_t k = 1; k <= 40; k += 9) {
32477 for (uint32_t n = 1; n <= 4; n++) {
32478 for (uint32_t m = 1; m <= 3; m++) {
32479 GemmMicrokernelTester()
32480 .mr(3)
32481 .nr(4)
32482 .kr(2)
32483 .sr(1)
32484 .m(m)
32485 .n(n)
32486 .k(k)
32487 .ks(3)
32488 .iterations(1)
32489 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
32490 }
32491 }
32492 }
32493 }
32494
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD128,n_gt_4_small_kernel)32495 TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD128, n_gt_4_small_kernel) {
32496 for (uint32_t n = 5; n < 8; n++) {
32497 for (size_t k = 1; k <= 40; k += 9) {
32498 GemmMicrokernelTester()
32499 .mr(3)
32500 .nr(4)
32501 .kr(2)
32502 .sr(1)
32503 .m(3)
32504 .n(n)
32505 .k(k)
32506 .ks(3)
32507 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
32508 }
32509 }
32510 }
32511
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD128,n_div_4_small_kernel)32512 TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD128, n_div_4_small_kernel) {
32513 for (uint32_t n = 8; n <= 12; n += 4) {
32514 for (size_t k = 1; k <= 40; k += 9) {
32515 GemmMicrokernelTester()
32516 .mr(3)
32517 .nr(4)
32518 .kr(2)
32519 .sr(1)
32520 .m(3)
32521 .n(n)
32522 .k(k)
32523 .ks(3)
32524 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
32525 }
32526 }
32527 }
32528
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD128,strided_cm_subtile)32529 TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD128, strided_cm_subtile) {
32530 for (size_t k = 1; k <= 40; k += 9) {
32531 for (uint32_t n = 1; n <= 4; n++) {
32532 for (uint32_t m = 1; m <= 3; m++) {
32533 GemmMicrokernelTester()
32534 .mr(3)
32535 .nr(4)
32536 .kr(2)
32537 .sr(1)
32538 .m(m)
32539 .n(n)
32540 .k(k)
32541 .cm_stride(7)
32542 .iterations(1)
32543 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
32544 }
32545 }
32546 }
32547 }
32548
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD128,a_offset)32549 TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD128, a_offset) {
32550 for (size_t k = 1; k <= 40; k += 9) {
32551 GemmMicrokernelTester()
32552 .mr(3)
32553 .nr(4)
32554 .kr(2)
32555 .sr(1)
32556 .m(3)
32557 .n(4)
32558 .k(k)
32559 .ks(3)
32560 .a_offset(127)
32561 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
32562 }
32563 }
32564
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD128,zero)32565 TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD128, zero) {
32566 for (size_t k = 1; k <= 40; k += 9) {
32567 for (uint32_t mz = 0; mz < 3; mz++) {
32568 GemmMicrokernelTester()
32569 .mr(3)
32570 .nr(4)
32571 .kr(2)
32572 .sr(1)
32573 .m(3)
32574 .n(4)
32575 .k(k)
32576 .ks(3)
32577 .a_offset(127)
32578 .zero_index(mz)
32579 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
32580 }
32581 }
32582 }
32583
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD128,qmin)32584 TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD128, qmin) {
32585 GemmMicrokernelTester()
32586 .mr(3)
32587 .nr(4)
32588 .kr(2)
32589 .sr(1)
32590 .m(3)
32591 .n(4)
32592 .k(8)
32593 .qmin(128)
32594 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
32595 }
32596
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD128,qmax)32597 TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD128, qmax) {
32598 GemmMicrokernelTester()
32599 .mr(3)
32600 .nr(4)
32601 .kr(2)
32602 .sr(1)
32603 .m(3)
32604 .n(4)
32605 .k(8)
32606 .qmax(128)
32607 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
32608 }
32609
TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD128,strided_cm)32610 TEST(QC8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD128, strided_cm) {
32611 GemmMicrokernelTester()
32612 .mr(3)
32613 .nr(4)
32614 .kr(2)
32615 .sr(1)
32616 .m(3)
32617 .n(4)
32618 .k(8)
32619 .cm_stride(7)
32620 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
32621 }
32622 #endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
32623
32624
32625 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD128,k_eq_8)32626 TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD128, k_eq_8) {
32627 GemmMicrokernelTester()
32628 .mr(3)
32629 .nr(4)
32630 .kr(8)
32631 .sr(1)
32632 .m(3)
32633 .n(4)
32634 .k(8)
32635 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
32636 }
32637
TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD128,strided_cn)32638 TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD128, strided_cn) {
32639 GemmMicrokernelTester()
32640 .mr(3)
32641 .nr(4)
32642 .kr(8)
32643 .sr(1)
32644 .m(3)
32645 .n(4)
32646 .k(8)
32647 .cn_stride(7)
32648 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
32649 }
32650
TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD128,k_eq_8_subtile)32651 TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD128, k_eq_8_subtile) {
32652 for (uint32_t n = 1; n <= 4; n++) {
32653 for (uint32_t m = 1; m <= 3; m++) {
32654 GemmMicrokernelTester()
32655 .mr(3)
32656 .nr(4)
32657 .kr(8)
32658 .sr(1)
32659 .m(m)
32660 .n(n)
32661 .k(8)
32662 .iterations(1)
32663 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
32664 }
32665 }
32666 }
32667
TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD128,k_eq_8_subtile_m)32668 TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD128, k_eq_8_subtile_m) {
32669 for (uint32_t m = 1; m <= 3; m++) {
32670 GemmMicrokernelTester()
32671 .mr(3)
32672 .nr(4)
32673 .kr(8)
32674 .sr(1)
32675 .m(m)
32676 .n(4)
32677 .k(8)
32678 .iterations(1)
32679 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
32680 }
32681 }
32682
TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD128,k_eq_8_subtile_n)32683 TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD128, k_eq_8_subtile_n) {
32684 for (uint32_t n = 1; n <= 4; n++) {
32685 GemmMicrokernelTester()
32686 .mr(3)
32687 .nr(4)
32688 .kr(8)
32689 .sr(1)
32690 .m(3)
32691 .n(n)
32692 .k(8)
32693 .iterations(1)
32694 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
32695 }
32696 }
32697
TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD128,k_lt_8)32698 TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD128, k_lt_8) {
32699 for (size_t k = 1; k < 8; k++) {
32700 GemmMicrokernelTester()
32701 .mr(3)
32702 .nr(4)
32703 .kr(8)
32704 .sr(1)
32705 .m(3)
32706 .n(4)
32707 .k(k)
32708 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
32709 }
32710 }
32711
TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD128,k_lt_8_subtile)32712 TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD128, k_lt_8_subtile) {
32713 for (size_t k = 1; k < 8; k++) {
32714 for (uint32_t n = 1; n <= 4; n++) {
32715 for (uint32_t m = 1; m <= 3; m++) {
32716 GemmMicrokernelTester()
32717 .mr(3)
32718 .nr(4)
32719 .kr(8)
32720 .sr(1)
32721 .m(m)
32722 .n(n)
32723 .k(k)
32724 .iterations(1)
32725 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
32726 }
32727 }
32728 }
32729 }
32730
TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD128,k_gt_8)32731 TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD128, k_gt_8) {
32732 for (size_t k = 9; k < 16; k++) {
32733 GemmMicrokernelTester()
32734 .mr(3)
32735 .nr(4)
32736 .kr(8)
32737 .sr(1)
32738 .m(3)
32739 .n(4)
32740 .k(k)
32741 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
32742 }
32743 }
32744
TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD128,k_gt_8_subtile)32745 TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD128, k_gt_8_subtile) {
32746 for (size_t k = 9; k < 16; k++) {
32747 for (uint32_t n = 1; n <= 4; n++) {
32748 for (uint32_t m = 1; m <= 3; m++) {
32749 GemmMicrokernelTester()
32750 .mr(3)
32751 .nr(4)
32752 .kr(8)
32753 .sr(1)
32754 .m(m)
32755 .n(n)
32756 .k(k)
32757 .iterations(1)
32758 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
32759 }
32760 }
32761 }
32762 }
32763
TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD128,k_div_8)32764 TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD128, k_div_8) {
32765 for (size_t k = 16; k <= 80; k += 8) {
32766 GemmMicrokernelTester()
32767 .mr(3)
32768 .nr(4)
32769 .kr(8)
32770 .sr(1)
32771 .m(3)
32772 .n(4)
32773 .k(k)
32774 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
32775 }
32776 }
32777
TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD128,k_div_8_subtile)32778 TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD128, k_div_8_subtile) {
32779 for (size_t k = 16; k <= 80; k += 8) {
32780 for (uint32_t n = 1; n <= 4; n++) {
32781 for (uint32_t m = 1; m <= 3; m++) {
32782 GemmMicrokernelTester()
32783 .mr(3)
32784 .nr(4)
32785 .kr(8)
32786 .sr(1)
32787 .m(m)
32788 .n(n)
32789 .k(k)
32790 .iterations(1)
32791 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
32792 }
32793 }
32794 }
32795 }
32796
TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD128,n_gt_4)32797 TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD128, n_gt_4) {
32798 for (uint32_t n = 5; n < 8; n++) {
32799 for (size_t k = 1; k <= 40; k += 9) {
32800 GemmMicrokernelTester()
32801 .mr(3)
32802 .nr(4)
32803 .kr(8)
32804 .sr(1)
32805 .m(3)
32806 .n(n)
32807 .k(k)
32808 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
32809 }
32810 }
32811 }
32812
TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD128,n_gt_4_strided_cn)32813 TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD128, n_gt_4_strided_cn) {
32814 for (uint32_t n = 5; n < 8; n++) {
32815 for (size_t k = 1; k <= 40; k += 9) {
32816 GemmMicrokernelTester()
32817 .mr(3)
32818 .nr(4)
32819 .kr(8)
32820 .sr(1)
32821 .m(3)
32822 .n(n)
32823 .k(k)
32824 .cn_stride(7)
32825 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
32826 }
32827 }
32828 }
32829
TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD128,n_gt_4_subtile)32830 TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD128, n_gt_4_subtile) {
32831 for (uint32_t n = 5; n < 8; n++) {
32832 for (size_t k = 1; k <= 40; k += 9) {
32833 for (uint32_t m = 1; m <= 3; m++) {
32834 GemmMicrokernelTester()
32835 .mr(3)
32836 .nr(4)
32837 .kr(8)
32838 .sr(1)
32839 .m(m)
32840 .n(n)
32841 .k(k)
32842 .iterations(1)
32843 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
32844 }
32845 }
32846 }
32847 }
32848
TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD128,n_div_4)32849 TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD128, n_div_4) {
32850 for (uint32_t n = 8; n <= 12; n += 4) {
32851 for (size_t k = 1; k <= 40; k += 9) {
32852 GemmMicrokernelTester()
32853 .mr(3)
32854 .nr(4)
32855 .kr(8)
32856 .sr(1)
32857 .m(3)
32858 .n(n)
32859 .k(k)
32860 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
32861 }
32862 }
32863 }
32864
TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD128,n_div_4_strided_cn)32865 TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD128, n_div_4_strided_cn) {
32866 for (uint32_t n = 8; n <= 12; n += 4) {
32867 for (size_t k = 1; k <= 40; k += 9) {
32868 GemmMicrokernelTester()
32869 .mr(3)
32870 .nr(4)
32871 .kr(8)
32872 .sr(1)
32873 .m(3)
32874 .n(n)
32875 .k(k)
32876 .cn_stride(7)
32877 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
32878 }
32879 }
32880 }
32881
TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD128,n_div_4_subtile)32882 TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD128, n_div_4_subtile) {
32883 for (uint32_t n = 8; n <= 12; n += 4) {
32884 for (size_t k = 1; k <= 40; k += 9) {
32885 for (uint32_t m = 1; m <= 3; m++) {
32886 GemmMicrokernelTester()
32887 .mr(3)
32888 .nr(4)
32889 .kr(8)
32890 .sr(1)
32891 .m(m)
32892 .n(n)
32893 .k(k)
32894 .iterations(1)
32895 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
32896 }
32897 }
32898 }
32899 }
32900
TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD128,small_kernel)32901 TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD128, small_kernel) {
32902 for (size_t k = 1; k <= 40; k += 9) {
32903 GemmMicrokernelTester()
32904 .mr(3)
32905 .nr(4)
32906 .kr(8)
32907 .sr(1)
32908 .m(3)
32909 .n(4)
32910 .k(k)
32911 .ks(3)
32912 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
32913 }
32914 }
32915
TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD128,small_kernel_subtile)32916 TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD128, small_kernel_subtile) {
32917 for (size_t k = 1; k <= 40; k += 9) {
32918 for (uint32_t n = 1; n <= 4; n++) {
32919 for (uint32_t m = 1; m <= 3; m++) {
32920 GemmMicrokernelTester()
32921 .mr(3)
32922 .nr(4)
32923 .kr(8)
32924 .sr(1)
32925 .m(m)
32926 .n(n)
32927 .k(k)
32928 .ks(3)
32929 .iterations(1)
32930 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
32931 }
32932 }
32933 }
32934 }
32935
TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD128,n_gt_4_small_kernel)32936 TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD128, n_gt_4_small_kernel) {
32937 for (uint32_t n = 5; n < 8; n++) {
32938 for (size_t k = 1; k <= 40; k += 9) {
32939 GemmMicrokernelTester()
32940 .mr(3)
32941 .nr(4)
32942 .kr(8)
32943 .sr(1)
32944 .m(3)
32945 .n(n)
32946 .k(k)
32947 .ks(3)
32948 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
32949 }
32950 }
32951 }
32952
TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD128,n_div_4_small_kernel)32953 TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD128, n_div_4_small_kernel) {
32954 for (uint32_t n = 8; n <= 12; n += 4) {
32955 for (size_t k = 1; k <= 40; k += 9) {
32956 GemmMicrokernelTester()
32957 .mr(3)
32958 .nr(4)
32959 .kr(8)
32960 .sr(1)
32961 .m(3)
32962 .n(n)
32963 .k(k)
32964 .ks(3)
32965 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
32966 }
32967 }
32968 }
32969
TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD128,strided_cm_subtile)32970 TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD128, strided_cm_subtile) {
32971 for (size_t k = 1; k <= 40; k += 9) {
32972 for (uint32_t n = 1; n <= 4; n++) {
32973 for (uint32_t m = 1; m <= 3; m++) {
32974 GemmMicrokernelTester()
32975 .mr(3)
32976 .nr(4)
32977 .kr(8)
32978 .sr(1)
32979 .m(m)
32980 .n(n)
32981 .k(k)
32982 .cm_stride(7)
32983 .iterations(1)
32984 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
32985 }
32986 }
32987 }
32988 }
32989
TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD128,a_offset)32990 TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD128, a_offset) {
32991 for (size_t k = 1; k <= 40; k += 9) {
32992 GemmMicrokernelTester()
32993 .mr(3)
32994 .nr(4)
32995 .kr(8)
32996 .sr(1)
32997 .m(3)
32998 .n(4)
32999 .k(k)
33000 .ks(3)
33001 .a_offset(127)
33002 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
33003 }
33004 }
33005
TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD128,zero)33006 TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD128, zero) {
33007 for (size_t k = 1; k <= 40; k += 9) {
33008 for (uint32_t mz = 0; mz < 3; mz++) {
33009 GemmMicrokernelTester()
33010 .mr(3)
33011 .nr(4)
33012 .kr(8)
33013 .sr(1)
33014 .m(3)
33015 .n(4)
33016 .k(k)
33017 .ks(3)
33018 .a_offset(127)
33019 .zero_index(mz)
33020 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
33021 }
33022 }
33023 }
33024
TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD128,qmin)33025 TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD128, qmin) {
33026 GemmMicrokernelTester()
33027 .mr(3)
33028 .nr(4)
33029 .kr(8)
33030 .sr(1)
33031 .m(3)
33032 .n(4)
33033 .k(8)
33034 .qmin(128)
33035 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
33036 }
33037
TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD128,qmax)33038 TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD128, qmax) {
33039 GemmMicrokernelTester()
33040 .mr(3)
33041 .nr(4)
33042 .kr(8)
33043 .sr(1)
33044 .m(3)
33045 .n(4)
33046 .k(8)
33047 .qmax(128)
33048 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
33049 }
33050
TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD128,strided_cm)33051 TEST(QC8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD128, strided_cm) {
33052 GemmMicrokernelTester()
33053 .mr(3)
33054 .nr(4)
33055 .kr(8)
33056 .sr(1)
33057 .m(3)
33058 .n(4)
33059 .k(8)
33060 .cm_stride(7)
33061 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
33062 }
33063 #endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
33064
33065
33066 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD128,k_eq_8)33067 TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD128, k_eq_8) {
33068 GemmMicrokernelTester()
33069 .mr(4)
33070 .nr(4)
33071 .kr(2)
33072 .sr(1)
33073 .m(4)
33074 .n(4)
33075 .k(8)
33076 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
33077 }
33078
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD128,strided_cn)33079 TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD128, strided_cn) {
33080 GemmMicrokernelTester()
33081 .mr(4)
33082 .nr(4)
33083 .kr(2)
33084 .sr(1)
33085 .m(4)
33086 .n(4)
33087 .k(8)
33088 .cn_stride(7)
33089 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
33090 }
33091
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD128,k_eq_8_subtile)33092 TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD128, k_eq_8_subtile) {
33093 for (uint32_t n = 1; n <= 4; n++) {
33094 for (uint32_t m = 1; m <= 4; m++) {
33095 GemmMicrokernelTester()
33096 .mr(4)
33097 .nr(4)
33098 .kr(2)
33099 .sr(1)
33100 .m(m)
33101 .n(n)
33102 .k(8)
33103 .iterations(1)
33104 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
33105 }
33106 }
33107 }
33108
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD128,k_eq_8_subtile_m)33109 TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD128, k_eq_8_subtile_m) {
33110 for (uint32_t m = 1; m <= 4; m++) {
33111 GemmMicrokernelTester()
33112 .mr(4)
33113 .nr(4)
33114 .kr(2)
33115 .sr(1)
33116 .m(m)
33117 .n(4)
33118 .k(8)
33119 .iterations(1)
33120 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
33121 }
33122 }
33123
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD128,k_eq_8_subtile_n)33124 TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD128, k_eq_8_subtile_n) {
33125 for (uint32_t n = 1; n <= 4; n++) {
33126 GemmMicrokernelTester()
33127 .mr(4)
33128 .nr(4)
33129 .kr(2)
33130 .sr(1)
33131 .m(4)
33132 .n(n)
33133 .k(8)
33134 .iterations(1)
33135 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
33136 }
33137 }
33138
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD128,k_lt_8)33139 TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD128, k_lt_8) {
33140 for (size_t k = 1; k < 8; k++) {
33141 GemmMicrokernelTester()
33142 .mr(4)
33143 .nr(4)
33144 .kr(2)
33145 .sr(1)
33146 .m(4)
33147 .n(4)
33148 .k(k)
33149 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
33150 }
33151 }
33152
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD128,k_lt_8_subtile)33153 TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD128, k_lt_8_subtile) {
33154 for (size_t k = 1; k < 8; k++) {
33155 for (uint32_t n = 1; n <= 4; n++) {
33156 for (uint32_t m = 1; m <= 4; m++) {
33157 GemmMicrokernelTester()
33158 .mr(4)
33159 .nr(4)
33160 .kr(2)
33161 .sr(1)
33162 .m(m)
33163 .n(n)
33164 .k(k)
33165 .iterations(1)
33166 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
33167 }
33168 }
33169 }
33170 }
33171
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD128,k_gt_8)33172 TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD128, k_gt_8) {
33173 for (size_t k = 9; k < 16; k++) {
33174 GemmMicrokernelTester()
33175 .mr(4)
33176 .nr(4)
33177 .kr(2)
33178 .sr(1)
33179 .m(4)
33180 .n(4)
33181 .k(k)
33182 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
33183 }
33184 }
33185
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD128,k_gt_8_subtile)33186 TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD128, k_gt_8_subtile) {
33187 for (size_t k = 9; k < 16; k++) {
33188 for (uint32_t n = 1; n <= 4; n++) {
33189 for (uint32_t m = 1; m <= 4; m++) {
33190 GemmMicrokernelTester()
33191 .mr(4)
33192 .nr(4)
33193 .kr(2)
33194 .sr(1)
33195 .m(m)
33196 .n(n)
33197 .k(k)
33198 .iterations(1)
33199 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
33200 }
33201 }
33202 }
33203 }
33204
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD128,k_div_8)33205 TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD128, k_div_8) {
33206 for (size_t k = 16; k <= 80; k += 8) {
33207 GemmMicrokernelTester()
33208 .mr(4)
33209 .nr(4)
33210 .kr(2)
33211 .sr(1)
33212 .m(4)
33213 .n(4)
33214 .k(k)
33215 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
33216 }
33217 }
33218
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD128,k_div_8_subtile)33219 TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD128, k_div_8_subtile) {
33220 for (size_t k = 16; k <= 80; k += 8) {
33221 for (uint32_t n = 1; n <= 4; n++) {
33222 for (uint32_t m = 1; m <= 4; m++) {
33223 GemmMicrokernelTester()
33224 .mr(4)
33225 .nr(4)
33226 .kr(2)
33227 .sr(1)
33228 .m(m)
33229 .n(n)
33230 .k(k)
33231 .iterations(1)
33232 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
33233 }
33234 }
33235 }
33236 }
33237
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD128,n_gt_4)33238 TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD128, n_gt_4) {
33239 for (uint32_t n = 5; n < 8; n++) {
33240 for (size_t k = 1; k <= 40; k += 9) {
33241 GemmMicrokernelTester()
33242 .mr(4)
33243 .nr(4)
33244 .kr(2)
33245 .sr(1)
33246 .m(4)
33247 .n(n)
33248 .k(k)
33249 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
33250 }
33251 }
33252 }
33253
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD128,n_gt_4_strided_cn)33254 TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD128, n_gt_4_strided_cn) {
33255 for (uint32_t n = 5; n < 8; n++) {
33256 for (size_t k = 1; k <= 40; k += 9) {
33257 GemmMicrokernelTester()
33258 .mr(4)
33259 .nr(4)
33260 .kr(2)
33261 .sr(1)
33262 .m(4)
33263 .n(n)
33264 .k(k)
33265 .cn_stride(7)
33266 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
33267 }
33268 }
33269 }
33270
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD128,n_gt_4_subtile)33271 TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD128, n_gt_4_subtile) {
33272 for (uint32_t n = 5; n < 8; n++) {
33273 for (size_t k = 1; k <= 40; k += 9) {
33274 for (uint32_t m = 1; m <= 4; m++) {
33275 GemmMicrokernelTester()
33276 .mr(4)
33277 .nr(4)
33278 .kr(2)
33279 .sr(1)
33280 .m(m)
33281 .n(n)
33282 .k(k)
33283 .iterations(1)
33284 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
33285 }
33286 }
33287 }
33288 }
33289
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD128,n_div_4)33290 TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD128, n_div_4) {
33291 for (uint32_t n = 8; n <= 12; n += 4) {
33292 for (size_t k = 1; k <= 40; k += 9) {
33293 GemmMicrokernelTester()
33294 .mr(4)
33295 .nr(4)
33296 .kr(2)
33297 .sr(1)
33298 .m(4)
33299 .n(n)
33300 .k(k)
33301 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
33302 }
33303 }
33304 }
33305
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD128,n_div_4_strided_cn)33306 TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD128, n_div_4_strided_cn) {
33307 for (uint32_t n = 8; n <= 12; n += 4) {
33308 for (size_t k = 1; k <= 40; k += 9) {
33309 GemmMicrokernelTester()
33310 .mr(4)
33311 .nr(4)
33312 .kr(2)
33313 .sr(1)
33314 .m(4)
33315 .n(n)
33316 .k(k)
33317 .cn_stride(7)
33318 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
33319 }
33320 }
33321 }
33322
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD128,n_div_4_subtile)33323 TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD128, n_div_4_subtile) {
33324 for (uint32_t n = 8; n <= 12; n += 4) {
33325 for (size_t k = 1; k <= 40; k += 9) {
33326 for (uint32_t m = 1; m <= 4; m++) {
33327 GemmMicrokernelTester()
33328 .mr(4)
33329 .nr(4)
33330 .kr(2)
33331 .sr(1)
33332 .m(m)
33333 .n(n)
33334 .k(k)
33335 .iterations(1)
33336 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
33337 }
33338 }
33339 }
33340 }
33341
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD128,small_kernel)33342 TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD128, small_kernel) {
33343 for (size_t k = 1; k <= 40; k += 9) {
33344 GemmMicrokernelTester()
33345 .mr(4)
33346 .nr(4)
33347 .kr(2)
33348 .sr(1)
33349 .m(4)
33350 .n(4)
33351 .k(k)
33352 .ks(3)
33353 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
33354 }
33355 }
33356
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD128,small_kernel_subtile)33357 TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD128, small_kernel_subtile) {
33358 for (size_t k = 1; k <= 40; k += 9) {
33359 for (uint32_t n = 1; n <= 4; n++) {
33360 for (uint32_t m = 1; m <= 4; m++) {
33361 GemmMicrokernelTester()
33362 .mr(4)
33363 .nr(4)
33364 .kr(2)
33365 .sr(1)
33366 .m(m)
33367 .n(n)
33368 .k(k)
33369 .ks(3)
33370 .iterations(1)
33371 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
33372 }
33373 }
33374 }
33375 }
33376
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD128,n_gt_4_small_kernel)33377 TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD128, n_gt_4_small_kernel) {
33378 for (uint32_t n = 5; n < 8; n++) {
33379 for (size_t k = 1; k <= 40; k += 9) {
33380 GemmMicrokernelTester()
33381 .mr(4)
33382 .nr(4)
33383 .kr(2)
33384 .sr(1)
33385 .m(4)
33386 .n(n)
33387 .k(k)
33388 .ks(3)
33389 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
33390 }
33391 }
33392 }
33393
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD128,n_div_4_small_kernel)33394 TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD128, n_div_4_small_kernel) {
33395 for (uint32_t n = 8; n <= 12; n += 4) {
33396 for (size_t k = 1; k <= 40; k += 9) {
33397 GemmMicrokernelTester()
33398 .mr(4)
33399 .nr(4)
33400 .kr(2)
33401 .sr(1)
33402 .m(4)
33403 .n(n)
33404 .k(k)
33405 .ks(3)
33406 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
33407 }
33408 }
33409 }
33410
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD128,strided_cm_subtile)33411 TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD128, strided_cm_subtile) {
33412 for (size_t k = 1; k <= 40; k += 9) {
33413 for (uint32_t n = 1; n <= 4; n++) {
33414 for (uint32_t m = 1; m <= 4; m++) {
33415 GemmMicrokernelTester()
33416 .mr(4)
33417 .nr(4)
33418 .kr(2)
33419 .sr(1)
33420 .m(m)
33421 .n(n)
33422 .k(k)
33423 .cm_stride(7)
33424 .iterations(1)
33425 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
33426 }
33427 }
33428 }
33429 }
33430
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD128,a_offset)33431 TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD128, a_offset) {
33432 for (size_t k = 1; k <= 40; k += 9) {
33433 GemmMicrokernelTester()
33434 .mr(4)
33435 .nr(4)
33436 .kr(2)
33437 .sr(1)
33438 .m(4)
33439 .n(4)
33440 .k(k)
33441 .ks(3)
33442 .a_offset(163)
33443 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
33444 }
33445 }
33446
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD128,zero)33447 TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD128, zero) {
33448 for (size_t k = 1; k <= 40; k += 9) {
33449 for (uint32_t mz = 0; mz < 4; mz++) {
33450 GemmMicrokernelTester()
33451 .mr(4)
33452 .nr(4)
33453 .kr(2)
33454 .sr(1)
33455 .m(4)
33456 .n(4)
33457 .k(k)
33458 .ks(3)
33459 .a_offset(163)
33460 .zero_index(mz)
33461 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
33462 }
33463 }
33464 }
33465
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD128,qmin)33466 TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD128, qmin) {
33467 GemmMicrokernelTester()
33468 .mr(4)
33469 .nr(4)
33470 .kr(2)
33471 .sr(1)
33472 .m(4)
33473 .n(4)
33474 .k(8)
33475 .qmin(128)
33476 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
33477 }
33478
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD128,qmax)33479 TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD128, qmax) {
33480 GemmMicrokernelTester()
33481 .mr(4)
33482 .nr(4)
33483 .kr(2)
33484 .sr(1)
33485 .m(4)
33486 .n(4)
33487 .k(8)
33488 .qmax(128)
33489 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
33490 }
33491
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD128,strided_cm)33492 TEST(QC8_IGEMM_MINMAX_FP32_4X4C2__WASMSIMD_DOT16X2_LD128, strided_cm) {
33493 GemmMicrokernelTester()
33494 .mr(4)
33495 .nr(4)
33496 .kr(2)
33497 .sr(1)
33498 .m(4)
33499 .n(4)
33500 .k(8)
33501 .cm_stride(7)
33502 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
33503 }
33504 #endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
33505
33506
33507 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD128,k_eq_8)33508 TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD128, k_eq_8) {
33509 GemmMicrokernelTester()
33510 .mr(4)
33511 .nr(4)
33512 .kr(2)
33513 .sr(4)
33514 .m(4)
33515 .n(4)
33516 .k(8)
33517 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
33518 }
33519
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD128,strided_cn)33520 TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD128, strided_cn) {
33521 GemmMicrokernelTester()
33522 .mr(4)
33523 .nr(4)
33524 .kr(2)
33525 .sr(4)
33526 .m(4)
33527 .n(4)
33528 .k(8)
33529 .cn_stride(7)
33530 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
33531 }
33532
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD128,k_eq_8_subtile)33533 TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD128, k_eq_8_subtile) {
33534 for (uint32_t n = 1; n <= 4; n++) {
33535 for (uint32_t m = 1; m <= 4; m++) {
33536 GemmMicrokernelTester()
33537 .mr(4)
33538 .nr(4)
33539 .kr(2)
33540 .sr(4)
33541 .m(m)
33542 .n(n)
33543 .k(8)
33544 .iterations(1)
33545 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
33546 }
33547 }
33548 }
33549
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD128,k_eq_8_subtile_m)33550 TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD128, k_eq_8_subtile_m) {
33551 for (uint32_t m = 1; m <= 4; m++) {
33552 GemmMicrokernelTester()
33553 .mr(4)
33554 .nr(4)
33555 .kr(2)
33556 .sr(4)
33557 .m(m)
33558 .n(4)
33559 .k(8)
33560 .iterations(1)
33561 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
33562 }
33563 }
33564
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD128,k_eq_8_subtile_n)33565 TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD128, k_eq_8_subtile_n) {
33566 for (uint32_t n = 1; n <= 4; n++) {
33567 GemmMicrokernelTester()
33568 .mr(4)
33569 .nr(4)
33570 .kr(2)
33571 .sr(4)
33572 .m(4)
33573 .n(n)
33574 .k(8)
33575 .iterations(1)
33576 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
33577 }
33578 }
33579
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD128,k_lt_8)33580 TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD128, k_lt_8) {
33581 for (size_t k = 1; k < 8; k++) {
33582 GemmMicrokernelTester()
33583 .mr(4)
33584 .nr(4)
33585 .kr(2)
33586 .sr(4)
33587 .m(4)
33588 .n(4)
33589 .k(k)
33590 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
33591 }
33592 }
33593
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD128,k_lt_8_subtile)33594 TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD128, k_lt_8_subtile) {
33595 for (size_t k = 1; k < 8; k++) {
33596 for (uint32_t n = 1; n <= 4; n++) {
33597 for (uint32_t m = 1; m <= 4; m++) {
33598 GemmMicrokernelTester()
33599 .mr(4)
33600 .nr(4)
33601 .kr(2)
33602 .sr(4)
33603 .m(m)
33604 .n(n)
33605 .k(k)
33606 .iterations(1)
33607 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
33608 }
33609 }
33610 }
33611 }
33612
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD128,k_gt_8)33613 TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD128, k_gt_8) {
33614 for (size_t k = 9; k < 16; k++) {
33615 GemmMicrokernelTester()
33616 .mr(4)
33617 .nr(4)
33618 .kr(2)
33619 .sr(4)
33620 .m(4)
33621 .n(4)
33622 .k(k)
33623 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
33624 }
33625 }
33626
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD128,k_gt_8_subtile)33627 TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD128, k_gt_8_subtile) {
33628 for (size_t k = 9; k < 16; k++) {
33629 for (uint32_t n = 1; n <= 4; n++) {
33630 for (uint32_t m = 1; m <= 4; m++) {
33631 GemmMicrokernelTester()
33632 .mr(4)
33633 .nr(4)
33634 .kr(2)
33635 .sr(4)
33636 .m(m)
33637 .n(n)
33638 .k(k)
33639 .iterations(1)
33640 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
33641 }
33642 }
33643 }
33644 }
33645
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD128,k_div_8)33646 TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD128, k_div_8) {
33647 for (size_t k = 16; k <= 80; k += 8) {
33648 GemmMicrokernelTester()
33649 .mr(4)
33650 .nr(4)
33651 .kr(2)
33652 .sr(4)
33653 .m(4)
33654 .n(4)
33655 .k(k)
33656 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
33657 }
33658 }
33659
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD128,k_div_8_subtile)33660 TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD128, k_div_8_subtile) {
33661 for (size_t k = 16; k <= 80; k += 8) {
33662 for (uint32_t n = 1; n <= 4; n++) {
33663 for (uint32_t m = 1; m <= 4; m++) {
33664 GemmMicrokernelTester()
33665 .mr(4)
33666 .nr(4)
33667 .kr(2)
33668 .sr(4)
33669 .m(m)
33670 .n(n)
33671 .k(k)
33672 .iterations(1)
33673 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
33674 }
33675 }
33676 }
33677 }
33678
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD128,n_gt_4)33679 TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD128, n_gt_4) {
33680 for (uint32_t n = 5; n < 8; n++) {
33681 for (size_t k = 1; k <= 40; k += 9) {
33682 GemmMicrokernelTester()
33683 .mr(4)
33684 .nr(4)
33685 .kr(2)
33686 .sr(4)
33687 .m(4)
33688 .n(n)
33689 .k(k)
33690 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
33691 }
33692 }
33693 }
33694
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD128,n_gt_4_strided_cn)33695 TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD128, n_gt_4_strided_cn) {
33696 for (uint32_t n = 5; n < 8; n++) {
33697 for (size_t k = 1; k <= 40; k += 9) {
33698 GemmMicrokernelTester()
33699 .mr(4)
33700 .nr(4)
33701 .kr(2)
33702 .sr(4)
33703 .m(4)
33704 .n(n)
33705 .k(k)
33706 .cn_stride(7)
33707 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
33708 }
33709 }
33710 }
33711
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD128,n_gt_4_subtile)33712 TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD128, n_gt_4_subtile) {
33713 for (uint32_t n = 5; n < 8; n++) {
33714 for (size_t k = 1; k <= 40; k += 9) {
33715 for (uint32_t m = 1; m <= 4; m++) {
33716 GemmMicrokernelTester()
33717 .mr(4)
33718 .nr(4)
33719 .kr(2)
33720 .sr(4)
33721 .m(m)
33722 .n(n)
33723 .k(k)
33724 .iterations(1)
33725 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
33726 }
33727 }
33728 }
33729 }
33730
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD128,n_div_4)33731 TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD128, n_div_4) {
33732 for (uint32_t n = 8; n <= 12; n += 4) {
33733 for (size_t k = 1; k <= 40; k += 9) {
33734 GemmMicrokernelTester()
33735 .mr(4)
33736 .nr(4)
33737 .kr(2)
33738 .sr(4)
33739 .m(4)
33740 .n(n)
33741 .k(k)
33742 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
33743 }
33744 }
33745 }
33746
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD128,n_div_4_strided_cn)33747 TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD128, n_div_4_strided_cn) {
33748 for (uint32_t n = 8; n <= 12; n += 4) {
33749 for (size_t k = 1; k <= 40; k += 9) {
33750 GemmMicrokernelTester()
33751 .mr(4)
33752 .nr(4)
33753 .kr(2)
33754 .sr(4)
33755 .m(4)
33756 .n(n)
33757 .k(k)
33758 .cn_stride(7)
33759 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
33760 }
33761 }
33762 }
33763
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD128,n_div_4_subtile)33764 TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD128, n_div_4_subtile) {
33765 for (uint32_t n = 8; n <= 12; n += 4) {
33766 for (size_t k = 1; k <= 40; k += 9) {
33767 for (uint32_t m = 1; m <= 4; m++) {
33768 GemmMicrokernelTester()
33769 .mr(4)
33770 .nr(4)
33771 .kr(2)
33772 .sr(4)
33773 .m(m)
33774 .n(n)
33775 .k(k)
33776 .iterations(1)
33777 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
33778 }
33779 }
33780 }
33781 }
33782
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD128,small_kernel)33783 TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD128, small_kernel) {
33784 for (size_t k = 1; k <= 40; k += 9) {
33785 GemmMicrokernelTester()
33786 .mr(4)
33787 .nr(4)
33788 .kr(2)
33789 .sr(4)
33790 .m(4)
33791 .n(4)
33792 .k(k)
33793 .ks(3)
33794 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
33795 }
33796 }
33797
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD128,small_kernel_subtile)33798 TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD128, small_kernel_subtile) {
33799 for (size_t k = 1; k <= 40; k += 9) {
33800 for (uint32_t n = 1; n <= 4; n++) {
33801 for (uint32_t m = 1; m <= 4; m++) {
33802 GemmMicrokernelTester()
33803 .mr(4)
33804 .nr(4)
33805 .kr(2)
33806 .sr(4)
33807 .m(m)
33808 .n(n)
33809 .k(k)
33810 .ks(3)
33811 .iterations(1)
33812 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
33813 }
33814 }
33815 }
33816 }
33817
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD128,n_gt_4_small_kernel)33818 TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD128, n_gt_4_small_kernel) {
33819 for (uint32_t n = 5; n < 8; n++) {
33820 for (size_t k = 1; k <= 40; k += 9) {
33821 GemmMicrokernelTester()
33822 .mr(4)
33823 .nr(4)
33824 .kr(2)
33825 .sr(4)
33826 .m(4)
33827 .n(n)
33828 .k(k)
33829 .ks(3)
33830 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
33831 }
33832 }
33833 }
33834
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD128,n_div_4_small_kernel)33835 TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD128, n_div_4_small_kernel) {
33836 for (uint32_t n = 8; n <= 12; n += 4) {
33837 for (size_t k = 1; k <= 40; k += 9) {
33838 GemmMicrokernelTester()
33839 .mr(4)
33840 .nr(4)
33841 .kr(2)
33842 .sr(4)
33843 .m(4)
33844 .n(n)
33845 .k(k)
33846 .ks(3)
33847 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
33848 }
33849 }
33850 }
33851
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD128,strided_cm_subtile)33852 TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD128, strided_cm_subtile) {
33853 for (size_t k = 1; k <= 40; k += 9) {
33854 for (uint32_t n = 1; n <= 4; n++) {
33855 for (uint32_t m = 1; m <= 4; m++) {
33856 GemmMicrokernelTester()
33857 .mr(4)
33858 .nr(4)
33859 .kr(2)
33860 .sr(4)
33861 .m(m)
33862 .n(n)
33863 .k(k)
33864 .cm_stride(7)
33865 .iterations(1)
33866 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
33867 }
33868 }
33869 }
33870 }
33871
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD128,a_offset)33872 TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD128, a_offset) {
33873 for (size_t k = 1; k <= 40; k += 9) {
33874 GemmMicrokernelTester()
33875 .mr(4)
33876 .nr(4)
33877 .kr(2)
33878 .sr(4)
33879 .m(4)
33880 .n(4)
33881 .k(k)
33882 .ks(3)
33883 .a_offset(163)
33884 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
33885 }
33886 }
33887
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD128,zero)33888 TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD128, zero) {
33889 for (size_t k = 1; k <= 40; k += 9) {
33890 for (uint32_t mz = 0; mz < 4; mz++) {
33891 GemmMicrokernelTester()
33892 .mr(4)
33893 .nr(4)
33894 .kr(2)
33895 .sr(4)
33896 .m(4)
33897 .n(4)
33898 .k(k)
33899 .ks(3)
33900 .a_offset(163)
33901 .zero_index(mz)
33902 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
33903 }
33904 }
33905 }
33906
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD128,qmin)33907 TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD128, qmin) {
33908 GemmMicrokernelTester()
33909 .mr(4)
33910 .nr(4)
33911 .kr(2)
33912 .sr(4)
33913 .m(4)
33914 .n(4)
33915 .k(8)
33916 .qmin(128)
33917 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
33918 }
33919
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD128,qmax)33920 TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD128, qmax) {
33921 GemmMicrokernelTester()
33922 .mr(4)
33923 .nr(4)
33924 .kr(2)
33925 .sr(4)
33926 .m(4)
33927 .n(4)
33928 .k(8)
33929 .qmax(128)
33930 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
33931 }
33932
TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD128,strided_cm)33933 TEST(QC8_IGEMM_MINMAX_FP32_4X4C2S4__WASMSIMD_DOT16X2_LD128, strided_cm) {
33934 GemmMicrokernelTester()
33935 .mr(4)
33936 .nr(4)
33937 .kr(2)
33938 .sr(4)
33939 .m(4)
33940 .n(4)
33941 .k(8)
33942 .cm_stride(7)
33943 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
33944 }
33945 #endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
33946
33947
33948 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(QC8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD64,k_eq_8)33949 TEST(QC8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD64, k_eq_8) {
33950 GemmMicrokernelTester()
33951 .mr(4)
33952 .nr(4)
33953 .kr(8)
33954 .sr(1)
33955 .m(4)
33956 .n(4)
33957 .k(8)
33958 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
33959 }
33960
TEST(QC8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD64,strided_cn)33961 TEST(QC8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD64, strided_cn) {
33962 GemmMicrokernelTester()
33963 .mr(4)
33964 .nr(4)
33965 .kr(8)
33966 .sr(1)
33967 .m(4)
33968 .n(4)
33969 .k(8)
33970 .cn_stride(7)
33971 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
33972 }
33973
TEST(QC8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD64,k_eq_8_subtile)33974 TEST(QC8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD64, k_eq_8_subtile) {
33975 for (uint32_t n = 1; n <= 4; n++) {
33976 for (uint32_t m = 1; m <= 4; m++) {
33977 GemmMicrokernelTester()
33978 .mr(4)
33979 .nr(4)
33980 .kr(8)
33981 .sr(1)
33982 .m(m)
33983 .n(n)
33984 .k(8)
33985 .iterations(1)
33986 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
33987 }
33988 }
33989 }
33990
TEST(QC8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD64,k_eq_8_subtile_m)33991 TEST(QC8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD64, k_eq_8_subtile_m) {
33992 for (uint32_t m = 1; m <= 4; m++) {
33993 GemmMicrokernelTester()
33994 .mr(4)
33995 .nr(4)
33996 .kr(8)
33997 .sr(1)
33998 .m(m)
33999 .n(4)
34000 .k(8)
34001 .iterations(1)
34002 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
34003 }
34004 }
34005
TEST(QC8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD64,k_eq_8_subtile_n)34006 TEST(QC8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD64, k_eq_8_subtile_n) {
34007 for (uint32_t n = 1; n <= 4; n++) {
34008 GemmMicrokernelTester()
34009 .mr(4)
34010 .nr(4)
34011 .kr(8)
34012 .sr(1)
34013 .m(4)
34014 .n(n)
34015 .k(8)
34016 .iterations(1)
34017 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
34018 }
34019 }
34020
TEST(QC8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD64,k_lt_8)34021 TEST(QC8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD64, k_lt_8) {
34022 for (size_t k = 1; k < 8; k++) {
34023 GemmMicrokernelTester()
34024 .mr(4)
34025 .nr(4)
34026 .kr(8)
34027 .sr(1)
34028 .m(4)
34029 .n(4)
34030 .k(k)
34031 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
34032 }
34033 }
34034
TEST(QC8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD64,k_lt_8_subtile)34035 TEST(QC8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD64, k_lt_8_subtile) {
34036 for (size_t k = 1; k < 8; k++) {
34037 for (uint32_t n = 1; n <= 4; n++) {
34038 for (uint32_t m = 1; m <= 4; m++) {
34039 GemmMicrokernelTester()
34040 .mr(4)
34041 .nr(4)
34042 .kr(8)
34043 .sr(1)
34044 .m(m)
34045 .n(n)
34046 .k(k)
34047 .iterations(1)
34048 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
34049 }
34050 }
34051 }
34052 }
34053
TEST(QC8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD64,k_gt_8)34054 TEST(QC8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD64, k_gt_8) {
34055 for (size_t k = 9; k < 16; k++) {
34056 GemmMicrokernelTester()
34057 .mr(4)
34058 .nr(4)
34059 .kr(8)
34060 .sr(1)
34061 .m(4)
34062 .n(4)
34063 .k(k)
34064 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
34065 }
34066 }
34067
TEST(QC8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD64,k_gt_8_subtile)34068 TEST(QC8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD64, k_gt_8_subtile) {
34069 for (size_t k = 9; k < 16; k++) {
34070 for (uint32_t n = 1; n <= 4; n++) {
34071 for (uint32_t m = 1; m <= 4; m++) {
34072 GemmMicrokernelTester()
34073 .mr(4)
34074 .nr(4)
34075 .kr(8)
34076 .sr(1)
34077 .m(m)
34078 .n(n)
34079 .k(k)
34080 .iterations(1)
34081 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
34082 }
34083 }
34084 }
34085 }
34086
TEST(QC8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD64,k_div_8)34087 TEST(QC8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD64, k_div_8) {
34088 for (size_t k = 16; k <= 80; k += 8) {
34089 GemmMicrokernelTester()
34090 .mr(4)
34091 .nr(4)
34092 .kr(8)
34093 .sr(1)
34094 .m(4)
34095 .n(4)
34096 .k(k)
34097 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
34098 }
34099 }
34100
TEST(QC8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD64,k_div_8_subtile)34101 TEST(QC8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD64, k_div_8_subtile) {
34102 for (size_t k = 16; k <= 80; k += 8) {
34103 for (uint32_t n = 1; n <= 4; n++) {
34104 for (uint32_t m = 1; m <= 4; m++) {
34105 GemmMicrokernelTester()
34106 .mr(4)
34107 .nr(4)
34108 .kr(8)
34109 .sr(1)
34110 .m(m)
34111 .n(n)
34112 .k(k)
34113 .iterations(1)
34114 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
34115 }
34116 }
34117 }
34118 }
34119
TEST(QC8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD64,n_gt_4)34120 TEST(QC8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD64, n_gt_4) {
34121 for (uint32_t n = 5; n < 8; n++) {
34122 for (size_t k = 1; k <= 40; k += 9) {
34123 GemmMicrokernelTester()
34124 .mr(4)
34125 .nr(4)
34126 .kr(8)
34127 .sr(1)
34128 .m(4)
34129 .n(n)
34130 .k(k)
34131 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
34132 }
34133 }
34134 }
34135
TEST(QC8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD64,n_gt_4_strided_cn)34136 TEST(QC8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD64, n_gt_4_strided_cn) {
34137 for (uint32_t n = 5; n < 8; n++) {
34138 for (size_t k = 1; k <= 40; k += 9) {
34139 GemmMicrokernelTester()
34140 .mr(4)
34141 .nr(4)
34142 .kr(8)
34143 .sr(1)
34144 .m(4)
34145 .n(n)
34146 .k(k)
34147 .cn_stride(7)
34148 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
34149 }
34150 }
34151 }
34152
TEST(QC8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD64,n_gt_4_subtile)34153 TEST(QC8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD64, n_gt_4_subtile) {
34154 for (uint32_t n = 5; n < 8; n++) {
34155 for (size_t k = 1; k <= 40; k += 9) {
34156 for (uint32_t m = 1; m <= 4; m++) {
34157 GemmMicrokernelTester()
34158 .mr(4)
34159 .nr(4)
34160 .kr(8)
34161 .sr(1)
34162 .m(m)
34163 .n(n)
34164 .k(k)
34165 .iterations(1)
34166 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
34167 }
34168 }
34169 }
34170 }
34171
TEST(QC8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD64,n_div_4)34172 TEST(QC8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD64, n_div_4) {
34173 for (uint32_t n = 8; n <= 12; n += 4) {
34174 for (size_t k = 1; k <= 40; k += 9) {
34175 GemmMicrokernelTester()
34176 .mr(4)
34177 .nr(4)
34178 .kr(8)
34179 .sr(1)
34180 .m(4)
34181 .n(n)
34182 .k(k)
34183 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
34184 }
34185 }
34186 }
34187
TEST(QC8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD64,n_div_4_strided_cn)34188 TEST(QC8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD64, n_div_4_strided_cn) {
34189 for (uint32_t n = 8; n <= 12; n += 4) {
34190 for (size_t k = 1; k <= 40; k += 9) {
34191 GemmMicrokernelTester()
34192 .mr(4)
34193 .nr(4)
34194 .kr(8)
34195 .sr(1)
34196 .m(4)
34197 .n(n)
34198 .k(k)
34199 .cn_stride(7)
34200 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
34201 }
34202 }
34203 }
34204
TEST(QC8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD64,n_div_4_subtile)34205 TEST(QC8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD64, n_div_4_subtile) {
34206 for (uint32_t n = 8; n <= 12; n += 4) {
34207 for (size_t k = 1; k <= 40; k += 9) {
34208 for (uint32_t m = 1; m <= 4; m++) {
34209 GemmMicrokernelTester()
34210 .mr(4)
34211 .nr(4)
34212 .kr(8)
34213 .sr(1)
34214 .m(m)
34215 .n(n)
34216 .k(k)
34217 .iterations(1)
34218 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
34219 }
34220 }
34221 }
34222 }
34223
TEST(QC8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD64,small_kernel)34224 TEST(QC8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD64, small_kernel) {
34225 for (size_t k = 1; k <= 40; k += 9) {
34226 GemmMicrokernelTester()
34227 .mr(4)
34228 .nr(4)
34229 .kr(8)
34230 .sr(1)
34231 .m(4)
34232 .n(4)
34233 .k(k)
34234 .ks(3)
34235 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
34236 }
34237 }
34238
TEST(QC8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD64,small_kernel_subtile)34239 TEST(QC8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD64, small_kernel_subtile) {
34240 for (size_t k = 1; k <= 40; k += 9) {
34241 for (uint32_t n = 1; n <= 4; n++) {
34242 for (uint32_t m = 1; m <= 4; m++) {
34243 GemmMicrokernelTester()
34244 .mr(4)
34245 .nr(4)
34246 .kr(8)
34247 .sr(1)
34248 .m(m)
34249 .n(n)
34250 .k(k)
34251 .ks(3)
34252 .iterations(1)
34253 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
34254 }
34255 }
34256 }
34257 }
34258
TEST(QC8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD64,n_gt_4_small_kernel)34259 TEST(QC8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD64, n_gt_4_small_kernel) {
34260 for (uint32_t n = 5; n < 8; n++) {
34261 for (size_t k = 1; k <= 40; k += 9) {
34262 GemmMicrokernelTester()
34263 .mr(4)
34264 .nr(4)
34265 .kr(8)
34266 .sr(1)
34267 .m(4)
34268 .n(n)
34269 .k(k)
34270 .ks(3)
34271 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
34272 }
34273 }
34274 }
34275
TEST(QC8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD64,n_div_4_small_kernel)34276 TEST(QC8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD64, n_div_4_small_kernel) {
34277 for (uint32_t n = 8; n <= 12; n += 4) {
34278 for (size_t k = 1; k <= 40; k += 9) {
34279 GemmMicrokernelTester()
34280 .mr(4)
34281 .nr(4)
34282 .kr(8)
34283 .sr(1)
34284 .m(4)
34285 .n(n)
34286 .k(k)
34287 .ks(3)
34288 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
34289 }
34290 }
34291 }
34292
TEST(QC8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD64,strided_cm_subtile)34293 TEST(QC8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD64, strided_cm_subtile) {
34294 for (size_t k = 1; k <= 40; k += 9) {
34295 for (uint32_t n = 1; n <= 4; n++) {
34296 for (uint32_t m = 1; m <= 4; m++) {
34297 GemmMicrokernelTester()
34298 .mr(4)
34299 .nr(4)
34300 .kr(8)
34301 .sr(1)
34302 .m(m)
34303 .n(n)
34304 .k(k)
34305 .cm_stride(7)
34306 .iterations(1)
34307 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
34308 }
34309 }
34310 }
34311 }
34312
TEST(QC8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD64,a_offset)34313 TEST(QC8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD64, a_offset) {
34314 for (size_t k = 1; k <= 40; k += 9) {
34315 GemmMicrokernelTester()
34316 .mr(4)
34317 .nr(4)
34318 .kr(8)
34319 .sr(1)
34320 .m(4)
34321 .n(4)
34322 .k(k)
34323 .ks(3)
34324 .a_offset(163)
34325 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
34326 }
34327 }
34328
TEST(QC8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD64,zero)34329 TEST(QC8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD64, zero) {
34330 for (size_t k = 1; k <= 40; k += 9) {
34331 for (uint32_t mz = 0; mz < 4; mz++) {
34332 GemmMicrokernelTester()
34333 .mr(4)
34334 .nr(4)
34335 .kr(8)
34336 .sr(1)
34337 .m(4)
34338 .n(4)
34339 .k(k)
34340 .ks(3)
34341 .a_offset(163)
34342 .zero_index(mz)
34343 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
34344 }
34345 }
34346 }
34347
TEST(QC8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD64,qmin)34348 TEST(QC8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD64, qmin) {
34349 GemmMicrokernelTester()
34350 .mr(4)
34351 .nr(4)
34352 .kr(8)
34353 .sr(1)
34354 .m(4)
34355 .n(4)
34356 .k(8)
34357 .qmin(128)
34358 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
34359 }
34360
TEST(QC8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD64,qmax)34361 TEST(QC8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD64, qmax) {
34362 GemmMicrokernelTester()
34363 .mr(4)
34364 .nr(4)
34365 .kr(8)
34366 .sr(1)
34367 .m(4)
34368 .n(4)
34369 .k(8)
34370 .qmax(128)
34371 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
34372 }
34373
TEST(QC8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD64,strided_cm)34374 TEST(QC8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD64, strided_cm) {
34375 GemmMicrokernelTester()
34376 .mr(4)
34377 .nr(4)
34378 .kr(8)
34379 .sr(1)
34380 .m(4)
34381 .n(4)
34382 .k(8)
34383 .cm_stride(7)
34384 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld64, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
34385 }
34386 #endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
34387
34388
34389 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(QC8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD128,k_eq_8)34390 TEST(QC8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD128, k_eq_8) {
34391 GemmMicrokernelTester()
34392 .mr(4)
34393 .nr(4)
34394 .kr(8)
34395 .sr(1)
34396 .m(4)
34397 .n(4)
34398 .k(8)
34399 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
34400 }
34401
TEST(QC8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD128,strided_cn)34402 TEST(QC8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD128, strided_cn) {
34403 GemmMicrokernelTester()
34404 .mr(4)
34405 .nr(4)
34406 .kr(8)
34407 .sr(1)
34408 .m(4)
34409 .n(4)
34410 .k(8)
34411 .cn_stride(7)
34412 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
34413 }
34414
TEST(QC8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD128,k_eq_8_subtile)34415 TEST(QC8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD128, k_eq_8_subtile) {
34416 for (uint32_t n = 1; n <= 4; n++) {
34417 for (uint32_t m = 1; m <= 4; m++) {
34418 GemmMicrokernelTester()
34419 .mr(4)
34420 .nr(4)
34421 .kr(8)
34422 .sr(1)
34423 .m(m)
34424 .n(n)
34425 .k(8)
34426 .iterations(1)
34427 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
34428 }
34429 }
34430 }
34431
TEST(QC8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD128,k_eq_8_subtile_m)34432 TEST(QC8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD128, k_eq_8_subtile_m) {
34433 for (uint32_t m = 1; m <= 4; m++) {
34434 GemmMicrokernelTester()
34435 .mr(4)
34436 .nr(4)
34437 .kr(8)
34438 .sr(1)
34439 .m(m)
34440 .n(4)
34441 .k(8)
34442 .iterations(1)
34443 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
34444 }
34445 }
34446
TEST(QC8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD128,k_eq_8_subtile_n)34447 TEST(QC8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD128, k_eq_8_subtile_n) {
34448 for (uint32_t n = 1; n <= 4; n++) {
34449 GemmMicrokernelTester()
34450 .mr(4)
34451 .nr(4)
34452 .kr(8)
34453 .sr(1)
34454 .m(4)
34455 .n(n)
34456 .k(8)
34457 .iterations(1)
34458 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
34459 }
34460 }
34461
TEST(QC8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD128,k_lt_8)34462 TEST(QC8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD128, k_lt_8) {
34463 for (size_t k = 1; k < 8; k++) {
34464 GemmMicrokernelTester()
34465 .mr(4)
34466 .nr(4)
34467 .kr(8)
34468 .sr(1)
34469 .m(4)
34470 .n(4)
34471 .k(k)
34472 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
34473 }
34474 }
34475
TEST(QC8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD128,k_lt_8_subtile)34476 TEST(QC8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD128, k_lt_8_subtile) {
34477 for (size_t k = 1; k < 8; k++) {
34478 for (uint32_t n = 1; n <= 4; n++) {
34479 for (uint32_t m = 1; m <= 4; m++) {
34480 GemmMicrokernelTester()
34481 .mr(4)
34482 .nr(4)
34483 .kr(8)
34484 .sr(1)
34485 .m(m)
34486 .n(n)
34487 .k(k)
34488 .iterations(1)
34489 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
34490 }
34491 }
34492 }
34493 }
34494
TEST(QC8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD128,k_gt_8)34495 TEST(QC8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD128, k_gt_8) {
34496 for (size_t k = 9; k < 16; k++) {
34497 GemmMicrokernelTester()
34498 .mr(4)
34499 .nr(4)
34500 .kr(8)
34501 .sr(1)
34502 .m(4)
34503 .n(4)
34504 .k(k)
34505 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
34506 }
34507 }
34508
TEST(QC8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD128,k_gt_8_subtile)34509 TEST(QC8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD128, k_gt_8_subtile) {
34510 for (size_t k = 9; k < 16; k++) {
34511 for (uint32_t n = 1; n <= 4; n++) {
34512 for (uint32_t m = 1; m <= 4; m++) {
34513 GemmMicrokernelTester()
34514 .mr(4)
34515 .nr(4)
34516 .kr(8)
34517 .sr(1)
34518 .m(m)
34519 .n(n)
34520 .k(k)
34521 .iterations(1)
34522 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
34523 }
34524 }
34525 }
34526 }
34527
TEST(QC8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD128,k_div_8)34528 TEST(QC8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD128, k_div_8) {
34529 for (size_t k = 16; k <= 80; k += 8) {
34530 GemmMicrokernelTester()
34531 .mr(4)
34532 .nr(4)
34533 .kr(8)
34534 .sr(1)
34535 .m(4)
34536 .n(4)
34537 .k(k)
34538 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
34539 }
34540 }
34541
TEST(QC8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD128,k_div_8_subtile)34542 TEST(QC8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD128, k_div_8_subtile) {
34543 for (size_t k = 16; k <= 80; k += 8) {
34544 for (uint32_t n = 1; n <= 4; n++) {
34545 for (uint32_t m = 1; m <= 4; m++) {
34546 GemmMicrokernelTester()
34547 .mr(4)
34548 .nr(4)
34549 .kr(8)
34550 .sr(1)
34551 .m(m)
34552 .n(n)
34553 .k(k)
34554 .iterations(1)
34555 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
34556 }
34557 }
34558 }
34559 }
34560
TEST(QC8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD128,n_gt_4)34561 TEST(QC8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD128, n_gt_4) {
34562 for (uint32_t n = 5; n < 8; n++) {
34563 for (size_t k = 1; k <= 40; k += 9) {
34564 GemmMicrokernelTester()
34565 .mr(4)
34566 .nr(4)
34567 .kr(8)
34568 .sr(1)
34569 .m(4)
34570 .n(n)
34571 .k(k)
34572 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
34573 }
34574 }
34575 }
34576
TEST(QC8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD128,n_gt_4_strided_cn)34577 TEST(QC8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD128, n_gt_4_strided_cn) {
34578 for (uint32_t n = 5; n < 8; n++) {
34579 for (size_t k = 1; k <= 40; k += 9) {
34580 GemmMicrokernelTester()
34581 .mr(4)
34582 .nr(4)
34583 .kr(8)
34584 .sr(1)
34585 .m(4)
34586 .n(n)
34587 .k(k)
34588 .cn_stride(7)
34589 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
34590 }
34591 }
34592 }
34593
TEST(QC8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD128,n_gt_4_subtile)34594 TEST(QC8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD128, n_gt_4_subtile) {
34595 for (uint32_t n = 5; n < 8; n++) {
34596 for (size_t k = 1; k <= 40; k += 9) {
34597 for (uint32_t m = 1; m <= 4; m++) {
34598 GemmMicrokernelTester()
34599 .mr(4)
34600 .nr(4)
34601 .kr(8)
34602 .sr(1)
34603 .m(m)
34604 .n(n)
34605 .k(k)
34606 .iterations(1)
34607 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
34608 }
34609 }
34610 }
34611 }
34612
TEST(QC8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD128,n_div_4)34613 TEST(QC8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD128, n_div_4) {
34614 for (uint32_t n = 8; n <= 12; n += 4) {
34615 for (size_t k = 1; k <= 40; k += 9) {
34616 GemmMicrokernelTester()
34617 .mr(4)
34618 .nr(4)
34619 .kr(8)
34620 .sr(1)
34621 .m(4)
34622 .n(n)
34623 .k(k)
34624 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
34625 }
34626 }
34627 }
34628
TEST(QC8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD128,n_div_4_strided_cn)34629 TEST(QC8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD128, n_div_4_strided_cn) {
34630 for (uint32_t n = 8; n <= 12; n += 4) {
34631 for (size_t k = 1; k <= 40; k += 9) {
34632 GemmMicrokernelTester()
34633 .mr(4)
34634 .nr(4)
34635 .kr(8)
34636 .sr(1)
34637 .m(4)
34638 .n(n)
34639 .k(k)
34640 .cn_stride(7)
34641 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
34642 }
34643 }
34644 }
34645
TEST(QC8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD128,n_div_4_subtile)34646 TEST(QC8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD128, n_div_4_subtile) {
34647 for (uint32_t n = 8; n <= 12; n += 4) {
34648 for (size_t k = 1; k <= 40; k += 9) {
34649 for (uint32_t m = 1; m <= 4; m++) {
34650 GemmMicrokernelTester()
34651 .mr(4)
34652 .nr(4)
34653 .kr(8)
34654 .sr(1)
34655 .m(m)
34656 .n(n)
34657 .k(k)
34658 .iterations(1)
34659 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
34660 }
34661 }
34662 }
34663 }
34664
TEST(QC8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD128,small_kernel)34665 TEST(QC8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD128, small_kernel) {
34666 for (size_t k = 1; k <= 40; k += 9) {
34667 GemmMicrokernelTester()
34668 .mr(4)
34669 .nr(4)
34670 .kr(8)
34671 .sr(1)
34672 .m(4)
34673 .n(4)
34674 .k(k)
34675 .ks(3)
34676 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
34677 }
34678 }
34679
TEST(QC8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD128,small_kernel_subtile)34680 TEST(QC8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD128, small_kernel_subtile) {
34681 for (size_t k = 1; k <= 40; k += 9) {
34682 for (uint32_t n = 1; n <= 4; n++) {
34683 for (uint32_t m = 1; m <= 4; m++) {
34684 GemmMicrokernelTester()
34685 .mr(4)
34686 .nr(4)
34687 .kr(8)
34688 .sr(1)
34689 .m(m)
34690 .n(n)
34691 .k(k)
34692 .ks(3)
34693 .iterations(1)
34694 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
34695 }
34696 }
34697 }
34698 }
34699
TEST(QC8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD128,n_gt_4_small_kernel)34700 TEST(QC8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD128, n_gt_4_small_kernel) {
34701 for (uint32_t n = 5; n < 8; n++) {
34702 for (size_t k = 1; k <= 40; k += 9) {
34703 GemmMicrokernelTester()
34704 .mr(4)
34705 .nr(4)
34706 .kr(8)
34707 .sr(1)
34708 .m(4)
34709 .n(n)
34710 .k(k)
34711 .ks(3)
34712 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
34713 }
34714 }
34715 }
34716
TEST(QC8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD128,n_div_4_small_kernel)34717 TEST(QC8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD128, n_div_4_small_kernel) {
34718 for (uint32_t n = 8; n <= 12; n += 4) {
34719 for (size_t k = 1; k <= 40; k += 9) {
34720 GemmMicrokernelTester()
34721 .mr(4)
34722 .nr(4)
34723 .kr(8)
34724 .sr(1)
34725 .m(4)
34726 .n(n)
34727 .k(k)
34728 .ks(3)
34729 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
34730 }
34731 }
34732 }
34733
TEST(QC8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD128,strided_cm_subtile)34734 TEST(QC8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD128, strided_cm_subtile) {
34735 for (size_t k = 1; k <= 40; k += 9) {
34736 for (uint32_t n = 1; n <= 4; n++) {
34737 for (uint32_t m = 1; m <= 4; m++) {
34738 GemmMicrokernelTester()
34739 .mr(4)
34740 .nr(4)
34741 .kr(8)
34742 .sr(1)
34743 .m(m)
34744 .n(n)
34745 .k(k)
34746 .cm_stride(7)
34747 .iterations(1)
34748 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
34749 }
34750 }
34751 }
34752 }
34753
TEST(QC8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD128,a_offset)34754 TEST(QC8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD128, a_offset) {
34755 for (size_t k = 1; k <= 40; k += 9) {
34756 GemmMicrokernelTester()
34757 .mr(4)
34758 .nr(4)
34759 .kr(8)
34760 .sr(1)
34761 .m(4)
34762 .n(4)
34763 .k(k)
34764 .ks(3)
34765 .a_offset(163)
34766 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
34767 }
34768 }
34769
TEST(QC8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD128,zero)34770 TEST(QC8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD128, zero) {
34771 for (size_t k = 1; k <= 40; k += 9) {
34772 for (uint32_t mz = 0; mz < 4; mz++) {
34773 GemmMicrokernelTester()
34774 .mr(4)
34775 .nr(4)
34776 .kr(8)
34777 .sr(1)
34778 .m(4)
34779 .n(4)
34780 .k(k)
34781 .ks(3)
34782 .a_offset(163)
34783 .zero_index(mz)
34784 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
34785 }
34786 }
34787 }
34788
TEST(QC8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD128,qmin)34789 TEST(QC8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD128, qmin) {
34790 GemmMicrokernelTester()
34791 .mr(4)
34792 .nr(4)
34793 .kr(8)
34794 .sr(1)
34795 .m(4)
34796 .n(4)
34797 .k(8)
34798 .qmin(128)
34799 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
34800 }
34801
TEST(QC8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD128,qmax)34802 TEST(QC8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD128, qmax) {
34803 GemmMicrokernelTester()
34804 .mr(4)
34805 .nr(4)
34806 .kr(8)
34807 .sr(1)
34808 .m(4)
34809 .n(4)
34810 .k(8)
34811 .qmax(128)
34812 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
34813 }
34814
TEST(QC8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD128,strided_cm)34815 TEST(QC8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD128, strided_cm) {
34816 GemmMicrokernelTester()
34817 .mr(4)
34818 .nr(4)
34819 .kr(8)
34820 .sr(1)
34821 .m(4)
34822 .n(4)
34823 .k(8)
34824 .cm_stride(7)
34825 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld128, xnn_init_qc8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
34826 }
34827 #endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
34828
34829
34830 #if XNN_ARCH_WASM || XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(QC8_IGEMM_MINMAX_FP32_1X2__WASM_FMAGIC,k_eq_1)34831 TEST(QC8_IGEMM_MINMAX_FP32_1X2__WASM_FMAGIC, k_eq_1) {
34832 GemmMicrokernelTester()
34833 .mr(1)
34834 .nr(2)
34835 .kr(1)
34836 .sr(1)
34837 .m(1)
34838 .n(2)
34839 .k(1)
34840 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x2__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
34841 }
34842
TEST(QC8_IGEMM_MINMAX_FP32_1X2__WASM_FMAGIC,strided_cn)34843 TEST(QC8_IGEMM_MINMAX_FP32_1X2__WASM_FMAGIC, strided_cn) {
34844 GemmMicrokernelTester()
34845 .mr(1)
34846 .nr(2)
34847 .kr(1)
34848 .sr(1)
34849 .m(1)
34850 .n(2)
34851 .k(1)
34852 .cn_stride(5)
34853 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x2__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
34854 }
34855
TEST(QC8_IGEMM_MINMAX_FP32_1X2__WASM_FMAGIC,k_eq_1_subtile)34856 TEST(QC8_IGEMM_MINMAX_FP32_1X2__WASM_FMAGIC, k_eq_1_subtile) {
34857 for (uint32_t n = 1; n <= 2; n++) {
34858 for (uint32_t m = 1; m <= 1; m++) {
34859 GemmMicrokernelTester()
34860 .mr(1)
34861 .nr(2)
34862 .kr(1)
34863 .sr(1)
34864 .m(m)
34865 .n(n)
34866 .k(1)
34867 .iterations(1)
34868 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x2__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
34869 }
34870 }
34871 }
34872
TEST(QC8_IGEMM_MINMAX_FP32_1X2__WASM_FMAGIC,k_eq_1_subtile_m)34873 TEST(QC8_IGEMM_MINMAX_FP32_1X2__WASM_FMAGIC, k_eq_1_subtile_m) {
34874 for (uint32_t m = 1; m <= 1; m++) {
34875 GemmMicrokernelTester()
34876 .mr(1)
34877 .nr(2)
34878 .kr(1)
34879 .sr(1)
34880 .m(m)
34881 .n(2)
34882 .k(1)
34883 .iterations(1)
34884 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x2__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
34885 }
34886 }
34887
TEST(QC8_IGEMM_MINMAX_FP32_1X2__WASM_FMAGIC,k_eq_1_subtile_n)34888 TEST(QC8_IGEMM_MINMAX_FP32_1X2__WASM_FMAGIC, k_eq_1_subtile_n) {
34889 for (uint32_t n = 1; n <= 2; n++) {
34890 GemmMicrokernelTester()
34891 .mr(1)
34892 .nr(2)
34893 .kr(1)
34894 .sr(1)
34895 .m(1)
34896 .n(n)
34897 .k(1)
34898 .iterations(1)
34899 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x2__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
34900 }
34901 }
34902
TEST(QC8_IGEMM_MINMAX_FP32_1X2__WASM_FMAGIC,k_gt_1)34903 TEST(QC8_IGEMM_MINMAX_FP32_1X2__WASM_FMAGIC, k_gt_1) {
34904 for (size_t k = 2; k < 10; k++) {
34905 GemmMicrokernelTester()
34906 .mr(1)
34907 .nr(2)
34908 .kr(1)
34909 .sr(1)
34910 .m(1)
34911 .n(2)
34912 .k(k)
34913 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x2__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
34914 }
34915 }
34916
TEST(QC8_IGEMM_MINMAX_FP32_1X2__WASM_FMAGIC,k_gt_1_subtile)34917 TEST(QC8_IGEMM_MINMAX_FP32_1X2__WASM_FMAGIC, k_gt_1_subtile) {
34918 for (size_t k = 2; k < 10; k++) {
34919 for (uint32_t n = 1; n <= 2; n++) {
34920 for (uint32_t m = 1; m <= 1; m++) {
34921 GemmMicrokernelTester()
34922 .mr(1)
34923 .nr(2)
34924 .kr(1)
34925 .sr(1)
34926 .m(m)
34927 .n(n)
34928 .k(k)
34929 .iterations(1)
34930 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x2__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
34931 }
34932 }
34933 }
34934 }
34935
TEST(QC8_IGEMM_MINMAX_FP32_1X2__WASM_FMAGIC,n_gt_2)34936 TEST(QC8_IGEMM_MINMAX_FP32_1X2__WASM_FMAGIC, n_gt_2) {
34937 for (uint32_t n = 3; n < 4; n++) {
34938 for (size_t k = 1; k <= 5; k += 2) {
34939 GemmMicrokernelTester()
34940 .mr(1)
34941 .nr(2)
34942 .kr(1)
34943 .sr(1)
34944 .m(1)
34945 .n(n)
34946 .k(k)
34947 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x2__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
34948 }
34949 }
34950 }
34951
TEST(QC8_IGEMM_MINMAX_FP32_1X2__WASM_FMAGIC,n_gt_2_strided_cn)34952 TEST(QC8_IGEMM_MINMAX_FP32_1X2__WASM_FMAGIC, n_gt_2_strided_cn) {
34953 for (uint32_t n = 3; n < 4; n++) {
34954 for (size_t k = 1; k <= 5; k += 2) {
34955 GemmMicrokernelTester()
34956 .mr(1)
34957 .nr(2)
34958 .kr(1)
34959 .sr(1)
34960 .m(1)
34961 .n(n)
34962 .k(k)
34963 .cn_stride(5)
34964 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x2__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
34965 }
34966 }
34967 }
34968
TEST(QC8_IGEMM_MINMAX_FP32_1X2__WASM_FMAGIC,n_gt_2_subtile)34969 TEST(QC8_IGEMM_MINMAX_FP32_1X2__WASM_FMAGIC, n_gt_2_subtile) {
34970 for (uint32_t n = 3; n < 4; n++) {
34971 for (size_t k = 1; k <= 5; k += 2) {
34972 for (uint32_t m = 1; m <= 1; m++) {
34973 GemmMicrokernelTester()
34974 .mr(1)
34975 .nr(2)
34976 .kr(1)
34977 .sr(1)
34978 .m(m)
34979 .n(n)
34980 .k(k)
34981 .iterations(1)
34982 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x2__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
34983 }
34984 }
34985 }
34986 }
34987
TEST(QC8_IGEMM_MINMAX_FP32_1X2__WASM_FMAGIC,n_div_2)34988 TEST(QC8_IGEMM_MINMAX_FP32_1X2__WASM_FMAGIC, n_div_2) {
34989 for (uint32_t n = 4; n <= 6; n += 2) {
34990 for (size_t k = 1; k <= 5; k += 2) {
34991 GemmMicrokernelTester()
34992 .mr(1)
34993 .nr(2)
34994 .kr(1)
34995 .sr(1)
34996 .m(1)
34997 .n(n)
34998 .k(k)
34999 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x2__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
35000 }
35001 }
35002 }
35003
TEST(QC8_IGEMM_MINMAX_FP32_1X2__WASM_FMAGIC,n_div_2_strided_cn)35004 TEST(QC8_IGEMM_MINMAX_FP32_1X2__WASM_FMAGIC, n_div_2_strided_cn) {
35005 for (uint32_t n = 4; n <= 6; n += 2) {
35006 for (size_t k = 1; k <= 5; k += 2) {
35007 GemmMicrokernelTester()
35008 .mr(1)
35009 .nr(2)
35010 .kr(1)
35011 .sr(1)
35012 .m(1)
35013 .n(n)
35014 .k(k)
35015 .cn_stride(5)
35016 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x2__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
35017 }
35018 }
35019 }
35020
TEST(QC8_IGEMM_MINMAX_FP32_1X2__WASM_FMAGIC,n_div_2_subtile)35021 TEST(QC8_IGEMM_MINMAX_FP32_1X2__WASM_FMAGIC, n_div_2_subtile) {
35022 for (uint32_t n = 4; n <= 6; n += 2) {
35023 for (size_t k = 1; k <= 5; k += 2) {
35024 for (uint32_t m = 1; m <= 1; m++) {
35025 GemmMicrokernelTester()
35026 .mr(1)
35027 .nr(2)
35028 .kr(1)
35029 .sr(1)
35030 .m(m)
35031 .n(n)
35032 .k(k)
35033 .iterations(1)
35034 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x2__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
35035 }
35036 }
35037 }
35038 }
35039
TEST(QC8_IGEMM_MINMAX_FP32_1X2__WASM_FMAGIC,small_kernel)35040 TEST(QC8_IGEMM_MINMAX_FP32_1X2__WASM_FMAGIC, small_kernel) {
35041 for (size_t k = 1; k <= 5; k += 2) {
35042 GemmMicrokernelTester()
35043 .mr(1)
35044 .nr(2)
35045 .kr(1)
35046 .sr(1)
35047 .m(1)
35048 .n(2)
35049 .k(k)
35050 .ks(3)
35051 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x2__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
35052 }
35053 }
35054
TEST(QC8_IGEMM_MINMAX_FP32_1X2__WASM_FMAGIC,small_kernel_subtile)35055 TEST(QC8_IGEMM_MINMAX_FP32_1X2__WASM_FMAGIC, small_kernel_subtile) {
35056 for (size_t k = 1; k <= 5; k += 2) {
35057 for (uint32_t n = 1; n <= 2; n++) {
35058 for (uint32_t m = 1; m <= 1; m++) {
35059 GemmMicrokernelTester()
35060 .mr(1)
35061 .nr(2)
35062 .kr(1)
35063 .sr(1)
35064 .m(m)
35065 .n(n)
35066 .k(k)
35067 .ks(3)
35068 .iterations(1)
35069 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x2__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
35070 }
35071 }
35072 }
35073 }
35074
TEST(QC8_IGEMM_MINMAX_FP32_1X2__WASM_FMAGIC,n_gt_2_small_kernel)35075 TEST(QC8_IGEMM_MINMAX_FP32_1X2__WASM_FMAGIC, n_gt_2_small_kernel) {
35076 for (uint32_t n = 3; n < 4; n++) {
35077 for (size_t k = 1; k <= 5; k += 2) {
35078 GemmMicrokernelTester()
35079 .mr(1)
35080 .nr(2)
35081 .kr(1)
35082 .sr(1)
35083 .m(1)
35084 .n(n)
35085 .k(k)
35086 .ks(3)
35087 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x2__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
35088 }
35089 }
35090 }
35091
TEST(QC8_IGEMM_MINMAX_FP32_1X2__WASM_FMAGIC,n_div_2_small_kernel)35092 TEST(QC8_IGEMM_MINMAX_FP32_1X2__WASM_FMAGIC, n_div_2_small_kernel) {
35093 for (uint32_t n = 4; n <= 6; n += 2) {
35094 for (size_t k = 1; k <= 5; k += 2) {
35095 GemmMicrokernelTester()
35096 .mr(1)
35097 .nr(2)
35098 .kr(1)
35099 .sr(1)
35100 .m(1)
35101 .n(n)
35102 .k(k)
35103 .ks(3)
35104 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x2__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
35105 }
35106 }
35107 }
35108
TEST(QC8_IGEMM_MINMAX_FP32_1X2__WASM_FMAGIC,strided_cm_subtile)35109 TEST(QC8_IGEMM_MINMAX_FP32_1X2__WASM_FMAGIC, strided_cm_subtile) {
35110 for (size_t k = 1; k <= 5; k += 2) {
35111 for (uint32_t n = 1; n <= 2; n++) {
35112 for (uint32_t m = 1; m <= 1; m++) {
35113 GemmMicrokernelTester()
35114 .mr(1)
35115 .nr(2)
35116 .kr(1)
35117 .sr(1)
35118 .m(m)
35119 .n(n)
35120 .k(k)
35121 .cm_stride(5)
35122 .iterations(1)
35123 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x2__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
35124 }
35125 }
35126 }
35127 }
35128
TEST(QC8_IGEMM_MINMAX_FP32_1X2__WASM_FMAGIC,a_offset)35129 TEST(QC8_IGEMM_MINMAX_FP32_1X2__WASM_FMAGIC, a_offset) {
35130 for (size_t k = 1; k <= 5; k += 2) {
35131 GemmMicrokernelTester()
35132 .mr(1)
35133 .nr(2)
35134 .kr(1)
35135 .sr(1)
35136 .m(1)
35137 .n(2)
35138 .k(k)
35139 .ks(3)
35140 .a_offset(7)
35141 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x2__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
35142 }
35143 }
35144
TEST(QC8_IGEMM_MINMAX_FP32_1X2__WASM_FMAGIC,zero)35145 TEST(QC8_IGEMM_MINMAX_FP32_1X2__WASM_FMAGIC, zero) {
35146 for (size_t k = 1; k <= 5; k += 2) {
35147 for (uint32_t mz = 0; mz < 1; mz++) {
35148 GemmMicrokernelTester()
35149 .mr(1)
35150 .nr(2)
35151 .kr(1)
35152 .sr(1)
35153 .m(1)
35154 .n(2)
35155 .k(k)
35156 .ks(3)
35157 .a_offset(7)
35158 .zero_index(mz)
35159 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x2__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
35160 }
35161 }
35162 }
35163
TEST(QC8_IGEMM_MINMAX_FP32_1X2__WASM_FMAGIC,qmin)35164 TEST(QC8_IGEMM_MINMAX_FP32_1X2__WASM_FMAGIC, qmin) {
35165 GemmMicrokernelTester()
35166 .mr(1)
35167 .nr(2)
35168 .kr(1)
35169 .sr(1)
35170 .m(1)
35171 .n(2)
35172 .k(1)
35173 .qmin(128)
35174 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x2__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
35175 }
35176
TEST(QC8_IGEMM_MINMAX_FP32_1X2__WASM_FMAGIC,qmax)35177 TEST(QC8_IGEMM_MINMAX_FP32_1X2__WASM_FMAGIC, qmax) {
35178 GemmMicrokernelTester()
35179 .mr(1)
35180 .nr(2)
35181 .kr(1)
35182 .sr(1)
35183 .m(1)
35184 .n(2)
35185 .k(1)
35186 .qmax(128)
35187 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x2__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
35188 }
35189
TEST(QC8_IGEMM_MINMAX_FP32_1X2__WASM_FMAGIC,strided_cm)35190 TEST(QC8_IGEMM_MINMAX_FP32_1X2__WASM_FMAGIC, strided_cm) {
35191 GemmMicrokernelTester()
35192 .mr(1)
35193 .nr(2)
35194 .kr(1)
35195 .sr(1)
35196 .m(1)
35197 .n(2)
35198 .k(1)
35199 .cm_stride(5)
35200 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x2__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
35201 }
35202 #endif // XNN_ARCH_WASM || XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
35203
35204
35205 #if XNN_ARCH_WASM || XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(QC8_IGEMM_MINMAX_FP32_2X2__WASM_FMAGIC,k_eq_1)35206 TEST(QC8_IGEMM_MINMAX_FP32_2X2__WASM_FMAGIC, k_eq_1) {
35207 GemmMicrokernelTester()
35208 .mr(2)
35209 .nr(2)
35210 .kr(1)
35211 .sr(1)
35212 .m(2)
35213 .n(2)
35214 .k(1)
35215 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x2__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
35216 }
35217
TEST(QC8_IGEMM_MINMAX_FP32_2X2__WASM_FMAGIC,strided_cn)35218 TEST(QC8_IGEMM_MINMAX_FP32_2X2__WASM_FMAGIC, strided_cn) {
35219 GemmMicrokernelTester()
35220 .mr(2)
35221 .nr(2)
35222 .kr(1)
35223 .sr(1)
35224 .m(2)
35225 .n(2)
35226 .k(1)
35227 .cn_stride(5)
35228 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x2__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
35229 }
35230
TEST(QC8_IGEMM_MINMAX_FP32_2X2__WASM_FMAGIC,k_eq_1_subtile)35231 TEST(QC8_IGEMM_MINMAX_FP32_2X2__WASM_FMAGIC, k_eq_1_subtile) {
35232 for (uint32_t n = 1; n <= 2; n++) {
35233 for (uint32_t m = 1; m <= 2; m++) {
35234 GemmMicrokernelTester()
35235 .mr(2)
35236 .nr(2)
35237 .kr(1)
35238 .sr(1)
35239 .m(m)
35240 .n(n)
35241 .k(1)
35242 .iterations(1)
35243 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x2__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
35244 }
35245 }
35246 }
35247
TEST(QC8_IGEMM_MINMAX_FP32_2X2__WASM_FMAGIC,k_eq_1_subtile_m)35248 TEST(QC8_IGEMM_MINMAX_FP32_2X2__WASM_FMAGIC, k_eq_1_subtile_m) {
35249 for (uint32_t m = 1; m <= 2; m++) {
35250 GemmMicrokernelTester()
35251 .mr(2)
35252 .nr(2)
35253 .kr(1)
35254 .sr(1)
35255 .m(m)
35256 .n(2)
35257 .k(1)
35258 .iterations(1)
35259 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x2__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
35260 }
35261 }
35262
TEST(QC8_IGEMM_MINMAX_FP32_2X2__WASM_FMAGIC,k_eq_1_subtile_n)35263 TEST(QC8_IGEMM_MINMAX_FP32_2X2__WASM_FMAGIC, k_eq_1_subtile_n) {
35264 for (uint32_t n = 1; n <= 2; n++) {
35265 GemmMicrokernelTester()
35266 .mr(2)
35267 .nr(2)
35268 .kr(1)
35269 .sr(1)
35270 .m(2)
35271 .n(n)
35272 .k(1)
35273 .iterations(1)
35274 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x2__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
35275 }
35276 }
35277
TEST(QC8_IGEMM_MINMAX_FP32_2X2__WASM_FMAGIC,k_gt_1)35278 TEST(QC8_IGEMM_MINMAX_FP32_2X2__WASM_FMAGIC, k_gt_1) {
35279 for (size_t k = 2; k < 10; k++) {
35280 GemmMicrokernelTester()
35281 .mr(2)
35282 .nr(2)
35283 .kr(1)
35284 .sr(1)
35285 .m(2)
35286 .n(2)
35287 .k(k)
35288 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x2__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
35289 }
35290 }
35291
TEST(QC8_IGEMM_MINMAX_FP32_2X2__WASM_FMAGIC,k_gt_1_subtile)35292 TEST(QC8_IGEMM_MINMAX_FP32_2X2__WASM_FMAGIC, k_gt_1_subtile) {
35293 for (size_t k = 2; k < 10; k++) {
35294 for (uint32_t n = 1; n <= 2; n++) {
35295 for (uint32_t m = 1; m <= 2; m++) {
35296 GemmMicrokernelTester()
35297 .mr(2)
35298 .nr(2)
35299 .kr(1)
35300 .sr(1)
35301 .m(m)
35302 .n(n)
35303 .k(k)
35304 .iterations(1)
35305 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x2__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
35306 }
35307 }
35308 }
35309 }
35310
TEST(QC8_IGEMM_MINMAX_FP32_2X2__WASM_FMAGIC,n_gt_2)35311 TEST(QC8_IGEMM_MINMAX_FP32_2X2__WASM_FMAGIC, n_gt_2) {
35312 for (uint32_t n = 3; n < 4; n++) {
35313 for (size_t k = 1; k <= 5; k += 2) {
35314 GemmMicrokernelTester()
35315 .mr(2)
35316 .nr(2)
35317 .kr(1)
35318 .sr(1)
35319 .m(2)
35320 .n(n)
35321 .k(k)
35322 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x2__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
35323 }
35324 }
35325 }
35326
TEST(QC8_IGEMM_MINMAX_FP32_2X2__WASM_FMAGIC,n_gt_2_strided_cn)35327 TEST(QC8_IGEMM_MINMAX_FP32_2X2__WASM_FMAGIC, n_gt_2_strided_cn) {
35328 for (uint32_t n = 3; n < 4; n++) {
35329 for (size_t k = 1; k <= 5; k += 2) {
35330 GemmMicrokernelTester()
35331 .mr(2)
35332 .nr(2)
35333 .kr(1)
35334 .sr(1)
35335 .m(2)
35336 .n(n)
35337 .k(k)
35338 .cn_stride(5)
35339 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x2__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
35340 }
35341 }
35342 }
35343
TEST(QC8_IGEMM_MINMAX_FP32_2X2__WASM_FMAGIC,n_gt_2_subtile)35344 TEST(QC8_IGEMM_MINMAX_FP32_2X2__WASM_FMAGIC, n_gt_2_subtile) {
35345 for (uint32_t n = 3; n < 4; n++) {
35346 for (size_t k = 1; k <= 5; k += 2) {
35347 for (uint32_t m = 1; m <= 2; m++) {
35348 GemmMicrokernelTester()
35349 .mr(2)
35350 .nr(2)
35351 .kr(1)
35352 .sr(1)
35353 .m(m)
35354 .n(n)
35355 .k(k)
35356 .iterations(1)
35357 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x2__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
35358 }
35359 }
35360 }
35361 }
35362
TEST(QC8_IGEMM_MINMAX_FP32_2X2__WASM_FMAGIC,n_div_2)35363 TEST(QC8_IGEMM_MINMAX_FP32_2X2__WASM_FMAGIC, n_div_2) {
35364 for (uint32_t n = 4; n <= 6; n += 2) {
35365 for (size_t k = 1; k <= 5; k += 2) {
35366 GemmMicrokernelTester()
35367 .mr(2)
35368 .nr(2)
35369 .kr(1)
35370 .sr(1)
35371 .m(2)
35372 .n(n)
35373 .k(k)
35374 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x2__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
35375 }
35376 }
35377 }
35378
TEST(QC8_IGEMM_MINMAX_FP32_2X2__WASM_FMAGIC,n_div_2_strided_cn)35379 TEST(QC8_IGEMM_MINMAX_FP32_2X2__WASM_FMAGIC, n_div_2_strided_cn) {
35380 for (uint32_t n = 4; n <= 6; n += 2) {
35381 for (size_t k = 1; k <= 5; k += 2) {
35382 GemmMicrokernelTester()
35383 .mr(2)
35384 .nr(2)
35385 .kr(1)
35386 .sr(1)
35387 .m(2)
35388 .n(n)
35389 .k(k)
35390 .cn_stride(5)
35391 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x2__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
35392 }
35393 }
35394 }
35395
TEST(QC8_IGEMM_MINMAX_FP32_2X2__WASM_FMAGIC,n_div_2_subtile)35396 TEST(QC8_IGEMM_MINMAX_FP32_2X2__WASM_FMAGIC, n_div_2_subtile) {
35397 for (uint32_t n = 4; n <= 6; n += 2) {
35398 for (size_t k = 1; k <= 5; k += 2) {
35399 for (uint32_t m = 1; m <= 2; m++) {
35400 GemmMicrokernelTester()
35401 .mr(2)
35402 .nr(2)
35403 .kr(1)
35404 .sr(1)
35405 .m(m)
35406 .n(n)
35407 .k(k)
35408 .iterations(1)
35409 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x2__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
35410 }
35411 }
35412 }
35413 }
35414
TEST(QC8_IGEMM_MINMAX_FP32_2X2__WASM_FMAGIC,small_kernel)35415 TEST(QC8_IGEMM_MINMAX_FP32_2X2__WASM_FMAGIC, small_kernel) {
35416 for (size_t k = 1; k <= 5; k += 2) {
35417 GemmMicrokernelTester()
35418 .mr(2)
35419 .nr(2)
35420 .kr(1)
35421 .sr(1)
35422 .m(2)
35423 .n(2)
35424 .k(k)
35425 .ks(3)
35426 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x2__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
35427 }
35428 }
35429
TEST(QC8_IGEMM_MINMAX_FP32_2X2__WASM_FMAGIC,small_kernel_subtile)35430 TEST(QC8_IGEMM_MINMAX_FP32_2X2__WASM_FMAGIC, small_kernel_subtile) {
35431 for (size_t k = 1; k <= 5; k += 2) {
35432 for (uint32_t n = 1; n <= 2; n++) {
35433 for (uint32_t m = 1; m <= 2; m++) {
35434 GemmMicrokernelTester()
35435 .mr(2)
35436 .nr(2)
35437 .kr(1)
35438 .sr(1)
35439 .m(m)
35440 .n(n)
35441 .k(k)
35442 .ks(3)
35443 .iterations(1)
35444 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x2__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
35445 }
35446 }
35447 }
35448 }
35449
TEST(QC8_IGEMM_MINMAX_FP32_2X2__WASM_FMAGIC,n_gt_2_small_kernel)35450 TEST(QC8_IGEMM_MINMAX_FP32_2X2__WASM_FMAGIC, n_gt_2_small_kernel) {
35451 for (uint32_t n = 3; n < 4; n++) {
35452 for (size_t k = 1; k <= 5; k += 2) {
35453 GemmMicrokernelTester()
35454 .mr(2)
35455 .nr(2)
35456 .kr(1)
35457 .sr(1)
35458 .m(2)
35459 .n(n)
35460 .k(k)
35461 .ks(3)
35462 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x2__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
35463 }
35464 }
35465 }
35466
TEST(QC8_IGEMM_MINMAX_FP32_2X2__WASM_FMAGIC,n_div_2_small_kernel)35467 TEST(QC8_IGEMM_MINMAX_FP32_2X2__WASM_FMAGIC, n_div_2_small_kernel) {
35468 for (uint32_t n = 4; n <= 6; n += 2) {
35469 for (size_t k = 1; k <= 5; k += 2) {
35470 GemmMicrokernelTester()
35471 .mr(2)
35472 .nr(2)
35473 .kr(1)
35474 .sr(1)
35475 .m(2)
35476 .n(n)
35477 .k(k)
35478 .ks(3)
35479 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x2__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
35480 }
35481 }
35482 }
35483
TEST(QC8_IGEMM_MINMAX_FP32_2X2__WASM_FMAGIC,strided_cm_subtile)35484 TEST(QC8_IGEMM_MINMAX_FP32_2X2__WASM_FMAGIC, strided_cm_subtile) {
35485 for (size_t k = 1; k <= 5; k += 2) {
35486 for (uint32_t n = 1; n <= 2; n++) {
35487 for (uint32_t m = 1; m <= 2; m++) {
35488 GemmMicrokernelTester()
35489 .mr(2)
35490 .nr(2)
35491 .kr(1)
35492 .sr(1)
35493 .m(m)
35494 .n(n)
35495 .k(k)
35496 .cm_stride(5)
35497 .iterations(1)
35498 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x2__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
35499 }
35500 }
35501 }
35502 }
35503
TEST(QC8_IGEMM_MINMAX_FP32_2X2__WASM_FMAGIC,a_offset)35504 TEST(QC8_IGEMM_MINMAX_FP32_2X2__WASM_FMAGIC, a_offset) {
35505 for (size_t k = 1; k <= 5; k += 2) {
35506 GemmMicrokernelTester()
35507 .mr(2)
35508 .nr(2)
35509 .kr(1)
35510 .sr(1)
35511 .m(2)
35512 .n(2)
35513 .k(k)
35514 .ks(3)
35515 .a_offset(13)
35516 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x2__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
35517 }
35518 }
35519
TEST(QC8_IGEMM_MINMAX_FP32_2X2__WASM_FMAGIC,zero)35520 TEST(QC8_IGEMM_MINMAX_FP32_2X2__WASM_FMAGIC, zero) {
35521 for (size_t k = 1; k <= 5; k += 2) {
35522 for (uint32_t mz = 0; mz < 2; mz++) {
35523 GemmMicrokernelTester()
35524 .mr(2)
35525 .nr(2)
35526 .kr(1)
35527 .sr(1)
35528 .m(2)
35529 .n(2)
35530 .k(k)
35531 .ks(3)
35532 .a_offset(13)
35533 .zero_index(mz)
35534 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x2__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
35535 }
35536 }
35537 }
35538
TEST(QC8_IGEMM_MINMAX_FP32_2X2__WASM_FMAGIC,qmin)35539 TEST(QC8_IGEMM_MINMAX_FP32_2X2__WASM_FMAGIC, qmin) {
35540 GemmMicrokernelTester()
35541 .mr(2)
35542 .nr(2)
35543 .kr(1)
35544 .sr(1)
35545 .m(2)
35546 .n(2)
35547 .k(1)
35548 .qmin(128)
35549 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x2__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
35550 }
35551
TEST(QC8_IGEMM_MINMAX_FP32_2X2__WASM_FMAGIC,qmax)35552 TEST(QC8_IGEMM_MINMAX_FP32_2X2__WASM_FMAGIC, qmax) {
35553 GemmMicrokernelTester()
35554 .mr(2)
35555 .nr(2)
35556 .kr(1)
35557 .sr(1)
35558 .m(2)
35559 .n(2)
35560 .k(1)
35561 .qmax(128)
35562 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x2__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
35563 }
35564
TEST(QC8_IGEMM_MINMAX_FP32_2X2__WASM_FMAGIC,strided_cm)35565 TEST(QC8_IGEMM_MINMAX_FP32_2X2__WASM_FMAGIC, strided_cm) {
35566 GemmMicrokernelTester()
35567 .mr(2)
35568 .nr(2)
35569 .kr(1)
35570 .sr(1)
35571 .m(2)
35572 .n(2)
35573 .k(1)
35574 .cm_stride(5)
35575 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x2__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
35576 }
35577 #endif // XNN_ARCH_WASM || XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
35578
35579
35580 #if XNN_ARCH_WASM || XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(QC8_IGEMM_MINMAX_FP32_2X4__WASM_FMAGIC,k_eq_1)35581 TEST(QC8_IGEMM_MINMAX_FP32_2X4__WASM_FMAGIC, k_eq_1) {
35582 GemmMicrokernelTester()
35583 .mr(2)
35584 .nr(4)
35585 .kr(1)
35586 .sr(1)
35587 .m(2)
35588 .n(4)
35589 .k(1)
35590 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
35591 }
35592
TEST(QC8_IGEMM_MINMAX_FP32_2X4__WASM_FMAGIC,strided_cn)35593 TEST(QC8_IGEMM_MINMAX_FP32_2X4__WASM_FMAGIC, strided_cn) {
35594 GemmMicrokernelTester()
35595 .mr(2)
35596 .nr(4)
35597 .kr(1)
35598 .sr(1)
35599 .m(2)
35600 .n(4)
35601 .k(1)
35602 .cn_stride(7)
35603 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
35604 }
35605
TEST(QC8_IGEMM_MINMAX_FP32_2X4__WASM_FMAGIC,k_eq_1_subtile)35606 TEST(QC8_IGEMM_MINMAX_FP32_2X4__WASM_FMAGIC, k_eq_1_subtile) {
35607 for (uint32_t n = 1; n <= 4; n++) {
35608 for (uint32_t m = 1; m <= 2; m++) {
35609 GemmMicrokernelTester()
35610 .mr(2)
35611 .nr(4)
35612 .kr(1)
35613 .sr(1)
35614 .m(m)
35615 .n(n)
35616 .k(1)
35617 .iterations(1)
35618 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
35619 }
35620 }
35621 }
35622
TEST(QC8_IGEMM_MINMAX_FP32_2X4__WASM_FMAGIC,k_eq_1_subtile_m)35623 TEST(QC8_IGEMM_MINMAX_FP32_2X4__WASM_FMAGIC, k_eq_1_subtile_m) {
35624 for (uint32_t m = 1; m <= 2; m++) {
35625 GemmMicrokernelTester()
35626 .mr(2)
35627 .nr(4)
35628 .kr(1)
35629 .sr(1)
35630 .m(m)
35631 .n(4)
35632 .k(1)
35633 .iterations(1)
35634 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
35635 }
35636 }
35637
TEST(QC8_IGEMM_MINMAX_FP32_2X4__WASM_FMAGIC,k_eq_1_subtile_n)35638 TEST(QC8_IGEMM_MINMAX_FP32_2X4__WASM_FMAGIC, k_eq_1_subtile_n) {
35639 for (uint32_t n = 1; n <= 4; n++) {
35640 GemmMicrokernelTester()
35641 .mr(2)
35642 .nr(4)
35643 .kr(1)
35644 .sr(1)
35645 .m(2)
35646 .n(n)
35647 .k(1)
35648 .iterations(1)
35649 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
35650 }
35651 }
35652
TEST(QC8_IGEMM_MINMAX_FP32_2X4__WASM_FMAGIC,k_gt_1)35653 TEST(QC8_IGEMM_MINMAX_FP32_2X4__WASM_FMAGIC, k_gt_1) {
35654 for (size_t k = 2; k < 10; k++) {
35655 GemmMicrokernelTester()
35656 .mr(2)
35657 .nr(4)
35658 .kr(1)
35659 .sr(1)
35660 .m(2)
35661 .n(4)
35662 .k(k)
35663 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
35664 }
35665 }
35666
TEST(QC8_IGEMM_MINMAX_FP32_2X4__WASM_FMAGIC,k_gt_1_subtile)35667 TEST(QC8_IGEMM_MINMAX_FP32_2X4__WASM_FMAGIC, k_gt_1_subtile) {
35668 for (size_t k = 2; k < 10; k++) {
35669 for (uint32_t n = 1; n <= 4; n++) {
35670 for (uint32_t m = 1; m <= 2; m++) {
35671 GemmMicrokernelTester()
35672 .mr(2)
35673 .nr(4)
35674 .kr(1)
35675 .sr(1)
35676 .m(m)
35677 .n(n)
35678 .k(k)
35679 .iterations(1)
35680 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
35681 }
35682 }
35683 }
35684 }
35685
TEST(QC8_IGEMM_MINMAX_FP32_2X4__WASM_FMAGIC,n_gt_4)35686 TEST(QC8_IGEMM_MINMAX_FP32_2X4__WASM_FMAGIC, n_gt_4) {
35687 for (uint32_t n = 5; n < 8; n++) {
35688 for (size_t k = 1; k <= 5; k += 2) {
35689 GemmMicrokernelTester()
35690 .mr(2)
35691 .nr(4)
35692 .kr(1)
35693 .sr(1)
35694 .m(2)
35695 .n(n)
35696 .k(k)
35697 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
35698 }
35699 }
35700 }
35701
TEST(QC8_IGEMM_MINMAX_FP32_2X4__WASM_FMAGIC,n_gt_4_strided_cn)35702 TEST(QC8_IGEMM_MINMAX_FP32_2X4__WASM_FMAGIC, n_gt_4_strided_cn) {
35703 for (uint32_t n = 5; n < 8; n++) {
35704 for (size_t k = 1; k <= 5; k += 2) {
35705 GemmMicrokernelTester()
35706 .mr(2)
35707 .nr(4)
35708 .kr(1)
35709 .sr(1)
35710 .m(2)
35711 .n(n)
35712 .k(k)
35713 .cn_stride(7)
35714 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
35715 }
35716 }
35717 }
35718
TEST(QC8_IGEMM_MINMAX_FP32_2X4__WASM_FMAGIC,n_gt_4_subtile)35719 TEST(QC8_IGEMM_MINMAX_FP32_2X4__WASM_FMAGIC, n_gt_4_subtile) {
35720 for (uint32_t n = 5; n < 8; n++) {
35721 for (size_t k = 1; k <= 5; k += 2) {
35722 for (uint32_t m = 1; m <= 2; m++) {
35723 GemmMicrokernelTester()
35724 .mr(2)
35725 .nr(4)
35726 .kr(1)
35727 .sr(1)
35728 .m(m)
35729 .n(n)
35730 .k(k)
35731 .iterations(1)
35732 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
35733 }
35734 }
35735 }
35736 }
35737
TEST(QC8_IGEMM_MINMAX_FP32_2X4__WASM_FMAGIC,n_div_4)35738 TEST(QC8_IGEMM_MINMAX_FP32_2X4__WASM_FMAGIC, n_div_4) {
35739 for (uint32_t n = 8; n <= 12; n += 4) {
35740 for (size_t k = 1; k <= 5; k += 2) {
35741 GemmMicrokernelTester()
35742 .mr(2)
35743 .nr(4)
35744 .kr(1)
35745 .sr(1)
35746 .m(2)
35747 .n(n)
35748 .k(k)
35749 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
35750 }
35751 }
35752 }
35753
TEST(QC8_IGEMM_MINMAX_FP32_2X4__WASM_FMAGIC,n_div_4_strided_cn)35754 TEST(QC8_IGEMM_MINMAX_FP32_2X4__WASM_FMAGIC, n_div_4_strided_cn) {
35755 for (uint32_t n = 8; n <= 12; n += 4) {
35756 for (size_t k = 1; k <= 5; k += 2) {
35757 GemmMicrokernelTester()
35758 .mr(2)
35759 .nr(4)
35760 .kr(1)
35761 .sr(1)
35762 .m(2)
35763 .n(n)
35764 .k(k)
35765 .cn_stride(7)
35766 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
35767 }
35768 }
35769 }
35770
TEST(QC8_IGEMM_MINMAX_FP32_2X4__WASM_FMAGIC,n_div_4_subtile)35771 TEST(QC8_IGEMM_MINMAX_FP32_2X4__WASM_FMAGIC, n_div_4_subtile) {
35772 for (uint32_t n = 8; n <= 12; n += 4) {
35773 for (size_t k = 1; k <= 5; k += 2) {
35774 for (uint32_t m = 1; m <= 2; m++) {
35775 GemmMicrokernelTester()
35776 .mr(2)
35777 .nr(4)
35778 .kr(1)
35779 .sr(1)
35780 .m(m)
35781 .n(n)
35782 .k(k)
35783 .iterations(1)
35784 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
35785 }
35786 }
35787 }
35788 }
35789
TEST(QC8_IGEMM_MINMAX_FP32_2X4__WASM_FMAGIC,small_kernel)35790 TEST(QC8_IGEMM_MINMAX_FP32_2X4__WASM_FMAGIC, small_kernel) {
35791 for (size_t k = 1; k <= 5; k += 2) {
35792 GemmMicrokernelTester()
35793 .mr(2)
35794 .nr(4)
35795 .kr(1)
35796 .sr(1)
35797 .m(2)
35798 .n(4)
35799 .k(k)
35800 .ks(3)
35801 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
35802 }
35803 }
35804
TEST(QC8_IGEMM_MINMAX_FP32_2X4__WASM_FMAGIC,small_kernel_subtile)35805 TEST(QC8_IGEMM_MINMAX_FP32_2X4__WASM_FMAGIC, small_kernel_subtile) {
35806 for (size_t k = 1; k <= 5; k += 2) {
35807 for (uint32_t n = 1; n <= 4; n++) {
35808 for (uint32_t m = 1; m <= 2; m++) {
35809 GemmMicrokernelTester()
35810 .mr(2)
35811 .nr(4)
35812 .kr(1)
35813 .sr(1)
35814 .m(m)
35815 .n(n)
35816 .k(k)
35817 .ks(3)
35818 .iterations(1)
35819 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
35820 }
35821 }
35822 }
35823 }
35824
TEST(QC8_IGEMM_MINMAX_FP32_2X4__WASM_FMAGIC,n_gt_4_small_kernel)35825 TEST(QC8_IGEMM_MINMAX_FP32_2X4__WASM_FMAGIC, n_gt_4_small_kernel) {
35826 for (uint32_t n = 5; n < 8; n++) {
35827 for (size_t k = 1; k <= 5; k += 2) {
35828 GemmMicrokernelTester()
35829 .mr(2)
35830 .nr(4)
35831 .kr(1)
35832 .sr(1)
35833 .m(2)
35834 .n(n)
35835 .k(k)
35836 .ks(3)
35837 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
35838 }
35839 }
35840 }
35841
TEST(QC8_IGEMM_MINMAX_FP32_2X4__WASM_FMAGIC,n_div_4_small_kernel)35842 TEST(QC8_IGEMM_MINMAX_FP32_2X4__WASM_FMAGIC, n_div_4_small_kernel) {
35843 for (uint32_t n = 8; n <= 12; n += 4) {
35844 for (size_t k = 1; k <= 5; k += 2) {
35845 GemmMicrokernelTester()
35846 .mr(2)
35847 .nr(4)
35848 .kr(1)
35849 .sr(1)
35850 .m(2)
35851 .n(n)
35852 .k(k)
35853 .ks(3)
35854 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
35855 }
35856 }
35857 }
35858
TEST(QC8_IGEMM_MINMAX_FP32_2X4__WASM_FMAGIC,strided_cm_subtile)35859 TEST(QC8_IGEMM_MINMAX_FP32_2X4__WASM_FMAGIC, strided_cm_subtile) {
35860 for (size_t k = 1; k <= 5; k += 2) {
35861 for (uint32_t n = 1; n <= 4; n++) {
35862 for (uint32_t m = 1; m <= 2; m++) {
35863 GemmMicrokernelTester()
35864 .mr(2)
35865 .nr(4)
35866 .kr(1)
35867 .sr(1)
35868 .m(m)
35869 .n(n)
35870 .k(k)
35871 .cm_stride(7)
35872 .iterations(1)
35873 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
35874 }
35875 }
35876 }
35877 }
35878
TEST(QC8_IGEMM_MINMAX_FP32_2X4__WASM_FMAGIC,a_offset)35879 TEST(QC8_IGEMM_MINMAX_FP32_2X4__WASM_FMAGIC, a_offset) {
35880 for (size_t k = 1; k <= 5; k += 2) {
35881 GemmMicrokernelTester()
35882 .mr(2)
35883 .nr(4)
35884 .kr(1)
35885 .sr(1)
35886 .m(2)
35887 .n(4)
35888 .k(k)
35889 .ks(3)
35890 .a_offset(13)
35891 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
35892 }
35893 }
35894
TEST(QC8_IGEMM_MINMAX_FP32_2X4__WASM_FMAGIC,zero)35895 TEST(QC8_IGEMM_MINMAX_FP32_2X4__WASM_FMAGIC, zero) {
35896 for (size_t k = 1; k <= 5; k += 2) {
35897 for (uint32_t mz = 0; mz < 2; mz++) {
35898 GemmMicrokernelTester()
35899 .mr(2)
35900 .nr(4)
35901 .kr(1)
35902 .sr(1)
35903 .m(2)
35904 .n(4)
35905 .k(k)
35906 .ks(3)
35907 .a_offset(13)
35908 .zero_index(mz)
35909 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
35910 }
35911 }
35912 }
35913
TEST(QC8_IGEMM_MINMAX_FP32_2X4__WASM_FMAGIC,qmin)35914 TEST(QC8_IGEMM_MINMAX_FP32_2X4__WASM_FMAGIC, qmin) {
35915 GemmMicrokernelTester()
35916 .mr(2)
35917 .nr(4)
35918 .kr(1)
35919 .sr(1)
35920 .m(2)
35921 .n(4)
35922 .k(1)
35923 .qmin(128)
35924 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
35925 }
35926
TEST(QC8_IGEMM_MINMAX_FP32_2X4__WASM_FMAGIC,qmax)35927 TEST(QC8_IGEMM_MINMAX_FP32_2X4__WASM_FMAGIC, qmax) {
35928 GemmMicrokernelTester()
35929 .mr(2)
35930 .nr(4)
35931 .kr(1)
35932 .sr(1)
35933 .m(2)
35934 .n(4)
35935 .k(1)
35936 .qmax(128)
35937 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
35938 }
35939
TEST(QC8_IGEMM_MINMAX_FP32_2X4__WASM_FMAGIC,strided_cm)35940 TEST(QC8_IGEMM_MINMAX_FP32_2X4__WASM_FMAGIC, strided_cm) {
35941 GemmMicrokernelTester()
35942 .mr(2)
35943 .nr(4)
35944 .kr(1)
35945 .sr(1)
35946 .m(2)
35947 .n(4)
35948 .k(1)
35949 .cm_stride(7)
35950 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_2x4__wasm_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
35951 }
35952 #endif // XNN_ARCH_WASM || XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
35953
35954
TEST(QC8_IGEMM_MINMAX_FP32_1X2__SCALAR_FMAGIC,k_eq_1)35955 TEST(QC8_IGEMM_MINMAX_FP32_1X2__SCALAR_FMAGIC, k_eq_1) {
35956 GemmMicrokernelTester()
35957 .mr(1)
35958 .nr(2)
35959 .kr(1)
35960 .sr(1)
35961 .m(1)
35962 .n(2)
35963 .k(1)
35964 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x2__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
35965 }
35966
TEST(QC8_IGEMM_MINMAX_FP32_1X2__SCALAR_FMAGIC,strided_cn)35967 TEST(QC8_IGEMM_MINMAX_FP32_1X2__SCALAR_FMAGIC, strided_cn) {
35968 GemmMicrokernelTester()
35969 .mr(1)
35970 .nr(2)
35971 .kr(1)
35972 .sr(1)
35973 .m(1)
35974 .n(2)
35975 .k(1)
35976 .cn_stride(5)
35977 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x2__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
35978 }
35979
TEST(QC8_IGEMM_MINMAX_FP32_1X2__SCALAR_FMAGIC,k_eq_1_subtile)35980 TEST(QC8_IGEMM_MINMAX_FP32_1X2__SCALAR_FMAGIC, k_eq_1_subtile) {
35981 for (uint32_t n = 1; n <= 2; n++) {
35982 for (uint32_t m = 1; m <= 1; m++) {
35983 GemmMicrokernelTester()
35984 .mr(1)
35985 .nr(2)
35986 .kr(1)
35987 .sr(1)
35988 .m(m)
35989 .n(n)
35990 .k(1)
35991 .iterations(1)
35992 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x2__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
35993 }
35994 }
35995 }
35996
TEST(QC8_IGEMM_MINMAX_FP32_1X2__SCALAR_FMAGIC,k_eq_1_subtile_m)35997 TEST(QC8_IGEMM_MINMAX_FP32_1X2__SCALAR_FMAGIC, k_eq_1_subtile_m) {
35998 for (uint32_t m = 1; m <= 1; m++) {
35999 GemmMicrokernelTester()
36000 .mr(1)
36001 .nr(2)
36002 .kr(1)
36003 .sr(1)
36004 .m(m)
36005 .n(2)
36006 .k(1)
36007 .iterations(1)
36008 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x2__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
36009 }
36010 }
36011
TEST(QC8_IGEMM_MINMAX_FP32_1X2__SCALAR_FMAGIC,k_eq_1_subtile_n)36012 TEST(QC8_IGEMM_MINMAX_FP32_1X2__SCALAR_FMAGIC, k_eq_1_subtile_n) {
36013 for (uint32_t n = 1; n <= 2; n++) {
36014 GemmMicrokernelTester()
36015 .mr(1)
36016 .nr(2)
36017 .kr(1)
36018 .sr(1)
36019 .m(1)
36020 .n(n)
36021 .k(1)
36022 .iterations(1)
36023 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x2__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
36024 }
36025 }
36026
TEST(QC8_IGEMM_MINMAX_FP32_1X2__SCALAR_FMAGIC,k_gt_1)36027 TEST(QC8_IGEMM_MINMAX_FP32_1X2__SCALAR_FMAGIC, k_gt_1) {
36028 for (size_t k = 2; k < 10; k++) {
36029 GemmMicrokernelTester()
36030 .mr(1)
36031 .nr(2)
36032 .kr(1)
36033 .sr(1)
36034 .m(1)
36035 .n(2)
36036 .k(k)
36037 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x2__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
36038 }
36039 }
36040
TEST(QC8_IGEMM_MINMAX_FP32_1X2__SCALAR_FMAGIC,k_gt_1_subtile)36041 TEST(QC8_IGEMM_MINMAX_FP32_1X2__SCALAR_FMAGIC, k_gt_1_subtile) {
36042 for (size_t k = 2; k < 10; k++) {
36043 for (uint32_t n = 1; n <= 2; n++) {
36044 for (uint32_t m = 1; m <= 1; m++) {
36045 GemmMicrokernelTester()
36046 .mr(1)
36047 .nr(2)
36048 .kr(1)
36049 .sr(1)
36050 .m(m)
36051 .n(n)
36052 .k(k)
36053 .iterations(1)
36054 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x2__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
36055 }
36056 }
36057 }
36058 }
36059
TEST(QC8_IGEMM_MINMAX_FP32_1X2__SCALAR_FMAGIC,n_gt_2)36060 TEST(QC8_IGEMM_MINMAX_FP32_1X2__SCALAR_FMAGIC, n_gt_2) {
36061 for (uint32_t n = 3; n < 4; n++) {
36062 for (size_t k = 1; k <= 5; k += 2) {
36063 GemmMicrokernelTester()
36064 .mr(1)
36065 .nr(2)
36066 .kr(1)
36067 .sr(1)
36068 .m(1)
36069 .n(n)
36070 .k(k)
36071 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x2__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
36072 }
36073 }
36074 }
36075
TEST(QC8_IGEMM_MINMAX_FP32_1X2__SCALAR_FMAGIC,n_gt_2_strided_cn)36076 TEST(QC8_IGEMM_MINMAX_FP32_1X2__SCALAR_FMAGIC, n_gt_2_strided_cn) {
36077 for (uint32_t n = 3; n < 4; n++) {
36078 for (size_t k = 1; k <= 5; k += 2) {
36079 GemmMicrokernelTester()
36080 .mr(1)
36081 .nr(2)
36082 .kr(1)
36083 .sr(1)
36084 .m(1)
36085 .n(n)
36086 .k(k)
36087 .cn_stride(5)
36088 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x2__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
36089 }
36090 }
36091 }
36092
TEST(QC8_IGEMM_MINMAX_FP32_1X2__SCALAR_FMAGIC,n_gt_2_subtile)36093 TEST(QC8_IGEMM_MINMAX_FP32_1X2__SCALAR_FMAGIC, n_gt_2_subtile) {
36094 for (uint32_t n = 3; n < 4; n++) {
36095 for (size_t k = 1; k <= 5; k += 2) {
36096 for (uint32_t m = 1; m <= 1; m++) {
36097 GemmMicrokernelTester()
36098 .mr(1)
36099 .nr(2)
36100 .kr(1)
36101 .sr(1)
36102 .m(m)
36103 .n(n)
36104 .k(k)
36105 .iterations(1)
36106 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x2__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
36107 }
36108 }
36109 }
36110 }
36111
TEST(QC8_IGEMM_MINMAX_FP32_1X2__SCALAR_FMAGIC,n_div_2)36112 TEST(QC8_IGEMM_MINMAX_FP32_1X2__SCALAR_FMAGIC, n_div_2) {
36113 for (uint32_t n = 4; n <= 6; n += 2) {
36114 for (size_t k = 1; k <= 5; k += 2) {
36115 GemmMicrokernelTester()
36116 .mr(1)
36117 .nr(2)
36118 .kr(1)
36119 .sr(1)
36120 .m(1)
36121 .n(n)
36122 .k(k)
36123 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x2__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
36124 }
36125 }
36126 }
36127
TEST(QC8_IGEMM_MINMAX_FP32_1X2__SCALAR_FMAGIC,n_div_2_strided_cn)36128 TEST(QC8_IGEMM_MINMAX_FP32_1X2__SCALAR_FMAGIC, n_div_2_strided_cn) {
36129 for (uint32_t n = 4; n <= 6; n += 2) {
36130 for (size_t k = 1; k <= 5; k += 2) {
36131 GemmMicrokernelTester()
36132 .mr(1)
36133 .nr(2)
36134 .kr(1)
36135 .sr(1)
36136 .m(1)
36137 .n(n)
36138 .k(k)
36139 .cn_stride(5)
36140 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x2__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
36141 }
36142 }
36143 }
36144
TEST(QC8_IGEMM_MINMAX_FP32_1X2__SCALAR_FMAGIC,n_div_2_subtile)36145 TEST(QC8_IGEMM_MINMAX_FP32_1X2__SCALAR_FMAGIC, n_div_2_subtile) {
36146 for (uint32_t n = 4; n <= 6; n += 2) {
36147 for (size_t k = 1; k <= 5; k += 2) {
36148 for (uint32_t m = 1; m <= 1; m++) {
36149 GemmMicrokernelTester()
36150 .mr(1)
36151 .nr(2)
36152 .kr(1)
36153 .sr(1)
36154 .m(m)
36155 .n(n)
36156 .k(k)
36157 .iterations(1)
36158 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x2__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
36159 }
36160 }
36161 }
36162 }
36163
TEST(QC8_IGEMM_MINMAX_FP32_1X2__SCALAR_FMAGIC,small_kernel)36164 TEST(QC8_IGEMM_MINMAX_FP32_1X2__SCALAR_FMAGIC, small_kernel) {
36165 for (size_t k = 1; k <= 5; k += 2) {
36166 GemmMicrokernelTester()
36167 .mr(1)
36168 .nr(2)
36169 .kr(1)
36170 .sr(1)
36171 .m(1)
36172 .n(2)
36173 .k(k)
36174 .ks(3)
36175 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x2__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
36176 }
36177 }
36178
TEST(QC8_IGEMM_MINMAX_FP32_1X2__SCALAR_FMAGIC,small_kernel_subtile)36179 TEST(QC8_IGEMM_MINMAX_FP32_1X2__SCALAR_FMAGIC, small_kernel_subtile) {
36180 for (size_t k = 1; k <= 5; k += 2) {
36181 for (uint32_t n = 1; n <= 2; n++) {
36182 for (uint32_t m = 1; m <= 1; m++) {
36183 GemmMicrokernelTester()
36184 .mr(1)
36185 .nr(2)
36186 .kr(1)
36187 .sr(1)
36188 .m(m)
36189 .n(n)
36190 .k(k)
36191 .ks(3)
36192 .iterations(1)
36193 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x2__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
36194 }
36195 }
36196 }
36197 }
36198
TEST(QC8_IGEMM_MINMAX_FP32_1X2__SCALAR_FMAGIC,n_gt_2_small_kernel)36199 TEST(QC8_IGEMM_MINMAX_FP32_1X2__SCALAR_FMAGIC, n_gt_2_small_kernel) {
36200 for (uint32_t n = 3; n < 4; n++) {
36201 for (size_t k = 1; k <= 5; k += 2) {
36202 GemmMicrokernelTester()
36203 .mr(1)
36204 .nr(2)
36205 .kr(1)
36206 .sr(1)
36207 .m(1)
36208 .n(n)
36209 .k(k)
36210 .ks(3)
36211 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x2__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
36212 }
36213 }
36214 }
36215
TEST(QC8_IGEMM_MINMAX_FP32_1X2__SCALAR_FMAGIC,n_div_2_small_kernel)36216 TEST(QC8_IGEMM_MINMAX_FP32_1X2__SCALAR_FMAGIC, n_div_2_small_kernel) {
36217 for (uint32_t n = 4; n <= 6; n += 2) {
36218 for (size_t k = 1; k <= 5; k += 2) {
36219 GemmMicrokernelTester()
36220 .mr(1)
36221 .nr(2)
36222 .kr(1)
36223 .sr(1)
36224 .m(1)
36225 .n(n)
36226 .k(k)
36227 .ks(3)
36228 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x2__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
36229 }
36230 }
36231 }
36232
TEST(QC8_IGEMM_MINMAX_FP32_1X2__SCALAR_FMAGIC,strided_cm_subtile)36233 TEST(QC8_IGEMM_MINMAX_FP32_1X2__SCALAR_FMAGIC, strided_cm_subtile) {
36234 for (size_t k = 1; k <= 5; k += 2) {
36235 for (uint32_t n = 1; n <= 2; n++) {
36236 for (uint32_t m = 1; m <= 1; m++) {
36237 GemmMicrokernelTester()
36238 .mr(1)
36239 .nr(2)
36240 .kr(1)
36241 .sr(1)
36242 .m(m)
36243 .n(n)
36244 .k(k)
36245 .cm_stride(5)
36246 .iterations(1)
36247 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x2__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
36248 }
36249 }
36250 }
36251 }
36252
TEST(QC8_IGEMM_MINMAX_FP32_1X2__SCALAR_FMAGIC,a_offset)36253 TEST(QC8_IGEMM_MINMAX_FP32_1X2__SCALAR_FMAGIC, a_offset) {
36254 for (size_t k = 1; k <= 5; k += 2) {
36255 GemmMicrokernelTester()
36256 .mr(1)
36257 .nr(2)
36258 .kr(1)
36259 .sr(1)
36260 .m(1)
36261 .n(2)
36262 .k(k)
36263 .ks(3)
36264 .a_offset(7)
36265 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x2__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
36266 }
36267 }
36268
TEST(QC8_IGEMM_MINMAX_FP32_1X2__SCALAR_FMAGIC,zero)36269 TEST(QC8_IGEMM_MINMAX_FP32_1X2__SCALAR_FMAGIC, zero) {
36270 for (size_t k = 1; k <= 5; k += 2) {
36271 for (uint32_t mz = 0; mz < 1; mz++) {
36272 GemmMicrokernelTester()
36273 .mr(1)
36274 .nr(2)
36275 .kr(1)
36276 .sr(1)
36277 .m(1)
36278 .n(2)
36279 .k(k)
36280 .ks(3)
36281 .a_offset(7)
36282 .zero_index(mz)
36283 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x2__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
36284 }
36285 }
36286 }
36287
TEST(QC8_IGEMM_MINMAX_FP32_1X2__SCALAR_FMAGIC,qmin)36288 TEST(QC8_IGEMM_MINMAX_FP32_1X2__SCALAR_FMAGIC, qmin) {
36289 GemmMicrokernelTester()
36290 .mr(1)
36291 .nr(2)
36292 .kr(1)
36293 .sr(1)
36294 .m(1)
36295 .n(2)
36296 .k(1)
36297 .qmin(128)
36298 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x2__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
36299 }
36300
TEST(QC8_IGEMM_MINMAX_FP32_1X2__SCALAR_FMAGIC,qmax)36301 TEST(QC8_IGEMM_MINMAX_FP32_1X2__SCALAR_FMAGIC, qmax) {
36302 GemmMicrokernelTester()
36303 .mr(1)
36304 .nr(2)
36305 .kr(1)
36306 .sr(1)
36307 .m(1)
36308 .n(2)
36309 .k(1)
36310 .qmax(128)
36311 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x2__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
36312 }
36313
TEST(QC8_IGEMM_MINMAX_FP32_1X2__SCALAR_FMAGIC,strided_cm)36314 TEST(QC8_IGEMM_MINMAX_FP32_1X2__SCALAR_FMAGIC, strided_cm) {
36315 GemmMicrokernelTester()
36316 .mr(1)
36317 .nr(2)
36318 .kr(1)
36319 .sr(1)
36320 .m(1)
36321 .n(2)
36322 .k(1)
36323 .cm_stride(5)
36324 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x2__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
36325 }
36326
36327
TEST(QC8_IGEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC,k_eq_1)36328 TEST(QC8_IGEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC, k_eq_1) {
36329 GemmMicrokernelTester()
36330 .mr(1)
36331 .nr(2)
36332 .kr(1)
36333 .sr(1)
36334 .m(1)
36335 .n(2)
36336 .k(1)
36337 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x2__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
36338 }
36339
TEST(QC8_IGEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC,strided_cn)36340 TEST(QC8_IGEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC, strided_cn) {
36341 GemmMicrokernelTester()
36342 .mr(1)
36343 .nr(2)
36344 .kr(1)
36345 .sr(1)
36346 .m(1)
36347 .n(2)
36348 .k(1)
36349 .cn_stride(5)
36350 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x2__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
36351 }
36352
TEST(QC8_IGEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC,k_eq_1_subtile)36353 TEST(QC8_IGEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC, k_eq_1_subtile) {
36354 for (uint32_t n = 1; n <= 2; n++) {
36355 for (uint32_t m = 1; m <= 1; m++) {
36356 GemmMicrokernelTester()
36357 .mr(1)
36358 .nr(2)
36359 .kr(1)
36360 .sr(1)
36361 .m(m)
36362 .n(n)
36363 .k(1)
36364 .iterations(1)
36365 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x2__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
36366 }
36367 }
36368 }
36369
TEST(QC8_IGEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC,k_eq_1_subtile_m)36370 TEST(QC8_IGEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC, k_eq_1_subtile_m) {
36371 for (uint32_t m = 1; m <= 1; m++) {
36372 GemmMicrokernelTester()
36373 .mr(1)
36374 .nr(2)
36375 .kr(1)
36376 .sr(1)
36377 .m(m)
36378 .n(2)
36379 .k(1)
36380 .iterations(1)
36381 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x2__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
36382 }
36383 }
36384
TEST(QC8_IGEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC,k_eq_1_subtile_n)36385 TEST(QC8_IGEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC, k_eq_1_subtile_n) {
36386 for (uint32_t n = 1; n <= 2; n++) {
36387 GemmMicrokernelTester()
36388 .mr(1)
36389 .nr(2)
36390 .kr(1)
36391 .sr(1)
36392 .m(1)
36393 .n(n)
36394 .k(1)
36395 .iterations(1)
36396 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x2__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
36397 }
36398 }
36399
TEST(QC8_IGEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC,k_gt_1)36400 TEST(QC8_IGEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC, k_gt_1) {
36401 for (size_t k = 2; k < 10; k++) {
36402 GemmMicrokernelTester()
36403 .mr(1)
36404 .nr(2)
36405 .kr(1)
36406 .sr(1)
36407 .m(1)
36408 .n(2)
36409 .k(k)
36410 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x2__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
36411 }
36412 }
36413
TEST(QC8_IGEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC,k_gt_1_subtile)36414 TEST(QC8_IGEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC, k_gt_1_subtile) {
36415 for (size_t k = 2; k < 10; k++) {
36416 for (uint32_t n = 1; n <= 2; n++) {
36417 for (uint32_t m = 1; m <= 1; m++) {
36418 GemmMicrokernelTester()
36419 .mr(1)
36420 .nr(2)
36421 .kr(1)
36422 .sr(1)
36423 .m(m)
36424 .n(n)
36425 .k(k)
36426 .iterations(1)
36427 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x2__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
36428 }
36429 }
36430 }
36431 }
36432
TEST(QC8_IGEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC,n_gt_2)36433 TEST(QC8_IGEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC, n_gt_2) {
36434 for (uint32_t n = 3; n < 4; n++) {
36435 for (size_t k = 1; k <= 5; k += 2) {
36436 GemmMicrokernelTester()
36437 .mr(1)
36438 .nr(2)
36439 .kr(1)
36440 .sr(1)
36441 .m(1)
36442 .n(n)
36443 .k(k)
36444 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x2__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
36445 }
36446 }
36447 }
36448
TEST(QC8_IGEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC,n_gt_2_strided_cn)36449 TEST(QC8_IGEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC, n_gt_2_strided_cn) {
36450 for (uint32_t n = 3; n < 4; n++) {
36451 for (size_t k = 1; k <= 5; k += 2) {
36452 GemmMicrokernelTester()
36453 .mr(1)
36454 .nr(2)
36455 .kr(1)
36456 .sr(1)
36457 .m(1)
36458 .n(n)
36459 .k(k)
36460 .cn_stride(5)
36461 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x2__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
36462 }
36463 }
36464 }
36465
TEST(QC8_IGEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC,n_gt_2_subtile)36466 TEST(QC8_IGEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC, n_gt_2_subtile) {
36467 for (uint32_t n = 3; n < 4; n++) {
36468 for (size_t k = 1; k <= 5; k += 2) {
36469 for (uint32_t m = 1; m <= 1; m++) {
36470 GemmMicrokernelTester()
36471 .mr(1)
36472 .nr(2)
36473 .kr(1)
36474 .sr(1)
36475 .m(m)
36476 .n(n)
36477 .k(k)
36478 .iterations(1)
36479 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x2__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
36480 }
36481 }
36482 }
36483 }
36484
TEST(QC8_IGEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC,n_div_2)36485 TEST(QC8_IGEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC, n_div_2) {
36486 for (uint32_t n = 4; n <= 6; n += 2) {
36487 for (size_t k = 1; k <= 5; k += 2) {
36488 GemmMicrokernelTester()
36489 .mr(1)
36490 .nr(2)
36491 .kr(1)
36492 .sr(1)
36493 .m(1)
36494 .n(n)
36495 .k(k)
36496 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x2__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
36497 }
36498 }
36499 }
36500
TEST(QC8_IGEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC,n_div_2_strided_cn)36501 TEST(QC8_IGEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC, n_div_2_strided_cn) {
36502 for (uint32_t n = 4; n <= 6; n += 2) {
36503 for (size_t k = 1; k <= 5; k += 2) {
36504 GemmMicrokernelTester()
36505 .mr(1)
36506 .nr(2)
36507 .kr(1)
36508 .sr(1)
36509 .m(1)
36510 .n(n)
36511 .k(k)
36512 .cn_stride(5)
36513 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x2__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
36514 }
36515 }
36516 }
36517
TEST(QC8_IGEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC,n_div_2_subtile)36518 TEST(QC8_IGEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC, n_div_2_subtile) {
36519 for (uint32_t n = 4; n <= 6; n += 2) {
36520 for (size_t k = 1; k <= 5; k += 2) {
36521 for (uint32_t m = 1; m <= 1; m++) {
36522 GemmMicrokernelTester()
36523 .mr(1)
36524 .nr(2)
36525 .kr(1)
36526 .sr(1)
36527 .m(m)
36528 .n(n)
36529 .k(k)
36530 .iterations(1)
36531 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x2__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
36532 }
36533 }
36534 }
36535 }
36536
TEST(QC8_IGEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC,small_kernel)36537 TEST(QC8_IGEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC, small_kernel) {
36538 for (size_t k = 1; k <= 5; k += 2) {
36539 GemmMicrokernelTester()
36540 .mr(1)
36541 .nr(2)
36542 .kr(1)
36543 .sr(1)
36544 .m(1)
36545 .n(2)
36546 .k(k)
36547 .ks(3)
36548 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x2__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
36549 }
36550 }
36551
TEST(QC8_IGEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC,small_kernel_subtile)36552 TEST(QC8_IGEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC, small_kernel_subtile) {
36553 for (size_t k = 1; k <= 5; k += 2) {
36554 for (uint32_t n = 1; n <= 2; n++) {
36555 for (uint32_t m = 1; m <= 1; m++) {
36556 GemmMicrokernelTester()
36557 .mr(1)
36558 .nr(2)
36559 .kr(1)
36560 .sr(1)
36561 .m(m)
36562 .n(n)
36563 .k(k)
36564 .ks(3)
36565 .iterations(1)
36566 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x2__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
36567 }
36568 }
36569 }
36570 }
36571
TEST(QC8_IGEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC,n_gt_2_small_kernel)36572 TEST(QC8_IGEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC, n_gt_2_small_kernel) {
36573 for (uint32_t n = 3; n < 4; n++) {
36574 for (size_t k = 1; k <= 5; k += 2) {
36575 GemmMicrokernelTester()
36576 .mr(1)
36577 .nr(2)
36578 .kr(1)
36579 .sr(1)
36580 .m(1)
36581 .n(n)
36582 .k(k)
36583 .ks(3)
36584 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x2__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
36585 }
36586 }
36587 }
36588
TEST(QC8_IGEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC,n_div_2_small_kernel)36589 TEST(QC8_IGEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC, n_div_2_small_kernel) {
36590 for (uint32_t n = 4; n <= 6; n += 2) {
36591 for (size_t k = 1; k <= 5; k += 2) {
36592 GemmMicrokernelTester()
36593 .mr(1)
36594 .nr(2)
36595 .kr(1)
36596 .sr(1)
36597 .m(1)
36598 .n(n)
36599 .k(k)
36600 .ks(3)
36601 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x2__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
36602 }
36603 }
36604 }
36605
TEST(QC8_IGEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC,strided_cm_subtile)36606 TEST(QC8_IGEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC, strided_cm_subtile) {
36607 for (size_t k = 1; k <= 5; k += 2) {
36608 for (uint32_t n = 1; n <= 2; n++) {
36609 for (uint32_t m = 1; m <= 1; m++) {
36610 GemmMicrokernelTester()
36611 .mr(1)
36612 .nr(2)
36613 .kr(1)
36614 .sr(1)
36615 .m(m)
36616 .n(n)
36617 .k(k)
36618 .cm_stride(5)
36619 .iterations(1)
36620 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x2__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
36621 }
36622 }
36623 }
36624 }
36625
TEST(QC8_IGEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC,a_offset)36626 TEST(QC8_IGEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC, a_offset) {
36627 for (size_t k = 1; k <= 5; k += 2) {
36628 GemmMicrokernelTester()
36629 .mr(1)
36630 .nr(2)
36631 .kr(1)
36632 .sr(1)
36633 .m(1)
36634 .n(2)
36635 .k(k)
36636 .ks(3)
36637 .a_offset(7)
36638 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x2__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
36639 }
36640 }
36641
TEST(QC8_IGEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC,zero)36642 TEST(QC8_IGEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC, zero) {
36643 for (size_t k = 1; k <= 5; k += 2) {
36644 for (uint32_t mz = 0; mz < 1; mz++) {
36645 GemmMicrokernelTester()
36646 .mr(1)
36647 .nr(2)
36648 .kr(1)
36649 .sr(1)
36650 .m(1)
36651 .n(2)
36652 .k(k)
36653 .ks(3)
36654 .a_offset(7)
36655 .zero_index(mz)
36656 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x2__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
36657 }
36658 }
36659 }
36660
TEST(QC8_IGEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC,qmin)36661 TEST(QC8_IGEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC, qmin) {
36662 GemmMicrokernelTester()
36663 .mr(1)
36664 .nr(2)
36665 .kr(1)
36666 .sr(1)
36667 .m(1)
36668 .n(2)
36669 .k(1)
36670 .qmin(128)
36671 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x2__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
36672 }
36673
TEST(QC8_IGEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC,qmax)36674 TEST(QC8_IGEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC, qmax) {
36675 GemmMicrokernelTester()
36676 .mr(1)
36677 .nr(2)
36678 .kr(1)
36679 .sr(1)
36680 .m(1)
36681 .n(2)
36682 .k(1)
36683 .qmax(128)
36684 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x2__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
36685 }
36686
TEST(QC8_IGEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC,strided_cm)36687 TEST(QC8_IGEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC, strided_cm) {
36688 GemmMicrokernelTester()
36689 .mr(1)
36690 .nr(2)
36691 .kr(1)
36692 .sr(1)
36693 .m(1)
36694 .n(2)
36695 .k(1)
36696 .cm_stride(5)
36697 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x2__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
36698 }
36699
36700
TEST(QC8_IGEMM_MINMAX_FP32_1X4__SCALAR_FMAGIC,k_eq_1)36701 TEST(QC8_IGEMM_MINMAX_FP32_1X4__SCALAR_FMAGIC, k_eq_1) {
36702 GemmMicrokernelTester()
36703 .mr(1)
36704 .nr(4)
36705 .kr(1)
36706 .sr(1)
36707 .m(1)
36708 .n(4)
36709 .k(1)
36710 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
36711 }
36712
TEST(QC8_IGEMM_MINMAX_FP32_1X4__SCALAR_FMAGIC,strided_cn)36713 TEST(QC8_IGEMM_MINMAX_FP32_1X4__SCALAR_FMAGIC, strided_cn) {
36714 GemmMicrokernelTester()
36715 .mr(1)
36716 .nr(4)
36717 .kr(1)
36718 .sr(1)
36719 .m(1)
36720 .n(4)
36721 .k(1)
36722 .cn_stride(7)
36723 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
36724 }
36725
TEST(QC8_IGEMM_MINMAX_FP32_1X4__SCALAR_FMAGIC,k_eq_1_subtile)36726 TEST(QC8_IGEMM_MINMAX_FP32_1X4__SCALAR_FMAGIC, k_eq_1_subtile) {
36727 for (uint32_t n = 1; n <= 4; n++) {
36728 for (uint32_t m = 1; m <= 1; m++) {
36729 GemmMicrokernelTester()
36730 .mr(1)
36731 .nr(4)
36732 .kr(1)
36733 .sr(1)
36734 .m(m)
36735 .n(n)
36736 .k(1)
36737 .iterations(1)
36738 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
36739 }
36740 }
36741 }
36742
TEST(QC8_IGEMM_MINMAX_FP32_1X4__SCALAR_FMAGIC,k_eq_1_subtile_m)36743 TEST(QC8_IGEMM_MINMAX_FP32_1X4__SCALAR_FMAGIC, k_eq_1_subtile_m) {
36744 for (uint32_t m = 1; m <= 1; m++) {
36745 GemmMicrokernelTester()
36746 .mr(1)
36747 .nr(4)
36748 .kr(1)
36749 .sr(1)
36750 .m(m)
36751 .n(4)
36752 .k(1)
36753 .iterations(1)
36754 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
36755 }
36756 }
36757
TEST(QC8_IGEMM_MINMAX_FP32_1X4__SCALAR_FMAGIC,k_eq_1_subtile_n)36758 TEST(QC8_IGEMM_MINMAX_FP32_1X4__SCALAR_FMAGIC, k_eq_1_subtile_n) {
36759 for (uint32_t n = 1; n <= 4; n++) {
36760 GemmMicrokernelTester()
36761 .mr(1)
36762 .nr(4)
36763 .kr(1)
36764 .sr(1)
36765 .m(1)
36766 .n(n)
36767 .k(1)
36768 .iterations(1)
36769 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
36770 }
36771 }
36772
TEST(QC8_IGEMM_MINMAX_FP32_1X4__SCALAR_FMAGIC,k_gt_1)36773 TEST(QC8_IGEMM_MINMAX_FP32_1X4__SCALAR_FMAGIC, k_gt_1) {
36774 for (size_t k = 2; k < 10; k++) {
36775 GemmMicrokernelTester()
36776 .mr(1)
36777 .nr(4)
36778 .kr(1)
36779 .sr(1)
36780 .m(1)
36781 .n(4)
36782 .k(k)
36783 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
36784 }
36785 }
36786
TEST(QC8_IGEMM_MINMAX_FP32_1X4__SCALAR_FMAGIC,k_gt_1_subtile)36787 TEST(QC8_IGEMM_MINMAX_FP32_1X4__SCALAR_FMAGIC, k_gt_1_subtile) {
36788 for (size_t k = 2; k < 10; k++) {
36789 for (uint32_t n = 1; n <= 4; n++) {
36790 for (uint32_t m = 1; m <= 1; m++) {
36791 GemmMicrokernelTester()
36792 .mr(1)
36793 .nr(4)
36794 .kr(1)
36795 .sr(1)
36796 .m(m)
36797 .n(n)
36798 .k(k)
36799 .iterations(1)
36800 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
36801 }
36802 }
36803 }
36804 }
36805
TEST(QC8_IGEMM_MINMAX_FP32_1X4__SCALAR_FMAGIC,n_gt_4)36806 TEST(QC8_IGEMM_MINMAX_FP32_1X4__SCALAR_FMAGIC, n_gt_4) {
36807 for (uint32_t n = 5; n < 8; n++) {
36808 for (size_t k = 1; k <= 5; k += 2) {
36809 GemmMicrokernelTester()
36810 .mr(1)
36811 .nr(4)
36812 .kr(1)
36813 .sr(1)
36814 .m(1)
36815 .n(n)
36816 .k(k)
36817 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
36818 }
36819 }
36820 }
36821
TEST(QC8_IGEMM_MINMAX_FP32_1X4__SCALAR_FMAGIC,n_gt_4_strided_cn)36822 TEST(QC8_IGEMM_MINMAX_FP32_1X4__SCALAR_FMAGIC, n_gt_4_strided_cn) {
36823 for (uint32_t n = 5; n < 8; n++) {
36824 for (size_t k = 1; k <= 5; k += 2) {
36825 GemmMicrokernelTester()
36826 .mr(1)
36827 .nr(4)
36828 .kr(1)
36829 .sr(1)
36830 .m(1)
36831 .n(n)
36832 .k(k)
36833 .cn_stride(7)
36834 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
36835 }
36836 }
36837 }
36838
TEST(QC8_IGEMM_MINMAX_FP32_1X4__SCALAR_FMAGIC,n_gt_4_subtile)36839 TEST(QC8_IGEMM_MINMAX_FP32_1X4__SCALAR_FMAGIC, n_gt_4_subtile) {
36840 for (uint32_t n = 5; n < 8; n++) {
36841 for (size_t k = 1; k <= 5; k += 2) {
36842 for (uint32_t m = 1; m <= 1; m++) {
36843 GemmMicrokernelTester()
36844 .mr(1)
36845 .nr(4)
36846 .kr(1)
36847 .sr(1)
36848 .m(m)
36849 .n(n)
36850 .k(k)
36851 .iterations(1)
36852 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
36853 }
36854 }
36855 }
36856 }
36857
TEST(QC8_IGEMM_MINMAX_FP32_1X4__SCALAR_FMAGIC,n_div_4)36858 TEST(QC8_IGEMM_MINMAX_FP32_1X4__SCALAR_FMAGIC, n_div_4) {
36859 for (uint32_t n = 8; n <= 12; n += 4) {
36860 for (size_t k = 1; k <= 5; k += 2) {
36861 GemmMicrokernelTester()
36862 .mr(1)
36863 .nr(4)
36864 .kr(1)
36865 .sr(1)
36866 .m(1)
36867 .n(n)
36868 .k(k)
36869 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
36870 }
36871 }
36872 }
36873
TEST(QC8_IGEMM_MINMAX_FP32_1X4__SCALAR_FMAGIC,n_div_4_strided_cn)36874 TEST(QC8_IGEMM_MINMAX_FP32_1X4__SCALAR_FMAGIC, n_div_4_strided_cn) {
36875 for (uint32_t n = 8; n <= 12; n += 4) {
36876 for (size_t k = 1; k <= 5; k += 2) {
36877 GemmMicrokernelTester()
36878 .mr(1)
36879 .nr(4)
36880 .kr(1)
36881 .sr(1)
36882 .m(1)
36883 .n(n)
36884 .k(k)
36885 .cn_stride(7)
36886 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
36887 }
36888 }
36889 }
36890
TEST(QC8_IGEMM_MINMAX_FP32_1X4__SCALAR_FMAGIC,n_div_4_subtile)36891 TEST(QC8_IGEMM_MINMAX_FP32_1X4__SCALAR_FMAGIC, n_div_4_subtile) {
36892 for (uint32_t n = 8; n <= 12; n += 4) {
36893 for (size_t k = 1; k <= 5; k += 2) {
36894 for (uint32_t m = 1; m <= 1; m++) {
36895 GemmMicrokernelTester()
36896 .mr(1)
36897 .nr(4)
36898 .kr(1)
36899 .sr(1)
36900 .m(m)
36901 .n(n)
36902 .k(k)
36903 .iterations(1)
36904 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
36905 }
36906 }
36907 }
36908 }
36909
TEST(QC8_IGEMM_MINMAX_FP32_1X4__SCALAR_FMAGIC,small_kernel)36910 TEST(QC8_IGEMM_MINMAX_FP32_1X4__SCALAR_FMAGIC, small_kernel) {
36911 for (size_t k = 1; k <= 5; k += 2) {
36912 GemmMicrokernelTester()
36913 .mr(1)
36914 .nr(4)
36915 .kr(1)
36916 .sr(1)
36917 .m(1)
36918 .n(4)
36919 .k(k)
36920 .ks(3)
36921 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
36922 }
36923 }
36924
TEST(QC8_IGEMM_MINMAX_FP32_1X4__SCALAR_FMAGIC,small_kernel_subtile)36925 TEST(QC8_IGEMM_MINMAX_FP32_1X4__SCALAR_FMAGIC, small_kernel_subtile) {
36926 for (size_t k = 1; k <= 5; k += 2) {
36927 for (uint32_t n = 1; n <= 4; n++) {
36928 for (uint32_t m = 1; m <= 1; m++) {
36929 GemmMicrokernelTester()
36930 .mr(1)
36931 .nr(4)
36932 .kr(1)
36933 .sr(1)
36934 .m(m)
36935 .n(n)
36936 .k(k)
36937 .ks(3)
36938 .iterations(1)
36939 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
36940 }
36941 }
36942 }
36943 }
36944
TEST(QC8_IGEMM_MINMAX_FP32_1X4__SCALAR_FMAGIC,n_gt_4_small_kernel)36945 TEST(QC8_IGEMM_MINMAX_FP32_1X4__SCALAR_FMAGIC, n_gt_4_small_kernel) {
36946 for (uint32_t n = 5; n < 8; n++) {
36947 for (size_t k = 1; k <= 5; k += 2) {
36948 GemmMicrokernelTester()
36949 .mr(1)
36950 .nr(4)
36951 .kr(1)
36952 .sr(1)
36953 .m(1)
36954 .n(n)
36955 .k(k)
36956 .ks(3)
36957 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
36958 }
36959 }
36960 }
36961
TEST(QC8_IGEMM_MINMAX_FP32_1X4__SCALAR_FMAGIC,n_div_4_small_kernel)36962 TEST(QC8_IGEMM_MINMAX_FP32_1X4__SCALAR_FMAGIC, n_div_4_small_kernel) {
36963 for (uint32_t n = 8; n <= 12; n += 4) {
36964 for (size_t k = 1; k <= 5; k += 2) {
36965 GemmMicrokernelTester()
36966 .mr(1)
36967 .nr(4)
36968 .kr(1)
36969 .sr(1)
36970 .m(1)
36971 .n(n)
36972 .k(k)
36973 .ks(3)
36974 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
36975 }
36976 }
36977 }
36978
TEST(QC8_IGEMM_MINMAX_FP32_1X4__SCALAR_FMAGIC,strided_cm_subtile)36979 TEST(QC8_IGEMM_MINMAX_FP32_1X4__SCALAR_FMAGIC, strided_cm_subtile) {
36980 for (size_t k = 1; k <= 5; k += 2) {
36981 for (uint32_t n = 1; n <= 4; n++) {
36982 for (uint32_t m = 1; m <= 1; m++) {
36983 GemmMicrokernelTester()
36984 .mr(1)
36985 .nr(4)
36986 .kr(1)
36987 .sr(1)
36988 .m(m)
36989 .n(n)
36990 .k(k)
36991 .cm_stride(7)
36992 .iterations(1)
36993 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
36994 }
36995 }
36996 }
36997 }
36998
TEST(QC8_IGEMM_MINMAX_FP32_1X4__SCALAR_FMAGIC,a_offset)36999 TEST(QC8_IGEMM_MINMAX_FP32_1X4__SCALAR_FMAGIC, a_offset) {
37000 for (size_t k = 1; k <= 5; k += 2) {
37001 GemmMicrokernelTester()
37002 .mr(1)
37003 .nr(4)
37004 .kr(1)
37005 .sr(1)
37006 .m(1)
37007 .n(4)
37008 .k(k)
37009 .ks(3)
37010 .a_offset(7)
37011 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
37012 }
37013 }
37014
TEST(QC8_IGEMM_MINMAX_FP32_1X4__SCALAR_FMAGIC,zero)37015 TEST(QC8_IGEMM_MINMAX_FP32_1X4__SCALAR_FMAGIC, zero) {
37016 for (size_t k = 1; k <= 5; k += 2) {
37017 for (uint32_t mz = 0; mz < 1; mz++) {
37018 GemmMicrokernelTester()
37019 .mr(1)
37020 .nr(4)
37021 .kr(1)
37022 .sr(1)
37023 .m(1)
37024 .n(4)
37025 .k(k)
37026 .ks(3)
37027 .a_offset(7)
37028 .zero_index(mz)
37029 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
37030 }
37031 }
37032 }
37033
TEST(QC8_IGEMM_MINMAX_FP32_1X4__SCALAR_FMAGIC,qmin)37034 TEST(QC8_IGEMM_MINMAX_FP32_1X4__SCALAR_FMAGIC, qmin) {
37035 GemmMicrokernelTester()
37036 .mr(1)
37037 .nr(4)
37038 .kr(1)
37039 .sr(1)
37040 .m(1)
37041 .n(4)
37042 .k(1)
37043 .qmin(128)
37044 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
37045 }
37046
TEST(QC8_IGEMM_MINMAX_FP32_1X4__SCALAR_FMAGIC,qmax)37047 TEST(QC8_IGEMM_MINMAX_FP32_1X4__SCALAR_FMAGIC, qmax) {
37048 GemmMicrokernelTester()
37049 .mr(1)
37050 .nr(4)
37051 .kr(1)
37052 .sr(1)
37053 .m(1)
37054 .n(4)
37055 .k(1)
37056 .qmax(128)
37057 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
37058 }
37059
TEST(QC8_IGEMM_MINMAX_FP32_1X4__SCALAR_FMAGIC,strided_cm)37060 TEST(QC8_IGEMM_MINMAX_FP32_1X4__SCALAR_FMAGIC, strided_cm) {
37061 GemmMicrokernelTester()
37062 .mr(1)
37063 .nr(4)
37064 .kr(1)
37065 .sr(1)
37066 .m(1)
37067 .n(4)
37068 .k(1)
37069 .cm_stride(7)
37070 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_1x4__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
37071 }
37072
37073
TEST(QC8_IGEMM_MINMAX_FP32_3X4__SCALAR_FMAGIC,k_eq_1)37074 TEST(QC8_IGEMM_MINMAX_FP32_3X4__SCALAR_FMAGIC, k_eq_1) {
37075 GemmMicrokernelTester()
37076 .mr(3)
37077 .nr(4)
37078 .kr(1)
37079 .sr(1)
37080 .m(3)
37081 .n(4)
37082 .k(1)
37083 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
37084 }
37085
TEST(QC8_IGEMM_MINMAX_FP32_3X4__SCALAR_FMAGIC,strided_cn)37086 TEST(QC8_IGEMM_MINMAX_FP32_3X4__SCALAR_FMAGIC, strided_cn) {
37087 GemmMicrokernelTester()
37088 .mr(3)
37089 .nr(4)
37090 .kr(1)
37091 .sr(1)
37092 .m(3)
37093 .n(4)
37094 .k(1)
37095 .cn_stride(7)
37096 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
37097 }
37098
TEST(QC8_IGEMM_MINMAX_FP32_3X4__SCALAR_FMAGIC,k_eq_1_subtile)37099 TEST(QC8_IGEMM_MINMAX_FP32_3X4__SCALAR_FMAGIC, k_eq_1_subtile) {
37100 for (uint32_t n = 1; n <= 4; n++) {
37101 for (uint32_t m = 1; m <= 3; m++) {
37102 GemmMicrokernelTester()
37103 .mr(3)
37104 .nr(4)
37105 .kr(1)
37106 .sr(1)
37107 .m(m)
37108 .n(n)
37109 .k(1)
37110 .iterations(1)
37111 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
37112 }
37113 }
37114 }
37115
TEST(QC8_IGEMM_MINMAX_FP32_3X4__SCALAR_FMAGIC,k_eq_1_subtile_m)37116 TEST(QC8_IGEMM_MINMAX_FP32_3X4__SCALAR_FMAGIC, k_eq_1_subtile_m) {
37117 for (uint32_t m = 1; m <= 3; m++) {
37118 GemmMicrokernelTester()
37119 .mr(3)
37120 .nr(4)
37121 .kr(1)
37122 .sr(1)
37123 .m(m)
37124 .n(4)
37125 .k(1)
37126 .iterations(1)
37127 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
37128 }
37129 }
37130
TEST(QC8_IGEMM_MINMAX_FP32_3X4__SCALAR_FMAGIC,k_eq_1_subtile_n)37131 TEST(QC8_IGEMM_MINMAX_FP32_3X4__SCALAR_FMAGIC, k_eq_1_subtile_n) {
37132 for (uint32_t n = 1; n <= 4; n++) {
37133 GemmMicrokernelTester()
37134 .mr(3)
37135 .nr(4)
37136 .kr(1)
37137 .sr(1)
37138 .m(3)
37139 .n(n)
37140 .k(1)
37141 .iterations(1)
37142 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
37143 }
37144 }
37145
TEST(QC8_IGEMM_MINMAX_FP32_3X4__SCALAR_FMAGIC,k_gt_1)37146 TEST(QC8_IGEMM_MINMAX_FP32_3X4__SCALAR_FMAGIC, k_gt_1) {
37147 for (size_t k = 2; k < 10; k++) {
37148 GemmMicrokernelTester()
37149 .mr(3)
37150 .nr(4)
37151 .kr(1)
37152 .sr(1)
37153 .m(3)
37154 .n(4)
37155 .k(k)
37156 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
37157 }
37158 }
37159
TEST(QC8_IGEMM_MINMAX_FP32_3X4__SCALAR_FMAGIC,k_gt_1_subtile)37160 TEST(QC8_IGEMM_MINMAX_FP32_3X4__SCALAR_FMAGIC, k_gt_1_subtile) {
37161 for (size_t k = 2; k < 10; k++) {
37162 for (uint32_t n = 1; n <= 4; n++) {
37163 for (uint32_t m = 1; m <= 3; m++) {
37164 GemmMicrokernelTester()
37165 .mr(3)
37166 .nr(4)
37167 .kr(1)
37168 .sr(1)
37169 .m(m)
37170 .n(n)
37171 .k(k)
37172 .iterations(1)
37173 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
37174 }
37175 }
37176 }
37177 }
37178
TEST(QC8_IGEMM_MINMAX_FP32_3X4__SCALAR_FMAGIC,n_gt_4)37179 TEST(QC8_IGEMM_MINMAX_FP32_3X4__SCALAR_FMAGIC, n_gt_4) {
37180 for (uint32_t n = 5; n < 8; n++) {
37181 for (size_t k = 1; k <= 5; k += 2) {
37182 GemmMicrokernelTester()
37183 .mr(3)
37184 .nr(4)
37185 .kr(1)
37186 .sr(1)
37187 .m(3)
37188 .n(n)
37189 .k(k)
37190 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
37191 }
37192 }
37193 }
37194
TEST(QC8_IGEMM_MINMAX_FP32_3X4__SCALAR_FMAGIC,n_gt_4_strided_cn)37195 TEST(QC8_IGEMM_MINMAX_FP32_3X4__SCALAR_FMAGIC, n_gt_4_strided_cn) {
37196 for (uint32_t n = 5; n < 8; n++) {
37197 for (size_t k = 1; k <= 5; k += 2) {
37198 GemmMicrokernelTester()
37199 .mr(3)
37200 .nr(4)
37201 .kr(1)
37202 .sr(1)
37203 .m(3)
37204 .n(n)
37205 .k(k)
37206 .cn_stride(7)
37207 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
37208 }
37209 }
37210 }
37211
TEST(QC8_IGEMM_MINMAX_FP32_3X4__SCALAR_FMAGIC,n_gt_4_subtile)37212 TEST(QC8_IGEMM_MINMAX_FP32_3X4__SCALAR_FMAGIC, n_gt_4_subtile) {
37213 for (uint32_t n = 5; n < 8; n++) {
37214 for (size_t k = 1; k <= 5; k += 2) {
37215 for (uint32_t m = 1; m <= 3; m++) {
37216 GemmMicrokernelTester()
37217 .mr(3)
37218 .nr(4)
37219 .kr(1)
37220 .sr(1)
37221 .m(m)
37222 .n(n)
37223 .k(k)
37224 .iterations(1)
37225 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
37226 }
37227 }
37228 }
37229 }
37230
TEST(QC8_IGEMM_MINMAX_FP32_3X4__SCALAR_FMAGIC,n_div_4)37231 TEST(QC8_IGEMM_MINMAX_FP32_3X4__SCALAR_FMAGIC, n_div_4) {
37232 for (uint32_t n = 8; n <= 12; n += 4) {
37233 for (size_t k = 1; k <= 5; k += 2) {
37234 GemmMicrokernelTester()
37235 .mr(3)
37236 .nr(4)
37237 .kr(1)
37238 .sr(1)
37239 .m(3)
37240 .n(n)
37241 .k(k)
37242 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
37243 }
37244 }
37245 }
37246
TEST(QC8_IGEMM_MINMAX_FP32_3X4__SCALAR_FMAGIC,n_div_4_strided_cn)37247 TEST(QC8_IGEMM_MINMAX_FP32_3X4__SCALAR_FMAGIC, n_div_4_strided_cn) {
37248 for (uint32_t n = 8; n <= 12; n += 4) {
37249 for (size_t k = 1; k <= 5; k += 2) {
37250 GemmMicrokernelTester()
37251 .mr(3)
37252 .nr(4)
37253 .kr(1)
37254 .sr(1)
37255 .m(3)
37256 .n(n)
37257 .k(k)
37258 .cn_stride(7)
37259 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
37260 }
37261 }
37262 }
37263
TEST(QC8_IGEMM_MINMAX_FP32_3X4__SCALAR_FMAGIC,n_div_4_subtile)37264 TEST(QC8_IGEMM_MINMAX_FP32_3X4__SCALAR_FMAGIC, n_div_4_subtile) {
37265 for (uint32_t n = 8; n <= 12; n += 4) {
37266 for (size_t k = 1; k <= 5; k += 2) {
37267 for (uint32_t m = 1; m <= 3; m++) {
37268 GemmMicrokernelTester()
37269 .mr(3)
37270 .nr(4)
37271 .kr(1)
37272 .sr(1)
37273 .m(m)
37274 .n(n)
37275 .k(k)
37276 .iterations(1)
37277 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
37278 }
37279 }
37280 }
37281 }
37282
TEST(QC8_IGEMM_MINMAX_FP32_3X4__SCALAR_FMAGIC,small_kernel)37283 TEST(QC8_IGEMM_MINMAX_FP32_3X4__SCALAR_FMAGIC, small_kernel) {
37284 for (size_t k = 1; k <= 5; k += 2) {
37285 GemmMicrokernelTester()
37286 .mr(3)
37287 .nr(4)
37288 .kr(1)
37289 .sr(1)
37290 .m(3)
37291 .n(4)
37292 .k(k)
37293 .ks(3)
37294 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
37295 }
37296 }
37297
TEST(QC8_IGEMM_MINMAX_FP32_3X4__SCALAR_FMAGIC,small_kernel_subtile)37298 TEST(QC8_IGEMM_MINMAX_FP32_3X4__SCALAR_FMAGIC, small_kernel_subtile) {
37299 for (size_t k = 1; k <= 5; k += 2) {
37300 for (uint32_t n = 1; n <= 4; n++) {
37301 for (uint32_t m = 1; m <= 3; m++) {
37302 GemmMicrokernelTester()
37303 .mr(3)
37304 .nr(4)
37305 .kr(1)
37306 .sr(1)
37307 .m(m)
37308 .n(n)
37309 .k(k)
37310 .ks(3)
37311 .iterations(1)
37312 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
37313 }
37314 }
37315 }
37316 }
37317
TEST(QC8_IGEMM_MINMAX_FP32_3X4__SCALAR_FMAGIC,n_gt_4_small_kernel)37318 TEST(QC8_IGEMM_MINMAX_FP32_3X4__SCALAR_FMAGIC, n_gt_4_small_kernel) {
37319 for (uint32_t n = 5; n < 8; n++) {
37320 for (size_t k = 1; k <= 5; k += 2) {
37321 GemmMicrokernelTester()
37322 .mr(3)
37323 .nr(4)
37324 .kr(1)
37325 .sr(1)
37326 .m(3)
37327 .n(n)
37328 .k(k)
37329 .ks(3)
37330 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
37331 }
37332 }
37333 }
37334
TEST(QC8_IGEMM_MINMAX_FP32_3X4__SCALAR_FMAGIC,n_div_4_small_kernel)37335 TEST(QC8_IGEMM_MINMAX_FP32_3X4__SCALAR_FMAGIC, n_div_4_small_kernel) {
37336 for (uint32_t n = 8; n <= 12; n += 4) {
37337 for (size_t k = 1; k <= 5; k += 2) {
37338 GemmMicrokernelTester()
37339 .mr(3)
37340 .nr(4)
37341 .kr(1)
37342 .sr(1)
37343 .m(3)
37344 .n(n)
37345 .k(k)
37346 .ks(3)
37347 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
37348 }
37349 }
37350 }
37351
TEST(QC8_IGEMM_MINMAX_FP32_3X4__SCALAR_FMAGIC,strided_cm_subtile)37352 TEST(QC8_IGEMM_MINMAX_FP32_3X4__SCALAR_FMAGIC, strided_cm_subtile) {
37353 for (size_t k = 1; k <= 5; k += 2) {
37354 for (uint32_t n = 1; n <= 4; n++) {
37355 for (uint32_t m = 1; m <= 3; m++) {
37356 GemmMicrokernelTester()
37357 .mr(3)
37358 .nr(4)
37359 .kr(1)
37360 .sr(1)
37361 .m(m)
37362 .n(n)
37363 .k(k)
37364 .cm_stride(7)
37365 .iterations(1)
37366 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
37367 }
37368 }
37369 }
37370 }
37371
TEST(QC8_IGEMM_MINMAX_FP32_3X4__SCALAR_FMAGIC,a_offset)37372 TEST(QC8_IGEMM_MINMAX_FP32_3X4__SCALAR_FMAGIC, a_offset) {
37373 for (size_t k = 1; k <= 5; k += 2) {
37374 GemmMicrokernelTester()
37375 .mr(3)
37376 .nr(4)
37377 .kr(1)
37378 .sr(1)
37379 .m(3)
37380 .n(4)
37381 .k(k)
37382 .ks(3)
37383 .a_offset(17)
37384 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
37385 }
37386 }
37387
TEST(QC8_IGEMM_MINMAX_FP32_3X4__SCALAR_FMAGIC,zero)37388 TEST(QC8_IGEMM_MINMAX_FP32_3X4__SCALAR_FMAGIC, zero) {
37389 for (size_t k = 1; k <= 5; k += 2) {
37390 for (uint32_t mz = 0; mz < 3; mz++) {
37391 GemmMicrokernelTester()
37392 .mr(3)
37393 .nr(4)
37394 .kr(1)
37395 .sr(1)
37396 .m(3)
37397 .n(4)
37398 .k(k)
37399 .ks(3)
37400 .a_offset(17)
37401 .zero_index(mz)
37402 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
37403 }
37404 }
37405 }
37406
TEST(QC8_IGEMM_MINMAX_FP32_3X4__SCALAR_FMAGIC,qmin)37407 TEST(QC8_IGEMM_MINMAX_FP32_3X4__SCALAR_FMAGIC, qmin) {
37408 GemmMicrokernelTester()
37409 .mr(3)
37410 .nr(4)
37411 .kr(1)
37412 .sr(1)
37413 .m(3)
37414 .n(4)
37415 .k(1)
37416 .qmin(128)
37417 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
37418 }
37419
TEST(QC8_IGEMM_MINMAX_FP32_3X4__SCALAR_FMAGIC,qmax)37420 TEST(QC8_IGEMM_MINMAX_FP32_3X4__SCALAR_FMAGIC, qmax) {
37421 GemmMicrokernelTester()
37422 .mr(3)
37423 .nr(4)
37424 .kr(1)
37425 .sr(1)
37426 .m(3)
37427 .n(4)
37428 .k(1)
37429 .qmax(128)
37430 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
37431 }
37432
TEST(QC8_IGEMM_MINMAX_FP32_3X4__SCALAR_FMAGIC,strided_cm)37433 TEST(QC8_IGEMM_MINMAX_FP32_3X4__SCALAR_FMAGIC, strided_cm) {
37434 GemmMicrokernelTester()
37435 .mr(3)
37436 .nr(4)
37437 .kr(1)
37438 .sr(1)
37439 .m(3)
37440 .n(4)
37441 .k(1)
37442 .cm_stride(7)
37443 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4__scalar_fmagic, xnn_init_qc8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
37444 }
37445
37446
TEST(QC8_IGEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC,k_eq_1)37447 TEST(QC8_IGEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC, k_eq_1) {
37448 GemmMicrokernelTester()
37449 .mr(3)
37450 .nr(4)
37451 .kr(1)
37452 .sr(1)
37453 .m(3)
37454 .n(4)
37455 .k(1)
37456 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
37457 }
37458
TEST(QC8_IGEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC,strided_cn)37459 TEST(QC8_IGEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC, strided_cn) {
37460 GemmMicrokernelTester()
37461 .mr(3)
37462 .nr(4)
37463 .kr(1)
37464 .sr(1)
37465 .m(3)
37466 .n(4)
37467 .k(1)
37468 .cn_stride(7)
37469 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
37470 }
37471
TEST(QC8_IGEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC,k_eq_1_subtile)37472 TEST(QC8_IGEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC, k_eq_1_subtile) {
37473 for (uint32_t n = 1; n <= 4; n++) {
37474 for (uint32_t m = 1; m <= 3; m++) {
37475 GemmMicrokernelTester()
37476 .mr(3)
37477 .nr(4)
37478 .kr(1)
37479 .sr(1)
37480 .m(m)
37481 .n(n)
37482 .k(1)
37483 .iterations(1)
37484 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
37485 }
37486 }
37487 }
37488
TEST(QC8_IGEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC,k_eq_1_subtile_m)37489 TEST(QC8_IGEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC, k_eq_1_subtile_m) {
37490 for (uint32_t m = 1; m <= 3; m++) {
37491 GemmMicrokernelTester()
37492 .mr(3)
37493 .nr(4)
37494 .kr(1)
37495 .sr(1)
37496 .m(m)
37497 .n(4)
37498 .k(1)
37499 .iterations(1)
37500 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
37501 }
37502 }
37503
TEST(QC8_IGEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC,k_eq_1_subtile_n)37504 TEST(QC8_IGEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC, k_eq_1_subtile_n) {
37505 for (uint32_t n = 1; n <= 4; n++) {
37506 GemmMicrokernelTester()
37507 .mr(3)
37508 .nr(4)
37509 .kr(1)
37510 .sr(1)
37511 .m(3)
37512 .n(n)
37513 .k(1)
37514 .iterations(1)
37515 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
37516 }
37517 }
37518
TEST(QC8_IGEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC,k_gt_1)37519 TEST(QC8_IGEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC, k_gt_1) {
37520 for (size_t k = 2; k < 10; k++) {
37521 GemmMicrokernelTester()
37522 .mr(3)
37523 .nr(4)
37524 .kr(1)
37525 .sr(1)
37526 .m(3)
37527 .n(4)
37528 .k(k)
37529 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
37530 }
37531 }
37532
TEST(QC8_IGEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC,k_gt_1_subtile)37533 TEST(QC8_IGEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC, k_gt_1_subtile) {
37534 for (size_t k = 2; k < 10; k++) {
37535 for (uint32_t n = 1; n <= 4; n++) {
37536 for (uint32_t m = 1; m <= 3; m++) {
37537 GemmMicrokernelTester()
37538 .mr(3)
37539 .nr(4)
37540 .kr(1)
37541 .sr(1)
37542 .m(m)
37543 .n(n)
37544 .k(k)
37545 .iterations(1)
37546 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
37547 }
37548 }
37549 }
37550 }
37551
TEST(QC8_IGEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC,n_gt_4)37552 TEST(QC8_IGEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC, n_gt_4) {
37553 for (uint32_t n = 5; n < 8; n++) {
37554 for (size_t k = 1; k <= 5; k += 2) {
37555 GemmMicrokernelTester()
37556 .mr(3)
37557 .nr(4)
37558 .kr(1)
37559 .sr(1)
37560 .m(3)
37561 .n(n)
37562 .k(k)
37563 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
37564 }
37565 }
37566 }
37567
TEST(QC8_IGEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC,n_gt_4_strided_cn)37568 TEST(QC8_IGEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC, n_gt_4_strided_cn) {
37569 for (uint32_t n = 5; n < 8; n++) {
37570 for (size_t k = 1; k <= 5; k += 2) {
37571 GemmMicrokernelTester()
37572 .mr(3)
37573 .nr(4)
37574 .kr(1)
37575 .sr(1)
37576 .m(3)
37577 .n(n)
37578 .k(k)
37579 .cn_stride(7)
37580 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
37581 }
37582 }
37583 }
37584
TEST(QC8_IGEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC,n_gt_4_subtile)37585 TEST(QC8_IGEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC, n_gt_4_subtile) {
37586 for (uint32_t n = 5; n < 8; n++) {
37587 for (size_t k = 1; k <= 5; k += 2) {
37588 for (uint32_t m = 1; m <= 3; m++) {
37589 GemmMicrokernelTester()
37590 .mr(3)
37591 .nr(4)
37592 .kr(1)
37593 .sr(1)
37594 .m(m)
37595 .n(n)
37596 .k(k)
37597 .iterations(1)
37598 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
37599 }
37600 }
37601 }
37602 }
37603
TEST(QC8_IGEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC,n_div_4)37604 TEST(QC8_IGEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC, n_div_4) {
37605 for (uint32_t n = 8; n <= 12; n += 4) {
37606 for (size_t k = 1; k <= 5; k += 2) {
37607 GemmMicrokernelTester()
37608 .mr(3)
37609 .nr(4)
37610 .kr(1)
37611 .sr(1)
37612 .m(3)
37613 .n(n)
37614 .k(k)
37615 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
37616 }
37617 }
37618 }
37619
TEST(QC8_IGEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC,n_div_4_strided_cn)37620 TEST(QC8_IGEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC, n_div_4_strided_cn) {
37621 for (uint32_t n = 8; n <= 12; n += 4) {
37622 for (size_t k = 1; k <= 5; k += 2) {
37623 GemmMicrokernelTester()
37624 .mr(3)
37625 .nr(4)
37626 .kr(1)
37627 .sr(1)
37628 .m(3)
37629 .n(n)
37630 .k(k)
37631 .cn_stride(7)
37632 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
37633 }
37634 }
37635 }
37636
TEST(QC8_IGEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC,n_div_4_subtile)37637 TEST(QC8_IGEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC, n_div_4_subtile) {
37638 for (uint32_t n = 8; n <= 12; n += 4) {
37639 for (size_t k = 1; k <= 5; k += 2) {
37640 for (uint32_t m = 1; m <= 3; m++) {
37641 GemmMicrokernelTester()
37642 .mr(3)
37643 .nr(4)
37644 .kr(1)
37645 .sr(1)
37646 .m(m)
37647 .n(n)
37648 .k(k)
37649 .iterations(1)
37650 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
37651 }
37652 }
37653 }
37654 }
37655
TEST(QC8_IGEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC,small_kernel)37656 TEST(QC8_IGEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC, small_kernel) {
37657 for (size_t k = 1; k <= 5; k += 2) {
37658 GemmMicrokernelTester()
37659 .mr(3)
37660 .nr(4)
37661 .kr(1)
37662 .sr(1)
37663 .m(3)
37664 .n(4)
37665 .k(k)
37666 .ks(3)
37667 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
37668 }
37669 }
37670
TEST(QC8_IGEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC,small_kernel_subtile)37671 TEST(QC8_IGEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC, small_kernel_subtile) {
37672 for (size_t k = 1; k <= 5; k += 2) {
37673 for (uint32_t n = 1; n <= 4; n++) {
37674 for (uint32_t m = 1; m <= 3; m++) {
37675 GemmMicrokernelTester()
37676 .mr(3)
37677 .nr(4)
37678 .kr(1)
37679 .sr(1)
37680 .m(m)
37681 .n(n)
37682 .k(k)
37683 .ks(3)
37684 .iterations(1)
37685 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
37686 }
37687 }
37688 }
37689 }
37690
TEST(QC8_IGEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC,n_gt_4_small_kernel)37691 TEST(QC8_IGEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC, n_gt_4_small_kernel) {
37692 for (uint32_t n = 5; n < 8; n++) {
37693 for (size_t k = 1; k <= 5; k += 2) {
37694 GemmMicrokernelTester()
37695 .mr(3)
37696 .nr(4)
37697 .kr(1)
37698 .sr(1)
37699 .m(3)
37700 .n(n)
37701 .k(k)
37702 .ks(3)
37703 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
37704 }
37705 }
37706 }
37707
TEST(QC8_IGEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC,n_div_4_small_kernel)37708 TEST(QC8_IGEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC, n_div_4_small_kernel) {
37709 for (uint32_t n = 8; n <= 12; n += 4) {
37710 for (size_t k = 1; k <= 5; k += 2) {
37711 GemmMicrokernelTester()
37712 .mr(3)
37713 .nr(4)
37714 .kr(1)
37715 .sr(1)
37716 .m(3)
37717 .n(n)
37718 .k(k)
37719 .ks(3)
37720 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
37721 }
37722 }
37723 }
37724
TEST(QC8_IGEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC,strided_cm_subtile)37725 TEST(QC8_IGEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC, strided_cm_subtile) {
37726 for (size_t k = 1; k <= 5; k += 2) {
37727 for (uint32_t n = 1; n <= 4; n++) {
37728 for (uint32_t m = 1; m <= 3; m++) {
37729 GemmMicrokernelTester()
37730 .mr(3)
37731 .nr(4)
37732 .kr(1)
37733 .sr(1)
37734 .m(m)
37735 .n(n)
37736 .k(k)
37737 .cm_stride(7)
37738 .iterations(1)
37739 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
37740 }
37741 }
37742 }
37743 }
37744
TEST(QC8_IGEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC,a_offset)37745 TEST(QC8_IGEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC, a_offset) {
37746 for (size_t k = 1; k <= 5; k += 2) {
37747 GemmMicrokernelTester()
37748 .mr(3)
37749 .nr(4)
37750 .kr(1)
37751 .sr(1)
37752 .m(3)
37753 .n(4)
37754 .k(k)
37755 .ks(3)
37756 .a_offset(17)
37757 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
37758 }
37759 }
37760
TEST(QC8_IGEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC,zero)37761 TEST(QC8_IGEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC, zero) {
37762 for (size_t k = 1; k <= 5; k += 2) {
37763 for (uint32_t mz = 0; mz < 3; mz++) {
37764 GemmMicrokernelTester()
37765 .mr(3)
37766 .nr(4)
37767 .kr(1)
37768 .sr(1)
37769 .m(3)
37770 .n(4)
37771 .k(k)
37772 .ks(3)
37773 .a_offset(17)
37774 .zero_index(mz)
37775 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
37776 }
37777 }
37778 }
37779
TEST(QC8_IGEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC,qmin)37780 TEST(QC8_IGEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC, qmin) {
37781 GemmMicrokernelTester()
37782 .mr(3)
37783 .nr(4)
37784 .kr(1)
37785 .sr(1)
37786 .m(3)
37787 .n(4)
37788 .k(1)
37789 .qmin(128)
37790 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
37791 }
37792
TEST(QC8_IGEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC,qmax)37793 TEST(QC8_IGEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC, qmax) {
37794 GemmMicrokernelTester()
37795 .mr(3)
37796 .nr(4)
37797 .kr(1)
37798 .sr(1)
37799 .m(3)
37800 .n(4)
37801 .k(1)
37802 .qmax(128)
37803 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
37804 }
37805
TEST(QC8_IGEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC,strided_cm)37806 TEST(QC8_IGEMM_MINMAX_FP32_3X4__SCALAR_IMAGIC, strided_cm) {
37807 GemmMicrokernelTester()
37808 .mr(3)
37809 .nr(4)
37810 .kr(1)
37811 .sr(1)
37812 .m(3)
37813 .n(4)
37814 .k(1)
37815 .cm_stride(7)
37816 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_3x4__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
37817 }
37818
37819
TEST(QC8_IGEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC,k_eq_1)37820 TEST(QC8_IGEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC, k_eq_1) {
37821 GemmMicrokernelTester()
37822 .mr(4)
37823 .nr(2)
37824 .kr(1)
37825 .sr(1)
37826 .m(4)
37827 .n(2)
37828 .k(1)
37829 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x2__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
37830 }
37831
TEST(QC8_IGEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC,strided_cn)37832 TEST(QC8_IGEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC, strided_cn) {
37833 GemmMicrokernelTester()
37834 .mr(4)
37835 .nr(2)
37836 .kr(1)
37837 .sr(1)
37838 .m(4)
37839 .n(2)
37840 .k(1)
37841 .cn_stride(5)
37842 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x2__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
37843 }
37844
TEST(QC8_IGEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC,k_eq_1_subtile)37845 TEST(QC8_IGEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC, k_eq_1_subtile) {
37846 for (uint32_t n = 1; n <= 2; n++) {
37847 for (uint32_t m = 1; m <= 4; m++) {
37848 GemmMicrokernelTester()
37849 .mr(4)
37850 .nr(2)
37851 .kr(1)
37852 .sr(1)
37853 .m(m)
37854 .n(n)
37855 .k(1)
37856 .iterations(1)
37857 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x2__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
37858 }
37859 }
37860 }
37861
TEST(QC8_IGEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC,k_eq_1_subtile_m)37862 TEST(QC8_IGEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC, k_eq_1_subtile_m) {
37863 for (uint32_t m = 1; m <= 4; m++) {
37864 GemmMicrokernelTester()
37865 .mr(4)
37866 .nr(2)
37867 .kr(1)
37868 .sr(1)
37869 .m(m)
37870 .n(2)
37871 .k(1)
37872 .iterations(1)
37873 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x2__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
37874 }
37875 }
37876
TEST(QC8_IGEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC,k_eq_1_subtile_n)37877 TEST(QC8_IGEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC, k_eq_1_subtile_n) {
37878 for (uint32_t n = 1; n <= 2; n++) {
37879 GemmMicrokernelTester()
37880 .mr(4)
37881 .nr(2)
37882 .kr(1)
37883 .sr(1)
37884 .m(4)
37885 .n(n)
37886 .k(1)
37887 .iterations(1)
37888 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x2__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
37889 }
37890 }
37891
TEST(QC8_IGEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC,k_gt_1)37892 TEST(QC8_IGEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC, k_gt_1) {
37893 for (size_t k = 2; k < 10; k++) {
37894 GemmMicrokernelTester()
37895 .mr(4)
37896 .nr(2)
37897 .kr(1)
37898 .sr(1)
37899 .m(4)
37900 .n(2)
37901 .k(k)
37902 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x2__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
37903 }
37904 }
37905
TEST(QC8_IGEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC,k_gt_1_subtile)37906 TEST(QC8_IGEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC, k_gt_1_subtile) {
37907 for (size_t k = 2; k < 10; k++) {
37908 for (uint32_t n = 1; n <= 2; n++) {
37909 for (uint32_t m = 1; m <= 4; m++) {
37910 GemmMicrokernelTester()
37911 .mr(4)
37912 .nr(2)
37913 .kr(1)
37914 .sr(1)
37915 .m(m)
37916 .n(n)
37917 .k(k)
37918 .iterations(1)
37919 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x2__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
37920 }
37921 }
37922 }
37923 }
37924
TEST(QC8_IGEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC,n_gt_2)37925 TEST(QC8_IGEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC, n_gt_2) {
37926 for (uint32_t n = 3; n < 4; n++) {
37927 for (size_t k = 1; k <= 5; k += 2) {
37928 GemmMicrokernelTester()
37929 .mr(4)
37930 .nr(2)
37931 .kr(1)
37932 .sr(1)
37933 .m(4)
37934 .n(n)
37935 .k(k)
37936 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x2__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
37937 }
37938 }
37939 }
37940
TEST(QC8_IGEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC,n_gt_2_strided_cn)37941 TEST(QC8_IGEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC, n_gt_2_strided_cn) {
37942 for (uint32_t n = 3; n < 4; n++) {
37943 for (size_t k = 1; k <= 5; k += 2) {
37944 GemmMicrokernelTester()
37945 .mr(4)
37946 .nr(2)
37947 .kr(1)
37948 .sr(1)
37949 .m(4)
37950 .n(n)
37951 .k(k)
37952 .cn_stride(5)
37953 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x2__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
37954 }
37955 }
37956 }
37957
TEST(QC8_IGEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC,n_gt_2_subtile)37958 TEST(QC8_IGEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC, n_gt_2_subtile) {
37959 for (uint32_t n = 3; n < 4; n++) {
37960 for (size_t k = 1; k <= 5; k += 2) {
37961 for (uint32_t m = 1; m <= 4; m++) {
37962 GemmMicrokernelTester()
37963 .mr(4)
37964 .nr(2)
37965 .kr(1)
37966 .sr(1)
37967 .m(m)
37968 .n(n)
37969 .k(k)
37970 .iterations(1)
37971 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x2__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
37972 }
37973 }
37974 }
37975 }
37976
TEST(QC8_IGEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC,n_div_2)37977 TEST(QC8_IGEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC, n_div_2) {
37978 for (uint32_t n = 4; n <= 6; n += 2) {
37979 for (size_t k = 1; k <= 5; k += 2) {
37980 GemmMicrokernelTester()
37981 .mr(4)
37982 .nr(2)
37983 .kr(1)
37984 .sr(1)
37985 .m(4)
37986 .n(n)
37987 .k(k)
37988 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x2__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
37989 }
37990 }
37991 }
37992
TEST(QC8_IGEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC,n_div_2_strided_cn)37993 TEST(QC8_IGEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC, n_div_2_strided_cn) {
37994 for (uint32_t n = 4; n <= 6; n += 2) {
37995 for (size_t k = 1; k <= 5; k += 2) {
37996 GemmMicrokernelTester()
37997 .mr(4)
37998 .nr(2)
37999 .kr(1)
38000 .sr(1)
38001 .m(4)
38002 .n(n)
38003 .k(k)
38004 .cn_stride(5)
38005 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x2__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
38006 }
38007 }
38008 }
38009
TEST(QC8_IGEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC,n_div_2_subtile)38010 TEST(QC8_IGEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC, n_div_2_subtile) {
38011 for (uint32_t n = 4; n <= 6; n += 2) {
38012 for (size_t k = 1; k <= 5; k += 2) {
38013 for (uint32_t m = 1; m <= 4; m++) {
38014 GemmMicrokernelTester()
38015 .mr(4)
38016 .nr(2)
38017 .kr(1)
38018 .sr(1)
38019 .m(m)
38020 .n(n)
38021 .k(k)
38022 .iterations(1)
38023 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x2__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
38024 }
38025 }
38026 }
38027 }
38028
TEST(QC8_IGEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC,small_kernel)38029 TEST(QC8_IGEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC, small_kernel) {
38030 for (size_t k = 1; k <= 5; k += 2) {
38031 GemmMicrokernelTester()
38032 .mr(4)
38033 .nr(2)
38034 .kr(1)
38035 .sr(1)
38036 .m(4)
38037 .n(2)
38038 .k(k)
38039 .ks(3)
38040 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x2__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
38041 }
38042 }
38043
TEST(QC8_IGEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC,small_kernel_subtile)38044 TEST(QC8_IGEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC, small_kernel_subtile) {
38045 for (size_t k = 1; k <= 5; k += 2) {
38046 for (uint32_t n = 1; n <= 2; n++) {
38047 for (uint32_t m = 1; m <= 4; m++) {
38048 GemmMicrokernelTester()
38049 .mr(4)
38050 .nr(2)
38051 .kr(1)
38052 .sr(1)
38053 .m(m)
38054 .n(n)
38055 .k(k)
38056 .ks(3)
38057 .iterations(1)
38058 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x2__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
38059 }
38060 }
38061 }
38062 }
38063
TEST(QC8_IGEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC,n_gt_2_small_kernel)38064 TEST(QC8_IGEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC, n_gt_2_small_kernel) {
38065 for (uint32_t n = 3; n < 4; n++) {
38066 for (size_t k = 1; k <= 5; k += 2) {
38067 GemmMicrokernelTester()
38068 .mr(4)
38069 .nr(2)
38070 .kr(1)
38071 .sr(1)
38072 .m(4)
38073 .n(n)
38074 .k(k)
38075 .ks(3)
38076 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x2__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
38077 }
38078 }
38079 }
38080
TEST(QC8_IGEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC,n_div_2_small_kernel)38081 TEST(QC8_IGEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC, n_div_2_small_kernel) {
38082 for (uint32_t n = 4; n <= 6; n += 2) {
38083 for (size_t k = 1; k <= 5; k += 2) {
38084 GemmMicrokernelTester()
38085 .mr(4)
38086 .nr(2)
38087 .kr(1)
38088 .sr(1)
38089 .m(4)
38090 .n(n)
38091 .k(k)
38092 .ks(3)
38093 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x2__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
38094 }
38095 }
38096 }
38097
TEST(QC8_IGEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC,strided_cm_subtile)38098 TEST(QC8_IGEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC, strided_cm_subtile) {
38099 for (size_t k = 1; k <= 5; k += 2) {
38100 for (uint32_t n = 1; n <= 2; n++) {
38101 for (uint32_t m = 1; m <= 4; m++) {
38102 GemmMicrokernelTester()
38103 .mr(4)
38104 .nr(2)
38105 .kr(1)
38106 .sr(1)
38107 .m(m)
38108 .n(n)
38109 .k(k)
38110 .cm_stride(5)
38111 .iterations(1)
38112 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x2__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
38113 }
38114 }
38115 }
38116 }
38117
TEST(QC8_IGEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC,a_offset)38118 TEST(QC8_IGEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC, a_offset) {
38119 for (size_t k = 1; k <= 5; k += 2) {
38120 GemmMicrokernelTester()
38121 .mr(4)
38122 .nr(2)
38123 .kr(1)
38124 .sr(1)
38125 .m(4)
38126 .n(2)
38127 .k(k)
38128 .ks(3)
38129 .a_offset(23)
38130 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x2__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
38131 }
38132 }
38133
TEST(QC8_IGEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC,zero)38134 TEST(QC8_IGEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC, zero) {
38135 for (size_t k = 1; k <= 5; k += 2) {
38136 for (uint32_t mz = 0; mz < 4; mz++) {
38137 GemmMicrokernelTester()
38138 .mr(4)
38139 .nr(2)
38140 .kr(1)
38141 .sr(1)
38142 .m(4)
38143 .n(2)
38144 .k(k)
38145 .ks(3)
38146 .a_offset(23)
38147 .zero_index(mz)
38148 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x2__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
38149 }
38150 }
38151 }
38152
TEST(QC8_IGEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC,qmin)38153 TEST(QC8_IGEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC, qmin) {
38154 GemmMicrokernelTester()
38155 .mr(4)
38156 .nr(2)
38157 .kr(1)
38158 .sr(1)
38159 .m(4)
38160 .n(2)
38161 .k(1)
38162 .qmin(128)
38163 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x2__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
38164 }
38165
TEST(QC8_IGEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC,qmax)38166 TEST(QC8_IGEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC, qmax) {
38167 GemmMicrokernelTester()
38168 .mr(4)
38169 .nr(2)
38170 .kr(1)
38171 .sr(1)
38172 .m(4)
38173 .n(2)
38174 .k(1)
38175 .qmax(128)
38176 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x2__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
38177 }
38178
TEST(QC8_IGEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC,strided_cm)38179 TEST(QC8_IGEMM_MINMAX_FP32_4X2__SCALAR_IMAGIC, strided_cm) {
38180 GemmMicrokernelTester()
38181 .mr(4)
38182 .nr(2)
38183 .kr(1)
38184 .sr(1)
38185 .m(4)
38186 .n(2)
38187 .k(1)
38188 .cm_stride(5)
38189 .Test(xnn_qc8_igemm_minmax_fp32_ukernel_4x2__scalar_imagic, xnn_init_qc8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
38190 }
38191