1 // Copyright (c) Facebook, Inc. and its affiliates.
2 // All rights reserved.
3 //
4 // Copyright 2019 Google LLC
5 //
6 // This source code is licensed under the BSD-style license found in the
7 // LICENSE file in the root directory of this source tree.
8 //
9 // Auto-generated file. Do not edit!
10 // Specification: test/qs8-igemm-minmax-fp32.yaml
11 // Generator: tools/generate-gemm-test.py
12
13
14 #include <gtest/gtest.h>
15
16 #include <xnnpack/allocator.h>
17 #include <xnnpack/common.h>
18 #include <xnnpack/isa-checks.h>
19 #include <xnnpack/microparams-init.h>
20
21 #include <xnnpack/gemm.h>
22 #include <xnnpack/igemm.h>
23 #include <xnnpack/ppmm.h>
24 #include "gemm-microkernel-tester.h"
25
26
27 #if XNN_ARCH_ARM
TEST(QS8_IGEMM_MINMAX_FP32_1X1C4__ARMSIMD32,k_eq_4)28 TEST(QS8_IGEMM_MINMAX_FP32_1X1C4__ARMSIMD32, k_eq_4) {
29 TEST_REQUIRES_ARM_SIMD32;
30 GemmMicrokernelTester()
31 .mr(1)
32 .nr(1)
33 .kr(4)
34 .sr(1)
35 .m(1)
36 .n(1)
37 .k(4)
38 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x1c4__armsimd32, xnn_init_qs8_conv_minmax_fp32_armsimd32_params, xnn_qs8_requantize_fp32);
39 }
40
TEST(QS8_IGEMM_MINMAX_FP32_1X1C4__ARMSIMD32,strided_cn)41 TEST(QS8_IGEMM_MINMAX_FP32_1X1C4__ARMSIMD32, strided_cn) {
42 TEST_REQUIRES_ARM_SIMD32;
43 GemmMicrokernelTester()
44 .mr(1)
45 .nr(1)
46 .kr(4)
47 .sr(1)
48 .m(1)
49 .n(1)
50 .k(4)
51 .cn_stride(3)
52 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x1c4__armsimd32, xnn_init_qs8_conv_minmax_fp32_armsimd32_params, xnn_qs8_requantize_fp32);
53 }
54
TEST(QS8_IGEMM_MINMAX_FP32_1X1C4__ARMSIMD32,k_eq_4_subtile)55 TEST(QS8_IGEMM_MINMAX_FP32_1X1C4__ARMSIMD32, k_eq_4_subtile) {
56 TEST_REQUIRES_ARM_SIMD32;
57 for (uint32_t n = 1; n <= 1; n++) {
58 for (uint32_t m = 1; m <= 1; m++) {
59 GemmMicrokernelTester()
60 .mr(1)
61 .nr(1)
62 .kr(4)
63 .sr(1)
64 .m(m)
65 .n(n)
66 .k(4)
67 .iterations(1)
68 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x1c4__armsimd32, xnn_init_qs8_conv_minmax_fp32_armsimd32_params, xnn_qs8_requantize_fp32);
69 }
70 }
71 }
72
TEST(QS8_IGEMM_MINMAX_FP32_1X1C4__ARMSIMD32,k_eq_4_subtile_m)73 TEST(QS8_IGEMM_MINMAX_FP32_1X1C4__ARMSIMD32, k_eq_4_subtile_m) {
74 TEST_REQUIRES_ARM_SIMD32;
75 for (uint32_t m = 1; m <= 1; m++) {
76 GemmMicrokernelTester()
77 .mr(1)
78 .nr(1)
79 .kr(4)
80 .sr(1)
81 .m(m)
82 .n(1)
83 .k(4)
84 .iterations(1)
85 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x1c4__armsimd32, xnn_init_qs8_conv_minmax_fp32_armsimd32_params, xnn_qs8_requantize_fp32);
86 }
87 }
88
TEST(QS8_IGEMM_MINMAX_FP32_1X1C4__ARMSIMD32,k_eq_4_subtile_n)89 TEST(QS8_IGEMM_MINMAX_FP32_1X1C4__ARMSIMD32, k_eq_4_subtile_n) {
90 TEST_REQUIRES_ARM_SIMD32;
91 for (uint32_t n = 1; n <= 1; n++) {
92 GemmMicrokernelTester()
93 .mr(1)
94 .nr(1)
95 .kr(4)
96 .sr(1)
97 .m(1)
98 .n(n)
99 .k(4)
100 .iterations(1)
101 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x1c4__armsimd32, xnn_init_qs8_conv_minmax_fp32_armsimd32_params, xnn_qs8_requantize_fp32);
102 }
103 }
104
TEST(QS8_IGEMM_MINMAX_FP32_1X1C4__ARMSIMD32,k_lt_4)105 TEST(QS8_IGEMM_MINMAX_FP32_1X1C4__ARMSIMD32, k_lt_4) {
106 TEST_REQUIRES_ARM_SIMD32;
107 for (size_t k = 1; k < 4; k++) {
108 GemmMicrokernelTester()
109 .mr(1)
110 .nr(1)
111 .kr(4)
112 .sr(1)
113 .m(1)
114 .n(1)
115 .k(k)
116 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x1c4__armsimd32, xnn_init_qs8_conv_minmax_fp32_armsimd32_params, xnn_qs8_requantize_fp32);
117 }
118 }
119
TEST(QS8_IGEMM_MINMAX_FP32_1X1C4__ARMSIMD32,k_lt_4_subtile)120 TEST(QS8_IGEMM_MINMAX_FP32_1X1C4__ARMSIMD32, k_lt_4_subtile) {
121 TEST_REQUIRES_ARM_SIMD32;
122 for (size_t k = 1; k < 4; k++) {
123 for (uint32_t n = 1; n <= 1; n++) {
124 for (uint32_t m = 1; m <= 1; m++) {
125 GemmMicrokernelTester()
126 .mr(1)
127 .nr(1)
128 .kr(4)
129 .sr(1)
130 .m(m)
131 .n(n)
132 .k(k)
133 .iterations(1)
134 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x1c4__armsimd32, xnn_init_qs8_conv_minmax_fp32_armsimd32_params, xnn_qs8_requantize_fp32);
135 }
136 }
137 }
138 }
139
TEST(QS8_IGEMM_MINMAX_FP32_1X1C4__ARMSIMD32,k_gt_4)140 TEST(QS8_IGEMM_MINMAX_FP32_1X1C4__ARMSIMD32, k_gt_4) {
141 TEST_REQUIRES_ARM_SIMD32;
142 for (size_t k = 5; k < 8; k++) {
143 GemmMicrokernelTester()
144 .mr(1)
145 .nr(1)
146 .kr(4)
147 .sr(1)
148 .m(1)
149 .n(1)
150 .k(k)
151 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x1c4__armsimd32, xnn_init_qs8_conv_minmax_fp32_armsimd32_params, xnn_qs8_requantize_fp32);
152 }
153 }
154
TEST(QS8_IGEMM_MINMAX_FP32_1X1C4__ARMSIMD32,k_gt_4_subtile)155 TEST(QS8_IGEMM_MINMAX_FP32_1X1C4__ARMSIMD32, k_gt_4_subtile) {
156 TEST_REQUIRES_ARM_SIMD32;
157 for (size_t k = 5; k < 8; k++) {
158 for (uint32_t n = 1; n <= 1; n++) {
159 for (uint32_t m = 1; m <= 1; m++) {
160 GemmMicrokernelTester()
161 .mr(1)
162 .nr(1)
163 .kr(4)
164 .sr(1)
165 .m(m)
166 .n(n)
167 .k(k)
168 .iterations(1)
169 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x1c4__armsimd32, xnn_init_qs8_conv_minmax_fp32_armsimd32_params, xnn_qs8_requantize_fp32);
170 }
171 }
172 }
173 }
174
TEST(QS8_IGEMM_MINMAX_FP32_1X1C4__ARMSIMD32,k_div_4)175 TEST(QS8_IGEMM_MINMAX_FP32_1X1C4__ARMSIMD32, k_div_4) {
176 TEST_REQUIRES_ARM_SIMD32;
177 for (size_t k = 8; k <= 40; k += 4) {
178 GemmMicrokernelTester()
179 .mr(1)
180 .nr(1)
181 .kr(4)
182 .sr(1)
183 .m(1)
184 .n(1)
185 .k(k)
186 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x1c4__armsimd32, xnn_init_qs8_conv_minmax_fp32_armsimd32_params, xnn_qs8_requantize_fp32);
187 }
188 }
189
TEST(QS8_IGEMM_MINMAX_FP32_1X1C4__ARMSIMD32,k_div_4_subtile)190 TEST(QS8_IGEMM_MINMAX_FP32_1X1C4__ARMSIMD32, k_div_4_subtile) {
191 TEST_REQUIRES_ARM_SIMD32;
192 for (size_t k = 8; k <= 40; k += 4) {
193 for (uint32_t n = 1; n <= 1; n++) {
194 for (uint32_t m = 1; m <= 1; m++) {
195 GemmMicrokernelTester()
196 .mr(1)
197 .nr(1)
198 .kr(4)
199 .sr(1)
200 .m(m)
201 .n(n)
202 .k(k)
203 .iterations(1)
204 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x1c4__armsimd32, xnn_init_qs8_conv_minmax_fp32_armsimd32_params, xnn_qs8_requantize_fp32);
205 }
206 }
207 }
208 }
209
TEST(QS8_IGEMM_MINMAX_FP32_1X1C4__ARMSIMD32,n_gt_1)210 TEST(QS8_IGEMM_MINMAX_FP32_1X1C4__ARMSIMD32, n_gt_1) {
211 TEST_REQUIRES_ARM_SIMD32;
212 for (uint32_t n = 2; n < 2; n++) {
213 for (size_t k = 1; k <= 20; k += 5) {
214 GemmMicrokernelTester()
215 .mr(1)
216 .nr(1)
217 .kr(4)
218 .sr(1)
219 .m(1)
220 .n(n)
221 .k(k)
222 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x1c4__armsimd32, xnn_init_qs8_conv_minmax_fp32_armsimd32_params, xnn_qs8_requantize_fp32);
223 }
224 }
225 }
226
TEST(QS8_IGEMM_MINMAX_FP32_1X1C4__ARMSIMD32,n_gt_1_strided_cn)227 TEST(QS8_IGEMM_MINMAX_FP32_1X1C4__ARMSIMD32, n_gt_1_strided_cn) {
228 TEST_REQUIRES_ARM_SIMD32;
229 for (uint32_t n = 2; n < 2; n++) {
230 for (size_t k = 1; k <= 20; k += 5) {
231 GemmMicrokernelTester()
232 .mr(1)
233 .nr(1)
234 .kr(4)
235 .sr(1)
236 .m(1)
237 .n(n)
238 .k(k)
239 .cn_stride(3)
240 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x1c4__armsimd32, xnn_init_qs8_conv_minmax_fp32_armsimd32_params, xnn_qs8_requantize_fp32);
241 }
242 }
243 }
244
TEST(QS8_IGEMM_MINMAX_FP32_1X1C4__ARMSIMD32,n_gt_1_subtile)245 TEST(QS8_IGEMM_MINMAX_FP32_1X1C4__ARMSIMD32, n_gt_1_subtile) {
246 TEST_REQUIRES_ARM_SIMD32;
247 for (uint32_t n = 2; n < 2; n++) {
248 for (size_t k = 1; k <= 20; k += 5) {
249 for (uint32_t m = 1; m <= 1; m++) {
250 GemmMicrokernelTester()
251 .mr(1)
252 .nr(1)
253 .kr(4)
254 .sr(1)
255 .m(m)
256 .n(n)
257 .k(k)
258 .iterations(1)
259 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x1c4__armsimd32, xnn_init_qs8_conv_minmax_fp32_armsimd32_params, xnn_qs8_requantize_fp32);
260 }
261 }
262 }
263 }
264
TEST(QS8_IGEMM_MINMAX_FP32_1X1C4__ARMSIMD32,n_div_1)265 TEST(QS8_IGEMM_MINMAX_FP32_1X1C4__ARMSIMD32, n_div_1) {
266 TEST_REQUIRES_ARM_SIMD32;
267 for (uint32_t n = 2; n <= 3; n += 1) {
268 for (size_t k = 1; k <= 20; k += 5) {
269 GemmMicrokernelTester()
270 .mr(1)
271 .nr(1)
272 .kr(4)
273 .sr(1)
274 .m(1)
275 .n(n)
276 .k(k)
277 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x1c4__armsimd32, xnn_init_qs8_conv_minmax_fp32_armsimd32_params, xnn_qs8_requantize_fp32);
278 }
279 }
280 }
281
TEST(QS8_IGEMM_MINMAX_FP32_1X1C4__ARMSIMD32,n_div_1_strided_cn)282 TEST(QS8_IGEMM_MINMAX_FP32_1X1C4__ARMSIMD32, n_div_1_strided_cn) {
283 TEST_REQUIRES_ARM_SIMD32;
284 for (uint32_t n = 2; n <= 3; n += 1) {
285 for (size_t k = 1; k <= 20; k += 5) {
286 GemmMicrokernelTester()
287 .mr(1)
288 .nr(1)
289 .kr(4)
290 .sr(1)
291 .m(1)
292 .n(n)
293 .k(k)
294 .cn_stride(3)
295 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x1c4__armsimd32, xnn_init_qs8_conv_minmax_fp32_armsimd32_params, xnn_qs8_requantize_fp32);
296 }
297 }
298 }
299
TEST(QS8_IGEMM_MINMAX_FP32_1X1C4__ARMSIMD32,n_div_1_subtile)300 TEST(QS8_IGEMM_MINMAX_FP32_1X1C4__ARMSIMD32, n_div_1_subtile) {
301 TEST_REQUIRES_ARM_SIMD32;
302 for (uint32_t n = 2; n <= 3; n += 1) {
303 for (size_t k = 1; k <= 20; k += 5) {
304 for (uint32_t m = 1; m <= 1; m++) {
305 GemmMicrokernelTester()
306 .mr(1)
307 .nr(1)
308 .kr(4)
309 .sr(1)
310 .m(m)
311 .n(n)
312 .k(k)
313 .iterations(1)
314 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x1c4__armsimd32, xnn_init_qs8_conv_minmax_fp32_armsimd32_params, xnn_qs8_requantize_fp32);
315 }
316 }
317 }
318 }
319
TEST(QS8_IGEMM_MINMAX_FP32_1X1C4__ARMSIMD32,small_kernel)320 TEST(QS8_IGEMM_MINMAX_FP32_1X1C4__ARMSIMD32, small_kernel) {
321 TEST_REQUIRES_ARM_SIMD32;
322 for (size_t k = 1; k <= 20; k += 5) {
323 GemmMicrokernelTester()
324 .mr(1)
325 .nr(1)
326 .kr(4)
327 .sr(1)
328 .m(1)
329 .n(1)
330 .k(k)
331 .ks(3)
332 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x1c4__armsimd32, xnn_init_qs8_conv_minmax_fp32_armsimd32_params, xnn_qs8_requantize_fp32);
333 }
334 }
335
TEST(QS8_IGEMM_MINMAX_FP32_1X1C4__ARMSIMD32,small_kernel_subtile)336 TEST(QS8_IGEMM_MINMAX_FP32_1X1C4__ARMSIMD32, small_kernel_subtile) {
337 TEST_REQUIRES_ARM_SIMD32;
338 for (size_t k = 1; k <= 20; k += 5) {
339 for (uint32_t n = 1; n <= 1; n++) {
340 for (uint32_t m = 1; m <= 1; m++) {
341 GemmMicrokernelTester()
342 .mr(1)
343 .nr(1)
344 .kr(4)
345 .sr(1)
346 .m(m)
347 .n(n)
348 .k(k)
349 .ks(3)
350 .iterations(1)
351 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x1c4__armsimd32, xnn_init_qs8_conv_minmax_fp32_armsimd32_params, xnn_qs8_requantize_fp32);
352 }
353 }
354 }
355 }
356
TEST(QS8_IGEMM_MINMAX_FP32_1X1C4__ARMSIMD32,n_gt_1_small_kernel)357 TEST(QS8_IGEMM_MINMAX_FP32_1X1C4__ARMSIMD32, n_gt_1_small_kernel) {
358 TEST_REQUIRES_ARM_SIMD32;
359 for (uint32_t n = 2; n < 2; n++) {
360 for (size_t k = 1; k <= 20; k += 5) {
361 GemmMicrokernelTester()
362 .mr(1)
363 .nr(1)
364 .kr(4)
365 .sr(1)
366 .m(1)
367 .n(n)
368 .k(k)
369 .ks(3)
370 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x1c4__armsimd32, xnn_init_qs8_conv_minmax_fp32_armsimd32_params, xnn_qs8_requantize_fp32);
371 }
372 }
373 }
374
TEST(QS8_IGEMM_MINMAX_FP32_1X1C4__ARMSIMD32,n_div_1_small_kernel)375 TEST(QS8_IGEMM_MINMAX_FP32_1X1C4__ARMSIMD32, n_div_1_small_kernel) {
376 TEST_REQUIRES_ARM_SIMD32;
377 for (uint32_t n = 2; n <= 3; n += 1) {
378 for (size_t k = 1; k <= 20; k += 5) {
379 GemmMicrokernelTester()
380 .mr(1)
381 .nr(1)
382 .kr(4)
383 .sr(1)
384 .m(1)
385 .n(n)
386 .k(k)
387 .ks(3)
388 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x1c4__armsimd32, xnn_init_qs8_conv_minmax_fp32_armsimd32_params, xnn_qs8_requantize_fp32);
389 }
390 }
391 }
392
TEST(QS8_IGEMM_MINMAX_FP32_1X1C4__ARMSIMD32,strided_cm_subtile)393 TEST(QS8_IGEMM_MINMAX_FP32_1X1C4__ARMSIMD32, strided_cm_subtile) {
394 TEST_REQUIRES_ARM_SIMD32;
395 for (size_t k = 1; k <= 20; k += 5) {
396 for (uint32_t n = 1; n <= 1; n++) {
397 for (uint32_t m = 1; m <= 1; m++) {
398 GemmMicrokernelTester()
399 .mr(1)
400 .nr(1)
401 .kr(4)
402 .sr(1)
403 .m(m)
404 .n(n)
405 .k(k)
406 .cm_stride(3)
407 .iterations(1)
408 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x1c4__armsimd32, xnn_init_qs8_conv_minmax_fp32_armsimd32_params, xnn_qs8_requantize_fp32);
409 }
410 }
411 }
412 }
413
TEST(QS8_IGEMM_MINMAX_FP32_1X1C4__ARMSIMD32,a_offset)414 TEST(QS8_IGEMM_MINMAX_FP32_1X1C4__ARMSIMD32, a_offset) {
415 TEST_REQUIRES_ARM_SIMD32;
416 for (size_t k = 1; k <= 20; k += 5) {
417 GemmMicrokernelTester()
418 .mr(1)
419 .nr(1)
420 .kr(4)
421 .sr(1)
422 .m(1)
423 .n(1)
424 .k(k)
425 .ks(3)
426 .a_offset(23)
427 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x1c4__armsimd32, xnn_init_qs8_conv_minmax_fp32_armsimd32_params, xnn_qs8_requantize_fp32);
428 }
429 }
430
TEST(QS8_IGEMM_MINMAX_FP32_1X1C4__ARMSIMD32,zero)431 TEST(QS8_IGEMM_MINMAX_FP32_1X1C4__ARMSIMD32, zero) {
432 TEST_REQUIRES_ARM_SIMD32;
433 for (size_t k = 1; k <= 20; k += 5) {
434 for (uint32_t mz = 0; mz < 1; mz++) {
435 GemmMicrokernelTester()
436 .mr(1)
437 .nr(1)
438 .kr(4)
439 .sr(1)
440 .m(1)
441 .n(1)
442 .k(k)
443 .ks(3)
444 .a_offset(23)
445 .zero_index(mz)
446 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x1c4__armsimd32, xnn_init_qs8_conv_minmax_fp32_armsimd32_params, xnn_qs8_requantize_fp32);
447 }
448 }
449 }
450
TEST(QS8_IGEMM_MINMAX_FP32_1X1C4__ARMSIMD32,qmin)451 TEST(QS8_IGEMM_MINMAX_FP32_1X1C4__ARMSIMD32, qmin) {
452 TEST_REQUIRES_ARM_SIMD32;
453 GemmMicrokernelTester()
454 .mr(1)
455 .nr(1)
456 .kr(4)
457 .sr(1)
458 .m(1)
459 .n(1)
460 .k(4)
461 .qmin(128)
462 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x1c4__armsimd32, xnn_init_qs8_conv_minmax_fp32_armsimd32_params, xnn_qs8_requantize_fp32);
463 }
464
TEST(QS8_IGEMM_MINMAX_FP32_1X1C4__ARMSIMD32,qmax)465 TEST(QS8_IGEMM_MINMAX_FP32_1X1C4__ARMSIMD32, qmax) {
466 TEST_REQUIRES_ARM_SIMD32;
467 GemmMicrokernelTester()
468 .mr(1)
469 .nr(1)
470 .kr(4)
471 .sr(1)
472 .m(1)
473 .n(1)
474 .k(4)
475 .qmax(128)
476 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x1c4__armsimd32, xnn_init_qs8_conv_minmax_fp32_armsimd32_params, xnn_qs8_requantize_fp32);
477 }
478
TEST(QS8_IGEMM_MINMAX_FP32_1X1C4__ARMSIMD32,strided_cm)479 TEST(QS8_IGEMM_MINMAX_FP32_1X1C4__ARMSIMD32, strided_cm) {
480 TEST_REQUIRES_ARM_SIMD32;
481 GemmMicrokernelTester()
482 .mr(1)
483 .nr(1)
484 .kr(4)
485 .sr(1)
486 .m(1)
487 .n(1)
488 .k(4)
489 .cm_stride(3)
490 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x1c4__armsimd32, xnn_init_qs8_conv_minmax_fp32_armsimd32_params, xnn_qs8_requantize_fp32);
491 }
492 #endif // XNN_ARCH_ARM
493
494
495 #if XNN_ARCH_ARM
TEST(QS8_IGEMM_MINMAX_FP32_2X1C4__ARMSIMD32,k_eq_4)496 TEST(QS8_IGEMM_MINMAX_FP32_2X1C4__ARMSIMD32, k_eq_4) {
497 TEST_REQUIRES_ARM_SIMD32;
498 GemmMicrokernelTester()
499 .mr(2)
500 .nr(1)
501 .kr(4)
502 .sr(1)
503 .m(2)
504 .n(1)
505 .k(4)
506 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x1c4__armsimd32, xnn_init_qs8_conv_minmax_fp32_armsimd32_params, xnn_qs8_requantize_fp32);
507 }
508
TEST(QS8_IGEMM_MINMAX_FP32_2X1C4__ARMSIMD32,strided_cn)509 TEST(QS8_IGEMM_MINMAX_FP32_2X1C4__ARMSIMD32, strided_cn) {
510 TEST_REQUIRES_ARM_SIMD32;
511 GemmMicrokernelTester()
512 .mr(2)
513 .nr(1)
514 .kr(4)
515 .sr(1)
516 .m(2)
517 .n(1)
518 .k(4)
519 .cn_stride(3)
520 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x1c4__armsimd32, xnn_init_qs8_conv_minmax_fp32_armsimd32_params, xnn_qs8_requantize_fp32);
521 }
522
TEST(QS8_IGEMM_MINMAX_FP32_2X1C4__ARMSIMD32,k_eq_4_subtile)523 TEST(QS8_IGEMM_MINMAX_FP32_2X1C4__ARMSIMD32, k_eq_4_subtile) {
524 TEST_REQUIRES_ARM_SIMD32;
525 for (uint32_t n = 1; n <= 1; n++) {
526 for (uint32_t m = 1; m <= 2; m++) {
527 GemmMicrokernelTester()
528 .mr(2)
529 .nr(1)
530 .kr(4)
531 .sr(1)
532 .m(m)
533 .n(n)
534 .k(4)
535 .iterations(1)
536 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x1c4__armsimd32, xnn_init_qs8_conv_minmax_fp32_armsimd32_params, xnn_qs8_requantize_fp32);
537 }
538 }
539 }
540
TEST(QS8_IGEMM_MINMAX_FP32_2X1C4__ARMSIMD32,k_eq_4_subtile_m)541 TEST(QS8_IGEMM_MINMAX_FP32_2X1C4__ARMSIMD32, k_eq_4_subtile_m) {
542 TEST_REQUIRES_ARM_SIMD32;
543 for (uint32_t m = 1; m <= 2; m++) {
544 GemmMicrokernelTester()
545 .mr(2)
546 .nr(1)
547 .kr(4)
548 .sr(1)
549 .m(m)
550 .n(1)
551 .k(4)
552 .iterations(1)
553 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x1c4__armsimd32, xnn_init_qs8_conv_minmax_fp32_armsimd32_params, xnn_qs8_requantize_fp32);
554 }
555 }
556
TEST(QS8_IGEMM_MINMAX_FP32_2X1C4__ARMSIMD32,k_eq_4_subtile_n)557 TEST(QS8_IGEMM_MINMAX_FP32_2X1C4__ARMSIMD32, k_eq_4_subtile_n) {
558 TEST_REQUIRES_ARM_SIMD32;
559 for (uint32_t n = 1; n <= 1; n++) {
560 GemmMicrokernelTester()
561 .mr(2)
562 .nr(1)
563 .kr(4)
564 .sr(1)
565 .m(2)
566 .n(n)
567 .k(4)
568 .iterations(1)
569 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x1c4__armsimd32, xnn_init_qs8_conv_minmax_fp32_armsimd32_params, xnn_qs8_requantize_fp32);
570 }
571 }
572
TEST(QS8_IGEMM_MINMAX_FP32_2X1C4__ARMSIMD32,k_lt_4)573 TEST(QS8_IGEMM_MINMAX_FP32_2X1C4__ARMSIMD32, k_lt_4) {
574 TEST_REQUIRES_ARM_SIMD32;
575 for (size_t k = 1; k < 4; k++) {
576 GemmMicrokernelTester()
577 .mr(2)
578 .nr(1)
579 .kr(4)
580 .sr(1)
581 .m(2)
582 .n(1)
583 .k(k)
584 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x1c4__armsimd32, xnn_init_qs8_conv_minmax_fp32_armsimd32_params, xnn_qs8_requantize_fp32);
585 }
586 }
587
TEST(QS8_IGEMM_MINMAX_FP32_2X1C4__ARMSIMD32,k_lt_4_subtile)588 TEST(QS8_IGEMM_MINMAX_FP32_2X1C4__ARMSIMD32, k_lt_4_subtile) {
589 TEST_REQUIRES_ARM_SIMD32;
590 for (size_t k = 1; k < 4; k++) {
591 for (uint32_t n = 1; n <= 1; n++) {
592 for (uint32_t m = 1; m <= 2; m++) {
593 GemmMicrokernelTester()
594 .mr(2)
595 .nr(1)
596 .kr(4)
597 .sr(1)
598 .m(m)
599 .n(n)
600 .k(k)
601 .iterations(1)
602 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x1c4__armsimd32, xnn_init_qs8_conv_minmax_fp32_armsimd32_params, xnn_qs8_requantize_fp32);
603 }
604 }
605 }
606 }
607
TEST(QS8_IGEMM_MINMAX_FP32_2X1C4__ARMSIMD32,k_gt_4)608 TEST(QS8_IGEMM_MINMAX_FP32_2X1C4__ARMSIMD32, k_gt_4) {
609 TEST_REQUIRES_ARM_SIMD32;
610 for (size_t k = 5; k < 8; k++) {
611 GemmMicrokernelTester()
612 .mr(2)
613 .nr(1)
614 .kr(4)
615 .sr(1)
616 .m(2)
617 .n(1)
618 .k(k)
619 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x1c4__armsimd32, xnn_init_qs8_conv_minmax_fp32_armsimd32_params, xnn_qs8_requantize_fp32);
620 }
621 }
622
TEST(QS8_IGEMM_MINMAX_FP32_2X1C4__ARMSIMD32,k_gt_4_subtile)623 TEST(QS8_IGEMM_MINMAX_FP32_2X1C4__ARMSIMD32, k_gt_4_subtile) {
624 TEST_REQUIRES_ARM_SIMD32;
625 for (size_t k = 5; k < 8; k++) {
626 for (uint32_t n = 1; n <= 1; n++) {
627 for (uint32_t m = 1; m <= 2; m++) {
628 GemmMicrokernelTester()
629 .mr(2)
630 .nr(1)
631 .kr(4)
632 .sr(1)
633 .m(m)
634 .n(n)
635 .k(k)
636 .iterations(1)
637 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x1c4__armsimd32, xnn_init_qs8_conv_minmax_fp32_armsimd32_params, xnn_qs8_requantize_fp32);
638 }
639 }
640 }
641 }
642
TEST(QS8_IGEMM_MINMAX_FP32_2X1C4__ARMSIMD32,k_div_4)643 TEST(QS8_IGEMM_MINMAX_FP32_2X1C4__ARMSIMD32, k_div_4) {
644 TEST_REQUIRES_ARM_SIMD32;
645 for (size_t k = 8; k <= 40; k += 4) {
646 GemmMicrokernelTester()
647 .mr(2)
648 .nr(1)
649 .kr(4)
650 .sr(1)
651 .m(2)
652 .n(1)
653 .k(k)
654 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x1c4__armsimd32, xnn_init_qs8_conv_minmax_fp32_armsimd32_params, xnn_qs8_requantize_fp32);
655 }
656 }
657
TEST(QS8_IGEMM_MINMAX_FP32_2X1C4__ARMSIMD32,k_div_4_subtile)658 TEST(QS8_IGEMM_MINMAX_FP32_2X1C4__ARMSIMD32, k_div_4_subtile) {
659 TEST_REQUIRES_ARM_SIMD32;
660 for (size_t k = 8; k <= 40; k += 4) {
661 for (uint32_t n = 1; n <= 1; n++) {
662 for (uint32_t m = 1; m <= 2; m++) {
663 GemmMicrokernelTester()
664 .mr(2)
665 .nr(1)
666 .kr(4)
667 .sr(1)
668 .m(m)
669 .n(n)
670 .k(k)
671 .iterations(1)
672 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x1c4__armsimd32, xnn_init_qs8_conv_minmax_fp32_armsimd32_params, xnn_qs8_requantize_fp32);
673 }
674 }
675 }
676 }
677
TEST(QS8_IGEMM_MINMAX_FP32_2X1C4__ARMSIMD32,n_gt_1)678 TEST(QS8_IGEMM_MINMAX_FP32_2X1C4__ARMSIMD32, n_gt_1) {
679 TEST_REQUIRES_ARM_SIMD32;
680 for (uint32_t n = 2; n < 2; n++) {
681 for (size_t k = 1; k <= 20; k += 5) {
682 GemmMicrokernelTester()
683 .mr(2)
684 .nr(1)
685 .kr(4)
686 .sr(1)
687 .m(2)
688 .n(n)
689 .k(k)
690 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x1c4__armsimd32, xnn_init_qs8_conv_minmax_fp32_armsimd32_params, xnn_qs8_requantize_fp32);
691 }
692 }
693 }
694
TEST(QS8_IGEMM_MINMAX_FP32_2X1C4__ARMSIMD32,n_gt_1_strided_cn)695 TEST(QS8_IGEMM_MINMAX_FP32_2X1C4__ARMSIMD32, n_gt_1_strided_cn) {
696 TEST_REQUIRES_ARM_SIMD32;
697 for (uint32_t n = 2; n < 2; n++) {
698 for (size_t k = 1; k <= 20; k += 5) {
699 GemmMicrokernelTester()
700 .mr(2)
701 .nr(1)
702 .kr(4)
703 .sr(1)
704 .m(2)
705 .n(n)
706 .k(k)
707 .cn_stride(3)
708 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x1c4__armsimd32, xnn_init_qs8_conv_minmax_fp32_armsimd32_params, xnn_qs8_requantize_fp32);
709 }
710 }
711 }
712
TEST(QS8_IGEMM_MINMAX_FP32_2X1C4__ARMSIMD32,n_gt_1_subtile)713 TEST(QS8_IGEMM_MINMAX_FP32_2X1C4__ARMSIMD32, n_gt_1_subtile) {
714 TEST_REQUIRES_ARM_SIMD32;
715 for (uint32_t n = 2; n < 2; n++) {
716 for (size_t k = 1; k <= 20; k += 5) {
717 for (uint32_t m = 1; m <= 2; m++) {
718 GemmMicrokernelTester()
719 .mr(2)
720 .nr(1)
721 .kr(4)
722 .sr(1)
723 .m(m)
724 .n(n)
725 .k(k)
726 .iterations(1)
727 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x1c4__armsimd32, xnn_init_qs8_conv_minmax_fp32_armsimd32_params, xnn_qs8_requantize_fp32);
728 }
729 }
730 }
731 }
732
TEST(QS8_IGEMM_MINMAX_FP32_2X1C4__ARMSIMD32,n_div_1)733 TEST(QS8_IGEMM_MINMAX_FP32_2X1C4__ARMSIMD32, n_div_1) {
734 TEST_REQUIRES_ARM_SIMD32;
735 for (uint32_t n = 2; n <= 3; n += 1) {
736 for (size_t k = 1; k <= 20; k += 5) {
737 GemmMicrokernelTester()
738 .mr(2)
739 .nr(1)
740 .kr(4)
741 .sr(1)
742 .m(2)
743 .n(n)
744 .k(k)
745 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x1c4__armsimd32, xnn_init_qs8_conv_minmax_fp32_armsimd32_params, xnn_qs8_requantize_fp32);
746 }
747 }
748 }
749
TEST(QS8_IGEMM_MINMAX_FP32_2X1C4__ARMSIMD32,n_div_1_strided_cn)750 TEST(QS8_IGEMM_MINMAX_FP32_2X1C4__ARMSIMD32, n_div_1_strided_cn) {
751 TEST_REQUIRES_ARM_SIMD32;
752 for (uint32_t n = 2; n <= 3; n += 1) {
753 for (size_t k = 1; k <= 20; k += 5) {
754 GemmMicrokernelTester()
755 .mr(2)
756 .nr(1)
757 .kr(4)
758 .sr(1)
759 .m(2)
760 .n(n)
761 .k(k)
762 .cn_stride(3)
763 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x1c4__armsimd32, xnn_init_qs8_conv_minmax_fp32_armsimd32_params, xnn_qs8_requantize_fp32);
764 }
765 }
766 }
767
TEST(QS8_IGEMM_MINMAX_FP32_2X1C4__ARMSIMD32,n_div_1_subtile)768 TEST(QS8_IGEMM_MINMAX_FP32_2X1C4__ARMSIMD32, n_div_1_subtile) {
769 TEST_REQUIRES_ARM_SIMD32;
770 for (uint32_t n = 2; n <= 3; n += 1) {
771 for (size_t k = 1; k <= 20; k += 5) {
772 for (uint32_t m = 1; m <= 2; m++) {
773 GemmMicrokernelTester()
774 .mr(2)
775 .nr(1)
776 .kr(4)
777 .sr(1)
778 .m(m)
779 .n(n)
780 .k(k)
781 .iterations(1)
782 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x1c4__armsimd32, xnn_init_qs8_conv_minmax_fp32_armsimd32_params, xnn_qs8_requantize_fp32);
783 }
784 }
785 }
786 }
787
TEST(QS8_IGEMM_MINMAX_FP32_2X1C4__ARMSIMD32,small_kernel)788 TEST(QS8_IGEMM_MINMAX_FP32_2X1C4__ARMSIMD32, small_kernel) {
789 TEST_REQUIRES_ARM_SIMD32;
790 for (size_t k = 1; k <= 20; k += 5) {
791 GemmMicrokernelTester()
792 .mr(2)
793 .nr(1)
794 .kr(4)
795 .sr(1)
796 .m(2)
797 .n(1)
798 .k(k)
799 .ks(3)
800 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x1c4__armsimd32, xnn_init_qs8_conv_minmax_fp32_armsimd32_params, xnn_qs8_requantize_fp32);
801 }
802 }
803
TEST(QS8_IGEMM_MINMAX_FP32_2X1C4__ARMSIMD32,small_kernel_subtile)804 TEST(QS8_IGEMM_MINMAX_FP32_2X1C4__ARMSIMD32, small_kernel_subtile) {
805 TEST_REQUIRES_ARM_SIMD32;
806 for (size_t k = 1; k <= 20; k += 5) {
807 for (uint32_t n = 1; n <= 1; n++) {
808 for (uint32_t m = 1; m <= 2; m++) {
809 GemmMicrokernelTester()
810 .mr(2)
811 .nr(1)
812 .kr(4)
813 .sr(1)
814 .m(m)
815 .n(n)
816 .k(k)
817 .ks(3)
818 .iterations(1)
819 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x1c4__armsimd32, xnn_init_qs8_conv_minmax_fp32_armsimd32_params, xnn_qs8_requantize_fp32);
820 }
821 }
822 }
823 }
824
TEST(QS8_IGEMM_MINMAX_FP32_2X1C4__ARMSIMD32,n_gt_1_small_kernel)825 TEST(QS8_IGEMM_MINMAX_FP32_2X1C4__ARMSIMD32, n_gt_1_small_kernel) {
826 TEST_REQUIRES_ARM_SIMD32;
827 for (uint32_t n = 2; n < 2; n++) {
828 for (size_t k = 1; k <= 20; k += 5) {
829 GemmMicrokernelTester()
830 .mr(2)
831 .nr(1)
832 .kr(4)
833 .sr(1)
834 .m(2)
835 .n(n)
836 .k(k)
837 .ks(3)
838 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x1c4__armsimd32, xnn_init_qs8_conv_minmax_fp32_armsimd32_params, xnn_qs8_requantize_fp32);
839 }
840 }
841 }
842
TEST(QS8_IGEMM_MINMAX_FP32_2X1C4__ARMSIMD32,n_div_1_small_kernel)843 TEST(QS8_IGEMM_MINMAX_FP32_2X1C4__ARMSIMD32, n_div_1_small_kernel) {
844 TEST_REQUIRES_ARM_SIMD32;
845 for (uint32_t n = 2; n <= 3; n += 1) {
846 for (size_t k = 1; k <= 20; k += 5) {
847 GemmMicrokernelTester()
848 .mr(2)
849 .nr(1)
850 .kr(4)
851 .sr(1)
852 .m(2)
853 .n(n)
854 .k(k)
855 .ks(3)
856 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x1c4__armsimd32, xnn_init_qs8_conv_minmax_fp32_armsimd32_params, xnn_qs8_requantize_fp32);
857 }
858 }
859 }
860
TEST(QS8_IGEMM_MINMAX_FP32_2X1C4__ARMSIMD32,strided_cm_subtile)861 TEST(QS8_IGEMM_MINMAX_FP32_2X1C4__ARMSIMD32, strided_cm_subtile) {
862 TEST_REQUIRES_ARM_SIMD32;
863 for (size_t k = 1; k <= 20; k += 5) {
864 for (uint32_t n = 1; n <= 1; n++) {
865 for (uint32_t m = 1; m <= 2; m++) {
866 GemmMicrokernelTester()
867 .mr(2)
868 .nr(1)
869 .kr(4)
870 .sr(1)
871 .m(m)
872 .n(n)
873 .k(k)
874 .cm_stride(3)
875 .iterations(1)
876 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x1c4__armsimd32, xnn_init_qs8_conv_minmax_fp32_armsimd32_params, xnn_qs8_requantize_fp32);
877 }
878 }
879 }
880 }
881
TEST(QS8_IGEMM_MINMAX_FP32_2X1C4__ARMSIMD32,a_offset)882 TEST(QS8_IGEMM_MINMAX_FP32_2X1C4__ARMSIMD32, a_offset) {
883 TEST_REQUIRES_ARM_SIMD32;
884 for (size_t k = 1; k <= 20; k += 5) {
885 GemmMicrokernelTester()
886 .mr(2)
887 .nr(1)
888 .kr(4)
889 .sr(1)
890 .m(2)
891 .n(1)
892 .k(k)
893 .ks(3)
894 .a_offset(43)
895 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x1c4__armsimd32, xnn_init_qs8_conv_minmax_fp32_armsimd32_params, xnn_qs8_requantize_fp32);
896 }
897 }
898
TEST(QS8_IGEMM_MINMAX_FP32_2X1C4__ARMSIMD32,zero)899 TEST(QS8_IGEMM_MINMAX_FP32_2X1C4__ARMSIMD32, zero) {
900 TEST_REQUIRES_ARM_SIMD32;
901 for (size_t k = 1; k <= 20; k += 5) {
902 for (uint32_t mz = 0; mz < 2; mz++) {
903 GemmMicrokernelTester()
904 .mr(2)
905 .nr(1)
906 .kr(4)
907 .sr(1)
908 .m(2)
909 .n(1)
910 .k(k)
911 .ks(3)
912 .a_offset(43)
913 .zero_index(mz)
914 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x1c4__armsimd32, xnn_init_qs8_conv_minmax_fp32_armsimd32_params, xnn_qs8_requantize_fp32);
915 }
916 }
917 }
918
TEST(QS8_IGEMM_MINMAX_FP32_2X1C4__ARMSIMD32,qmin)919 TEST(QS8_IGEMM_MINMAX_FP32_2X1C4__ARMSIMD32, qmin) {
920 TEST_REQUIRES_ARM_SIMD32;
921 GemmMicrokernelTester()
922 .mr(2)
923 .nr(1)
924 .kr(4)
925 .sr(1)
926 .m(2)
927 .n(1)
928 .k(4)
929 .qmin(128)
930 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x1c4__armsimd32, xnn_init_qs8_conv_minmax_fp32_armsimd32_params, xnn_qs8_requantize_fp32);
931 }
932
TEST(QS8_IGEMM_MINMAX_FP32_2X1C4__ARMSIMD32,qmax)933 TEST(QS8_IGEMM_MINMAX_FP32_2X1C4__ARMSIMD32, qmax) {
934 TEST_REQUIRES_ARM_SIMD32;
935 GemmMicrokernelTester()
936 .mr(2)
937 .nr(1)
938 .kr(4)
939 .sr(1)
940 .m(2)
941 .n(1)
942 .k(4)
943 .qmax(128)
944 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x1c4__armsimd32, xnn_init_qs8_conv_minmax_fp32_armsimd32_params, xnn_qs8_requantize_fp32);
945 }
946
TEST(QS8_IGEMM_MINMAX_FP32_2X1C4__ARMSIMD32,strided_cm)947 TEST(QS8_IGEMM_MINMAX_FP32_2X1C4__ARMSIMD32, strided_cm) {
948 TEST_REQUIRES_ARM_SIMD32;
949 GemmMicrokernelTester()
950 .mr(2)
951 .nr(1)
952 .kr(4)
953 .sr(1)
954 .m(2)
955 .n(1)
956 .k(4)
957 .cm_stride(3)
958 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x1c4__armsimd32, xnn_init_qs8_conv_minmax_fp32_armsimd32_params, xnn_qs8_requantize_fp32);
959 }
960 #endif // XNN_ARCH_ARM
961
962
963 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_DUP,k_eq_16)964 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_DUP, k_eq_16) {
965 TEST_REQUIRES_ARM_NEON;
966 GemmMicrokernelTester()
967 .mr(1)
968 .nr(8)
969 .kr(2)
970 .sr(1)
971 .m(1)
972 .n(8)
973 .k(16)
974 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
975 }
976
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_DUP,strided_cn)977 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_DUP, strided_cn) {
978 TEST_REQUIRES_ARM_NEON;
979 GemmMicrokernelTester()
980 .mr(1)
981 .nr(8)
982 .kr(2)
983 .sr(1)
984 .m(1)
985 .n(8)
986 .k(16)
987 .cn_stride(11)
988 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
989 }
990
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_DUP,k_eq_16_subtile)991 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_DUP, k_eq_16_subtile) {
992 TEST_REQUIRES_ARM_NEON;
993 for (uint32_t n = 1; n <= 8; n++) {
994 for (uint32_t m = 1; m <= 1; m++) {
995 GemmMicrokernelTester()
996 .mr(1)
997 .nr(8)
998 .kr(2)
999 .sr(1)
1000 .m(m)
1001 .n(n)
1002 .k(16)
1003 .iterations(1)
1004 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1005 }
1006 }
1007 }
1008
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_DUP,k_eq_16_subtile_m)1009 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_DUP, k_eq_16_subtile_m) {
1010 TEST_REQUIRES_ARM_NEON;
1011 for (uint32_t m = 1; m <= 1; m++) {
1012 GemmMicrokernelTester()
1013 .mr(1)
1014 .nr(8)
1015 .kr(2)
1016 .sr(1)
1017 .m(m)
1018 .n(8)
1019 .k(16)
1020 .iterations(1)
1021 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1022 }
1023 }
1024
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_DUP,k_eq_16_subtile_n)1025 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_DUP, k_eq_16_subtile_n) {
1026 TEST_REQUIRES_ARM_NEON;
1027 for (uint32_t n = 1; n <= 8; n++) {
1028 GemmMicrokernelTester()
1029 .mr(1)
1030 .nr(8)
1031 .kr(2)
1032 .sr(1)
1033 .m(1)
1034 .n(n)
1035 .k(16)
1036 .iterations(1)
1037 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1038 }
1039 }
1040
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_DUP,k_lt_16)1041 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_DUP, k_lt_16) {
1042 TEST_REQUIRES_ARM_NEON;
1043 for (size_t k = 1; k < 16; k++) {
1044 GemmMicrokernelTester()
1045 .mr(1)
1046 .nr(8)
1047 .kr(2)
1048 .sr(1)
1049 .m(1)
1050 .n(8)
1051 .k(k)
1052 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1053 }
1054 }
1055
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_DUP,k_lt_16_subtile)1056 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_DUP, k_lt_16_subtile) {
1057 TEST_REQUIRES_ARM_NEON;
1058 for (size_t k = 1; k < 16; k++) {
1059 for (uint32_t n = 1; n <= 8; n++) {
1060 for (uint32_t m = 1; m <= 1; m++) {
1061 GemmMicrokernelTester()
1062 .mr(1)
1063 .nr(8)
1064 .kr(2)
1065 .sr(1)
1066 .m(m)
1067 .n(n)
1068 .k(k)
1069 .iterations(1)
1070 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1071 }
1072 }
1073 }
1074 }
1075
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_DUP,k_gt_16)1076 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_DUP, k_gt_16) {
1077 TEST_REQUIRES_ARM_NEON;
1078 for (size_t k = 17; k < 32; k++) {
1079 GemmMicrokernelTester()
1080 .mr(1)
1081 .nr(8)
1082 .kr(2)
1083 .sr(1)
1084 .m(1)
1085 .n(8)
1086 .k(k)
1087 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1088 }
1089 }
1090
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_DUP,k_gt_16_subtile)1091 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_DUP, k_gt_16_subtile) {
1092 TEST_REQUIRES_ARM_NEON;
1093 for (size_t k = 17; k < 32; k++) {
1094 for (uint32_t n = 1; n <= 8; n++) {
1095 for (uint32_t m = 1; m <= 1; m++) {
1096 GemmMicrokernelTester()
1097 .mr(1)
1098 .nr(8)
1099 .kr(2)
1100 .sr(1)
1101 .m(m)
1102 .n(n)
1103 .k(k)
1104 .iterations(1)
1105 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1106 }
1107 }
1108 }
1109 }
1110
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_DUP,k_div_16)1111 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_DUP, k_div_16) {
1112 TEST_REQUIRES_ARM_NEON;
1113 for (size_t k = 32; k <= 160; k += 16) {
1114 GemmMicrokernelTester()
1115 .mr(1)
1116 .nr(8)
1117 .kr(2)
1118 .sr(1)
1119 .m(1)
1120 .n(8)
1121 .k(k)
1122 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1123 }
1124 }
1125
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_DUP,k_div_16_subtile)1126 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_DUP, k_div_16_subtile) {
1127 TEST_REQUIRES_ARM_NEON;
1128 for (size_t k = 32; k <= 160; k += 16) {
1129 for (uint32_t n = 1; n <= 8; n++) {
1130 for (uint32_t m = 1; m <= 1; m++) {
1131 GemmMicrokernelTester()
1132 .mr(1)
1133 .nr(8)
1134 .kr(2)
1135 .sr(1)
1136 .m(m)
1137 .n(n)
1138 .k(k)
1139 .iterations(1)
1140 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1141 }
1142 }
1143 }
1144 }
1145
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_DUP,n_gt_8)1146 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_DUP, n_gt_8) {
1147 TEST_REQUIRES_ARM_NEON;
1148 for (uint32_t n = 9; n < 16; n++) {
1149 for (size_t k = 1; k <= 80; k += 17) {
1150 GemmMicrokernelTester()
1151 .mr(1)
1152 .nr(8)
1153 .kr(2)
1154 .sr(1)
1155 .m(1)
1156 .n(n)
1157 .k(k)
1158 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1159 }
1160 }
1161 }
1162
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_DUP,n_gt_8_strided_cn)1163 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_DUP, n_gt_8_strided_cn) {
1164 TEST_REQUIRES_ARM_NEON;
1165 for (uint32_t n = 9; n < 16; n++) {
1166 for (size_t k = 1; k <= 80; k += 17) {
1167 GemmMicrokernelTester()
1168 .mr(1)
1169 .nr(8)
1170 .kr(2)
1171 .sr(1)
1172 .m(1)
1173 .n(n)
1174 .k(k)
1175 .cn_stride(11)
1176 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1177 }
1178 }
1179 }
1180
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_DUP,n_gt_8_subtile)1181 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_DUP, n_gt_8_subtile) {
1182 TEST_REQUIRES_ARM_NEON;
1183 for (uint32_t n = 9; n < 16; n++) {
1184 for (size_t k = 1; k <= 80; k += 17) {
1185 for (uint32_t m = 1; m <= 1; m++) {
1186 GemmMicrokernelTester()
1187 .mr(1)
1188 .nr(8)
1189 .kr(2)
1190 .sr(1)
1191 .m(m)
1192 .n(n)
1193 .k(k)
1194 .iterations(1)
1195 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1196 }
1197 }
1198 }
1199 }
1200
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_DUP,n_div_8)1201 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_DUP, n_div_8) {
1202 TEST_REQUIRES_ARM_NEON;
1203 for (uint32_t n = 16; n <= 24; n += 8) {
1204 for (size_t k = 1; k <= 80; k += 17) {
1205 GemmMicrokernelTester()
1206 .mr(1)
1207 .nr(8)
1208 .kr(2)
1209 .sr(1)
1210 .m(1)
1211 .n(n)
1212 .k(k)
1213 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1214 }
1215 }
1216 }
1217
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_DUP,n_div_8_strided_cn)1218 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_DUP, n_div_8_strided_cn) {
1219 TEST_REQUIRES_ARM_NEON;
1220 for (uint32_t n = 16; n <= 24; n += 8) {
1221 for (size_t k = 1; k <= 80; k += 17) {
1222 GemmMicrokernelTester()
1223 .mr(1)
1224 .nr(8)
1225 .kr(2)
1226 .sr(1)
1227 .m(1)
1228 .n(n)
1229 .k(k)
1230 .cn_stride(11)
1231 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1232 }
1233 }
1234 }
1235
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_DUP,n_div_8_subtile)1236 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_DUP, n_div_8_subtile) {
1237 TEST_REQUIRES_ARM_NEON;
1238 for (uint32_t n = 16; n <= 24; n += 8) {
1239 for (size_t k = 1; k <= 80; k += 17) {
1240 for (uint32_t m = 1; m <= 1; m++) {
1241 GemmMicrokernelTester()
1242 .mr(1)
1243 .nr(8)
1244 .kr(2)
1245 .sr(1)
1246 .m(m)
1247 .n(n)
1248 .k(k)
1249 .iterations(1)
1250 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1251 }
1252 }
1253 }
1254 }
1255
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_DUP,small_kernel)1256 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_DUP, small_kernel) {
1257 TEST_REQUIRES_ARM_NEON;
1258 for (size_t k = 1; k <= 80; k += 17) {
1259 GemmMicrokernelTester()
1260 .mr(1)
1261 .nr(8)
1262 .kr(2)
1263 .sr(1)
1264 .m(1)
1265 .n(8)
1266 .k(k)
1267 .ks(3)
1268 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1269 }
1270 }
1271
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_DUP,small_kernel_subtile)1272 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_DUP, small_kernel_subtile) {
1273 TEST_REQUIRES_ARM_NEON;
1274 for (size_t k = 1; k <= 80; k += 17) {
1275 for (uint32_t n = 1; n <= 8; n++) {
1276 for (uint32_t m = 1; m <= 1; m++) {
1277 GemmMicrokernelTester()
1278 .mr(1)
1279 .nr(8)
1280 .kr(2)
1281 .sr(1)
1282 .m(m)
1283 .n(n)
1284 .k(k)
1285 .ks(3)
1286 .iterations(1)
1287 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1288 }
1289 }
1290 }
1291 }
1292
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_DUP,n_gt_8_small_kernel)1293 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_DUP, n_gt_8_small_kernel) {
1294 TEST_REQUIRES_ARM_NEON;
1295 for (uint32_t n = 9; n < 16; n++) {
1296 for (size_t k = 1; k <= 80; k += 17) {
1297 GemmMicrokernelTester()
1298 .mr(1)
1299 .nr(8)
1300 .kr(2)
1301 .sr(1)
1302 .m(1)
1303 .n(n)
1304 .k(k)
1305 .ks(3)
1306 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1307 }
1308 }
1309 }
1310
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_DUP,n_div_8_small_kernel)1311 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_DUP, n_div_8_small_kernel) {
1312 TEST_REQUIRES_ARM_NEON;
1313 for (uint32_t n = 16; n <= 24; n += 8) {
1314 for (size_t k = 1; k <= 80; k += 17) {
1315 GemmMicrokernelTester()
1316 .mr(1)
1317 .nr(8)
1318 .kr(2)
1319 .sr(1)
1320 .m(1)
1321 .n(n)
1322 .k(k)
1323 .ks(3)
1324 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1325 }
1326 }
1327 }
1328
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_DUP,strided_cm_subtile)1329 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_DUP, strided_cm_subtile) {
1330 TEST_REQUIRES_ARM_NEON;
1331 for (size_t k = 1; k <= 80; k += 17) {
1332 for (uint32_t n = 1; n <= 8; n++) {
1333 for (uint32_t m = 1; m <= 1; m++) {
1334 GemmMicrokernelTester()
1335 .mr(1)
1336 .nr(8)
1337 .kr(2)
1338 .sr(1)
1339 .m(m)
1340 .n(n)
1341 .k(k)
1342 .cm_stride(11)
1343 .iterations(1)
1344 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1345 }
1346 }
1347 }
1348 }
1349
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_DUP,a_offset)1350 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_DUP, a_offset) {
1351 TEST_REQUIRES_ARM_NEON;
1352 for (size_t k = 1; k <= 80; k += 17) {
1353 GemmMicrokernelTester()
1354 .mr(1)
1355 .nr(8)
1356 .kr(2)
1357 .sr(1)
1358 .m(1)
1359 .n(8)
1360 .k(k)
1361 .ks(3)
1362 .a_offset(83)
1363 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1364 }
1365 }
1366
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_DUP,zero)1367 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_DUP, zero) {
1368 TEST_REQUIRES_ARM_NEON;
1369 for (size_t k = 1; k <= 80; k += 17) {
1370 for (uint32_t mz = 0; mz < 1; mz++) {
1371 GemmMicrokernelTester()
1372 .mr(1)
1373 .nr(8)
1374 .kr(2)
1375 .sr(1)
1376 .m(1)
1377 .n(8)
1378 .k(k)
1379 .ks(3)
1380 .a_offset(83)
1381 .zero_index(mz)
1382 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1383 }
1384 }
1385 }
1386
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_DUP,qmin)1387 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_DUP, qmin) {
1388 TEST_REQUIRES_ARM_NEON;
1389 GemmMicrokernelTester()
1390 .mr(1)
1391 .nr(8)
1392 .kr(2)
1393 .sr(1)
1394 .m(1)
1395 .n(8)
1396 .k(16)
1397 .qmin(128)
1398 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1399 }
1400
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_DUP,qmax)1401 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_DUP, qmax) {
1402 TEST_REQUIRES_ARM_NEON;
1403 GemmMicrokernelTester()
1404 .mr(1)
1405 .nr(8)
1406 .kr(2)
1407 .sr(1)
1408 .m(1)
1409 .n(8)
1410 .k(16)
1411 .qmax(128)
1412 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1413 }
1414
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_DUP,strided_cm)1415 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_DUP, strided_cm) {
1416 TEST_REQUIRES_ARM_NEON;
1417 GemmMicrokernelTester()
1418 .mr(1)
1419 .nr(8)
1420 .kr(2)
1421 .sr(1)
1422 .m(1)
1423 .n(8)
1424 .k(16)
1425 .cm_stride(11)
1426 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1427 }
1428 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
1429
1430
1431 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD2R,k_eq_16)1432 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD2R, k_eq_16) {
1433 TEST_REQUIRES_ARM_NEON;
1434 GemmMicrokernelTester()
1435 .mr(1)
1436 .nr(8)
1437 .kr(2)
1438 .sr(1)
1439 .m(1)
1440 .n(8)
1441 .k(16)
1442 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1443 }
1444
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD2R,strided_cn)1445 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD2R, strided_cn) {
1446 TEST_REQUIRES_ARM_NEON;
1447 GemmMicrokernelTester()
1448 .mr(1)
1449 .nr(8)
1450 .kr(2)
1451 .sr(1)
1452 .m(1)
1453 .n(8)
1454 .k(16)
1455 .cn_stride(11)
1456 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1457 }
1458
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD2R,k_eq_16_subtile)1459 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD2R, k_eq_16_subtile) {
1460 TEST_REQUIRES_ARM_NEON;
1461 for (uint32_t n = 1; n <= 8; n++) {
1462 for (uint32_t m = 1; m <= 1; m++) {
1463 GemmMicrokernelTester()
1464 .mr(1)
1465 .nr(8)
1466 .kr(2)
1467 .sr(1)
1468 .m(m)
1469 .n(n)
1470 .k(16)
1471 .iterations(1)
1472 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1473 }
1474 }
1475 }
1476
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD2R,k_eq_16_subtile_m)1477 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD2R, k_eq_16_subtile_m) {
1478 TEST_REQUIRES_ARM_NEON;
1479 for (uint32_t m = 1; m <= 1; m++) {
1480 GemmMicrokernelTester()
1481 .mr(1)
1482 .nr(8)
1483 .kr(2)
1484 .sr(1)
1485 .m(m)
1486 .n(8)
1487 .k(16)
1488 .iterations(1)
1489 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1490 }
1491 }
1492
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD2R,k_eq_16_subtile_n)1493 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD2R, k_eq_16_subtile_n) {
1494 TEST_REQUIRES_ARM_NEON;
1495 for (uint32_t n = 1; n <= 8; n++) {
1496 GemmMicrokernelTester()
1497 .mr(1)
1498 .nr(8)
1499 .kr(2)
1500 .sr(1)
1501 .m(1)
1502 .n(n)
1503 .k(16)
1504 .iterations(1)
1505 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1506 }
1507 }
1508
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD2R,k_lt_16)1509 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD2R, k_lt_16) {
1510 TEST_REQUIRES_ARM_NEON;
1511 for (size_t k = 1; k < 16; k++) {
1512 GemmMicrokernelTester()
1513 .mr(1)
1514 .nr(8)
1515 .kr(2)
1516 .sr(1)
1517 .m(1)
1518 .n(8)
1519 .k(k)
1520 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1521 }
1522 }
1523
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD2R,k_lt_16_subtile)1524 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD2R, k_lt_16_subtile) {
1525 TEST_REQUIRES_ARM_NEON;
1526 for (size_t k = 1; k < 16; k++) {
1527 for (uint32_t n = 1; n <= 8; n++) {
1528 for (uint32_t m = 1; m <= 1; m++) {
1529 GemmMicrokernelTester()
1530 .mr(1)
1531 .nr(8)
1532 .kr(2)
1533 .sr(1)
1534 .m(m)
1535 .n(n)
1536 .k(k)
1537 .iterations(1)
1538 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1539 }
1540 }
1541 }
1542 }
1543
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD2R,k_gt_16)1544 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD2R, k_gt_16) {
1545 TEST_REQUIRES_ARM_NEON;
1546 for (size_t k = 17; k < 32; k++) {
1547 GemmMicrokernelTester()
1548 .mr(1)
1549 .nr(8)
1550 .kr(2)
1551 .sr(1)
1552 .m(1)
1553 .n(8)
1554 .k(k)
1555 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1556 }
1557 }
1558
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD2R,k_gt_16_subtile)1559 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD2R, k_gt_16_subtile) {
1560 TEST_REQUIRES_ARM_NEON;
1561 for (size_t k = 17; k < 32; k++) {
1562 for (uint32_t n = 1; n <= 8; n++) {
1563 for (uint32_t m = 1; m <= 1; m++) {
1564 GemmMicrokernelTester()
1565 .mr(1)
1566 .nr(8)
1567 .kr(2)
1568 .sr(1)
1569 .m(m)
1570 .n(n)
1571 .k(k)
1572 .iterations(1)
1573 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1574 }
1575 }
1576 }
1577 }
1578
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD2R,k_div_16)1579 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD2R, k_div_16) {
1580 TEST_REQUIRES_ARM_NEON;
1581 for (size_t k = 32; k <= 160; k += 16) {
1582 GemmMicrokernelTester()
1583 .mr(1)
1584 .nr(8)
1585 .kr(2)
1586 .sr(1)
1587 .m(1)
1588 .n(8)
1589 .k(k)
1590 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1591 }
1592 }
1593
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD2R,k_div_16_subtile)1594 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD2R, k_div_16_subtile) {
1595 TEST_REQUIRES_ARM_NEON;
1596 for (size_t k = 32; k <= 160; k += 16) {
1597 for (uint32_t n = 1; n <= 8; n++) {
1598 for (uint32_t m = 1; m <= 1; m++) {
1599 GemmMicrokernelTester()
1600 .mr(1)
1601 .nr(8)
1602 .kr(2)
1603 .sr(1)
1604 .m(m)
1605 .n(n)
1606 .k(k)
1607 .iterations(1)
1608 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1609 }
1610 }
1611 }
1612 }
1613
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD2R,n_gt_8)1614 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD2R, n_gt_8) {
1615 TEST_REQUIRES_ARM_NEON;
1616 for (uint32_t n = 9; n < 16; n++) {
1617 for (size_t k = 1; k <= 80; k += 17) {
1618 GemmMicrokernelTester()
1619 .mr(1)
1620 .nr(8)
1621 .kr(2)
1622 .sr(1)
1623 .m(1)
1624 .n(n)
1625 .k(k)
1626 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1627 }
1628 }
1629 }
1630
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD2R,n_gt_8_strided_cn)1631 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD2R, n_gt_8_strided_cn) {
1632 TEST_REQUIRES_ARM_NEON;
1633 for (uint32_t n = 9; n < 16; n++) {
1634 for (size_t k = 1; k <= 80; k += 17) {
1635 GemmMicrokernelTester()
1636 .mr(1)
1637 .nr(8)
1638 .kr(2)
1639 .sr(1)
1640 .m(1)
1641 .n(n)
1642 .k(k)
1643 .cn_stride(11)
1644 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1645 }
1646 }
1647 }
1648
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD2R,n_gt_8_subtile)1649 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD2R, n_gt_8_subtile) {
1650 TEST_REQUIRES_ARM_NEON;
1651 for (uint32_t n = 9; n < 16; n++) {
1652 for (size_t k = 1; k <= 80; k += 17) {
1653 for (uint32_t m = 1; m <= 1; m++) {
1654 GemmMicrokernelTester()
1655 .mr(1)
1656 .nr(8)
1657 .kr(2)
1658 .sr(1)
1659 .m(m)
1660 .n(n)
1661 .k(k)
1662 .iterations(1)
1663 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1664 }
1665 }
1666 }
1667 }
1668
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD2R,n_div_8)1669 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD2R, n_div_8) {
1670 TEST_REQUIRES_ARM_NEON;
1671 for (uint32_t n = 16; n <= 24; n += 8) {
1672 for (size_t k = 1; k <= 80; k += 17) {
1673 GemmMicrokernelTester()
1674 .mr(1)
1675 .nr(8)
1676 .kr(2)
1677 .sr(1)
1678 .m(1)
1679 .n(n)
1680 .k(k)
1681 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1682 }
1683 }
1684 }
1685
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD2R,n_div_8_strided_cn)1686 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD2R, n_div_8_strided_cn) {
1687 TEST_REQUIRES_ARM_NEON;
1688 for (uint32_t n = 16; n <= 24; n += 8) {
1689 for (size_t k = 1; k <= 80; k += 17) {
1690 GemmMicrokernelTester()
1691 .mr(1)
1692 .nr(8)
1693 .kr(2)
1694 .sr(1)
1695 .m(1)
1696 .n(n)
1697 .k(k)
1698 .cn_stride(11)
1699 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1700 }
1701 }
1702 }
1703
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD2R,n_div_8_subtile)1704 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD2R, n_div_8_subtile) {
1705 TEST_REQUIRES_ARM_NEON;
1706 for (uint32_t n = 16; n <= 24; n += 8) {
1707 for (size_t k = 1; k <= 80; k += 17) {
1708 for (uint32_t m = 1; m <= 1; m++) {
1709 GemmMicrokernelTester()
1710 .mr(1)
1711 .nr(8)
1712 .kr(2)
1713 .sr(1)
1714 .m(m)
1715 .n(n)
1716 .k(k)
1717 .iterations(1)
1718 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1719 }
1720 }
1721 }
1722 }
1723
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD2R,small_kernel)1724 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD2R, small_kernel) {
1725 TEST_REQUIRES_ARM_NEON;
1726 for (size_t k = 1; k <= 80; k += 17) {
1727 GemmMicrokernelTester()
1728 .mr(1)
1729 .nr(8)
1730 .kr(2)
1731 .sr(1)
1732 .m(1)
1733 .n(8)
1734 .k(k)
1735 .ks(3)
1736 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1737 }
1738 }
1739
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD2R,small_kernel_subtile)1740 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD2R, small_kernel_subtile) {
1741 TEST_REQUIRES_ARM_NEON;
1742 for (size_t k = 1; k <= 80; k += 17) {
1743 for (uint32_t n = 1; n <= 8; n++) {
1744 for (uint32_t m = 1; m <= 1; m++) {
1745 GemmMicrokernelTester()
1746 .mr(1)
1747 .nr(8)
1748 .kr(2)
1749 .sr(1)
1750 .m(m)
1751 .n(n)
1752 .k(k)
1753 .ks(3)
1754 .iterations(1)
1755 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1756 }
1757 }
1758 }
1759 }
1760
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD2R,n_gt_8_small_kernel)1761 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD2R, n_gt_8_small_kernel) {
1762 TEST_REQUIRES_ARM_NEON;
1763 for (uint32_t n = 9; n < 16; n++) {
1764 for (size_t k = 1; k <= 80; k += 17) {
1765 GemmMicrokernelTester()
1766 .mr(1)
1767 .nr(8)
1768 .kr(2)
1769 .sr(1)
1770 .m(1)
1771 .n(n)
1772 .k(k)
1773 .ks(3)
1774 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1775 }
1776 }
1777 }
1778
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD2R,n_div_8_small_kernel)1779 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD2R, n_div_8_small_kernel) {
1780 TEST_REQUIRES_ARM_NEON;
1781 for (uint32_t n = 16; n <= 24; n += 8) {
1782 for (size_t k = 1; k <= 80; k += 17) {
1783 GemmMicrokernelTester()
1784 .mr(1)
1785 .nr(8)
1786 .kr(2)
1787 .sr(1)
1788 .m(1)
1789 .n(n)
1790 .k(k)
1791 .ks(3)
1792 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1793 }
1794 }
1795 }
1796
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD2R,strided_cm_subtile)1797 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD2R, strided_cm_subtile) {
1798 TEST_REQUIRES_ARM_NEON;
1799 for (size_t k = 1; k <= 80; k += 17) {
1800 for (uint32_t n = 1; n <= 8; n++) {
1801 for (uint32_t m = 1; m <= 1; m++) {
1802 GemmMicrokernelTester()
1803 .mr(1)
1804 .nr(8)
1805 .kr(2)
1806 .sr(1)
1807 .m(m)
1808 .n(n)
1809 .k(k)
1810 .cm_stride(11)
1811 .iterations(1)
1812 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1813 }
1814 }
1815 }
1816 }
1817
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD2R,a_offset)1818 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD2R, a_offset) {
1819 TEST_REQUIRES_ARM_NEON;
1820 for (size_t k = 1; k <= 80; k += 17) {
1821 GemmMicrokernelTester()
1822 .mr(1)
1823 .nr(8)
1824 .kr(2)
1825 .sr(1)
1826 .m(1)
1827 .n(8)
1828 .k(k)
1829 .ks(3)
1830 .a_offset(83)
1831 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1832 }
1833 }
1834
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD2R,zero)1835 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD2R, zero) {
1836 TEST_REQUIRES_ARM_NEON;
1837 for (size_t k = 1; k <= 80; k += 17) {
1838 for (uint32_t mz = 0; mz < 1; mz++) {
1839 GemmMicrokernelTester()
1840 .mr(1)
1841 .nr(8)
1842 .kr(2)
1843 .sr(1)
1844 .m(1)
1845 .n(8)
1846 .k(k)
1847 .ks(3)
1848 .a_offset(83)
1849 .zero_index(mz)
1850 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1851 }
1852 }
1853 }
1854
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD2R,qmin)1855 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD2R, qmin) {
1856 TEST_REQUIRES_ARM_NEON;
1857 GemmMicrokernelTester()
1858 .mr(1)
1859 .nr(8)
1860 .kr(2)
1861 .sr(1)
1862 .m(1)
1863 .n(8)
1864 .k(16)
1865 .qmin(128)
1866 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1867 }
1868
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD2R,qmax)1869 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD2R, qmax) {
1870 TEST_REQUIRES_ARM_NEON;
1871 GemmMicrokernelTester()
1872 .mr(1)
1873 .nr(8)
1874 .kr(2)
1875 .sr(1)
1876 .m(1)
1877 .n(8)
1878 .k(16)
1879 .qmax(128)
1880 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1881 }
1882
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD2R,strided_cm)1883 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD2R, strided_cm) {
1884 TEST_REQUIRES_ARM_NEON;
1885 GemmMicrokernelTester()
1886 .mr(1)
1887 .nr(8)
1888 .kr(2)
1889 .sr(1)
1890 .m(1)
1891 .n(8)
1892 .k(16)
1893 .cm_stride(11)
1894 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1895 }
1896 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
1897
1898
1899 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD4R,k_eq_16)1900 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD4R, k_eq_16) {
1901 TEST_REQUIRES_ARM_NEON;
1902 GemmMicrokernelTester()
1903 .mr(1)
1904 .nr(8)
1905 .kr(2)
1906 .sr(1)
1907 .m(1)
1908 .n(8)
1909 .k(16)
1910 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1911 }
1912
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD4R,strided_cn)1913 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD4R, strided_cn) {
1914 TEST_REQUIRES_ARM_NEON;
1915 GemmMicrokernelTester()
1916 .mr(1)
1917 .nr(8)
1918 .kr(2)
1919 .sr(1)
1920 .m(1)
1921 .n(8)
1922 .k(16)
1923 .cn_stride(11)
1924 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1925 }
1926
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD4R,k_eq_16_subtile)1927 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD4R, k_eq_16_subtile) {
1928 TEST_REQUIRES_ARM_NEON;
1929 for (uint32_t n = 1; n <= 8; n++) {
1930 for (uint32_t m = 1; m <= 1; m++) {
1931 GemmMicrokernelTester()
1932 .mr(1)
1933 .nr(8)
1934 .kr(2)
1935 .sr(1)
1936 .m(m)
1937 .n(n)
1938 .k(16)
1939 .iterations(1)
1940 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1941 }
1942 }
1943 }
1944
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD4R,k_eq_16_subtile_m)1945 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD4R, k_eq_16_subtile_m) {
1946 TEST_REQUIRES_ARM_NEON;
1947 for (uint32_t m = 1; m <= 1; m++) {
1948 GemmMicrokernelTester()
1949 .mr(1)
1950 .nr(8)
1951 .kr(2)
1952 .sr(1)
1953 .m(m)
1954 .n(8)
1955 .k(16)
1956 .iterations(1)
1957 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1958 }
1959 }
1960
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD4R,k_eq_16_subtile_n)1961 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD4R, k_eq_16_subtile_n) {
1962 TEST_REQUIRES_ARM_NEON;
1963 for (uint32_t n = 1; n <= 8; n++) {
1964 GemmMicrokernelTester()
1965 .mr(1)
1966 .nr(8)
1967 .kr(2)
1968 .sr(1)
1969 .m(1)
1970 .n(n)
1971 .k(16)
1972 .iterations(1)
1973 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1974 }
1975 }
1976
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD4R,k_lt_16)1977 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD4R, k_lt_16) {
1978 TEST_REQUIRES_ARM_NEON;
1979 for (size_t k = 1; k < 16; k++) {
1980 GemmMicrokernelTester()
1981 .mr(1)
1982 .nr(8)
1983 .kr(2)
1984 .sr(1)
1985 .m(1)
1986 .n(8)
1987 .k(k)
1988 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
1989 }
1990 }
1991
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD4R,k_lt_16_subtile)1992 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD4R, k_lt_16_subtile) {
1993 TEST_REQUIRES_ARM_NEON;
1994 for (size_t k = 1; k < 16; k++) {
1995 for (uint32_t n = 1; n <= 8; n++) {
1996 for (uint32_t m = 1; m <= 1; m++) {
1997 GemmMicrokernelTester()
1998 .mr(1)
1999 .nr(8)
2000 .kr(2)
2001 .sr(1)
2002 .m(m)
2003 .n(n)
2004 .k(k)
2005 .iterations(1)
2006 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
2007 }
2008 }
2009 }
2010 }
2011
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD4R,k_gt_16)2012 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD4R, k_gt_16) {
2013 TEST_REQUIRES_ARM_NEON;
2014 for (size_t k = 17; k < 32; k++) {
2015 GemmMicrokernelTester()
2016 .mr(1)
2017 .nr(8)
2018 .kr(2)
2019 .sr(1)
2020 .m(1)
2021 .n(8)
2022 .k(k)
2023 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
2024 }
2025 }
2026
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD4R,k_gt_16_subtile)2027 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD4R, k_gt_16_subtile) {
2028 TEST_REQUIRES_ARM_NEON;
2029 for (size_t k = 17; k < 32; k++) {
2030 for (uint32_t n = 1; n <= 8; n++) {
2031 for (uint32_t m = 1; m <= 1; m++) {
2032 GemmMicrokernelTester()
2033 .mr(1)
2034 .nr(8)
2035 .kr(2)
2036 .sr(1)
2037 .m(m)
2038 .n(n)
2039 .k(k)
2040 .iterations(1)
2041 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
2042 }
2043 }
2044 }
2045 }
2046
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD4R,k_div_16)2047 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD4R, k_div_16) {
2048 TEST_REQUIRES_ARM_NEON;
2049 for (size_t k = 32; k <= 160; k += 16) {
2050 GemmMicrokernelTester()
2051 .mr(1)
2052 .nr(8)
2053 .kr(2)
2054 .sr(1)
2055 .m(1)
2056 .n(8)
2057 .k(k)
2058 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
2059 }
2060 }
2061
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD4R,k_div_16_subtile)2062 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD4R, k_div_16_subtile) {
2063 TEST_REQUIRES_ARM_NEON;
2064 for (size_t k = 32; k <= 160; k += 16) {
2065 for (uint32_t n = 1; n <= 8; n++) {
2066 for (uint32_t m = 1; m <= 1; m++) {
2067 GemmMicrokernelTester()
2068 .mr(1)
2069 .nr(8)
2070 .kr(2)
2071 .sr(1)
2072 .m(m)
2073 .n(n)
2074 .k(k)
2075 .iterations(1)
2076 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
2077 }
2078 }
2079 }
2080 }
2081
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD4R,n_gt_8)2082 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD4R, n_gt_8) {
2083 TEST_REQUIRES_ARM_NEON;
2084 for (uint32_t n = 9; n < 16; n++) {
2085 for (size_t k = 1; k <= 80; k += 17) {
2086 GemmMicrokernelTester()
2087 .mr(1)
2088 .nr(8)
2089 .kr(2)
2090 .sr(1)
2091 .m(1)
2092 .n(n)
2093 .k(k)
2094 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
2095 }
2096 }
2097 }
2098
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD4R,n_gt_8_strided_cn)2099 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD4R, n_gt_8_strided_cn) {
2100 TEST_REQUIRES_ARM_NEON;
2101 for (uint32_t n = 9; n < 16; n++) {
2102 for (size_t k = 1; k <= 80; k += 17) {
2103 GemmMicrokernelTester()
2104 .mr(1)
2105 .nr(8)
2106 .kr(2)
2107 .sr(1)
2108 .m(1)
2109 .n(n)
2110 .k(k)
2111 .cn_stride(11)
2112 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
2113 }
2114 }
2115 }
2116
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD4R,n_gt_8_subtile)2117 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD4R, n_gt_8_subtile) {
2118 TEST_REQUIRES_ARM_NEON;
2119 for (uint32_t n = 9; n < 16; n++) {
2120 for (size_t k = 1; k <= 80; k += 17) {
2121 for (uint32_t m = 1; m <= 1; m++) {
2122 GemmMicrokernelTester()
2123 .mr(1)
2124 .nr(8)
2125 .kr(2)
2126 .sr(1)
2127 .m(m)
2128 .n(n)
2129 .k(k)
2130 .iterations(1)
2131 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
2132 }
2133 }
2134 }
2135 }
2136
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD4R,n_div_8)2137 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD4R, n_div_8) {
2138 TEST_REQUIRES_ARM_NEON;
2139 for (uint32_t n = 16; n <= 24; n += 8) {
2140 for (size_t k = 1; k <= 80; k += 17) {
2141 GemmMicrokernelTester()
2142 .mr(1)
2143 .nr(8)
2144 .kr(2)
2145 .sr(1)
2146 .m(1)
2147 .n(n)
2148 .k(k)
2149 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
2150 }
2151 }
2152 }
2153
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD4R,n_div_8_strided_cn)2154 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD4R, n_div_8_strided_cn) {
2155 TEST_REQUIRES_ARM_NEON;
2156 for (uint32_t n = 16; n <= 24; n += 8) {
2157 for (size_t k = 1; k <= 80; k += 17) {
2158 GemmMicrokernelTester()
2159 .mr(1)
2160 .nr(8)
2161 .kr(2)
2162 .sr(1)
2163 .m(1)
2164 .n(n)
2165 .k(k)
2166 .cn_stride(11)
2167 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
2168 }
2169 }
2170 }
2171
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD4R,n_div_8_subtile)2172 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD4R, n_div_8_subtile) {
2173 TEST_REQUIRES_ARM_NEON;
2174 for (uint32_t n = 16; n <= 24; n += 8) {
2175 for (size_t k = 1; k <= 80; k += 17) {
2176 for (uint32_t m = 1; m <= 1; m++) {
2177 GemmMicrokernelTester()
2178 .mr(1)
2179 .nr(8)
2180 .kr(2)
2181 .sr(1)
2182 .m(m)
2183 .n(n)
2184 .k(k)
2185 .iterations(1)
2186 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
2187 }
2188 }
2189 }
2190 }
2191
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD4R,small_kernel)2192 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD4R, small_kernel) {
2193 TEST_REQUIRES_ARM_NEON;
2194 for (size_t k = 1; k <= 80; k += 17) {
2195 GemmMicrokernelTester()
2196 .mr(1)
2197 .nr(8)
2198 .kr(2)
2199 .sr(1)
2200 .m(1)
2201 .n(8)
2202 .k(k)
2203 .ks(3)
2204 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
2205 }
2206 }
2207
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD4R,small_kernel_subtile)2208 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD4R, small_kernel_subtile) {
2209 TEST_REQUIRES_ARM_NEON;
2210 for (size_t k = 1; k <= 80; k += 17) {
2211 for (uint32_t n = 1; n <= 8; n++) {
2212 for (uint32_t m = 1; m <= 1; m++) {
2213 GemmMicrokernelTester()
2214 .mr(1)
2215 .nr(8)
2216 .kr(2)
2217 .sr(1)
2218 .m(m)
2219 .n(n)
2220 .k(k)
2221 .ks(3)
2222 .iterations(1)
2223 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
2224 }
2225 }
2226 }
2227 }
2228
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD4R,n_gt_8_small_kernel)2229 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD4R, n_gt_8_small_kernel) {
2230 TEST_REQUIRES_ARM_NEON;
2231 for (uint32_t n = 9; n < 16; n++) {
2232 for (size_t k = 1; k <= 80; k += 17) {
2233 GemmMicrokernelTester()
2234 .mr(1)
2235 .nr(8)
2236 .kr(2)
2237 .sr(1)
2238 .m(1)
2239 .n(n)
2240 .k(k)
2241 .ks(3)
2242 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
2243 }
2244 }
2245 }
2246
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD4R,n_div_8_small_kernel)2247 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD4R, n_div_8_small_kernel) {
2248 TEST_REQUIRES_ARM_NEON;
2249 for (uint32_t n = 16; n <= 24; n += 8) {
2250 for (size_t k = 1; k <= 80; k += 17) {
2251 GemmMicrokernelTester()
2252 .mr(1)
2253 .nr(8)
2254 .kr(2)
2255 .sr(1)
2256 .m(1)
2257 .n(n)
2258 .k(k)
2259 .ks(3)
2260 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
2261 }
2262 }
2263 }
2264
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD4R,strided_cm_subtile)2265 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD4R, strided_cm_subtile) {
2266 TEST_REQUIRES_ARM_NEON;
2267 for (size_t k = 1; k <= 80; k += 17) {
2268 for (uint32_t n = 1; n <= 8; n++) {
2269 for (uint32_t m = 1; m <= 1; m++) {
2270 GemmMicrokernelTester()
2271 .mr(1)
2272 .nr(8)
2273 .kr(2)
2274 .sr(1)
2275 .m(m)
2276 .n(n)
2277 .k(k)
2278 .cm_stride(11)
2279 .iterations(1)
2280 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
2281 }
2282 }
2283 }
2284 }
2285
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD4R,a_offset)2286 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD4R, a_offset) {
2287 TEST_REQUIRES_ARM_NEON;
2288 for (size_t k = 1; k <= 80; k += 17) {
2289 GemmMicrokernelTester()
2290 .mr(1)
2291 .nr(8)
2292 .kr(2)
2293 .sr(1)
2294 .m(1)
2295 .n(8)
2296 .k(k)
2297 .ks(3)
2298 .a_offset(83)
2299 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
2300 }
2301 }
2302
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD4R,zero)2303 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD4R, zero) {
2304 TEST_REQUIRES_ARM_NEON;
2305 for (size_t k = 1; k <= 80; k += 17) {
2306 for (uint32_t mz = 0; mz < 1; mz++) {
2307 GemmMicrokernelTester()
2308 .mr(1)
2309 .nr(8)
2310 .kr(2)
2311 .sr(1)
2312 .m(1)
2313 .n(8)
2314 .k(k)
2315 .ks(3)
2316 .a_offset(83)
2317 .zero_index(mz)
2318 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
2319 }
2320 }
2321 }
2322
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD4R,qmin)2323 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD4R, qmin) {
2324 TEST_REQUIRES_ARM_NEON;
2325 GemmMicrokernelTester()
2326 .mr(1)
2327 .nr(8)
2328 .kr(2)
2329 .sr(1)
2330 .m(1)
2331 .n(8)
2332 .k(16)
2333 .qmin(128)
2334 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
2335 }
2336
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD4R,qmax)2337 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD4R, qmax) {
2338 TEST_REQUIRES_ARM_NEON;
2339 GemmMicrokernelTester()
2340 .mr(1)
2341 .nr(8)
2342 .kr(2)
2343 .sr(1)
2344 .m(1)
2345 .n(8)
2346 .k(16)
2347 .qmax(128)
2348 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
2349 }
2350
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD4R,strided_cm)2351 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEON_MLAL_LD4R, strided_cm) {
2352 TEST_REQUIRES_ARM_NEON;
2353 GemmMicrokernelTester()
2354 .mr(1)
2355 .nr(8)
2356 .kr(2)
2357 .sr(1)
2358 .m(1)
2359 .n(8)
2360 .k(16)
2361 .cm_stride(11)
2362 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
2363 }
2364 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
2365
2366
2367 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD2R,k_eq_16)2368 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD2R, k_eq_16) {
2369 TEST_REQUIRES_ARM_NEON_V8;
2370 GemmMicrokernelTester()
2371 .mr(1)
2372 .nr(8)
2373 .kr(2)
2374 .sr(1)
2375 .m(1)
2376 .n(8)
2377 .k(16)
2378 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2379 }
2380
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD2R,strided_cn)2381 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD2R, strided_cn) {
2382 TEST_REQUIRES_ARM_NEON_V8;
2383 GemmMicrokernelTester()
2384 .mr(1)
2385 .nr(8)
2386 .kr(2)
2387 .sr(1)
2388 .m(1)
2389 .n(8)
2390 .k(16)
2391 .cn_stride(11)
2392 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2393 }
2394
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD2R,k_eq_16_subtile)2395 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD2R, k_eq_16_subtile) {
2396 TEST_REQUIRES_ARM_NEON_V8;
2397 for (uint32_t n = 1; n <= 8; n++) {
2398 for (uint32_t m = 1; m <= 1; m++) {
2399 GemmMicrokernelTester()
2400 .mr(1)
2401 .nr(8)
2402 .kr(2)
2403 .sr(1)
2404 .m(m)
2405 .n(n)
2406 .k(16)
2407 .iterations(1)
2408 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2409 }
2410 }
2411 }
2412
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD2R,k_eq_16_subtile_m)2413 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD2R, k_eq_16_subtile_m) {
2414 TEST_REQUIRES_ARM_NEON_V8;
2415 for (uint32_t m = 1; m <= 1; m++) {
2416 GemmMicrokernelTester()
2417 .mr(1)
2418 .nr(8)
2419 .kr(2)
2420 .sr(1)
2421 .m(m)
2422 .n(8)
2423 .k(16)
2424 .iterations(1)
2425 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2426 }
2427 }
2428
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD2R,k_eq_16_subtile_n)2429 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD2R, k_eq_16_subtile_n) {
2430 TEST_REQUIRES_ARM_NEON_V8;
2431 for (uint32_t n = 1; n <= 8; n++) {
2432 GemmMicrokernelTester()
2433 .mr(1)
2434 .nr(8)
2435 .kr(2)
2436 .sr(1)
2437 .m(1)
2438 .n(n)
2439 .k(16)
2440 .iterations(1)
2441 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2442 }
2443 }
2444
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD2R,k_lt_16)2445 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD2R, k_lt_16) {
2446 TEST_REQUIRES_ARM_NEON_V8;
2447 for (size_t k = 1; k < 16; k++) {
2448 GemmMicrokernelTester()
2449 .mr(1)
2450 .nr(8)
2451 .kr(2)
2452 .sr(1)
2453 .m(1)
2454 .n(8)
2455 .k(k)
2456 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2457 }
2458 }
2459
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD2R,k_lt_16_subtile)2460 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD2R, k_lt_16_subtile) {
2461 TEST_REQUIRES_ARM_NEON_V8;
2462 for (size_t k = 1; k < 16; k++) {
2463 for (uint32_t n = 1; n <= 8; n++) {
2464 for (uint32_t m = 1; m <= 1; m++) {
2465 GemmMicrokernelTester()
2466 .mr(1)
2467 .nr(8)
2468 .kr(2)
2469 .sr(1)
2470 .m(m)
2471 .n(n)
2472 .k(k)
2473 .iterations(1)
2474 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2475 }
2476 }
2477 }
2478 }
2479
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD2R,k_gt_16)2480 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD2R, k_gt_16) {
2481 TEST_REQUIRES_ARM_NEON_V8;
2482 for (size_t k = 17; k < 32; k++) {
2483 GemmMicrokernelTester()
2484 .mr(1)
2485 .nr(8)
2486 .kr(2)
2487 .sr(1)
2488 .m(1)
2489 .n(8)
2490 .k(k)
2491 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2492 }
2493 }
2494
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD2R,k_gt_16_subtile)2495 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD2R, k_gt_16_subtile) {
2496 TEST_REQUIRES_ARM_NEON_V8;
2497 for (size_t k = 17; k < 32; k++) {
2498 for (uint32_t n = 1; n <= 8; n++) {
2499 for (uint32_t m = 1; m <= 1; m++) {
2500 GemmMicrokernelTester()
2501 .mr(1)
2502 .nr(8)
2503 .kr(2)
2504 .sr(1)
2505 .m(m)
2506 .n(n)
2507 .k(k)
2508 .iterations(1)
2509 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2510 }
2511 }
2512 }
2513 }
2514
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD2R,k_div_16)2515 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD2R, k_div_16) {
2516 TEST_REQUIRES_ARM_NEON_V8;
2517 for (size_t k = 32; k <= 160; k += 16) {
2518 GemmMicrokernelTester()
2519 .mr(1)
2520 .nr(8)
2521 .kr(2)
2522 .sr(1)
2523 .m(1)
2524 .n(8)
2525 .k(k)
2526 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2527 }
2528 }
2529
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD2R,k_div_16_subtile)2530 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD2R, k_div_16_subtile) {
2531 TEST_REQUIRES_ARM_NEON_V8;
2532 for (size_t k = 32; k <= 160; k += 16) {
2533 for (uint32_t n = 1; n <= 8; n++) {
2534 for (uint32_t m = 1; m <= 1; m++) {
2535 GemmMicrokernelTester()
2536 .mr(1)
2537 .nr(8)
2538 .kr(2)
2539 .sr(1)
2540 .m(m)
2541 .n(n)
2542 .k(k)
2543 .iterations(1)
2544 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2545 }
2546 }
2547 }
2548 }
2549
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD2R,n_gt_8)2550 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD2R, n_gt_8) {
2551 TEST_REQUIRES_ARM_NEON_V8;
2552 for (uint32_t n = 9; n < 16; n++) {
2553 for (size_t k = 1; k <= 80; k += 17) {
2554 GemmMicrokernelTester()
2555 .mr(1)
2556 .nr(8)
2557 .kr(2)
2558 .sr(1)
2559 .m(1)
2560 .n(n)
2561 .k(k)
2562 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2563 }
2564 }
2565 }
2566
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD2R,n_gt_8_strided_cn)2567 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD2R, n_gt_8_strided_cn) {
2568 TEST_REQUIRES_ARM_NEON_V8;
2569 for (uint32_t n = 9; n < 16; n++) {
2570 for (size_t k = 1; k <= 80; k += 17) {
2571 GemmMicrokernelTester()
2572 .mr(1)
2573 .nr(8)
2574 .kr(2)
2575 .sr(1)
2576 .m(1)
2577 .n(n)
2578 .k(k)
2579 .cn_stride(11)
2580 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2581 }
2582 }
2583 }
2584
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD2R,n_gt_8_subtile)2585 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD2R, n_gt_8_subtile) {
2586 TEST_REQUIRES_ARM_NEON_V8;
2587 for (uint32_t n = 9; n < 16; n++) {
2588 for (size_t k = 1; k <= 80; k += 17) {
2589 for (uint32_t m = 1; m <= 1; m++) {
2590 GemmMicrokernelTester()
2591 .mr(1)
2592 .nr(8)
2593 .kr(2)
2594 .sr(1)
2595 .m(m)
2596 .n(n)
2597 .k(k)
2598 .iterations(1)
2599 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2600 }
2601 }
2602 }
2603 }
2604
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD2R,n_div_8)2605 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD2R, n_div_8) {
2606 TEST_REQUIRES_ARM_NEON_V8;
2607 for (uint32_t n = 16; n <= 24; n += 8) {
2608 for (size_t k = 1; k <= 80; k += 17) {
2609 GemmMicrokernelTester()
2610 .mr(1)
2611 .nr(8)
2612 .kr(2)
2613 .sr(1)
2614 .m(1)
2615 .n(n)
2616 .k(k)
2617 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2618 }
2619 }
2620 }
2621
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD2R,n_div_8_strided_cn)2622 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD2R, n_div_8_strided_cn) {
2623 TEST_REQUIRES_ARM_NEON_V8;
2624 for (uint32_t n = 16; n <= 24; n += 8) {
2625 for (size_t k = 1; k <= 80; k += 17) {
2626 GemmMicrokernelTester()
2627 .mr(1)
2628 .nr(8)
2629 .kr(2)
2630 .sr(1)
2631 .m(1)
2632 .n(n)
2633 .k(k)
2634 .cn_stride(11)
2635 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2636 }
2637 }
2638 }
2639
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD2R,n_div_8_subtile)2640 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD2R, n_div_8_subtile) {
2641 TEST_REQUIRES_ARM_NEON_V8;
2642 for (uint32_t n = 16; n <= 24; n += 8) {
2643 for (size_t k = 1; k <= 80; k += 17) {
2644 for (uint32_t m = 1; m <= 1; m++) {
2645 GemmMicrokernelTester()
2646 .mr(1)
2647 .nr(8)
2648 .kr(2)
2649 .sr(1)
2650 .m(m)
2651 .n(n)
2652 .k(k)
2653 .iterations(1)
2654 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2655 }
2656 }
2657 }
2658 }
2659
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD2R,small_kernel)2660 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD2R, small_kernel) {
2661 TEST_REQUIRES_ARM_NEON_V8;
2662 for (size_t k = 1; k <= 80; k += 17) {
2663 GemmMicrokernelTester()
2664 .mr(1)
2665 .nr(8)
2666 .kr(2)
2667 .sr(1)
2668 .m(1)
2669 .n(8)
2670 .k(k)
2671 .ks(3)
2672 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2673 }
2674 }
2675
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD2R,small_kernel_subtile)2676 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD2R, small_kernel_subtile) {
2677 TEST_REQUIRES_ARM_NEON_V8;
2678 for (size_t k = 1; k <= 80; k += 17) {
2679 for (uint32_t n = 1; n <= 8; n++) {
2680 for (uint32_t m = 1; m <= 1; m++) {
2681 GemmMicrokernelTester()
2682 .mr(1)
2683 .nr(8)
2684 .kr(2)
2685 .sr(1)
2686 .m(m)
2687 .n(n)
2688 .k(k)
2689 .ks(3)
2690 .iterations(1)
2691 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2692 }
2693 }
2694 }
2695 }
2696
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD2R,n_gt_8_small_kernel)2697 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD2R, n_gt_8_small_kernel) {
2698 TEST_REQUIRES_ARM_NEON_V8;
2699 for (uint32_t n = 9; n < 16; n++) {
2700 for (size_t k = 1; k <= 80; k += 17) {
2701 GemmMicrokernelTester()
2702 .mr(1)
2703 .nr(8)
2704 .kr(2)
2705 .sr(1)
2706 .m(1)
2707 .n(n)
2708 .k(k)
2709 .ks(3)
2710 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2711 }
2712 }
2713 }
2714
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD2R,n_div_8_small_kernel)2715 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD2R, n_div_8_small_kernel) {
2716 TEST_REQUIRES_ARM_NEON_V8;
2717 for (uint32_t n = 16; n <= 24; n += 8) {
2718 for (size_t k = 1; k <= 80; k += 17) {
2719 GemmMicrokernelTester()
2720 .mr(1)
2721 .nr(8)
2722 .kr(2)
2723 .sr(1)
2724 .m(1)
2725 .n(n)
2726 .k(k)
2727 .ks(3)
2728 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2729 }
2730 }
2731 }
2732
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD2R,strided_cm_subtile)2733 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD2R, strided_cm_subtile) {
2734 TEST_REQUIRES_ARM_NEON_V8;
2735 for (size_t k = 1; k <= 80; k += 17) {
2736 for (uint32_t n = 1; n <= 8; n++) {
2737 for (uint32_t m = 1; m <= 1; m++) {
2738 GemmMicrokernelTester()
2739 .mr(1)
2740 .nr(8)
2741 .kr(2)
2742 .sr(1)
2743 .m(m)
2744 .n(n)
2745 .k(k)
2746 .cm_stride(11)
2747 .iterations(1)
2748 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2749 }
2750 }
2751 }
2752 }
2753
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD2R,a_offset)2754 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD2R, a_offset) {
2755 TEST_REQUIRES_ARM_NEON_V8;
2756 for (size_t k = 1; k <= 80; k += 17) {
2757 GemmMicrokernelTester()
2758 .mr(1)
2759 .nr(8)
2760 .kr(2)
2761 .sr(1)
2762 .m(1)
2763 .n(8)
2764 .k(k)
2765 .ks(3)
2766 .a_offset(83)
2767 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2768 }
2769 }
2770
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD2R,zero)2771 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD2R, zero) {
2772 TEST_REQUIRES_ARM_NEON_V8;
2773 for (size_t k = 1; k <= 80; k += 17) {
2774 for (uint32_t mz = 0; mz < 1; mz++) {
2775 GemmMicrokernelTester()
2776 .mr(1)
2777 .nr(8)
2778 .kr(2)
2779 .sr(1)
2780 .m(1)
2781 .n(8)
2782 .k(k)
2783 .ks(3)
2784 .a_offset(83)
2785 .zero_index(mz)
2786 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2787 }
2788 }
2789 }
2790
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD2R,qmin)2791 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD2R, qmin) {
2792 TEST_REQUIRES_ARM_NEON_V8;
2793 GemmMicrokernelTester()
2794 .mr(1)
2795 .nr(8)
2796 .kr(2)
2797 .sr(1)
2798 .m(1)
2799 .n(8)
2800 .k(16)
2801 .qmin(128)
2802 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2803 }
2804
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD2R,qmax)2805 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD2R, qmax) {
2806 TEST_REQUIRES_ARM_NEON_V8;
2807 GemmMicrokernelTester()
2808 .mr(1)
2809 .nr(8)
2810 .kr(2)
2811 .sr(1)
2812 .m(1)
2813 .n(8)
2814 .k(16)
2815 .qmax(128)
2816 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2817 }
2818
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD2R,strided_cm)2819 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD2R, strided_cm) {
2820 TEST_REQUIRES_ARM_NEON_V8;
2821 GemmMicrokernelTester()
2822 .mr(1)
2823 .nr(8)
2824 .kr(2)
2825 .sr(1)
2826 .m(1)
2827 .n(8)
2828 .k(16)
2829 .cm_stride(11)
2830 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2831 }
2832 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
2833
2834
2835 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD4R,k_eq_16)2836 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD4R, k_eq_16) {
2837 TEST_REQUIRES_ARM_NEON_V8;
2838 GemmMicrokernelTester()
2839 .mr(1)
2840 .nr(8)
2841 .kr(2)
2842 .sr(1)
2843 .m(1)
2844 .n(8)
2845 .k(16)
2846 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld4r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2847 }
2848
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD4R,strided_cn)2849 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD4R, strided_cn) {
2850 TEST_REQUIRES_ARM_NEON_V8;
2851 GemmMicrokernelTester()
2852 .mr(1)
2853 .nr(8)
2854 .kr(2)
2855 .sr(1)
2856 .m(1)
2857 .n(8)
2858 .k(16)
2859 .cn_stride(11)
2860 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld4r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2861 }
2862
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD4R,k_eq_16_subtile)2863 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD4R, k_eq_16_subtile) {
2864 TEST_REQUIRES_ARM_NEON_V8;
2865 for (uint32_t n = 1; n <= 8; n++) {
2866 for (uint32_t m = 1; m <= 1; m++) {
2867 GemmMicrokernelTester()
2868 .mr(1)
2869 .nr(8)
2870 .kr(2)
2871 .sr(1)
2872 .m(m)
2873 .n(n)
2874 .k(16)
2875 .iterations(1)
2876 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld4r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2877 }
2878 }
2879 }
2880
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD4R,k_eq_16_subtile_m)2881 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD4R, k_eq_16_subtile_m) {
2882 TEST_REQUIRES_ARM_NEON_V8;
2883 for (uint32_t m = 1; m <= 1; m++) {
2884 GemmMicrokernelTester()
2885 .mr(1)
2886 .nr(8)
2887 .kr(2)
2888 .sr(1)
2889 .m(m)
2890 .n(8)
2891 .k(16)
2892 .iterations(1)
2893 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld4r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2894 }
2895 }
2896
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD4R,k_eq_16_subtile_n)2897 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD4R, k_eq_16_subtile_n) {
2898 TEST_REQUIRES_ARM_NEON_V8;
2899 for (uint32_t n = 1; n <= 8; n++) {
2900 GemmMicrokernelTester()
2901 .mr(1)
2902 .nr(8)
2903 .kr(2)
2904 .sr(1)
2905 .m(1)
2906 .n(n)
2907 .k(16)
2908 .iterations(1)
2909 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld4r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2910 }
2911 }
2912
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD4R,k_lt_16)2913 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD4R, k_lt_16) {
2914 TEST_REQUIRES_ARM_NEON_V8;
2915 for (size_t k = 1; k < 16; k++) {
2916 GemmMicrokernelTester()
2917 .mr(1)
2918 .nr(8)
2919 .kr(2)
2920 .sr(1)
2921 .m(1)
2922 .n(8)
2923 .k(k)
2924 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld4r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2925 }
2926 }
2927
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD4R,k_lt_16_subtile)2928 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD4R, k_lt_16_subtile) {
2929 TEST_REQUIRES_ARM_NEON_V8;
2930 for (size_t k = 1; k < 16; k++) {
2931 for (uint32_t n = 1; n <= 8; n++) {
2932 for (uint32_t m = 1; m <= 1; m++) {
2933 GemmMicrokernelTester()
2934 .mr(1)
2935 .nr(8)
2936 .kr(2)
2937 .sr(1)
2938 .m(m)
2939 .n(n)
2940 .k(k)
2941 .iterations(1)
2942 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld4r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2943 }
2944 }
2945 }
2946 }
2947
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD4R,k_gt_16)2948 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD4R, k_gt_16) {
2949 TEST_REQUIRES_ARM_NEON_V8;
2950 for (size_t k = 17; k < 32; k++) {
2951 GemmMicrokernelTester()
2952 .mr(1)
2953 .nr(8)
2954 .kr(2)
2955 .sr(1)
2956 .m(1)
2957 .n(8)
2958 .k(k)
2959 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld4r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2960 }
2961 }
2962
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD4R,k_gt_16_subtile)2963 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD4R, k_gt_16_subtile) {
2964 TEST_REQUIRES_ARM_NEON_V8;
2965 for (size_t k = 17; k < 32; k++) {
2966 for (uint32_t n = 1; n <= 8; n++) {
2967 for (uint32_t m = 1; m <= 1; m++) {
2968 GemmMicrokernelTester()
2969 .mr(1)
2970 .nr(8)
2971 .kr(2)
2972 .sr(1)
2973 .m(m)
2974 .n(n)
2975 .k(k)
2976 .iterations(1)
2977 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld4r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2978 }
2979 }
2980 }
2981 }
2982
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD4R,k_div_16)2983 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD4R, k_div_16) {
2984 TEST_REQUIRES_ARM_NEON_V8;
2985 for (size_t k = 32; k <= 160; k += 16) {
2986 GemmMicrokernelTester()
2987 .mr(1)
2988 .nr(8)
2989 .kr(2)
2990 .sr(1)
2991 .m(1)
2992 .n(8)
2993 .k(k)
2994 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld4r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
2995 }
2996 }
2997
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD4R,k_div_16_subtile)2998 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD4R, k_div_16_subtile) {
2999 TEST_REQUIRES_ARM_NEON_V8;
3000 for (size_t k = 32; k <= 160; k += 16) {
3001 for (uint32_t n = 1; n <= 8; n++) {
3002 for (uint32_t m = 1; m <= 1; m++) {
3003 GemmMicrokernelTester()
3004 .mr(1)
3005 .nr(8)
3006 .kr(2)
3007 .sr(1)
3008 .m(m)
3009 .n(n)
3010 .k(k)
3011 .iterations(1)
3012 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld4r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3013 }
3014 }
3015 }
3016 }
3017
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD4R,n_gt_8)3018 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD4R, n_gt_8) {
3019 TEST_REQUIRES_ARM_NEON_V8;
3020 for (uint32_t n = 9; n < 16; n++) {
3021 for (size_t k = 1; k <= 80; k += 17) {
3022 GemmMicrokernelTester()
3023 .mr(1)
3024 .nr(8)
3025 .kr(2)
3026 .sr(1)
3027 .m(1)
3028 .n(n)
3029 .k(k)
3030 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld4r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3031 }
3032 }
3033 }
3034
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD4R,n_gt_8_strided_cn)3035 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD4R, n_gt_8_strided_cn) {
3036 TEST_REQUIRES_ARM_NEON_V8;
3037 for (uint32_t n = 9; n < 16; n++) {
3038 for (size_t k = 1; k <= 80; k += 17) {
3039 GemmMicrokernelTester()
3040 .mr(1)
3041 .nr(8)
3042 .kr(2)
3043 .sr(1)
3044 .m(1)
3045 .n(n)
3046 .k(k)
3047 .cn_stride(11)
3048 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld4r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3049 }
3050 }
3051 }
3052
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD4R,n_gt_8_subtile)3053 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD4R, n_gt_8_subtile) {
3054 TEST_REQUIRES_ARM_NEON_V8;
3055 for (uint32_t n = 9; n < 16; n++) {
3056 for (size_t k = 1; k <= 80; k += 17) {
3057 for (uint32_t m = 1; m <= 1; m++) {
3058 GemmMicrokernelTester()
3059 .mr(1)
3060 .nr(8)
3061 .kr(2)
3062 .sr(1)
3063 .m(m)
3064 .n(n)
3065 .k(k)
3066 .iterations(1)
3067 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld4r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3068 }
3069 }
3070 }
3071 }
3072
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD4R,n_div_8)3073 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD4R, n_div_8) {
3074 TEST_REQUIRES_ARM_NEON_V8;
3075 for (uint32_t n = 16; n <= 24; n += 8) {
3076 for (size_t k = 1; k <= 80; k += 17) {
3077 GemmMicrokernelTester()
3078 .mr(1)
3079 .nr(8)
3080 .kr(2)
3081 .sr(1)
3082 .m(1)
3083 .n(n)
3084 .k(k)
3085 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld4r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3086 }
3087 }
3088 }
3089
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD4R,n_div_8_strided_cn)3090 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD4R, n_div_8_strided_cn) {
3091 TEST_REQUIRES_ARM_NEON_V8;
3092 for (uint32_t n = 16; n <= 24; n += 8) {
3093 for (size_t k = 1; k <= 80; k += 17) {
3094 GemmMicrokernelTester()
3095 .mr(1)
3096 .nr(8)
3097 .kr(2)
3098 .sr(1)
3099 .m(1)
3100 .n(n)
3101 .k(k)
3102 .cn_stride(11)
3103 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld4r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3104 }
3105 }
3106 }
3107
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD4R,n_div_8_subtile)3108 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD4R, n_div_8_subtile) {
3109 TEST_REQUIRES_ARM_NEON_V8;
3110 for (uint32_t n = 16; n <= 24; n += 8) {
3111 for (size_t k = 1; k <= 80; k += 17) {
3112 for (uint32_t m = 1; m <= 1; m++) {
3113 GemmMicrokernelTester()
3114 .mr(1)
3115 .nr(8)
3116 .kr(2)
3117 .sr(1)
3118 .m(m)
3119 .n(n)
3120 .k(k)
3121 .iterations(1)
3122 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld4r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3123 }
3124 }
3125 }
3126 }
3127
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD4R,small_kernel)3128 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD4R, small_kernel) {
3129 TEST_REQUIRES_ARM_NEON_V8;
3130 for (size_t k = 1; k <= 80; k += 17) {
3131 GemmMicrokernelTester()
3132 .mr(1)
3133 .nr(8)
3134 .kr(2)
3135 .sr(1)
3136 .m(1)
3137 .n(8)
3138 .k(k)
3139 .ks(3)
3140 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld4r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3141 }
3142 }
3143
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD4R,small_kernel_subtile)3144 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD4R, small_kernel_subtile) {
3145 TEST_REQUIRES_ARM_NEON_V8;
3146 for (size_t k = 1; k <= 80; k += 17) {
3147 for (uint32_t n = 1; n <= 8; n++) {
3148 for (uint32_t m = 1; m <= 1; m++) {
3149 GemmMicrokernelTester()
3150 .mr(1)
3151 .nr(8)
3152 .kr(2)
3153 .sr(1)
3154 .m(m)
3155 .n(n)
3156 .k(k)
3157 .ks(3)
3158 .iterations(1)
3159 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld4r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3160 }
3161 }
3162 }
3163 }
3164
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD4R,n_gt_8_small_kernel)3165 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD4R, n_gt_8_small_kernel) {
3166 TEST_REQUIRES_ARM_NEON_V8;
3167 for (uint32_t n = 9; n < 16; n++) {
3168 for (size_t k = 1; k <= 80; k += 17) {
3169 GemmMicrokernelTester()
3170 .mr(1)
3171 .nr(8)
3172 .kr(2)
3173 .sr(1)
3174 .m(1)
3175 .n(n)
3176 .k(k)
3177 .ks(3)
3178 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld4r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3179 }
3180 }
3181 }
3182
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD4R,n_div_8_small_kernel)3183 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD4R, n_div_8_small_kernel) {
3184 TEST_REQUIRES_ARM_NEON_V8;
3185 for (uint32_t n = 16; n <= 24; n += 8) {
3186 for (size_t k = 1; k <= 80; k += 17) {
3187 GemmMicrokernelTester()
3188 .mr(1)
3189 .nr(8)
3190 .kr(2)
3191 .sr(1)
3192 .m(1)
3193 .n(n)
3194 .k(k)
3195 .ks(3)
3196 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld4r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3197 }
3198 }
3199 }
3200
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD4R,strided_cm_subtile)3201 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD4R, strided_cm_subtile) {
3202 TEST_REQUIRES_ARM_NEON_V8;
3203 for (size_t k = 1; k <= 80; k += 17) {
3204 for (uint32_t n = 1; n <= 8; n++) {
3205 for (uint32_t m = 1; m <= 1; m++) {
3206 GemmMicrokernelTester()
3207 .mr(1)
3208 .nr(8)
3209 .kr(2)
3210 .sr(1)
3211 .m(m)
3212 .n(n)
3213 .k(k)
3214 .cm_stride(11)
3215 .iterations(1)
3216 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld4r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3217 }
3218 }
3219 }
3220 }
3221
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD4R,a_offset)3222 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD4R, a_offset) {
3223 TEST_REQUIRES_ARM_NEON_V8;
3224 for (size_t k = 1; k <= 80; k += 17) {
3225 GemmMicrokernelTester()
3226 .mr(1)
3227 .nr(8)
3228 .kr(2)
3229 .sr(1)
3230 .m(1)
3231 .n(8)
3232 .k(k)
3233 .ks(3)
3234 .a_offset(83)
3235 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld4r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3236 }
3237 }
3238
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD4R,zero)3239 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD4R, zero) {
3240 TEST_REQUIRES_ARM_NEON_V8;
3241 for (size_t k = 1; k <= 80; k += 17) {
3242 for (uint32_t mz = 0; mz < 1; mz++) {
3243 GemmMicrokernelTester()
3244 .mr(1)
3245 .nr(8)
3246 .kr(2)
3247 .sr(1)
3248 .m(1)
3249 .n(8)
3250 .k(k)
3251 .ks(3)
3252 .a_offset(83)
3253 .zero_index(mz)
3254 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld4r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3255 }
3256 }
3257 }
3258
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD4R,qmin)3259 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD4R, qmin) {
3260 TEST_REQUIRES_ARM_NEON_V8;
3261 GemmMicrokernelTester()
3262 .mr(1)
3263 .nr(8)
3264 .kr(2)
3265 .sr(1)
3266 .m(1)
3267 .n(8)
3268 .k(16)
3269 .qmin(128)
3270 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld4r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3271 }
3272
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD4R,qmax)3273 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD4R, qmax) {
3274 TEST_REQUIRES_ARM_NEON_V8;
3275 GemmMicrokernelTester()
3276 .mr(1)
3277 .nr(8)
3278 .kr(2)
3279 .sr(1)
3280 .m(1)
3281 .n(8)
3282 .k(16)
3283 .qmax(128)
3284 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld4r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3285 }
3286
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD4R,strided_cm)3287 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2__NEONV8_MLAL_LD4R, strided_cm) {
3288 TEST_REQUIRES_ARM_NEON_V8;
3289 GemmMicrokernelTester()
3290 .mr(1)
3291 .nr(8)
3292 .kr(2)
3293 .sr(1)
3294 .m(1)
3295 .n(8)
3296 .k(16)
3297 .cm_stride(11)
3298 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld4r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3299 }
3300 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
3301
3302
3303 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2S4__NEON_MLAL,k_eq_16)3304 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2S4__NEON_MLAL, k_eq_16) {
3305 TEST_REQUIRES_ARM_NEON;
3306 GemmMicrokernelTester()
3307 .mr(1)
3308 .nr(8)
3309 .kr(2)
3310 .sr(4)
3311 .m(1)
3312 .n(8)
3313 .k(16)
3314 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
3315 }
3316
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2S4__NEON_MLAL,strided_cn)3317 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2S4__NEON_MLAL, strided_cn) {
3318 TEST_REQUIRES_ARM_NEON;
3319 GemmMicrokernelTester()
3320 .mr(1)
3321 .nr(8)
3322 .kr(2)
3323 .sr(4)
3324 .m(1)
3325 .n(8)
3326 .k(16)
3327 .cn_stride(11)
3328 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
3329 }
3330
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2S4__NEON_MLAL,k_eq_16_subtile)3331 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2S4__NEON_MLAL, k_eq_16_subtile) {
3332 TEST_REQUIRES_ARM_NEON;
3333 for (uint32_t n = 1; n <= 8; n++) {
3334 for (uint32_t m = 1; m <= 1; m++) {
3335 GemmMicrokernelTester()
3336 .mr(1)
3337 .nr(8)
3338 .kr(2)
3339 .sr(4)
3340 .m(m)
3341 .n(n)
3342 .k(16)
3343 .iterations(1)
3344 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
3345 }
3346 }
3347 }
3348
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2S4__NEON_MLAL,k_eq_16_subtile_m)3349 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2S4__NEON_MLAL, k_eq_16_subtile_m) {
3350 TEST_REQUIRES_ARM_NEON;
3351 for (uint32_t m = 1; m <= 1; m++) {
3352 GemmMicrokernelTester()
3353 .mr(1)
3354 .nr(8)
3355 .kr(2)
3356 .sr(4)
3357 .m(m)
3358 .n(8)
3359 .k(16)
3360 .iterations(1)
3361 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
3362 }
3363 }
3364
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2S4__NEON_MLAL,k_eq_16_subtile_n)3365 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2S4__NEON_MLAL, k_eq_16_subtile_n) {
3366 TEST_REQUIRES_ARM_NEON;
3367 for (uint32_t n = 1; n <= 8; n++) {
3368 GemmMicrokernelTester()
3369 .mr(1)
3370 .nr(8)
3371 .kr(2)
3372 .sr(4)
3373 .m(1)
3374 .n(n)
3375 .k(16)
3376 .iterations(1)
3377 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
3378 }
3379 }
3380
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2S4__NEON_MLAL,k_lt_16)3381 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2S4__NEON_MLAL, k_lt_16) {
3382 TEST_REQUIRES_ARM_NEON;
3383 for (size_t k = 1; k < 16; k++) {
3384 GemmMicrokernelTester()
3385 .mr(1)
3386 .nr(8)
3387 .kr(2)
3388 .sr(4)
3389 .m(1)
3390 .n(8)
3391 .k(k)
3392 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
3393 }
3394 }
3395
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2S4__NEON_MLAL,k_lt_16_subtile)3396 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2S4__NEON_MLAL, k_lt_16_subtile) {
3397 TEST_REQUIRES_ARM_NEON;
3398 for (size_t k = 1; k < 16; k++) {
3399 for (uint32_t n = 1; n <= 8; n++) {
3400 for (uint32_t m = 1; m <= 1; m++) {
3401 GemmMicrokernelTester()
3402 .mr(1)
3403 .nr(8)
3404 .kr(2)
3405 .sr(4)
3406 .m(m)
3407 .n(n)
3408 .k(k)
3409 .iterations(1)
3410 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
3411 }
3412 }
3413 }
3414 }
3415
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2S4__NEON_MLAL,k_gt_16)3416 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2S4__NEON_MLAL, k_gt_16) {
3417 TEST_REQUIRES_ARM_NEON;
3418 for (size_t k = 17; k < 32; k++) {
3419 GemmMicrokernelTester()
3420 .mr(1)
3421 .nr(8)
3422 .kr(2)
3423 .sr(4)
3424 .m(1)
3425 .n(8)
3426 .k(k)
3427 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
3428 }
3429 }
3430
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2S4__NEON_MLAL,k_gt_16_subtile)3431 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2S4__NEON_MLAL, k_gt_16_subtile) {
3432 TEST_REQUIRES_ARM_NEON;
3433 for (size_t k = 17; k < 32; k++) {
3434 for (uint32_t n = 1; n <= 8; n++) {
3435 for (uint32_t m = 1; m <= 1; m++) {
3436 GemmMicrokernelTester()
3437 .mr(1)
3438 .nr(8)
3439 .kr(2)
3440 .sr(4)
3441 .m(m)
3442 .n(n)
3443 .k(k)
3444 .iterations(1)
3445 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
3446 }
3447 }
3448 }
3449 }
3450
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2S4__NEON_MLAL,k_div_16)3451 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2S4__NEON_MLAL, k_div_16) {
3452 TEST_REQUIRES_ARM_NEON;
3453 for (size_t k = 32; k <= 160; k += 16) {
3454 GemmMicrokernelTester()
3455 .mr(1)
3456 .nr(8)
3457 .kr(2)
3458 .sr(4)
3459 .m(1)
3460 .n(8)
3461 .k(k)
3462 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
3463 }
3464 }
3465
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2S4__NEON_MLAL,k_div_16_subtile)3466 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2S4__NEON_MLAL, k_div_16_subtile) {
3467 TEST_REQUIRES_ARM_NEON;
3468 for (size_t k = 32; k <= 160; k += 16) {
3469 for (uint32_t n = 1; n <= 8; n++) {
3470 for (uint32_t m = 1; m <= 1; m++) {
3471 GemmMicrokernelTester()
3472 .mr(1)
3473 .nr(8)
3474 .kr(2)
3475 .sr(4)
3476 .m(m)
3477 .n(n)
3478 .k(k)
3479 .iterations(1)
3480 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
3481 }
3482 }
3483 }
3484 }
3485
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2S4__NEON_MLAL,n_gt_8)3486 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2S4__NEON_MLAL, n_gt_8) {
3487 TEST_REQUIRES_ARM_NEON;
3488 for (uint32_t n = 9; n < 16; n++) {
3489 for (size_t k = 1; k <= 80; k += 17) {
3490 GemmMicrokernelTester()
3491 .mr(1)
3492 .nr(8)
3493 .kr(2)
3494 .sr(4)
3495 .m(1)
3496 .n(n)
3497 .k(k)
3498 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
3499 }
3500 }
3501 }
3502
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2S4__NEON_MLAL,n_gt_8_strided_cn)3503 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2S4__NEON_MLAL, n_gt_8_strided_cn) {
3504 TEST_REQUIRES_ARM_NEON;
3505 for (uint32_t n = 9; n < 16; n++) {
3506 for (size_t k = 1; k <= 80; k += 17) {
3507 GemmMicrokernelTester()
3508 .mr(1)
3509 .nr(8)
3510 .kr(2)
3511 .sr(4)
3512 .m(1)
3513 .n(n)
3514 .k(k)
3515 .cn_stride(11)
3516 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
3517 }
3518 }
3519 }
3520
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2S4__NEON_MLAL,n_gt_8_subtile)3521 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2S4__NEON_MLAL, n_gt_8_subtile) {
3522 TEST_REQUIRES_ARM_NEON;
3523 for (uint32_t n = 9; n < 16; n++) {
3524 for (size_t k = 1; k <= 80; k += 17) {
3525 for (uint32_t m = 1; m <= 1; m++) {
3526 GemmMicrokernelTester()
3527 .mr(1)
3528 .nr(8)
3529 .kr(2)
3530 .sr(4)
3531 .m(m)
3532 .n(n)
3533 .k(k)
3534 .iterations(1)
3535 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
3536 }
3537 }
3538 }
3539 }
3540
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2S4__NEON_MLAL,n_div_8)3541 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2S4__NEON_MLAL, n_div_8) {
3542 TEST_REQUIRES_ARM_NEON;
3543 for (uint32_t n = 16; n <= 24; n += 8) {
3544 for (size_t k = 1; k <= 80; k += 17) {
3545 GemmMicrokernelTester()
3546 .mr(1)
3547 .nr(8)
3548 .kr(2)
3549 .sr(4)
3550 .m(1)
3551 .n(n)
3552 .k(k)
3553 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
3554 }
3555 }
3556 }
3557
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2S4__NEON_MLAL,n_div_8_strided_cn)3558 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2S4__NEON_MLAL, n_div_8_strided_cn) {
3559 TEST_REQUIRES_ARM_NEON;
3560 for (uint32_t n = 16; n <= 24; n += 8) {
3561 for (size_t k = 1; k <= 80; k += 17) {
3562 GemmMicrokernelTester()
3563 .mr(1)
3564 .nr(8)
3565 .kr(2)
3566 .sr(4)
3567 .m(1)
3568 .n(n)
3569 .k(k)
3570 .cn_stride(11)
3571 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
3572 }
3573 }
3574 }
3575
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2S4__NEON_MLAL,n_div_8_subtile)3576 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2S4__NEON_MLAL, n_div_8_subtile) {
3577 TEST_REQUIRES_ARM_NEON;
3578 for (uint32_t n = 16; n <= 24; n += 8) {
3579 for (size_t k = 1; k <= 80; k += 17) {
3580 for (uint32_t m = 1; m <= 1; m++) {
3581 GemmMicrokernelTester()
3582 .mr(1)
3583 .nr(8)
3584 .kr(2)
3585 .sr(4)
3586 .m(m)
3587 .n(n)
3588 .k(k)
3589 .iterations(1)
3590 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
3591 }
3592 }
3593 }
3594 }
3595
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2S4__NEON_MLAL,small_kernel)3596 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2S4__NEON_MLAL, small_kernel) {
3597 TEST_REQUIRES_ARM_NEON;
3598 for (size_t k = 1; k <= 80; k += 17) {
3599 GemmMicrokernelTester()
3600 .mr(1)
3601 .nr(8)
3602 .kr(2)
3603 .sr(4)
3604 .m(1)
3605 .n(8)
3606 .k(k)
3607 .ks(3)
3608 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
3609 }
3610 }
3611
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2S4__NEON_MLAL,small_kernel_subtile)3612 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2S4__NEON_MLAL, small_kernel_subtile) {
3613 TEST_REQUIRES_ARM_NEON;
3614 for (size_t k = 1; k <= 80; k += 17) {
3615 for (uint32_t n = 1; n <= 8; n++) {
3616 for (uint32_t m = 1; m <= 1; m++) {
3617 GemmMicrokernelTester()
3618 .mr(1)
3619 .nr(8)
3620 .kr(2)
3621 .sr(4)
3622 .m(m)
3623 .n(n)
3624 .k(k)
3625 .ks(3)
3626 .iterations(1)
3627 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
3628 }
3629 }
3630 }
3631 }
3632
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2S4__NEON_MLAL,n_gt_8_small_kernel)3633 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2S4__NEON_MLAL, n_gt_8_small_kernel) {
3634 TEST_REQUIRES_ARM_NEON;
3635 for (uint32_t n = 9; n < 16; n++) {
3636 for (size_t k = 1; k <= 80; k += 17) {
3637 GemmMicrokernelTester()
3638 .mr(1)
3639 .nr(8)
3640 .kr(2)
3641 .sr(4)
3642 .m(1)
3643 .n(n)
3644 .k(k)
3645 .ks(3)
3646 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
3647 }
3648 }
3649 }
3650
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2S4__NEON_MLAL,n_div_8_small_kernel)3651 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2S4__NEON_MLAL, n_div_8_small_kernel) {
3652 TEST_REQUIRES_ARM_NEON;
3653 for (uint32_t n = 16; n <= 24; n += 8) {
3654 for (size_t k = 1; k <= 80; k += 17) {
3655 GemmMicrokernelTester()
3656 .mr(1)
3657 .nr(8)
3658 .kr(2)
3659 .sr(4)
3660 .m(1)
3661 .n(n)
3662 .k(k)
3663 .ks(3)
3664 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
3665 }
3666 }
3667 }
3668
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2S4__NEON_MLAL,strided_cm_subtile)3669 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2S4__NEON_MLAL, strided_cm_subtile) {
3670 TEST_REQUIRES_ARM_NEON;
3671 for (size_t k = 1; k <= 80; k += 17) {
3672 for (uint32_t n = 1; n <= 8; n++) {
3673 for (uint32_t m = 1; m <= 1; m++) {
3674 GemmMicrokernelTester()
3675 .mr(1)
3676 .nr(8)
3677 .kr(2)
3678 .sr(4)
3679 .m(m)
3680 .n(n)
3681 .k(k)
3682 .cm_stride(11)
3683 .iterations(1)
3684 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
3685 }
3686 }
3687 }
3688 }
3689
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2S4__NEON_MLAL,a_offset)3690 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2S4__NEON_MLAL, a_offset) {
3691 TEST_REQUIRES_ARM_NEON;
3692 for (size_t k = 1; k <= 80; k += 17) {
3693 GemmMicrokernelTester()
3694 .mr(1)
3695 .nr(8)
3696 .kr(2)
3697 .sr(4)
3698 .m(1)
3699 .n(8)
3700 .k(k)
3701 .ks(3)
3702 .a_offset(83)
3703 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
3704 }
3705 }
3706
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2S4__NEON_MLAL,zero)3707 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2S4__NEON_MLAL, zero) {
3708 TEST_REQUIRES_ARM_NEON;
3709 for (size_t k = 1; k <= 80; k += 17) {
3710 for (uint32_t mz = 0; mz < 1; mz++) {
3711 GemmMicrokernelTester()
3712 .mr(1)
3713 .nr(8)
3714 .kr(2)
3715 .sr(4)
3716 .m(1)
3717 .n(8)
3718 .k(k)
3719 .ks(3)
3720 .a_offset(83)
3721 .zero_index(mz)
3722 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
3723 }
3724 }
3725 }
3726
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2S4__NEON_MLAL,qmin)3727 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2S4__NEON_MLAL, qmin) {
3728 TEST_REQUIRES_ARM_NEON;
3729 GemmMicrokernelTester()
3730 .mr(1)
3731 .nr(8)
3732 .kr(2)
3733 .sr(4)
3734 .m(1)
3735 .n(8)
3736 .k(16)
3737 .qmin(128)
3738 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
3739 }
3740
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2S4__NEON_MLAL,qmax)3741 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2S4__NEON_MLAL, qmax) {
3742 TEST_REQUIRES_ARM_NEON;
3743 GemmMicrokernelTester()
3744 .mr(1)
3745 .nr(8)
3746 .kr(2)
3747 .sr(4)
3748 .m(1)
3749 .n(8)
3750 .k(16)
3751 .qmax(128)
3752 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
3753 }
3754
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2S4__NEON_MLAL,strided_cm)3755 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2S4__NEON_MLAL, strided_cm) {
3756 TEST_REQUIRES_ARM_NEON;
3757 GemmMicrokernelTester()
3758 .mr(1)
3759 .nr(8)
3760 .kr(2)
3761 .sr(4)
3762 .m(1)
3763 .n(8)
3764 .k(16)
3765 .cm_stride(11)
3766 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
3767 }
3768 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
3769
3770
3771 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2S4__NEONV8_MLAL,k_eq_16)3772 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2S4__NEONV8_MLAL, k_eq_16) {
3773 TEST_REQUIRES_ARM_NEON_V8;
3774 GemmMicrokernelTester()
3775 .mr(1)
3776 .nr(8)
3777 .kr(2)
3778 .sr(4)
3779 .m(1)
3780 .n(8)
3781 .k(16)
3782 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2s4__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3783 }
3784
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2S4__NEONV8_MLAL,strided_cn)3785 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2S4__NEONV8_MLAL, strided_cn) {
3786 TEST_REQUIRES_ARM_NEON_V8;
3787 GemmMicrokernelTester()
3788 .mr(1)
3789 .nr(8)
3790 .kr(2)
3791 .sr(4)
3792 .m(1)
3793 .n(8)
3794 .k(16)
3795 .cn_stride(11)
3796 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2s4__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3797 }
3798
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2S4__NEONV8_MLAL,k_eq_16_subtile)3799 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2S4__NEONV8_MLAL, k_eq_16_subtile) {
3800 TEST_REQUIRES_ARM_NEON_V8;
3801 for (uint32_t n = 1; n <= 8; n++) {
3802 for (uint32_t m = 1; m <= 1; m++) {
3803 GemmMicrokernelTester()
3804 .mr(1)
3805 .nr(8)
3806 .kr(2)
3807 .sr(4)
3808 .m(m)
3809 .n(n)
3810 .k(16)
3811 .iterations(1)
3812 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2s4__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3813 }
3814 }
3815 }
3816
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2S4__NEONV8_MLAL,k_eq_16_subtile_m)3817 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2S4__NEONV8_MLAL, k_eq_16_subtile_m) {
3818 TEST_REQUIRES_ARM_NEON_V8;
3819 for (uint32_t m = 1; m <= 1; m++) {
3820 GemmMicrokernelTester()
3821 .mr(1)
3822 .nr(8)
3823 .kr(2)
3824 .sr(4)
3825 .m(m)
3826 .n(8)
3827 .k(16)
3828 .iterations(1)
3829 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2s4__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3830 }
3831 }
3832
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2S4__NEONV8_MLAL,k_eq_16_subtile_n)3833 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2S4__NEONV8_MLAL, k_eq_16_subtile_n) {
3834 TEST_REQUIRES_ARM_NEON_V8;
3835 for (uint32_t n = 1; n <= 8; n++) {
3836 GemmMicrokernelTester()
3837 .mr(1)
3838 .nr(8)
3839 .kr(2)
3840 .sr(4)
3841 .m(1)
3842 .n(n)
3843 .k(16)
3844 .iterations(1)
3845 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2s4__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3846 }
3847 }
3848
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2S4__NEONV8_MLAL,k_lt_16)3849 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2S4__NEONV8_MLAL, k_lt_16) {
3850 TEST_REQUIRES_ARM_NEON_V8;
3851 for (size_t k = 1; k < 16; k++) {
3852 GemmMicrokernelTester()
3853 .mr(1)
3854 .nr(8)
3855 .kr(2)
3856 .sr(4)
3857 .m(1)
3858 .n(8)
3859 .k(k)
3860 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2s4__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3861 }
3862 }
3863
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2S4__NEONV8_MLAL,k_lt_16_subtile)3864 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2S4__NEONV8_MLAL, k_lt_16_subtile) {
3865 TEST_REQUIRES_ARM_NEON_V8;
3866 for (size_t k = 1; k < 16; k++) {
3867 for (uint32_t n = 1; n <= 8; n++) {
3868 for (uint32_t m = 1; m <= 1; m++) {
3869 GemmMicrokernelTester()
3870 .mr(1)
3871 .nr(8)
3872 .kr(2)
3873 .sr(4)
3874 .m(m)
3875 .n(n)
3876 .k(k)
3877 .iterations(1)
3878 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2s4__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3879 }
3880 }
3881 }
3882 }
3883
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2S4__NEONV8_MLAL,k_gt_16)3884 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2S4__NEONV8_MLAL, k_gt_16) {
3885 TEST_REQUIRES_ARM_NEON_V8;
3886 for (size_t k = 17; k < 32; k++) {
3887 GemmMicrokernelTester()
3888 .mr(1)
3889 .nr(8)
3890 .kr(2)
3891 .sr(4)
3892 .m(1)
3893 .n(8)
3894 .k(k)
3895 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2s4__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3896 }
3897 }
3898
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2S4__NEONV8_MLAL,k_gt_16_subtile)3899 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2S4__NEONV8_MLAL, k_gt_16_subtile) {
3900 TEST_REQUIRES_ARM_NEON_V8;
3901 for (size_t k = 17; k < 32; k++) {
3902 for (uint32_t n = 1; n <= 8; n++) {
3903 for (uint32_t m = 1; m <= 1; m++) {
3904 GemmMicrokernelTester()
3905 .mr(1)
3906 .nr(8)
3907 .kr(2)
3908 .sr(4)
3909 .m(m)
3910 .n(n)
3911 .k(k)
3912 .iterations(1)
3913 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2s4__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3914 }
3915 }
3916 }
3917 }
3918
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2S4__NEONV8_MLAL,k_div_16)3919 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2S4__NEONV8_MLAL, k_div_16) {
3920 TEST_REQUIRES_ARM_NEON_V8;
3921 for (size_t k = 32; k <= 160; k += 16) {
3922 GemmMicrokernelTester()
3923 .mr(1)
3924 .nr(8)
3925 .kr(2)
3926 .sr(4)
3927 .m(1)
3928 .n(8)
3929 .k(k)
3930 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2s4__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3931 }
3932 }
3933
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2S4__NEONV8_MLAL,k_div_16_subtile)3934 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2S4__NEONV8_MLAL, k_div_16_subtile) {
3935 TEST_REQUIRES_ARM_NEON_V8;
3936 for (size_t k = 32; k <= 160; k += 16) {
3937 for (uint32_t n = 1; n <= 8; n++) {
3938 for (uint32_t m = 1; m <= 1; m++) {
3939 GemmMicrokernelTester()
3940 .mr(1)
3941 .nr(8)
3942 .kr(2)
3943 .sr(4)
3944 .m(m)
3945 .n(n)
3946 .k(k)
3947 .iterations(1)
3948 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2s4__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3949 }
3950 }
3951 }
3952 }
3953
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2S4__NEONV8_MLAL,n_gt_8)3954 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2S4__NEONV8_MLAL, n_gt_8) {
3955 TEST_REQUIRES_ARM_NEON_V8;
3956 for (uint32_t n = 9; n < 16; n++) {
3957 for (size_t k = 1; k <= 80; k += 17) {
3958 GemmMicrokernelTester()
3959 .mr(1)
3960 .nr(8)
3961 .kr(2)
3962 .sr(4)
3963 .m(1)
3964 .n(n)
3965 .k(k)
3966 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2s4__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3967 }
3968 }
3969 }
3970
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2S4__NEONV8_MLAL,n_gt_8_strided_cn)3971 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2S4__NEONV8_MLAL, n_gt_8_strided_cn) {
3972 TEST_REQUIRES_ARM_NEON_V8;
3973 for (uint32_t n = 9; n < 16; n++) {
3974 for (size_t k = 1; k <= 80; k += 17) {
3975 GemmMicrokernelTester()
3976 .mr(1)
3977 .nr(8)
3978 .kr(2)
3979 .sr(4)
3980 .m(1)
3981 .n(n)
3982 .k(k)
3983 .cn_stride(11)
3984 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2s4__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
3985 }
3986 }
3987 }
3988
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2S4__NEONV8_MLAL,n_gt_8_subtile)3989 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2S4__NEONV8_MLAL, n_gt_8_subtile) {
3990 TEST_REQUIRES_ARM_NEON_V8;
3991 for (uint32_t n = 9; n < 16; n++) {
3992 for (size_t k = 1; k <= 80; k += 17) {
3993 for (uint32_t m = 1; m <= 1; m++) {
3994 GemmMicrokernelTester()
3995 .mr(1)
3996 .nr(8)
3997 .kr(2)
3998 .sr(4)
3999 .m(m)
4000 .n(n)
4001 .k(k)
4002 .iterations(1)
4003 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2s4__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4004 }
4005 }
4006 }
4007 }
4008
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2S4__NEONV8_MLAL,n_div_8)4009 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2S4__NEONV8_MLAL, n_div_8) {
4010 TEST_REQUIRES_ARM_NEON_V8;
4011 for (uint32_t n = 16; n <= 24; n += 8) {
4012 for (size_t k = 1; k <= 80; k += 17) {
4013 GemmMicrokernelTester()
4014 .mr(1)
4015 .nr(8)
4016 .kr(2)
4017 .sr(4)
4018 .m(1)
4019 .n(n)
4020 .k(k)
4021 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2s4__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4022 }
4023 }
4024 }
4025
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2S4__NEONV8_MLAL,n_div_8_strided_cn)4026 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2S4__NEONV8_MLAL, n_div_8_strided_cn) {
4027 TEST_REQUIRES_ARM_NEON_V8;
4028 for (uint32_t n = 16; n <= 24; n += 8) {
4029 for (size_t k = 1; k <= 80; k += 17) {
4030 GemmMicrokernelTester()
4031 .mr(1)
4032 .nr(8)
4033 .kr(2)
4034 .sr(4)
4035 .m(1)
4036 .n(n)
4037 .k(k)
4038 .cn_stride(11)
4039 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2s4__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4040 }
4041 }
4042 }
4043
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2S4__NEONV8_MLAL,n_div_8_subtile)4044 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2S4__NEONV8_MLAL, n_div_8_subtile) {
4045 TEST_REQUIRES_ARM_NEON_V8;
4046 for (uint32_t n = 16; n <= 24; n += 8) {
4047 for (size_t k = 1; k <= 80; k += 17) {
4048 for (uint32_t m = 1; m <= 1; m++) {
4049 GemmMicrokernelTester()
4050 .mr(1)
4051 .nr(8)
4052 .kr(2)
4053 .sr(4)
4054 .m(m)
4055 .n(n)
4056 .k(k)
4057 .iterations(1)
4058 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2s4__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4059 }
4060 }
4061 }
4062 }
4063
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2S4__NEONV8_MLAL,small_kernel)4064 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2S4__NEONV8_MLAL, small_kernel) {
4065 TEST_REQUIRES_ARM_NEON_V8;
4066 for (size_t k = 1; k <= 80; k += 17) {
4067 GemmMicrokernelTester()
4068 .mr(1)
4069 .nr(8)
4070 .kr(2)
4071 .sr(4)
4072 .m(1)
4073 .n(8)
4074 .k(k)
4075 .ks(3)
4076 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2s4__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4077 }
4078 }
4079
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2S4__NEONV8_MLAL,small_kernel_subtile)4080 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2S4__NEONV8_MLAL, small_kernel_subtile) {
4081 TEST_REQUIRES_ARM_NEON_V8;
4082 for (size_t k = 1; k <= 80; k += 17) {
4083 for (uint32_t n = 1; n <= 8; n++) {
4084 for (uint32_t m = 1; m <= 1; m++) {
4085 GemmMicrokernelTester()
4086 .mr(1)
4087 .nr(8)
4088 .kr(2)
4089 .sr(4)
4090 .m(m)
4091 .n(n)
4092 .k(k)
4093 .ks(3)
4094 .iterations(1)
4095 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2s4__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4096 }
4097 }
4098 }
4099 }
4100
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2S4__NEONV8_MLAL,n_gt_8_small_kernel)4101 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2S4__NEONV8_MLAL, n_gt_8_small_kernel) {
4102 TEST_REQUIRES_ARM_NEON_V8;
4103 for (uint32_t n = 9; n < 16; n++) {
4104 for (size_t k = 1; k <= 80; k += 17) {
4105 GemmMicrokernelTester()
4106 .mr(1)
4107 .nr(8)
4108 .kr(2)
4109 .sr(4)
4110 .m(1)
4111 .n(n)
4112 .k(k)
4113 .ks(3)
4114 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2s4__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4115 }
4116 }
4117 }
4118
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2S4__NEONV8_MLAL,n_div_8_small_kernel)4119 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2S4__NEONV8_MLAL, n_div_8_small_kernel) {
4120 TEST_REQUIRES_ARM_NEON_V8;
4121 for (uint32_t n = 16; n <= 24; n += 8) {
4122 for (size_t k = 1; k <= 80; k += 17) {
4123 GemmMicrokernelTester()
4124 .mr(1)
4125 .nr(8)
4126 .kr(2)
4127 .sr(4)
4128 .m(1)
4129 .n(n)
4130 .k(k)
4131 .ks(3)
4132 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2s4__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4133 }
4134 }
4135 }
4136
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2S4__NEONV8_MLAL,strided_cm_subtile)4137 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2S4__NEONV8_MLAL, strided_cm_subtile) {
4138 TEST_REQUIRES_ARM_NEON_V8;
4139 for (size_t k = 1; k <= 80; k += 17) {
4140 for (uint32_t n = 1; n <= 8; n++) {
4141 for (uint32_t m = 1; m <= 1; m++) {
4142 GemmMicrokernelTester()
4143 .mr(1)
4144 .nr(8)
4145 .kr(2)
4146 .sr(4)
4147 .m(m)
4148 .n(n)
4149 .k(k)
4150 .cm_stride(11)
4151 .iterations(1)
4152 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2s4__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4153 }
4154 }
4155 }
4156 }
4157
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2S4__NEONV8_MLAL,a_offset)4158 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2S4__NEONV8_MLAL, a_offset) {
4159 TEST_REQUIRES_ARM_NEON_V8;
4160 for (size_t k = 1; k <= 80; k += 17) {
4161 GemmMicrokernelTester()
4162 .mr(1)
4163 .nr(8)
4164 .kr(2)
4165 .sr(4)
4166 .m(1)
4167 .n(8)
4168 .k(k)
4169 .ks(3)
4170 .a_offset(83)
4171 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2s4__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4172 }
4173 }
4174
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2S4__NEONV8_MLAL,zero)4175 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2S4__NEONV8_MLAL, zero) {
4176 TEST_REQUIRES_ARM_NEON_V8;
4177 for (size_t k = 1; k <= 80; k += 17) {
4178 for (uint32_t mz = 0; mz < 1; mz++) {
4179 GemmMicrokernelTester()
4180 .mr(1)
4181 .nr(8)
4182 .kr(2)
4183 .sr(4)
4184 .m(1)
4185 .n(8)
4186 .k(k)
4187 .ks(3)
4188 .a_offset(83)
4189 .zero_index(mz)
4190 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2s4__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4191 }
4192 }
4193 }
4194
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2S4__NEONV8_MLAL,qmin)4195 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2S4__NEONV8_MLAL, qmin) {
4196 TEST_REQUIRES_ARM_NEON_V8;
4197 GemmMicrokernelTester()
4198 .mr(1)
4199 .nr(8)
4200 .kr(2)
4201 .sr(4)
4202 .m(1)
4203 .n(8)
4204 .k(16)
4205 .qmin(128)
4206 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2s4__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4207 }
4208
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2S4__NEONV8_MLAL,qmax)4209 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2S4__NEONV8_MLAL, qmax) {
4210 TEST_REQUIRES_ARM_NEON_V8;
4211 GemmMicrokernelTester()
4212 .mr(1)
4213 .nr(8)
4214 .kr(2)
4215 .sr(4)
4216 .m(1)
4217 .n(8)
4218 .k(16)
4219 .qmax(128)
4220 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2s4__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4221 }
4222
TEST(QS8_IGEMM_MINMAX_FP32_1X8C2S4__NEONV8_MLAL,strided_cm)4223 TEST(QS8_IGEMM_MINMAX_FP32_1X8C2S4__NEONV8_MLAL, strided_cm) {
4224 TEST_REQUIRES_ARM_NEON_V8;
4225 GemmMicrokernelTester()
4226 .mr(1)
4227 .nr(8)
4228 .kr(2)
4229 .sr(4)
4230 .m(1)
4231 .n(8)
4232 .k(16)
4233 .cm_stride(11)
4234 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2s4__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
4235 }
4236 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
4237
4238
4239 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_DUP,k_eq_16)4240 TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_DUP, k_eq_16) {
4241 TEST_REQUIRES_ARM_NEON;
4242 GemmMicrokernelTester()
4243 .mr(1)
4244 .nr(8)
4245 .kr(4)
4246 .sr(1)
4247 .m(1)
4248 .n(8)
4249 .k(16)
4250 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
4251 }
4252
TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_DUP,strided_cn)4253 TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_DUP, strided_cn) {
4254 TEST_REQUIRES_ARM_NEON;
4255 GemmMicrokernelTester()
4256 .mr(1)
4257 .nr(8)
4258 .kr(4)
4259 .sr(1)
4260 .m(1)
4261 .n(8)
4262 .k(16)
4263 .cn_stride(11)
4264 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
4265 }
4266
TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_DUP,k_eq_16_subtile)4267 TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_DUP, k_eq_16_subtile) {
4268 TEST_REQUIRES_ARM_NEON;
4269 for (uint32_t n = 1; n <= 8; n++) {
4270 for (uint32_t m = 1; m <= 1; m++) {
4271 GemmMicrokernelTester()
4272 .mr(1)
4273 .nr(8)
4274 .kr(4)
4275 .sr(1)
4276 .m(m)
4277 .n(n)
4278 .k(16)
4279 .iterations(1)
4280 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
4281 }
4282 }
4283 }
4284
TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_DUP,k_eq_16_subtile_m)4285 TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_DUP, k_eq_16_subtile_m) {
4286 TEST_REQUIRES_ARM_NEON;
4287 for (uint32_t m = 1; m <= 1; m++) {
4288 GemmMicrokernelTester()
4289 .mr(1)
4290 .nr(8)
4291 .kr(4)
4292 .sr(1)
4293 .m(m)
4294 .n(8)
4295 .k(16)
4296 .iterations(1)
4297 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
4298 }
4299 }
4300
TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_DUP,k_eq_16_subtile_n)4301 TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_DUP, k_eq_16_subtile_n) {
4302 TEST_REQUIRES_ARM_NEON;
4303 for (uint32_t n = 1; n <= 8; n++) {
4304 GemmMicrokernelTester()
4305 .mr(1)
4306 .nr(8)
4307 .kr(4)
4308 .sr(1)
4309 .m(1)
4310 .n(n)
4311 .k(16)
4312 .iterations(1)
4313 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
4314 }
4315 }
4316
TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_DUP,k_lt_16)4317 TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_DUP, k_lt_16) {
4318 TEST_REQUIRES_ARM_NEON;
4319 for (size_t k = 1; k < 16; k++) {
4320 GemmMicrokernelTester()
4321 .mr(1)
4322 .nr(8)
4323 .kr(4)
4324 .sr(1)
4325 .m(1)
4326 .n(8)
4327 .k(k)
4328 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
4329 }
4330 }
4331
TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_DUP,k_lt_16_subtile)4332 TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_DUP, k_lt_16_subtile) {
4333 TEST_REQUIRES_ARM_NEON;
4334 for (size_t k = 1; k < 16; k++) {
4335 for (uint32_t n = 1; n <= 8; n++) {
4336 for (uint32_t m = 1; m <= 1; m++) {
4337 GemmMicrokernelTester()
4338 .mr(1)
4339 .nr(8)
4340 .kr(4)
4341 .sr(1)
4342 .m(m)
4343 .n(n)
4344 .k(k)
4345 .iterations(1)
4346 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
4347 }
4348 }
4349 }
4350 }
4351
TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_DUP,k_gt_16)4352 TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_DUP, k_gt_16) {
4353 TEST_REQUIRES_ARM_NEON;
4354 for (size_t k = 17; k < 32; k++) {
4355 GemmMicrokernelTester()
4356 .mr(1)
4357 .nr(8)
4358 .kr(4)
4359 .sr(1)
4360 .m(1)
4361 .n(8)
4362 .k(k)
4363 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
4364 }
4365 }
4366
TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_DUP,k_gt_16_subtile)4367 TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_DUP, k_gt_16_subtile) {
4368 TEST_REQUIRES_ARM_NEON;
4369 for (size_t k = 17; k < 32; k++) {
4370 for (uint32_t n = 1; n <= 8; n++) {
4371 for (uint32_t m = 1; m <= 1; m++) {
4372 GemmMicrokernelTester()
4373 .mr(1)
4374 .nr(8)
4375 .kr(4)
4376 .sr(1)
4377 .m(m)
4378 .n(n)
4379 .k(k)
4380 .iterations(1)
4381 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
4382 }
4383 }
4384 }
4385 }
4386
TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_DUP,k_div_16)4387 TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_DUP, k_div_16) {
4388 TEST_REQUIRES_ARM_NEON;
4389 for (size_t k = 32; k <= 160; k += 16) {
4390 GemmMicrokernelTester()
4391 .mr(1)
4392 .nr(8)
4393 .kr(4)
4394 .sr(1)
4395 .m(1)
4396 .n(8)
4397 .k(k)
4398 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
4399 }
4400 }
4401
TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_DUP,k_div_16_subtile)4402 TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_DUP, k_div_16_subtile) {
4403 TEST_REQUIRES_ARM_NEON;
4404 for (size_t k = 32; k <= 160; k += 16) {
4405 for (uint32_t n = 1; n <= 8; n++) {
4406 for (uint32_t m = 1; m <= 1; m++) {
4407 GemmMicrokernelTester()
4408 .mr(1)
4409 .nr(8)
4410 .kr(4)
4411 .sr(1)
4412 .m(m)
4413 .n(n)
4414 .k(k)
4415 .iterations(1)
4416 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
4417 }
4418 }
4419 }
4420 }
4421
TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_DUP,n_gt_8)4422 TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_DUP, n_gt_8) {
4423 TEST_REQUIRES_ARM_NEON;
4424 for (uint32_t n = 9; n < 16; n++) {
4425 for (size_t k = 1; k <= 80; k += 17) {
4426 GemmMicrokernelTester()
4427 .mr(1)
4428 .nr(8)
4429 .kr(4)
4430 .sr(1)
4431 .m(1)
4432 .n(n)
4433 .k(k)
4434 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
4435 }
4436 }
4437 }
4438
TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_DUP,n_gt_8_strided_cn)4439 TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_DUP, n_gt_8_strided_cn) {
4440 TEST_REQUIRES_ARM_NEON;
4441 for (uint32_t n = 9; n < 16; n++) {
4442 for (size_t k = 1; k <= 80; k += 17) {
4443 GemmMicrokernelTester()
4444 .mr(1)
4445 .nr(8)
4446 .kr(4)
4447 .sr(1)
4448 .m(1)
4449 .n(n)
4450 .k(k)
4451 .cn_stride(11)
4452 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
4453 }
4454 }
4455 }
4456
TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_DUP,n_gt_8_subtile)4457 TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_DUP, n_gt_8_subtile) {
4458 TEST_REQUIRES_ARM_NEON;
4459 for (uint32_t n = 9; n < 16; n++) {
4460 for (size_t k = 1; k <= 80; k += 17) {
4461 for (uint32_t m = 1; m <= 1; m++) {
4462 GemmMicrokernelTester()
4463 .mr(1)
4464 .nr(8)
4465 .kr(4)
4466 .sr(1)
4467 .m(m)
4468 .n(n)
4469 .k(k)
4470 .iterations(1)
4471 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
4472 }
4473 }
4474 }
4475 }
4476
TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_DUP,n_div_8)4477 TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_DUP, n_div_8) {
4478 TEST_REQUIRES_ARM_NEON;
4479 for (uint32_t n = 16; n <= 24; n += 8) {
4480 for (size_t k = 1; k <= 80; k += 17) {
4481 GemmMicrokernelTester()
4482 .mr(1)
4483 .nr(8)
4484 .kr(4)
4485 .sr(1)
4486 .m(1)
4487 .n(n)
4488 .k(k)
4489 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
4490 }
4491 }
4492 }
4493
TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_DUP,n_div_8_strided_cn)4494 TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_DUP, n_div_8_strided_cn) {
4495 TEST_REQUIRES_ARM_NEON;
4496 for (uint32_t n = 16; n <= 24; n += 8) {
4497 for (size_t k = 1; k <= 80; k += 17) {
4498 GemmMicrokernelTester()
4499 .mr(1)
4500 .nr(8)
4501 .kr(4)
4502 .sr(1)
4503 .m(1)
4504 .n(n)
4505 .k(k)
4506 .cn_stride(11)
4507 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
4508 }
4509 }
4510 }
4511
TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_DUP,n_div_8_subtile)4512 TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_DUP, n_div_8_subtile) {
4513 TEST_REQUIRES_ARM_NEON;
4514 for (uint32_t n = 16; n <= 24; n += 8) {
4515 for (size_t k = 1; k <= 80; k += 17) {
4516 for (uint32_t m = 1; m <= 1; m++) {
4517 GemmMicrokernelTester()
4518 .mr(1)
4519 .nr(8)
4520 .kr(4)
4521 .sr(1)
4522 .m(m)
4523 .n(n)
4524 .k(k)
4525 .iterations(1)
4526 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
4527 }
4528 }
4529 }
4530 }
4531
TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_DUP,small_kernel)4532 TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_DUP, small_kernel) {
4533 TEST_REQUIRES_ARM_NEON;
4534 for (size_t k = 1; k <= 80; k += 17) {
4535 GemmMicrokernelTester()
4536 .mr(1)
4537 .nr(8)
4538 .kr(4)
4539 .sr(1)
4540 .m(1)
4541 .n(8)
4542 .k(k)
4543 .ks(3)
4544 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
4545 }
4546 }
4547
TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_DUP,small_kernel_subtile)4548 TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_DUP, small_kernel_subtile) {
4549 TEST_REQUIRES_ARM_NEON;
4550 for (size_t k = 1; k <= 80; k += 17) {
4551 for (uint32_t n = 1; n <= 8; n++) {
4552 for (uint32_t m = 1; m <= 1; m++) {
4553 GemmMicrokernelTester()
4554 .mr(1)
4555 .nr(8)
4556 .kr(4)
4557 .sr(1)
4558 .m(m)
4559 .n(n)
4560 .k(k)
4561 .ks(3)
4562 .iterations(1)
4563 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
4564 }
4565 }
4566 }
4567 }
4568
TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_DUP,n_gt_8_small_kernel)4569 TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_DUP, n_gt_8_small_kernel) {
4570 TEST_REQUIRES_ARM_NEON;
4571 for (uint32_t n = 9; n < 16; n++) {
4572 for (size_t k = 1; k <= 80; k += 17) {
4573 GemmMicrokernelTester()
4574 .mr(1)
4575 .nr(8)
4576 .kr(4)
4577 .sr(1)
4578 .m(1)
4579 .n(n)
4580 .k(k)
4581 .ks(3)
4582 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
4583 }
4584 }
4585 }
4586
TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_DUP,n_div_8_small_kernel)4587 TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_DUP, n_div_8_small_kernel) {
4588 TEST_REQUIRES_ARM_NEON;
4589 for (uint32_t n = 16; n <= 24; n += 8) {
4590 for (size_t k = 1; k <= 80; k += 17) {
4591 GemmMicrokernelTester()
4592 .mr(1)
4593 .nr(8)
4594 .kr(4)
4595 .sr(1)
4596 .m(1)
4597 .n(n)
4598 .k(k)
4599 .ks(3)
4600 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
4601 }
4602 }
4603 }
4604
TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_DUP,strided_cm_subtile)4605 TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_DUP, strided_cm_subtile) {
4606 TEST_REQUIRES_ARM_NEON;
4607 for (size_t k = 1; k <= 80; k += 17) {
4608 for (uint32_t n = 1; n <= 8; n++) {
4609 for (uint32_t m = 1; m <= 1; m++) {
4610 GemmMicrokernelTester()
4611 .mr(1)
4612 .nr(8)
4613 .kr(4)
4614 .sr(1)
4615 .m(m)
4616 .n(n)
4617 .k(k)
4618 .cm_stride(11)
4619 .iterations(1)
4620 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
4621 }
4622 }
4623 }
4624 }
4625
TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_DUP,a_offset)4626 TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_DUP, a_offset) {
4627 TEST_REQUIRES_ARM_NEON;
4628 for (size_t k = 1; k <= 80; k += 17) {
4629 GemmMicrokernelTester()
4630 .mr(1)
4631 .nr(8)
4632 .kr(4)
4633 .sr(1)
4634 .m(1)
4635 .n(8)
4636 .k(k)
4637 .ks(3)
4638 .a_offset(83)
4639 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
4640 }
4641 }
4642
TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_DUP,zero)4643 TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_DUP, zero) {
4644 TEST_REQUIRES_ARM_NEON;
4645 for (size_t k = 1; k <= 80; k += 17) {
4646 for (uint32_t mz = 0; mz < 1; mz++) {
4647 GemmMicrokernelTester()
4648 .mr(1)
4649 .nr(8)
4650 .kr(4)
4651 .sr(1)
4652 .m(1)
4653 .n(8)
4654 .k(k)
4655 .ks(3)
4656 .a_offset(83)
4657 .zero_index(mz)
4658 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
4659 }
4660 }
4661 }
4662
TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_DUP,qmin)4663 TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_DUP, qmin) {
4664 TEST_REQUIRES_ARM_NEON;
4665 GemmMicrokernelTester()
4666 .mr(1)
4667 .nr(8)
4668 .kr(4)
4669 .sr(1)
4670 .m(1)
4671 .n(8)
4672 .k(16)
4673 .qmin(128)
4674 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
4675 }
4676
TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_DUP,qmax)4677 TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_DUP, qmax) {
4678 TEST_REQUIRES_ARM_NEON;
4679 GemmMicrokernelTester()
4680 .mr(1)
4681 .nr(8)
4682 .kr(4)
4683 .sr(1)
4684 .m(1)
4685 .n(8)
4686 .k(16)
4687 .qmax(128)
4688 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
4689 }
4690
TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_DUP,strided_cm)4691 TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_DUP, strided_cm) {
4692 TEST_REQUIRES_ARM_NEON;
4693 GemmMicrokernelTester()
4694 .mr(1)
4695 .nr(8)
4696 .kr(4)
4697 .sr(1)
4698 .m(1)
4699 .n(8)
4700 .k(16)
4701 .cm_stride(11)
4702 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
4703 }
4704 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
4705
4706
4707 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD2R,k_eq_16)4708 TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD2R, k_eq_16) {
4709 TEST_REQUIRES_ARM_NEON;
4710 GemmMicrokernelTester()
4711 .mr(1)
4712 .nr(8)
4713 .kr(4)
4714 .sr(1)
4715 .m(1)
4716 .n(8)
4717 .k(16)
4718 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
4719 }
4720
TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD2R,strided_cn)4721 TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD2R, strided_cn) {
4722 TEST_REQUIRES_ARM_NEON;
4723 GemmMicrokernelTester()
4724 .mr(1)
4725 .nr(8)
4726 .kr(4)
4727 .sr(1)
4728 .m(1)
4729 .n(8)
4730 .k(16)
4731 .cn_stride(11)
4732 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
4733 }
4734
TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD2R,k_eq_16_subtile)4735 TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD2R, k_eq_16_subtile) {
4736 TEST_REQUIRES_ARM_NEON;
4737 for (uint32_t n = 1; n <= 8; n++) {
4738 for (uint32_t m = 1; m <= 1; m++) {
4739 GemmMicrokernelTester()
4740 .mr(1)
4741 .nr(8)
4742 .kr(4)
4743 .sr(1)
4744 .m(m)
4745 .n(n)
4746 .k(16)
4747 .iterations(1)
4748 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
4749 }
4750 }
4751 }
4752
TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD2R,k_eq_16_subtile_m)4753 TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD2R, k_eq_16_subtile_m) {
4754 TEST_REQUIRES_ARM_NEON;
4755 for (uint32_t m = 1; m <= 1; m++) {
4756 GemmMicrokernelTester()
4757 .mr(1)
4758 .nr(8)
4759 .kr(4)
4760 .sr(1)
4761 .m(m)
4762 .n(8)
4763 .k(16)
4764 .iterations(1)
4765 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
4766 }
4767 }
4768
TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD2R,k_eq_16_subtile_n)4769 TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD2R, k_eq_16_subtile_n) {
4770 TEST_REQUIRES_ARM_NEON;
4771 for (uint32_t n = 1; n <= 8; n++) {
4772 GemmMicrokernelTester()
4773 .mr(1)
4774 .nr(8)
4775 .kr(4)
4776 .sr(1)
4777 .m(1)
4778 .n(n)
4779 .k(16)
4780 .iterations(1)
4781 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
4782 }
4783 }
4784
TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD2R,k_lt_16)4785 TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD2R, k_lt_16) {
4786 TEST_REQUIRES_ARM_NEON;
4787 for (size_t k = 1; k < 16; k++) {
4788 GemmMicrokernelTester()
4789 .mr(1)
4790 .nr(8)
4791 .kr(4)
4792 .sr(1)
4793 .m(1)
4794 .n(8)
4795 .k(k)
4796 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
4797 }
4798 }
4799
TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD2R,k_lt_16_subtile)4800 TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD2R, k_lt_16_subtile) {
4801 TEST_REQUIRES_ARM_NEON;
4802 for (size_t k = 1; k < 16; k++) {
4803 for (uint32_t n = 1; n <= 8; n++) {
4804 for (uint32_t m = 1; m <= 1; m++) {
4805 GemmMicrokernelTester()
4806 .mr(1)
4807 .nr(8)
4808 .kr(4)
4809 .sr(1)
4810 .m(m)
4811 .n(n)
4812 .k(k)
4813 .iterations(1)
4814 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
4815 }
4816 }
4817 }
4818 }
4819
TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD2R,k_gt_16)4820 TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD2R, k_gt_16) {
4821 TEST_REQUIRES_ARM_NEON;
4822 for (size_t k = 17; k < 32; k++) {
4823 GemmMicrokernelTester()
4824 .mr(1)
4825 .nr(8)
4826 .kr(4)
4827 .sr(1)
4828 .m(1)
4829 .n(8)
4830 .k(k)
4831 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
4832 }
4833 }
4834
TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD2R,k_gt_16_subtile)4835 TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD2R, k_gt_16_subtile) {
4836 TEST_REQUIRES_ARM_NEON;
4837 for (size_t k = 17; k < 32; k++) {
4838 for (uint32_t n = 1; n <= 8; n++) {
4839 for (uint32_t m = 1; m <= 1; m++) {
4840 GemmMicrokernelTester()
4841 .mr(1)
4842 .nr(8)
4843 .kr(4)
4844 .sr(1)
4845 .m(m)
4846 .n(n)
4847 .k(k)
4848 .iterations(1)
4849 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
4850 }
4851 }
4852 }
4853 }
4854
TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD2R,k_div_16)4855 TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD2R, k_div_16) {
4856 TEST_REQUIRES_ARM_NEON;
4857 for (size_t k = 32; k <= 160; k += 16) {
4858 GemmMicrokernelTester()
4859 .mr(1)
4860 .nr(8)
4861 .kr(4)
4862 .sr(1)
4863 .m(1)
4864 .n(8)
4865 .k(k)
4866 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
4867 }
4868 }
4869
TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD2R,k_div_16_subtile)4870 TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD2R, k_div_16_subtile) {
4871 TEST_REQUIRES_ARM_NEON;
4872 for (size_t k = 32; k <= 160; k += 16) {
4873 for (uint32_t n = 1; n <= 8; n++) {
4874 for (uint32_t m = 1; m <= 1; m++) {
4875 GemmMicrokernelTester()
4876 .mr(1)
4877 .nr(8)
4878 .kr(4)
4879 .sr(1)
4880 .m(m)
4881 .n(n)
4882 .k(k)
4883 .iterations(1)
4884 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
4885 }
4886 }
4887 }
4888 }
4889
TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD2R,n_gt_8)4890 TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD2R, n_gt_8) {
4891 TEST_REQUIRES_ARM_NEON;
4892 for (uint32_t n = 9; n < 16; n++) {
4893 for (size_t k = 1; k <= 80; k += 17) {
4894 GemmMicrokernelTester()
4895 .mr(1)
4896 .nr(8)
4897 .kr(4)
4898 .sr(1)
4899 .m(1)
4900 .n(n)
4901 .k(k)
4902 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
4903 }
4904 }
4905 }
4906
TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD2R,n_gt_8_strided_cn)4907 TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD2R, n_gt_8_strided_cn) {
4908 TEST_REQUIRES_ARM_NEON;
4909 for (uint32_t n = 9; n < 16; n++) {
4910 for (size_t k = 1; k <= 80; k += 17) {
4911 GemmMicrokernelTester()
4912 .mr(1)
4913 .nr(8)
4914 .kr(4)
4915 .sr(1)
4916 .m(1)
4917 .n(n)
4918 .k(k)
4919 .cn_stride(11)
4920 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
4921 }
4922 }
4923 }
4924
TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD2R,n_gt_8_subtile)4925 TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD2R, n_gt_8_subtile) {
4926 TEST_REQUIRES_ARM_NEON;
4927 for (uint32_t n = 9; n < 16; n++) {
4928 for (size_t k = 1; k <= 80; k += 17) {
4929 for (uint32_t m = 1; m <= 1; m++) {
4930 GemmMicrokernelTester()
4931 .mr(1)
4932 .nr(8)
4933 .kr(4)
4934 .sr(1)
4935 .m(m)
4936 .n(n)
4937 .k(k)
4938 .iterations(1)
4939 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
4940 }
4941 }
4942 }
4943 }
4944
TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD2R,n_div_8)4945 TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD2R, n_div_8) {
4946 TEST_REQUIRES_ARM_NEON;
4947 for (uint32_t n = 16; n <= 24; n += 8) {
4948 for (size_t k = 1; k <= 80; k += 17) {
4949 GemmMicrokernelTester()
4950 .mr(1)
4951 .nr(8)
4952 .kr(4)
4953 .sr(1)
4954 .m(1)
4955 .n(n)
4956 .k(k)
4957 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
4958 }
4959 }
4960 }
4961
TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD2R,n_div_8_strided_cn)4962 TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD2R, n_div_8_strided_cn) {
4963 TEST_REQUIRES_ARM_NEON;
4964 for (uint32_t n = 16; n <= 24; n += 8) {
4965 for (size_t k = 1; k <= 80; k += 17) {
4966 GemmMicrokernelTester()
4967 .mr(1)
4968 .nr(8)
4969 .kr(4)
4970 .sr(1)
4971 .m(1)
4972 .n(n)
4973 .k(k)
4974 .cn_stride(11)
4975 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
4976 }
4977 }
4978 }
4979
TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD2R,n_div_8_subtile)4980 TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD2R, n_div_8_subtile) {
4981 TEST_REQUIRES_ARM_NEON;
4982 for (uint32_t n = 16; n <= 24; n += 8) {
4983 for (size_t k = 1; k <= 80; k += 17) {
4984 for (uint32_t m = 1; m <= 1; m++) {
4985 GemmMicrokernelTester()
4986 .mr(1)
4987 .nr(8)
4988 .kr(4)
4989 .sr(1)
4990 .m(m)
4991 .n(n)
4992 .k(k)
4993 .iterations(1)
4994 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
4995 }
4996 }
4997 }
4998 }
4999
TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD2R,small_kernel)5000 TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD2R, small_kernel) {
5001 TEST_REQUIRES_ARM_NEON;
5002 for (size_t k = 1; k <= 80; k += 17) {
5003 GemmMicrokernelTester()
5004 .mr(1)
5005 .nr(8)
5006 .kr(4)
5007 .sr(1)
5008 .m(1)
5009 .n(8)
5010 .k(k)
5011 .ks(3)
5012 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
5013 }
5014 }
5015
TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD2R,small_kernel_subtile)5016 TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD2R, small_kernel_subtile) {
5017 TEST_REQUIRES_ARM_NEON;
5018 for (size_t k = 1; k <= 80; k += 17) {
5019 for (uint32_t n = 1; n <= 8; n++) {
5020 for (uint32_t m = 1; m <= 1; m++) {
5021 GemmMicrokernelTester()
5022 .mr(1)
5023 .nr(8)
5024 .kr(4)
5025 .sr(1)
5026 .m(m)
5027 .n(n)
5028 .k(k)
5029 .ks(3)
5030 .iterations(1)
5031 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
5032 }
5033 }
5034 }
5035 }
5036
TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD2R,n_gt_8_small_kernel)5037 TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD2R, n_gt_8_small_kernel) {
5038 TEST_REQUIRES_ARM_NEON;
5039 for (uint32_t n = 9; n < 16; n++) {
5040 for (size_t k = 1; k <= 80; k += 17) {
5041 GemmMicrokernelTester()
5042 .mr(1)
5043 .nr(8)
5044 .kr(4)
5045 .sr(1)
5046 .m(1)
5047 .n(n)
5048 .k(k)
5049 .ks(3)
5050 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
5051 }
5052 }
5053 }
5054
TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD2R,n_div_8_small_kernel)5055 TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD2R, n_div_8_small_kernel) {
5056 TEST_REQUIRES_ARM_NEON;
5057 for (uint32_t n = 16; n <= 24; n += 8) {
5058 for (size_t k = 1; k <= 80; k += 17) {
5059 GemmMicrokernelTester()
5060 .mr(1)
5061 .nr(8)
5062 .kr(4)
5063 .sr(1)
5064 .m(1)
5065 .n(n)
5066 .k(k)
5067 .ks(3)
5068 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
5069 }
5070 }
5071 }
5072
TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD2R,strided_cm_subtile)5073 TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD2R, strided_cm_subtile) {
5074 TEST_REQUIRES_ARM_NEON;
5075 for (size_t k = 1; k <= 80; k += 17) {
5076 for (uint32_t n = 1; n <= 8; n++) {
5077 for (uint32_t m = 1; m <= 1; m++) {
5078 GemmMicrokernelTester()
5079 .mr(1)
5080 .nr(8)
5081 .kr(4)
5082 .sr(1)
5083 .m(m)
5084 .n(n)
5085 .k(k)
5086 .cm_stride(11)
5087 .iterations(1)
5088 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
5089 }
5090 }
5091 }
5092 }
5093
TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD2R,a_offset)5094 TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD2R, a_offset) {
5095 TEST_REQUIRES_ARM_NEON;
5096 for (size_t k = 1; k <= 80; k += 17) {
5097 GemmMicrokernelTester()
5098 .mr(1)
5099 .nr(8)
5100 .kr(4)
5101 .sr(1)
5102 .m(1)
5103 .n(8)
5104 .k(k)
5105 .ks(3)
5106 .a_offset(83)
5107 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
5108 }
5109 }
5110
TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD2R,zero)5111 TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD2R, zero) {
5112 TEST_REQUIRES_ARM_NEON;
5113 for (size_t k = 1; k <= 80; k += 17) {
5114 for (uint32_t mz = 0; mz < 1; mz++) {
5115 GemmMicrokernelTester()
5116 .mr(1)
5117 .nr(8)
5118 .kr(4)
5119 .sr(1)
5120 .m(1)
5121 .n(8)
5122 .k(k)
5123 .ks(3)
5124 .a_offset(83)
5125 .zero_index(mz)
5126 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
5127 }
5128 }
5129 }
5130
TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD2R,qmin)5131 TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD2R, qmin) {
5132 TEST_REQUIRES_ARM_NEON;
5133 GemmMicrokernelTester()
5134 .mr(1)
5135 .nr(8)
5136 .kr(4)
5137 .sr(1)
5138 .m(1)
5139 .n(8)
5140 .k(16)
5141 .qmin(128)
5142 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
5143 }
5144
TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD2R,qmax)5145 TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD2R, qmax) {
5146 TEST_REQUIRES_ARM_NEON;
5147 GemmMicrokernelTester()
5148 .mr(1)
5149 .nr(8)
5150 .kr(4)
5151 .sr(1)
5152 .m(1)
5153 .n(8)
5154 .k(16)
5155 .qmax(128)
5156 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
5157 }
5158
TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD2R,strided_cm)5159 TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEON_MLAL_LD2R, strided_cm) {
5160 TEST_REQUIRES_ARM_NEON;
5161 GemmMicrokernelTester()
5162 .mr(1)
5163 .nr(8)
5164 .kr(4)
5165 .sr(1)
5166 .m(1)
5167 .n(8)
5168 .k(16)
5169 .cm_stride(11)
5170 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
5171 }
5172 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
5173
5174
5175 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_DUP,k_eq_16)5176 TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_DUP, k_eq_16) {
5177 TEST_REQUIRES_ARM_NEON_V8;
5178 GemmMicrokernelTester()
5179 .mr(1)
5180 .nr(8)
5181 .kr(4)
5182 .sr(1)
5183 .m(1)
5184 .n(8)
5185 .k(16)
5186 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neonv8_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
5187 }
5188
TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_DUP,strided_cn)5189 TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_DUP, strided_cn) {
5190 TEST_REQUIRES_ARM_NEON_V8;
5191 GemmMicrokernelTester()
5192 .mr(1)
5193 .nr(8)
5194 .kr(4)
5195 .sr(1)
5196 .m(1)
5197 .n(8)
5198 .k(16)
5199 .cn_stride(11)
5200 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neonv8_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
5201 }
5202
TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_DUP,k_eq_16_subtile)5203 TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_DUP, k_eq_16_subtile) {
5204 TEST_REQUIRES_ARM_NEON_V8;
5205 for (uint32_t n = 1; n <= 8; n++) {
5206 for (uint32_t m = 1; m <= 1; m++) {
5207 GemmMicrokernelTester()
5208 .mr(1)
5209 .nr(8)
5210 .kr(4)
5211 .sr(1)
5212 .m(m)
5213 .n(n)
5214 .k(16)
5215 .iterations(1)
5216 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neonv8_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
5217 }
5218 }
5219 }
5220
TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_DUP,k_eq_16_subtile_m)5221 TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_DUP, k_eq_16_subtile_m) {
5222 TEST_REQUIRES_ARM_NEON_V8;
5223 for (uint32_t m = 1; m <= 1; m++) {
5224 GemmMicrokernelTester()
5225 .mr(1)
5226 .nr(8)
5227 .kr(4)
5228 .sr(1)
5229 .m(m)
5230 .n(8)
5231 .k(16)
5232 .iterations(1)
5233 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neonv8_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
5234 }
5235 }
5236
TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_DUP,k_eq_16_subtile_n)5237 TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_DUP, k_eq_16_subtile_n) {
5238 TEST_REQUIRES_ARM_NEON_V8;
5239 for (uint32_t n = 1; n <= 8; n++) {
5240 GemmMicrokernelTester()
5241 .mr(1)
5242 .nr(8)
5243 .kr(4)
5244 .sr(1)
5245 .m(1)
5246 .n(n)
5247 .k(16)
5248 .iterations(1)
5249 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neonv8_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
5250 }
5251 }
5252
TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_DUP,k_lt_16)5253 TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_DUP, k_lt_16) {
5254 TEST_REQUIRES_ARM_NEON_V8;
5255 for (size_t k = 1; k < 16; k++) {
5256 GemmMicrokernelTester()
5257 .mr(1)
5258 .nr(8)
5259 .kr(4)
5260 .sr(1)
5261 .m(1)
5262 .n(8)
5263 .k(k)
5264 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neonv8_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
5265 }
5266 }
5267
TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_DUP,k_lt_16_subtile)5268 TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_DUP, k_lt_16_subtile) {
5269 TEST_REQUIRES_ARM_NEON_V8;
5270 for (size_t k = 1; k < 16; k++) {
5271 for (uint32_t n = 1; n <= 8; n++) {
5272 for (uint32_t m = 1; m <= 1; m++) {
5273 GemmMicrokernelTester()
5274 .mr(1)
5275 .nr(8)
5276 .kr(4)
5277 .sr(1)
5278 .m(m)
5279 .n(n)
5280 .k(k)
5281 .iterations(1)
5282 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neonv8_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
5283 }
5284 }
5285 }
5286 }
5287
TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_DUP,k_gt_16)5288 TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_DUP, k_gt_16) {
5289 TEST_REQUIRES_ARM_NEON_V8;
5290 for (size_t k = 17; k < 32; k++) {
5291 GemmMicrokernelTester()
5292 .mr(1)
5293 .nr(8)
5294 .kr(4)
5295 .sr(1)
5296 .m(1)
5297 .n(8)
5298 .k(k)
5299 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neonv8_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
5300 }
5301 }
5302
TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_DUP,k_gt_16_subtile)5303 TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_DUP, k_gt_16_subtile) {
5304 TEST_REQUIRES_ARM_NEON_V8;
5305 for (size_t k = 17; k < 32; k++) {
5306 for (uint32_t n = 1; n <= 8; n++) {
5307 for (uint32_t m = 1; m <= 1; m++) {
5308 GemmMicrokernelTester()
5309 .mr(1)
5310 .nr(8)
5311 .kr(4)
5312 .sr(1)
5313 .m(m)
5314 .n(n)
5315 .k(k)
5316 .iterations(1)
5317 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neonv8_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
5318 }
5319 }
5320 }
5321 }
5322
TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_DUP,k_div_16)5323 TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_DUP, k_div_16) {
5324 TEST_REQUIRES_ARM_NEON_V8;
5325 for (size_t k = 32; k <= 160; k += 16) {
5326 GemmMicrokernelTester()
5327 .mr(1)
5328 .nr(8)
5329 .kr(4)
5330 .sr(1)
5331 .m(1)
5332 .n(8)
5333 .k(k)
5334 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neonv8_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
5335 }
5336 }
5337
TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_DUP,k_div_16_subtile)5338 TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_DUP, k_div_16_subtile) {
5339 TEST_REQUIRES_ARM_NEON_V8;
5340 for (size_t k = 32; k <= 160; k += 16) {
5341 for (uint32_t n = 1; n <= 8; n++) {
5342 for (uint32_t m = 1; m <= 1; m++) {
5343 GemmMicrokernelTester()
5344 .mr(1)
5345 .nr(8)
5346 .kr(4)
5347 .sr(1)
5348 .m(m)
5349 .n(n)
5350 .k(k)
5351 .iterations(1)
5352 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neonv8_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
5353 }
5354 }
5355 }
5356 }
5357
TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_DUP,n_gt_8)5358 TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_DUP, n_gt_8) {
5359 TEST_REQUIRES_ARM_NEON_V8;
5360 for (uint32_t n = 9; n < 16; n++) {
5361 for (size_t k = 1; k <= 80; k += 17) {
5362 GemmMicrokernelTester()
5363 .mr(1)
5364 .nr(8)
5365 .kr(4)
5366 .sr(1)
5367 .m(1)
5368 .n(n)
5369 .k(k)
5370 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neonv8_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
5371 }
5372 }
5373 }
5374
TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_DUP,n_gt_8_strided_cn)5375 TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_DUP, n_gt_8_strided_cn) {
5376 TEST_REQUIRES_ARM_NEON_V8;
5377 for (uint32_t n = 9; n < 16; n++) {
5378 for (size_t k = 1; k <= 80; k += 17) {
5379 GemmMicrokernelTester()
5380 .mr(1)
5381 .nr(8)
5382 .kr(4)
5383 .sr(1)
5384 .m(1)
5385 .n(n)
5386 .k(k)
5387 .cn_stride(11)
5388 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neonv8_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
5389 }
5390 }
5391 }
5392
TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_DUP,n_gt_8_subtile)5393 TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_DUP, n_gt_8_subtile) {
5394 TEST_REQUIRES_ARM_NEON_V8;
5395 for (uint32_t n = 9; n < 16; n++) {
5396 for (size_t k = 1; k <= 80; k += 17) {
5397 for (uint32_t m = 1; m <= 1; m++) {
5398 GemmMicrokernelTester()
5399 .mr(1)
5400 .nr(8)
5401 .kr(4)
5402 .sr(1)
5403 .m(m)
5404 .n(n)
5405 .k(k)
5406 .iterations(1)
5407 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neonv8_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
5408 }
5409 }
5410 }
5411 }
5412
TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_DUP,n_div_8)5413 TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_DUP, n_div_8) {
5414 TEST_REQUIRES_ARM_NEON_V8;
5415 for (uint32_t n = 16; n <= 24; n += 8) {
5416 for (size_t k = 1; k <= 80; k += 17) {
5417 GemmMicrokernelTester()
5418 .mr(1)
5419 .nr(8)
5420 .kr(4)
5421 .sr(1)
5422 .m(1)
5423 .n(n)
5424 .k(k)
5425 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neonv8_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
5426 }
5427 }
5428 }
5429
TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_DUP,n_div_8_strided_cn)5430 TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_DUP, n_div_8_strided_cn) {
5431 TEST_REQUIRES_ARM_NEON_V8;
5432 for (uint32_t n = 16; n <= 24; n += 8) {
5433 for (size_t k = 1; k <= 80; k += 17) {
5434 GemmMicrokernelTester()
5435 .mr(1)
5436 .nr(8)
5437 .kr(4)
5438 .sr(1)
5439 .m(1)
5440 .n(n)
5441 .k(k)
5442 .cn_stride(11)
5443 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neonv8_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
5444 }
5445 }
5446 }
5447
TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_DUP,n_div_8_subtile)5448 TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_DUP, n_div_8_subtile) {
5449 TEST_REQUIRES_ARM_NEON_V8;
5450 for (uint32_t n = 16; n <= 24; n += 8) {
5451 for (size_t k = 1; k <= 80; k += 17) {
5452 for (uint32_t m = 1; m <= 1; m++) {
5453 GemmMicrokernelTester()
5454 .mr(1)
5455 .nr(8)
5456 .kr(4)
5457 .sr(1)
5458 .m(m)
5459 .n(n)
5460 .k(k)
5461 .iterations(1)
5462 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neonv8_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
5463 }
5464 }
5465 }
5466 }
5467
TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_DUP,small_kernel)5468 TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_DUP, small_kernel) {
5469 TEST_REQUIRES_ARM_NEON_V8;
5470 for (size_t k = 1; k <= 80; k += 17) {
5471 GemmMicrokernelTester()
5472 .mr(1)
5473 .nr(8)
5474 .kr(4)
5475 .sr(1)
5476 .m(1)
5477 .n(8)
5478 .k(k)
5479 .ks(3)
5480 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neonv8_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
5481 }
5482 }
5483
TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_DUP,small_kernel_subtile)5484 TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_DUP, small_kernel_subtile) {
5485 TEST_REQUIRES_ARM_NEON_V8;
5486 for (size_t k = 1; k <= 80; k += 17) {
5487 for (uint32_t n = 1; n <= 8; n++) {
5488 for (uint32_t m = 1; m <= 1; m++) {
5489 GemmMicrokernelTester()
5490 .mr(1)
5491 .nr(8)
5492 .kr(4)
5493 .sr(1)
5494 .m(m)
5495 .n(n)
5496 .k(k)
5497 .ks(3)
5498 .iterations(1)
5499 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neonv8_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
5500 }
5501 }
5502 }
5503 }
5504
TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_DUP,n_gt_8_small_kernel)5505 TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_DUP, n_gt_8_small_kernel) {
5506 TEST_REQUIRES_ARM_NEON_V8;
5507 for (uint32_t n = 9; n < 16; n++) {
5508 for (size_t k = 1; k <= 80; k += 17) {
5509 GemmMicrokernelTester()
5510 .mr(1)
5511 .nr(8)
5512 .kr(4)
5513 .sr(1)
5514 .m(1)
5515 .n(n)
5516 .k(k)
5517 .ks(3)
5518 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neonv8_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
5519 }
5520 }
5521 }
5522
TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_DUP,n_div_8_small_kernel)5523 TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_DUP, n_div_8_small_kernel) {
5524 TEST_REQUIRES_ARM_NEON_V8;
5525 for (uint32_t n = 16; n <= 24; n += 8) {
5526 for (size_t k = 1; k <= 80; k += 17) {
5527 GemmMicrokernelTester()
5528 .mr(1)
5529 .nr(8)
5530 .kr(4)
5531 .sr(1)
5532 .m(1)
5533 .n(n)
5534 .k(k)
5535 .ks(3)
5536 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neonv8_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
5537 }
5538 }
5539 }
5540
TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_DUP,strided_cm_subtile)5541 TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_DUP, strided_cm_subtile) {
5542 TEST_REQUIRES_ARM_NEON_V8;
5543 for (size_t k = 1; k <= 80; k += 17) {
5544 for (uint32_t n = 1; n <= 8; n++) {
5545 for (uint32_t m = 1; m <= 1; m++) {
5546 GemmMicrokernelTester()
5547 .mr(1)
5548 .nr(8)
5549 .kr(4)
5550 .sr(1)
5551 .m(m)
5552 .n(n)
5553 .k(k)
5554 .cm_stride(11)
5555 .iterations(1)
5556 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neonv8_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
5557 }
5558 }
5559 }
5560 }
5561
TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_DUP,a_offset)5562 TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_DUP, a_offset) {
5563 TEST_REQUIRES_ARM_NEON_V8;
5564 for (size_t k = 1; k <= 80; k += 17) {
5565 GemmMicrokernelTester()
5566 .mr(1)
5567 .nr(8)
5568 .kr(4)
5569 .sr(1)
5570 .m(1)
5571 .n(8)
5572 .k(k)
5573 .ks(3)
5574 .a_offset(83)
5575 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neonv8_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
5576 }
5577 }
5578
TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_DUP,zero)5579 TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_DUP, zero) {
5580 TEST_REQUIRES_ARM_NEON_V8;
5581 for (size_t k = 1; k <= 80; k += 17) {
5582 for (uint32_t mz = 0; mz < 1; mz++) {
5583 GemmMicrokernelTester()
5584 .mr(1)
5585 .nr(8)
5586 .kr(4)
5587 .sr(1)
5588 .m(1)
5589 .n(8)
5590 .k(k)
5591 .ks(3)
5592 .a_offset(83)
5593 .zero_index(mz)
5594 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neonv8_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
5595 }
5596 }
5597 }
5598
TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_DUP,qmin)5599 TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_DUP, qmin) {
5600 TEST_REQUIRES_ARM_NEON_V8;
5601 GemmMicrokernelTester()
5602 .mr(1)
5603 .nr(8)
5604 .kr(4)
5605 .sr(1)
5606 .m(1)
5607 .n(8)
5608 .k(16)
5609 .qmin(128)
5610 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neonv8_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
5611 }
5612
TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_DUP,qmax)5613 TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_DUP, qmax) {
5614 TEST_REQUIRES_ARM_NEON_V8;
5615 GemmMicrokernelTester()
5616 .mr(1)
5617 .nr(8)
5618 .kr(4)
5619 .sr(1)
5620 .m(1)
5621 .n(8)
5622 .k(16)
5623 .qmax(128)
5624 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neonv8_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
5625 }
5626
TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_DUP,strided_cm)5627 TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_DUP, strided_cm) {
5628 TEST_REQUIRES_ARM_NEON_V8;
5629 GemmMicrokernelTester()
5630 .mr(1)
5631 .nr(8)
5632 .kr(4)
5633 .sr(1)
5634 .m(1)
5635 .n(8)
5636 .k(16)
5637 .cm_stride(11)
5638 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neonv8_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
5639 }
5640 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
5641
5642
5643 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_LD1R,k_eq_16)5644 TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_LD1R, k_eq_16) {
5645 TEST_REQUIRES_ARM_NEON_V8;
5646 GemmMicrokernelTester()
5647 .mr(1)
5648 .nr(8)
5649 .kr(4)
5650 .sr(1)
5651 .m(1)
5652 .n(8)
5653 .k(16)
5654 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neonv8_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
5655 }
5656
TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_LD1R,strided_cn)5657 TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_LD1R, strided_cn) {
5658 TEST_REQUIRES_ARM_NEON_V8;
5659 GemmMicrokernelTester()
5660 .mr(1)
5661 .nr(8)
5662 .kr(4)
5663 .sr(1)
5664 .m(1)
5665 .n(8)
5666 .k(16)
5667 .cn_stride(11)
5668 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neonv8_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
5669 }
5670
TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_LD1R,k_eq_16_subtile)5671 TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_LD1R, k_eq_16_subtile) {
5672 TEST_REQUIRES_ARM_NEON_V8;
5673 for (uint32_t n = 1; n <= 8; n++) {
5674 for (uint32_t m = 1; m <= 1; m++) {
5675 GemmMicrokernelTester()
5676 .mr(1)
5677 .nr(8)
5678 .kr(4)
5679 .sr(1)
5680 .m(m)
5681 .n(n)
5682 .k(16)
5683 .iterations(1)
5684 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neonv8_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
5685 }
5686 }
5687 }
5688
TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_LD1R,k_eq_16_subtile_m)5689 TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_LD1R, k_eq_16_subtile_m) {
5690 TEST_REQUIRES_ARM_NEON_V8;
5691 for (uint32_t m = 1; m <= 1; m++) {
5692 GemmMicrokernelTester()
5693 .mr(1)
5694 .nr(8)
5695 .kr(4)
5696 .sr(1)
5697 .m(m)
5698 .n(8)
5699 .k(16)
5700 .iterations(1)
5701 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neonv8_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
5702 }
5703 }
5704
TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_LD1R,k_eq_16_subtile_n)5705 TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_LD1R, k_eq_16_subtile_n) {
5706 TEST_REQUIRES_ARM_NEON_V8;
5707 for (uint32_t n = 1; n <= 8; n++) {
5708 GemmMicrokernelTester()
5709 .mr(1)
5710 .nr(8)
5711 .kr(4)
5712 .sr(1)
5713 .m(1)
5714 .n(n)
5715 .k(16)
5716 .iterations(1)
5717 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neonv8_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
5718 }
5719 }
5720
TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_LD1R,k_lt_16)5721 TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_LD1R, k_lt_16) {
5722 TEST_REQUIRES_ARM_NEON_V8;
5723 for (size_t k = 1; k < 16; k++) {
5724 GemmMicrokernelTester()
5725 .mr(1)
5726 .nr(8)
5727 .kr(4)
5728 .sr(1)
5729 .m(1)
5730 .n(8)
5731 .k(k)
5732 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neonv8_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
5733 }
5734 }
5735
TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_LD1R,k_lt_16_subtile)5736 TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_LD1R, k_lt_16_subtile) {
5737 TEST_REQUIRES_ARM_NEON_V8;
5738 for (size_t k = 1; k < 16; k++) {
5739 for (uint32_t n = 1; n <= 8; n++) {
5740 for (uint32_t m = 1; m <= 1; m++) {
5741 GemmMicrokernelTester()
5742 .mr(1)
5743 .nr(8)
5744 .kr(4)
5745 .sr(1)
5746 .m(m)
5747 .n(n)
5748 .k(k)
5749 .iterations(1)
5750 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neonv8_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
5751 }
5752 }
5753 }
5754 }
5755
TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_LD1R,k_gt_16)5756 TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_LD1R, k_gt_16) {
5757 TEST_REQUIRES_ARM_NEON_V8;
5758 for (size_t k = 17; k < 32; k++) {
5759 GemmMicrokernelTester()
5760 .mr(1)
5761 .nr(8)
5762 .kr(4)
5763 .sr(1)
5764 .m(1)
5765 .n(8)
5766 .k(k)
5767 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neonv8_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
5768 }
5769 }
5770
TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_LD1R,k_gt_16_subtile)5771 TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_LD1R, k_gt_16_subtile) {
5772 TEST_REQUIRES_ARM_NEON_V8;
5773 for (size_t k = 17; k < 32; k++) {
5774 for (uint32_t n = 1; n <= 8; n++) {
5775 for (uint32_t m = 1; m <= 1; m++) {
5776 GemmMicrokernelTester()
5777 .mr(1)
5778 .nr(8)
5779 .kr(4)
5780 .sr(1)
5781 .m(m)
5782 .n(n)
5783 .k(k)
5784 .iterations(1)
5785 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neonv8_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
5786 }
5787 }
5788 }
5789 }
5790
TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_LD1R,k_div_16)5791 TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_LD1R, k_div_16) {
5792 TEST_REQUIRES_ARM_NEON_V8;
5793 for (size_t k = 32; k <= 160; k += 16) {
5794 GemmMicrokernelTester()
5795 .mr(1)
5796 .nr(8)
5797 .kr(4)
5798 .sr(1)
5799 .m(1)
5800 .n(8)
5801 .k(k)
5802 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neonv8_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
5803 }
5804 }
5805
TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_LD1R,k_div_16_subtile)5806 TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_LD1R, k_div_16_subtile) {
5807 TEST_REQUIRES_ARM_NEON_V8;
5808 for (size_t k = 32; k <= 160; k += 16) {
5809 for (uint32_t n = 1; n <= 8; n++) {
5810 for (uint32_t m = 1; m <= 1; m++) {
5811 GemmMicrokernelTester()
5812 .mr(1)
5813 .nr(8)
5814 .kr(4)
5815 .sr(1)
5816 .m(m)
5817 .n(n)
5818 .k(k)
5819 .iterations(1)
5820 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neonv8_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
5821 }
5822 }
5823 }
5824 }
5825
TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_LD1R,n_gt_8)5826 TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_LD1R, n_gt_8) {
5827 TEST_REQUIRES_ARM_NEON_V8;
5828 for (uint32_t n = 9; n < 16; n++) {
5829 for (size_t k = 1; k <= 80; k += 17) {
5830 GemmMicrokernelTester()
5831 .mr(1)
5832 .nr(8)
5833 .kr(4)
5834 .sr(1)
5835 .m(1)
5836 .n(n)
5837 .k(k)
5838 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neonv8_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
5839 }
5840 }
5841 }
5842
TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_LD1R,n_gt_8_strided_cn)5843 TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_LD1R, n_gt_8_strided_cn) {
5844 TEST_REQUIRES_ARM_NEON_V8;
5845 for (uint32_t n = 9; n < 16; n++) {
5846 for (size_t k = 1; k <= 80; k += 17) {
5847 GemmMicrokernelTester()
5848 .mr(1)
5849 .nr(8)
5850 .kr(4)
5851 .sr(1)
5852 .m(1)
5853 .n(n)
5854 .k(k)
5855 .cn_stride(11)
5856 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neonv8_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
5857 }
5858 }
5859 }
5860
TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_LD1R,n_gt_8_subtile)5861 TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_LD1R, n_gt_8_subtile) {
5862 TEST_REQUIRES_ARM_NEON_V8;
5863 for (uint32_t n = 9; n < 16; n++) {
5864 for (size_t k = 1; k <= 80; k += 17) {
5865 for (uint32_t m = 1; m <= 1; m++) {
5866 GemmMicrokernelTester()
5867 .mr(1)
5868 .nr(8)
5869 .kr(4)
5870 .sr(1)
5871 .m(m)
5872 .n(n)
5873 .k(k)
5874 .iterations(1)
5875 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neonv8_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
5876 }
5877 }
5878 }
5879 }
5880
TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_LD1R,n_div_8)5881 TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_LD1R, n_div_8) {
5882 TEST_REQUIRES_ARM_NEON_V8;
5883 for (uint32_t n = 16; n <= 24; n += 8) {
5884 for (size_t k = 1; k <= 80; k += 17) {
5885 GemmMicrokernelTester()
5886 .mr(1)
5887 .nr(8)
5888 .kr(4)
5889 .sr(1)
5890 .m(1)
5891 .n(n)
5892 .k(k)
5893 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neonv8_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
5894 }
5895 }
5896 }
5897
TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_LD1R,n_div_8_strided_cn)5898 TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_LD1R, n_div_8_strided_cn) {
5899 TEST_REQUIRES_ARM_NEON_V8;
5900 for (uint32_t n = 16; n <= 24; n += 8) {
5901 for (size_t k = 1; k <= 80; k += 17) {
5902 GemmMicrokernelTester()
5903 .mr(1)
5904 .nr(8)
5905 .kr(4)
5906 .sr(1)
5907 .m(1)
5908 .n(n)
5909 .k(k)
5910 .cn_stride(11)
5911 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neonv8_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
5912 }
5913 }
5914 }
5915
TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_LD1R,n_div_8_subtile)5916 TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_LD1R, n_div_8_subtile) {
5917 TEST_REQUIRES_ARM_NEON_V8;
5918 for (uint32_t n = 16; n <= 24; n += 8) {
5919 for (size_t k = 1; k <= 80; k += 17) {
5920 for (uint32_t m = 1; m <= 1; m++) {
5921 GemmMicrokernelTester()
5922 .mr(1)
5923 .nr(8)
5924 .kr(4)
5925 .sr(1)
5926 .m(m)
5927 .n(n)
5928 .k(k)
5929 .iterations(1)
5930 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neonv8_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
5931 }
5932 }
5933 }
5934 }
5935
TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_LD1R,small_kernel)5936 TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_LD1R, small_kernel) {
5937 TEST_REQUIRES_ARM_NEON_V8;
5938 for (size_t k = 1; k <= 80; k += 17) {
5939 GemmMicrokernelTester()
5940 .mr(1)
5941 .nr(8)
5942 .kr(4)
5943 .sr(1)
5944 .m(1)
5945 .n(8)
5946 .k(k)
5947 .ks(3)
5948 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neonv8_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
5949 }
5950 }
5951
TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_LD1R,small_kernel_subtile)5952 TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_LD1R, small_kernel_subtile) {
5953 TEST_REQUIRES_ARM_NEON_V8;
5954 for (size_t k = 1; k <= 80; k += 17) {
5955 for (uint32_t n = 1; n <= 8; n++) {
5956 for (uint32_t m = 1; m <= 1; m++) {
5957 GemmMicrokernelTester()
5958 .mr(1)
5959 .nr(8)
5960 .kr(4)
5961 .sr(1)
5962 .m(m)
5963 .n(n)
5964 .k(k)
5965 .ks(3)
5966 .iterations(1)
5967 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neonv8_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
5968 }
5969 }
5970 }
5971 }
5972
TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_LD1R,n_gt_8_small_kernel)5973 TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_LD1R, n_gt_8_small_kernel) {
5974 TEST_REQUIRES_ARM_NEON_V8;
5975 for (uint32_t n = 9; n < 16; n++) {
5976 for (size_t k = 1; k <= 80; k += 17) {
5977 GemmMicrokernelTester()
5978 .mr(1)
5979 .nr(8)
5980 .kr(4)
5981 .sr(1)
5982 .m(1)
5983 .n(n)
5984 .k(k)
5985 .ks(3)
5986 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neonv8_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
5987 }
5988 }
5989 }
5990
TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_LD1R,n_div_8_small_kernel)5991 TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_LD1R, n_div_8_small_kernel) {
5992 TEST_REQUIRES_ARM_NEON_V8;
5993 for (uint32_t n = 16; n <= 24; n += 8) {
5994 for (size_t k = 1; k <= 80; k += 17) {
5995 GemmMicrokernelTester()
5996 .mr(1)
5997 .nr(8)
5998 .kr(4)
5999 .sr(1)
6000 .m(1)
6001 .n(n)
6002 .k(k)
6003 .ks(3)
6004 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neonv8_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
6005 }
6006 }
6007 }
6008
TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_LD1R,strided_cm_subtile)6009 TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_LD1R, strided_cm_subtile) {
6010 TEST_REQUIRES_ARM_NEON_V8;
6011 for (size_t k = 1; k <= 80; k += 17) {
6012 for (uint32_t n = 1; n <= 8; n++) {
6013 for (uint32_t m = 1; m <= 1; m++) {
6014 GemmMicrokernelTester()
6015 .mr(1)
6016 .nr(8)
6017 .kr(4)
6018 .sr(1)
6019 .m(m)
6020 .n(n)
6021 .k(k)
6022 .cm_stride(11)
6023 .iterations(1)
6024 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neonv8_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
6025 }
6026 }
6027 }
6028 }
6029
TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_LD1R,a_offset)6030 TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_LD1R, a_offset) {
6031 TEST_REQUIRES_ARM_NEON_V8;
6032 for (size_t k = 1; k <= 80; k += 17) {
6033 GemmMicrokernelTester()
6034 .mr(1)
6035 .nr(8)
6036 .kr(4)
6037 .sr(1)
6038 .m(1)
6039 .n(8)
6040 .k(k)
6041 .ks(3)
6042 .a_offset(83)
6043 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neonv8_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
6044 }
6045 }
6046
TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_LD1R,zero)6047 TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_LD1R, zero) {
6048 TEST_REQUIRES_ARM_NEON_V8;
6049 for (size_t k = 1; k <= 80; k += 17) {
6050 for (uint32_t mz = 0; mz < 1; mz++) {
6051 GemmMicrokernelTester()
6052 .mr(1)
6053 .nr(8)
6054 .kr(4)
6055 .sr(1)
6056 .m(1)
6057 .n(8)
6058 .k(k)
6059 .ks(3)
6060 .a_offset(83)
6061 .zero_index(mz)
6062 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neonv8_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
6063 }
6064 }
6065 }
6066
TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_LD1R,qmin)6067 TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_LD1R, qmin) {
6068 TEST_REQUIRES_ARM_NEON_V8;
6069 GemmMicrokernelTester()
6070 .mr(1)
6071 .nr(8)
6072 .kr(4)
6073 .sr(1)
6074 .m(1)
6075 .n(8)
6076 .k(16)
6077 .qmin(128)
6078 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neonv8_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
6079 }
6080
TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_LD1R,qmax)6081 TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_LD1R, qmax) {
6082 TEST_REQUIRES_ARM_NEON_V8;
6083 GemmMicrokernelTester()
6084 .mr(1)
6085 .nr(8)
6086 .kr(4)
6087 .sr(1)
6088 .m(1)
6089 .n(8)
6090 .k(16)
6091 .qmax(128)
6092 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neonv8_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
6093 }
6094
TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_LD1R,strided_cm)6095 TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONV8_MLAL_LD1R, strided_cm) {
6096 TEST_REQUIRES_ARM_NEON_V8;
6097 GemmMicrokernelTester()
6098 .mr(1)
6099 .nr(8)
6100 .kr(4)
6101 .sr(1)
6102 .m(1)
6103 .n(8)
6104 .k(16)
6105 .cm_stride(11)
6106 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neonv8_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
6107 }
6108 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
6109
6110
6111 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QS8_IGEMM_MINMAX_FP32_1X8C4S2__NEON_MLAL,k_eq_16)6112 TEST(QS8_IGEMM_MINMAX_FP32_1X8C4S2__NEON_MLAL, k_eq_16) {
6113 TEST_REQUIRES_ARM_NEON;
6114 GemmMicrokernelTester()
6115 .mr(1)
6116 .nr(8)
6117 .kr(4)
6118 .sr(2)
6119 .m(1)
6120 .n(8)
6121 .k(16)
6122 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4s2__neon_mlal, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
6123 }
6124
TEST(QS8_IGEMM_MINMAX_FP32_1X8C4S2__NEON_MLAL,strided_cn)6125 TEST(QS8_IGEMM_MINMAX_FP32_1X8C4S2__NEON_MLAL, strided_cn) {
6126 TEST_REQUIRES_ARM_NEON;
6127 GemmMicrokernelTester()
6128 .mr(1)
6129 .nr(8)
6130 .kr(4)
6131 .sr(2)
6132 .m(1)
6133 .n(8)
6134 .k(16)
6135 .cn_stride(11)
6136 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4s2__neon_mlal, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
6137 }
6138
TEST(QS8_IGEMM_MINMAX_FP32_1X8C4S2__NEON_MLAL,k_eq_16_subtile)6139 TEST(QS8_IGEMM_MINMAX_FP32_1X8C4S2__NEON_MLAL, k_eq_16_subtile) {
6140 TEST_REQUIRES_ARM_NEON;
6141 for (uint32_t n = 1; n <= 8; n++) {
6142 for (uint32_t m = 1; m <= 1; m++) {
6143 GemmMicrokernelTester()
6144 .mr(1)
6145 .nr(8)
6146 .kr(4)
6147 .sr(2)
6148 .m(m)
6149 .n(n)
6150 .k(16)
6151 .iterations(1)
6152 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4s2__neon_mlal, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
6153 }
6154 }
6155 }
6156
TEST(QS8_IGEMM_MINMAX_FP32_1X8C4S2__NEON_MLAL,k_eq_16_subtile_m)6157 TEST(QS8_IGEMM_MINMAX_FP32_1X8C4S2__NEON_MLAL, k_eq_16_subtile_m) {
6158 TEST_REQUIRES_ARM_NEON;
6159 for (uint32_t m = 1; m <= 1; m++) {
6160 GemmMicrokernelTester()
6161 .mr(1)
6162 .nr(8)
6163 .kr(4)
6164 .sr(2)
6165 .m(m)
6166 .n(8)
6167 .k(16)
6168 .iterations(1)
6169 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4s2__neon_mlal, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
6170 }
6171 }
6172
TEST(QS8_IGEMM_MINMAX_FP32_1X8C4S2__NEON_MLAL,k_eq_16_subtile_n)6173 TEST(QS8_IGEMM_MINMAX_FP32_1X8C4S2__NEON_MLAL, k_eq_16_subtile_n) {
6174 TEST_REQUIRES_ARM_NEON;
6175 for (uint32_t n = 1; n <= 8; n++) {
6176 GemmMicrokernelTester()
6177 .mr(1)
6178 .nr(8)
6179 .kr(4)
6180 .sr(2)
6181 .m(1)
6182 .n(n)
6183 .k(16)
6184 .iterations(1)
6185 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4s2__neon_mlal, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
6186 }
6187 }
6188
TEST(QS8_IGEMM_MINMAX_FP32_1X8C4S2__NEON_MLAL,k_lt_16)6189 TEST(QS8_IGEMM_MINMAX_FP32_1X8C4S2__NEON_MLAL, k_lt_16) {
6190 TEST_REQUIRES_ARM_NEON;
6191 for (size_t k = 1; k < 16; k++) {
6192 GemmMicrokernelTester()
6193 .mr(1)
6194 .nr(8)
6195 .kr(4)
6196 .sr(2)
6197 .m(1)
6198 .n(8)
6199 .k(k)
6200 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4s2__neon_mlal, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
6201 }
6202 }
6203
TEST(QS8_IGEMM_MINMAX_FP32_1X8C4S2__NEON_MLAL,k_lt_16_subtile)6204 TEST(QS8_IGEMM_MINMAX_FP32_1X8C4S2__NEON_MLAL, k_lt_16_subtile) {
6205 TEST_REQUIRES_ARM_NEON;
6206 for (size_t k = 1; k < 16; k++) {
6207 for (uint32_t n = 1; n <= 8; n++) {
6208 for (uint32_t m = 1; m <= 1; m++) {
6209 GemmMicrokernelTester()
6210 .mr(1)
6211 .nr(8)
6212 .kr(4)
6213 .sr(2)
6214 .m(m)
6215 .n(n)
6216 .k(k)
6217 .iterations(1)
6218 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4s2__neon_mlal, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
6219 }
6220 }
6221 }
6222 }
6223
TEST(QS8_IGEMM_MINMAX_FP32_1X8C4S2__NEON_MLAL,k_gt_16)6224 TEST(QS8_IGEMM_MINMAX_FP32_1X8C4S2__NEON_MLAL, k_gt_16) {
6225 TEST_REQUIRES_ARM_NEON;
6226 for (size_t k = 17; k < 32; k++) {
6227 GemmMicrokernelTester()
6228 .mr(1)
6229 .nr(8)
6230 .kr(4)
6231 .sr(2)
6232 .m(1)
6233 .n(8)
6234 .k(k)
6235 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4s2__neon_mlal, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
6236 }
6237 }
6238
TEST(QS8_IGEMM_MINMAX_FP32_1X8C4S2__NEON_MLAL,k_gt_16_subtile)6239 TEST(QS8_IGEMM_MINMAX_FP32_1X8C4S2__NEON_MLAL, k_gt_16_subtile) {
6240 TEST_REQUIRES_ARM_NEON;
6241 for (size_t k = 17; k < 32; k++) {
6242 for (uint32_t n = 1; n <= 8; n++) {
6243 for (uint32_t m = 1; m <= 1; m++) {
6244 GemmMicrokernelTester()
6245 .mr(1)
6246 .nr(8)
6247 .kr(4)
6248 .sr(2)
6249 .m(m)
6250 .n(n)
6251 .k(k)
6252 .iterations(1)
6253 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4s2__neon_mlal, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
6254 }
6255 }
6256 }
6257 }
6258
TEST(QS8_IGEMM_MINMAX_FP32_1X8C4S2__NEON_MLAL,k_div_16)6259 TEST(QS8_IGEMM_MINMAX_FP32_1X8C4S2__NEON_MLAL, k_div_16) {
6260 TEST_REQUIRES_ARM_NEON;
6261 for (size_t k = 32; k <= 160; k += 16) {
6262 GemmMicrokernelTester()
6263 .mr(1)
6264 .nr(8)
6265 .kr(4)
6266 .sr(2)
6267 .m(1)
6268 .n(8)
6269 .k(k)
6270 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4s2__neon_mlal, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
6271 }
6272 }
6273
TEST(QS8_IGEMM_MINMAX_FP32_1X8C4S2__NEON_MLAL,k_div_16_subtile)6274 TEST(QS8_IGEMM_MINMAX_FP32_1X8C4S2__NEON_MLAL, k_div_16_subtile) {
6275 TEST_REQUIRES_ARM_NEON;
6276 for (size_t k = 32; k <= 160; k += 16) {
6277 for (uint32_t n = 1; n <= 8; n++) {
6278 for (uint32_t m = 1; m <= 1; m++) {
6279 GemmMicrokernelTester()
6280 .mr(1)
6281 .nr(8)
6282 .kr(4)
6283 .sr(2)
6284 .m(m)
6285 .n(n)
6286 .k(k)
6287 .iterations(1)
6288 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4s2__neon_mlal, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
6289 }
6290 }
6291 }
6292 }
6293
TEST(QS8_IGEMM_MINMAX_FP32_1X8C4S2__NEON_MLAL,n_gt_8)6294 TEST(QS8_IGEMM_MINMAX_FP32_1X8C4S2__NEON_MLAL, n_gt_8) {
6295 TEST_REQUIRES_ARM_NEON;
6296 for (uint32_t n = 9; n < 16; n++) {
6297 for (size_t k = 1; k <= 80; k += 17) {
6298 GemmMicrokernelTester()
6299 .mr(1)
6300 .nr(8)
6301 .kr(4)
6302 .sr(2)
6303 .m(1)
6304 .n(n)
6305 .k(k)
6306 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4s2__neon_mlal, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
6307 }
6308 }
6309 }
6310
TEST(QS8_IGEMM_MINMAX_FP32_1X8C4S2__NEON_MLAL,n_gt_8_strided_cn)6311 TEST(QS8_IGEMM_MINMAX_FP32_1X8C4S2__NEON_MLAL, n_gt_8_strided_cn) {
6312 TEST_REQUIRES_ARM_NEON;
6313 for (uint32_t n = 9; n < 16; n++) {
6314 for (size_t k = 1; k <= 80; k += 17) {
6315 GemmMicrokernelTester()
6316 .mr(1)
6317 .nr(8)
6318 .kr(4)
6319 .sr(2)
6320 .m(1)
6321 .n(n)
6322 .k(k)
6323 .cn_stride(11)
6324 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4s2__neon_mlal, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
6325 }
6326 }
6327 }
6328
TEST(QS8_IGEMM_MINMAX_FP32_1X8C4S2__NEON_MLAL,n_gt_8_subtile)6329 TEST(QS8_IGEMM_MINMAX_FP32_1X8C4S2__NEON_MLAL, n_gt_8_subtile) {
6330 TEST_REQUIRES_ARM_NEON;
6331 for (uint32_t n = 9; n < 16; n++) {
6332 for (size_t k = 1; k <= 80; k += 17) {
6333 for (uint32_t m = 1; m <= 1; m++) {
6334 GemmMicrokernelTester()
6335 .mr(1)
6336 .nr(8)
6337 .kr(4)
6338 .sr(2)
6339 .m(m)
6340 .n(n)
6341 .k(k)
6342 .iterations(1)
6343 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4s2__neon_mlal, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
6344 }
6345 }
6346 }
6347 }
6348
TEST(QS8_IGEMM_MINMAX_FP32_1X8C4S2__NEON_MLAL,n_div_8)6349 TEST(QS8_IGEMM_MINMAX_FP32_1X8C4S2__NEON_MLAL, n_div_8) {
6350 TEST_REQUIRES_ARM_NEON;
6351 for (uint32_t n = 16; n <= 24; n += 8) {
6352 for (size_t k = 1; k <= 80; k += 17) {
6353 GemmMicrokernelTester()
6354 .mr(1)
6355 .nr(8)
6356 .kr(4)
6357 .sr(2)
6358 .m(1)
6359 .n(n)
6360 .k(k)
6361 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4s2__neon_mlal, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
6362 }
6363 }
6364 }
6365
TEST(QS8_IGEMM_MINMAX_FP32_1X8C4S2__NEON_MLAL,n_div_8_strided_cn)6366 TEST(QS8_IGEMM_MINMAX_FP32_1X8C4S2__NEON_MLAL, n_div_8_strided_cn) {
6367 TEST_REQUIRES_ARM_NEON;
6368 for (uint32_t n = 16; n <= 24; n += 8) {
6369 for (size_t k = 1; k <= 80; k += 17) {
6370 GemmMicrokernelTester()
6371 .mr(1)
6372 .nr(8)
6373 .kr(4)
6374 .sr(2)
6375 .m(1)
6376 .n(n)
6377 .k(k)
6378 .cn_stride(11)
6379 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4s2__neon_mlal, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
6380 }
6381 }
6382 }
6383
TEST(QS8_IGEMM_MINMAX_FP32_1X8C4S2__NEON_MLAL,n_div_8_subtile)6384 TEST(QS8_IGEMM_MINMAX_FP32_1X8C4S2__NEON_MLAL, n_div_8_subtile) {
6385 TEST_REQUIRES_ARM_NEON;
6386 for (uint32_t n = 16; n <= 24; n += 8) {
6387 for (size_t k = 1; k <= 80; k += 17) {
6388 for (uint32_t m = 1; m <= 1; m++) {
6389 GemmMicrokernelTester()
6390 .mr(1)
6391 .nr(8)
6392 .kr(4)
6393 .sr(2)
6394 .m(m)
6395 .n(n)
6396 .k(k)
6397 .iterations(1)
6398 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4s2__neon_mlal, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
6399 }
6400 }
6401 }
6402 }
6403
TEST(QS8_IGEMM_MINMAX_FP32_1X8C4S2__NEON_MLAL,small_kernel)6404 TEST(QS8_IGEMM_MINMAX_FP32_1X8C4S2__NEON_MLAL, small_kernel) {
6405 TEST_REQUIRES_ARM_NEON;
6406 for (size_t k = 1; k <= 80; k += 17) {
6407 GemmMicrokernelTester()
6408 .mr(1)
6409 .nr(8)
6410 .kr(4)
6411 .sr(2)
6412 .m(1)
6413 .n(8)
6414 .k(k)
6415 .ks(3)
6416 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4s2__neon_mlal, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
6417 }
6418 }
6419
TEST(QS8_IGEMM_MINMAX_FP32_1X8C4S2__NEON_MLAL,small_kernel_subtile)6420 TEST(QS8_IGEMM_MINMAX_FP32_1X8C4S2__NEON_MLAL, small_kernel_subtile) {
6421 TEST_REQUIRES_ARM_NEON;
6422 for (size_t k = 1; k <= 80; k += 17) {
6423 for (uint32_t n = 1; n <= 8; n++) {
6424 for (uint32_t m = 1; m <= 1; m++) {
6425 GemmMicrokernelTester()
6426 .mr(1)
6427 .nr(8)
6428 .kr(4)
6429 .sr(2)
6430 .m(m)
6431 .n(n)
6432 .k(k)
6433 .ks(3)
6434 .iterations(1)
6435 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4s2__neon_mlal, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
6436 }
6437 }
6438 }
6439 }
6440
TEST(QS8_IGEMM_MINMAX_FP32_1X8C4S2__NEON_MLAL,n_gt_8_small_kernel)6441 TEST(QS8_IGEMM_MINMAX_FP32_1X8C4S2__NEON_MLAL, n_gt_8_small_kernel) {
6442 TEST_REQUIRES_ARM_NEON;
6443 for (uint32_t n = 9; n < 16; n++) {
6444 for (size_t k = 1; k <= 80; k += 17) {
6445 GemmMicrokernelTester()
6446 .mr(1)
6447 .nr(8)
6448 .kr(4)
6449 .sr(2)
6450 .m(1)
6451 .n(n)
6452 .k(k)
6453 .ks(3)
6454 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4s2__neon_mlal, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
6455 }
6456 }
6457 }
6458
TEST(QS8_IGEMM_MINMAX_FP32_1X8C4S2__NEON_MLAL,n_div_8_small_kernel)6459 TEST(QS8_IGEMM_MINMAX_FP32_1X8C4S2__NEON_MLAL, n_div_8_small_kernel) {
6460 TEST_REQUIRES_ARM_NEON;
6461 for (uint32_t n = 16; n <= 24; n += 8) {
6462 for (size_t k = 1; k <= 80; k += 17) {
6463 GemmMicrokernelTester()
6464 .mr(1)
6465 .nr(8)
6466 .kr(4)
6467 .sr(2)
6468 .m(1)
6469 .n(n)
6470 .k(k)
6471 .ks(3)
6472 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4s2__neon_mlal, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
6473 }
6474 }
6475 }
6476
TEST(QS8_IGEMM_MINMAX_FP32_1X8C4S2__NEON_MLAL,strided_cm_subtile)6477 TEST(QS8_IGEMM_MINMAX_FP32_1X8C4S2__NEON_MLAL, strided_cm_subtile) {
6478 TEST_REQUIRES_ARM_NEON;
6479 for (size_t k = 1; k <= 80; k += 17) {
6480 for (uint32_t n = 1; n <= 8; n++) {
6481 for (uint32_t m = 1; m <= 1; m++) {
6482 GemmMicrokernelTester()
6483 .mr(1)
6484 .nr(8)
6485 .kr(4)
6486 .sr(2)
6487 .m(m)
6488 .n(n)
6489 .k(k)
6490 .cm_stride(11)
6491 .iterations(1)
6492 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4s2__neon_mlal, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
6493 }
6494 }
6495 }
6496 }
6497
TEST(QS8_IGEMM_MINMAX_FP32_1X8C4S2__NEON_MLAL,a_offset)6498 TEST(QS8_IGEMM_MINMAX_FP32_1X8C4S2__NEON_MLAL, a_offset) {
6499 TEST_REQUIRES_ARM_NEON;
6500 for (size_t k = 1; k <= 80; k += 17) {
6501 GemmMicrokernelTester()
6502 .mr(1)
6503 .nr(8)
6504 .kr(4)
6505 .sr(2)
6506 .m(1)
6507 .n(8)
6508 .k(k)
6509 .ks(3)
6510 .a_offset(83)
6511 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4s2__neon_mlal, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
6512 }
6513 }
6514
TEST(QS8_IGEMM_MINMAX_FP32_1X8C4S2__NEON_MLAL,zero)6515 TEST(QS8_IGEMM_MINMAX_FP32_1X8C4S2__NEON_MLAL, zero) {
6516 TEST_REQUIRES_ARM_NEON;
6517 for (size_t k = 1; k <= 80; k += 17) {
6518 for (uint32_t mz = 0; mz < 1; mz++) {
6519 GemmMicrokernelTester()
6520 .mr(1)
6521 .nr(8)
6522 .kr(4)
6523 .sr(2)
6524 .m(1)
6525 .n(8)
6526 .k(k)
6527 .ks(3)
6528 .a_offset(83)
6529 .zero_index(mz)
6530 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4s2__neon_mlal, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
6531 }
6532 }
6533 }
6534
TEST(QS8_IGEMM_MINMAX_FP32_1X8C4S2__NEON_MLAL,qmin)6535 TEST(QS8_IGEMM_MINMAX_FP32_1X8C4S2__NEON_MLAL, qmin) {
6536 TEST_REQUIRES_ARM_NEON;
6537 GemmMicrokernelTester()
6538 .mr(1)
6539 .nr(8)
6540 .kr(4)
6541 .sr(2)
6542 .m(1)
6543 .n(8)
6544 .k(16)
6545 .qmin(128)
6546 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4s2__neon_mlal, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
6547 }
6548
TEST(QS8_IGEMM_MINMAX_FP32_1X8C4S2__NEON_MLAL,qmax)6549 TEST(QS8_IGEMM_MINMAX_FP32_1X8C4S2__NEON_MLAL, qmax) {
6550 TEST_REQUIRES_ARM_NEON;
6551 GemmMicrokernelTester()
6552 .mr(1)
6553 .nr(8)
6554 .kr(4)
6555 .sr(2)
6556 .m(1)
6557 .n(8)
6558 .k(16)
6559 .qmax(128)
6560 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4s2__neon_mlal, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
6561 }
6562
TEST(QS8_IGEMM_MINMAX_FP32_1X8C4S2__NEON_MLAL,strided_cm)6563 TEST(QS8_IGEMM_MINMAX_FP32_1X8C4S2__NEON_MLAL, strided_cm) {
6564 TEST_REQUIRES_ARM_NEON;
6565 GemmMicrokernelTester()
6566 .mr(1)
6567 .nr(8)
6568 .kr(4)
6569 .sr(2)
6570 .m(1)
6571 .n(8)
6572 .k(16)
6573 .cm_stride(11)
6574 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4s2__neon_mlal, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
6575 }
6576 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
6577
6578
6579 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QS8_IGEMM_MINMAX_FP32_1X8C4S2__NEONV8_MLAL,k_eq_16)6580 TEST(QS8_IGEMM_MINMAX_FP32_1X8C4S2__NEONV8_MLAL, k_eq_16) {
6581 TEST_REQUIRES_ARM_NEON_V8;
6582 GemmMicrokernelTester()
6583 .mr(1)
6584 .nr(8)
6585 .kr(4)
6586 .sr(2)
6587 .m(1)
6588 .n(8)
6589 .k(16)
6590 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4s2__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
6591 }
6592
TEST(QS8_IGEMM_MINMAX_FP32_1X8C4S2__NEONV8_MLAL,strided_cn)6593 TEST(QS8_IGEMM_MINMAX_FP32_1X8C4S2__NEONV8_MLAL, strided_cn) {
6594 TEST_REQUIRES_ARM_NEON_V8;
6595 GemmMicrokernelTester()
6596 .mr(1)
6597 .nr(8)
6598 .kr(4)
6599 .sr(2)
6600 .m(1)
6601 .n(8)
6602 .k(16)
6603 .cn_stride(11)
6604 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4s2__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
6605 }
6606
TEST(QS8_IGEMM_MINMAX_FP32_1X8C4S2__NEONV8_MLAL,k_eq_16_subtile)6607 TEST(QS8_IGEMM_MINMAX_FP32_1X8C4S2__NEONV8_MLAL, k_eq_16_subtile) {
6608 TEST_REQUIRES_ARM_NEON_V8;
6609 for (uint32_t n = 1; n <= 8; n++) {
6610 for (uint32_t m = 1; m <= 1; m++) {
6611 GemmMicrokernelTester()
6612 .mr(1)
6613 .nr(8)
6614 .kr(4)
6615 .sr(2)
6616 .m(m)
6617 .n(n)
6618 .k(16)
6619 .iterations(1)
6620 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4s2__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
6621 }
6622 }
6623 }
6624
TEST(QS8_IGEMM_MINMAX_FP32_1X8C4S2__NEONV8_MLAL,k_eq_16_subtile_m)6625 TEST(QS8_IGEMM_MINMAX_FP32_1X8C4S2__NEONV8_MLAL, k_eq_16_subtile_m) {
6626 TEST_REQUIRES_ARM_NEON_V8;
6627 for (uint32_t m = 1; m <= 1; m++) {
6628 GemmMicrokernelTester()
6629 .mr(1)
6630 .nr(8)
6631 .kr(4)
6632 .sr(2)
6633 .m(m)
6634 .n(8)
6635 .k(16)
6636 .iterations(1)
6637 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4s2__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
6638 }
6639 }
6640
TEST(QS8_IGEMM_MINMAX_FP32_1X8C4S2__NEONV8_MLAL,k_eq_16_subtile_n)6641 TEST(QS8_IGEMM_MINMAX_FP32_1X8C4S2__NEONV8_MLAL, k_eq_16_subtile_n) {
6642 TEST_REQUIRES_ARM_NEON_V8;
6643 for (uint32_t n = 1; n <= 8; n++) {
6644 GemmMicrokernelTester()
6645 .mr(1)
6646 .nr(8)
6647 .kr(4)
6648 .sr(2)
6649 .m(1)
6650 .n(n)
6651 .k(16)
6652 .iterations(1)
6653 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4s2__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
6654 }
6655 }
6656
TEST(QS8_IGEMM_MINMAX_FP32_1X8C4S2__NEONV8_MLAL,k_lt_16)6657 TEST(QS8_IGEMM_MINMAX_FP32_1X8C4S2__NEONV8_MLAL, k_lt_16) {
6658 TEST_REQUIRES_ARM_NEON_V8;
6659 for (size_t k = 1; k < 16; k++) {
6660 GemmMicrokernelTester()
6661 .mr(1)
6662 .nr(8)
6663 .kr(4)
6664 .sr(2)
6665 .m(1)
6666 .n(8)
6667 .k(k)
6668 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4s2__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
6669 }
6670 }
6671
TEST(QS8_IGEMM_MINMAX_FP32_1X8C4S2__NEONV8_MLAL,k_lt_16_subtile)6672 TEST(QS8_IGEMM_MINMAX_FP32_1X8C4S2__NEONV8_MLAL, k_lt_16_subtile) {
6673 TEST_REQUIRES_ARM_NEON_V8;
6674 for (size_t k = 1; k < 16; k++) {
6675 for (uint32_t n = 1; n <= 8; n++) {
6676 for (uint32_t m = 1; m <= 1; m++) {
6677 GemmMicrokernelTester()
6678 .mr(1)
6679 .nr(8)
6680 .kr(4)
6681 .sr(2)
6682 .m(m)
6683 .n(n)
6684 .k(k)
6685 .iterations(1)
6686 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4s2__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
6687 }
6688 }
6689 }
6690 }
6691
TEST(QS8_IGEMM_MINMAX_FP32_1X8C4S2__NEONV8_MLAL,k_gt_16)6692 TEST(QS8_IGEMM_MINMAX_FP32_1X8C4S2__NEONV8_MLAL, k_gt_16) {
6693 TEST_REQUIRES_ARM_NEON_V8;
6694 for (size_t k = 17; k < 32; k++) {
6695 GemmMicrokernelTester()
6696 .mr(1)
6697 .nr(8)
6698 .kr(4)
6699 .sr(2)
6700 .m(1)
6701 .n(8)
6702 .k(k)
6703 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4s2__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
6704 }
6705 }
6706
TEST(QS8_IGEMM_MINMAX_FP32_1X8C4S2__NEONV8_MLAL,k_gt_16_subtile)6707 TEST(QS8_IGEMM_MINMAX_FP32_1X8C4S2__NEONV8_MLAL, k_gt_16_subtile) {
6708 TEST_REQUIRES_ARM_NEON_V8;
6709 for (size_t k = 17; k < 32; k++) {
6710 for (uint32_t n = 1; n <= 8; n++) {
6711 for (uint32_t m = 1; m <= 1; m++) {
6712 GemmMicrokernelTester()
6713 .mr(1)
6714 .nr(8)
6715 .kr(4)
6716 .sr(2)
6717 .m(m)
6718 .n(n)
6719 .k(k)
6720 .iterations(1)
6721 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4s2__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
6722 }
6723 }
6724 }
6725 }
6726
TEST(QS8_IGEMM_MINMAX_FP32_1X8C4S2__NEONV8_MLAL,k_div_16)6727 TEST(QS8_IGEMM_MINMAX_FP32_1X8C4S2__NEONV8_MLAL, k_div_16) {
6728 TEST_REQUIRES_ARM_NEON_V8;
6729 for (size_t k = 32; k <= 160; k += 16) {
6730 GemmMicrokernelTester()
6731 .mr(1)
6732 .nr(8)
6733 .kr(4)
6734 .sr(2)
6735 .m(1)
6736 .n(8)
6737 .k(k)
6738 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4s2__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
6739 }
6740 }
6741
TEST(QS8_IGEMM_MINMAX_FP32_1X8C4S2__NEONV8_MLAL,k_div_16_subtile)6742 TEST(QS8_IGEMM_MINMAX_FP32_1X8C4S2__NEONV8_MLAL, k_div_16_subtile) {
6743 TEST_REQUIRES_ARM_NEON_V8;
6744 for (size_t k = 32; k <= 160; k += 16) {
6745 for (uint32_t n = 1; n <= 8; n++) {
6746 for (uint32_t m = 1; m <= 1; m++) {
6747 GemmMicrokernelTester()
6748 .mr(1)
6749 .nr(8)
6750 .kr(4)
6751 .sr(2)
6752 .m(m)
6753 .n(n)
6754 .k(k)
6755 .iterations(1)
6756 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4s2__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
6757 }
6758 }
6759 }
6760 }
6761
TEST(QS8_IGEMM_MINMAX_FP32_1X8C4S2__NEONV8_MLAL,n_gt_8)6762 TEST(QS8_IGEMM_MINMAX_FP32_1X8C4S2__NEONV8_MLAL, n_gt_8) {
6763 TEST_REQUIRES_ARM_NEON_V8;
6764 for (uint32_t n = 9; n < 16; n++) {
6765 for (size_t k = 1; k <= 80; k += 17) {
6766 GemmMicrokernelTester()
6767 .mr(1)
6768 .nr(8)
6769 .kr(4)
6770 .sr(2)
6771 .m(1)
6772 .n(n)
6773 .k(k)
6774 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4s2__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
6775 }
6776 }
6777 }
6778
TEST(QS8_IGEMM_MINMAX_FP32_1X8C4S2__NEONV8_MLAL,n_gt_8_strided_cn)6779 TEST(QS8_IGEMM_MINMAX_FP32_1X8C4S2__NEONV8_MLAL, n_gt_8_strided_cn) {
6780 TEST_REQUIRES_ARM_NEON_V8;
6781 for (uint32_t n = 9; n < 16; n++) {
6782 for (size_t k = 1; k <= 80; k += 17) {
6783 GemmMicrokernelTester()
6784 .mr(1)
6785 .nr(8)
6786 .kr(4)
6787 .sr(2)
6788 .m(1)
6789 .n(n)
6790 .k(k)
6791 .cn_stride(11)
6792 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4s2__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
6793 }
6794 }
6795 }
6796
TEST(QS8_IGEMM_MINMAX_FP32_1X8C4S2__NEONV8_MLAL,n_gt_8_subtile)6797 TEST(QS8_IGEMM_MINMAX_FP32_1X8C4S2__NEONV8_MLAL, n_gt_8_subtile) {
6798 TEST_REQUIRES_ARM_NEON_V8;
6799 for (uint32_t n = 9; n < 16; n++) {
6800 for (size_t k = 1; k <= 80; k += 17) {
6801 for (uint32_t m = 1; m <= 1; m++) {
6802 GemmMicrokernelTester()
6803 .mr(1)
6804 .nr(8)
6805 .kr(4)
6806 .sr(2)
6807 .m(m)
6808 .n(n)
6809 .k(k)
6810 .iterations(1)
6811 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4s2__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
6812 }
6813 }
6814 }
6815 }
6816
TEST(QS8_IGEMM_MINMAX_FP32_1X8C4S2__NEONV8_MLAL,n_div_8)6817 TEST(QS8_IGEMM_MINMAX_FP32_1X8C4S2__NEONV8_MLAL, n_div_8) {
6818 TEST_REQUIRES_ARM_NEON_V8;
6819 for (uint32_t n = 16; n <= 24; n += 8) {
6820 for (size_t k = 1; k <= 80; k += 17) {
6821 GemmMicrokernelTester()
6822 .mr(1)
6823 .nr(8)
6824 .kr(4)
6825 .sr(2)
6826 .m(1)
6827 .n(n)
6828 .k(k)
6829 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4s2__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
6830 }
6831 }
6832 }
6833
TEST(QS8_IGEMM_MINMAX_FP32_1X8C4S2__NEONV8_MLAL,n_div_8_strided_cn)6834 TEST(QS8_IGEMM_MINMAX_FP32_1X8C4S2__NEONV8_MLAL, n_div_8_strided_cn) {
6835 TEST_REQUIRES_ARM_NEON_V8;
6836 for (uint32_t n = 16; n <= 24; n += 8) {
6837 for (size_t k = 1; k <= 80; k += 17) {
6838 GemmMicrokernelTester()
6839 .mr(1)
6840 .nr(8)
6841 .kr(4)
6842 .sr(2)
6843 .m(1)
6844 .n(n)
6845 .k(k)
6846 .cn_stride(11)
6847 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4s2__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
6848 }
6849 }
6850 }
6851
TEST(QS8_IGEMM_MINMAX_FP32_1X8C4S2__NEONV8_MLAL,n_div_8_subtile)6852 TEST(QS8_IGEMM_MINMAX_FP32_1X8C4S2__NEONV8_MLAL, n_div_8_subtile) {
6853 TEST_REQUIRES_ARM_NEON_V8;
6854 for (uint32_t n = 16; n <= 24; n += 8) {
6855 for (size_t k = 1; k <= 80; k += 17) {
6856 for (uint32_t m = 1; m <= 1; m++) {
6857 GemmMicrokernelTester()
6858 .mr(1)
6859 .nr(8)
6860 .kr(4)
6861 .sr(2)
6862 .m(m)
6863 .n(n)
6864 .k(k)
6865 .iterations(1)
6866 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4s2__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
6867 }
6868 }
6869 }
6870 }
6871
TEST(QS8_IGEMM_MINMAX_FP32_1X8C4S2__NEONV8_MLAL,small_kernel)6872 TEST(QS8_IGEMM_MINMAX_FP32_1X8C4S2__NEONV8_MLAL, small_kernel) {
6873 TEST_REQUIRES_ARM_NEON_V8;
6874 for (size_t k = 1; k <= 80; k += 17) {
6875 GemmMicrokernelTester()
6876 .mr(1)
6877 .nr(8)
6878 .kr(4)
6879 .sr(2)
6880 .m(1)
6881 .n(8)
6882 .k(k)
6883 .ks(3)
6884 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4s2__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
6885 }
6886 }
6887
TEST(QS8_IGEMM_MINMAX_FP32_1X8C4S2__NEONV8_MLAL,small_kernel_subtile)6888 TEST(QS8_IGEMM_MINMAX_FP32_1X8C4S2__NEONV8_MLAL, small_kernel_subtile) {
6889 TEST_REQUIRES_ARM_NEON_V8;
6890 for (size_t k = 1; k <= 80; k += 17) {
6891 for (uint32_t n = 1; n <= 8; n++) {
6892 for (uint32_t m = 1; m <= 1; m++) {
6893 GemmMicrokernelTester()
6894 .mr(1)
6895 .nr(8)
6896 .kr(4)
6897 .sr(2)
6898 .m(m)
6899 .n(n)
6900 .k(k)
6901 .ks(3)
6902 .iterations(1)
6903 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4s2__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
6904 }
6905 }
6906 }
6907 }
6908
TEST(QS8_IGEMM_MINMAX_FP32_1X8C4S2__NEONV8_MLAL,n_gt_8_small_kernel)6909 TEST(QS8_IGEMM_MINMAX_FP32_1X8C4S2__NEONV8_MLAL, n_gt_8_small_kernel) {
6910 TEST_REQUIRES_ARM_NEON_V8;
6911 for (uint32_t n = 9; n < 16; n++) {
6912 for (size_t k = 1; k <= 80; k += 17) {
6913 GemmMicrokernelTester()
6914 .mr(1)
6915 .nr(8)
6916 .kr(4)
6917 .sr(2)
6918 .m(1)
6919 .n(n)
6920 .k(k)
6921 .ks(3)
6922 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4s2__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
6923 }
6924 }
6925 }
6926
TEST(QS8_IGEMM_MINMAX_FP32_1X8C4S2__NEONV8_MLAL,n_div_8_small_kernel)6927 TEST(QS8_IGEMM_MINMAX_FP32_1X8C4S2__NEONV8_MLAL, n_div_8_small_kernel) {
6928 TEST_REQUIRES_ARM_NEON_V8;
6929 for (uint32_t n = 16; n <= 24; n += 8) {
6930 for (size_t k = 1; k <= 80; k += 17) {
6931 GemmMicrokernelTester()
6932 .mr(1)
6933 .nr(8)
6934 .kr(4)
6935 .sr(2)
6936 .m(1)
6937 .n(n)
6938 .k(k)
6939 .ks(3)
6940 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4s2__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
6941 }
6942 }
6943 }
6944
TEST(QS8_IGEMM_MINMAX_FP32_1X8C4S2__NEONV8_MLAL,strided_cm_subtile)6945 TEST(QS8_IGEMM_MINMAX_FP32_1X8C4S2__NEONV8_MLAL, strided_cm_subtile) {
6946 TEST_REQUIRES_ARM_NEON_V8;
6947 for (size_t k = 1; k <= 80; k += 17) {
6948 for (uint32_t n = 1; n <= 8; n++) {
6949 for (uint32_t m = 1; m <= 1; m++) {
6950 GemmMicrokernelTester()
6951 .mr(1)
6952 .nr(8)
6953 .kr(4)
6954 .sr(2)
6955 .m(m)
6956 .n(n)
6957 .k(k)
6958 .cm_stride(11)
6959 .iterations(1)
6960 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4s2__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
6961 }
6962 }
6963 }
6964 }
6965
TEST(QS8_IGEMM_MINMAX_FP32_1X8C4S2__NEONV8_MLAL,a_offset)6966 TEST(QS8_IGEMM_MINMAX_FP32_1X8C4S2__NEONV8_MLAL, a_offset) {
6967 TEST_REQUIRES_ARM_NEON_V8;
6968 for (size_t k = 1; k <= 80; k += 17) {
6969 GemmMicrokernelTester()
6970 .mr(1)
6971 .nr(8)
6972 .kr(4)
6973 .sr(2)
6974 .m(1)
6975 .n(8)
6976 .k(k)
6977 .ks(3)
6978 .a_offset(83)
6979 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4s2__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
6980 }
6981 }
6982
TEST(QS8_IGEMM_MINMAX_FP32_1X8C4S2__NEONV8_MLAL,zero)6983 TEST(QS8_IGEMM_MINMAX_FP32_1X8C4S2__NEONV8_MLAL, zero) {
6984 TEST_REQUIRES_ARM_NEON_V8;
6985 for (size_t k = 1; k <= 80; k += 17) {
6986 for (uint32_t mz = 0; mz < 1; mz++) {
6987 GemmMicrokernelTester()
6988 .mr(1)
6989 .nr(8)
6990 .kr(4)
6991 .sr(2)
6992 .m(1)
6993 .n(8)
6994 .k(k)
6995 .ks(3)
6996 .a_offset(83)
6997 .zero_index(mz)
6998 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4s2__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
6999 }
7000 }
7001 }
7002
TEST(QS8_IGEMM_MINMAX_FP32_1X8C4S2__NEONV8_MLAL,qmin)7003 TEST(QS8_IGEMM_MINMAX_FP32_1X8C4S2__NEONV8_MLAL, qmin) {
7004 TEST_REQUIRES_ARM_NEON_V8;
7005 GemmMicrokernelTester()
7006 .mr(1)
7007 .nr(8)
7008 .kr(4)
7009 .sr(2)
7010 .m(1)
7011 .n(8)
7012 .k(16)
7013 .qmin(128)
7014 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4s2__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
7015 }
7016
TEST(QS8_IGEMM_MINMAX_FP32_1X8C4S2__NEONV8_MLAL,qmax)7017 TEST(QS8_IGEMM_MINMAX_FP32_1X8C4S2__NEONV8_MLAL, qmax) {
7018 TEST_REQUIRES_ARM_NEON_V8;
7019 GemmMicrokernelTester()
7020 .mr(1)
7021 .nr(8)
7022 .kr(4)
7023 .sr(2)
7024 .m(1)
7025 .n(8)
7026 .k(16)
7027 .qmax(128)
7028 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4s2__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
7029 }
7030
TEST(QS8_IGEMM_MINMAX_FP32_1X8C4S2__NEONV8_MLAL,strided_cm)7031 TEST(QS8_IGEMM_MINMAX_FP32_1X8C4S2__NEONV8_MLAL, strided_cm) {
7032 TEST_REQUIRES_ARM_NEON_V8;
7033 GemmMicrokernelTester()
7034 .mr(1)
7035 .nr(8)
7036 .kr(4)
7037 .sr(2)
7038 .m(1)
7039 .n(8)
7040 .k(16)
7041 .cm_stride(11)
7042 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4s2__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
7043 }
7044 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
7045
7046
7047 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_DUP,k_eq_16)7048 TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_DUP, k_eq_16) {
7049 TEST_REQUIRES_ARM_NEON;
7050 GemmMicrokernelTester()
7051 .mr(2)
7052 .nr(8)
7053 .kr(2)
7054 .sr(1)
7055 .m(2)
7056 .n(8)
7057 .k(16)
7058 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
7059 }
7060
TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_DUP,strided_cn)7061 TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_DUP, strided_cn) {
7062 TEST_REQUIRES_ARM_NEON;
7063 GemmMicrokernelTester()
7064 .mr(2)
7065 .nr(8)
7066 .kr(2)
7067 .sr(1)
7068 .m(2)
7069 .n(8)
7070 .k(16)
7071 .cn_stride(11)
7072 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
7073 }
7074
TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_DUP,k_eq_16_subtile)7075 TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_DUP, k_eq_16_subtile) {
7076 TEST_REQUIRES_ARM_NEON;
7077 for (uint32_t n = 1; n <= 8; n++) {
7078 for (uint32_t m = 1; m <= 2; m++) {
7079 GemmMicrokernelTester()
7080 .mr(2)
7081 .nr(8)
7082 .kr(2)
7083 .sr(1)
7084 .m(m)
7085 .n(n)
7086 .k(16)
7087 .iterations(1)
7088 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
7089 }
7090 }
7091 }
7092
TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_DUP,k_eq_16_subtile_m)7093 TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_DUP, k_eq_16_subtile_m) {
7094 TEST_REQUIRES_ARM_NEON;
7095 for (uint32_t m = 1; m <= 2; m++) {
7096 GemmMicrokernelTester()
7097 .mr(2)
7098 .nr(8)
7099 .kr(2)
7100 .sr(1)
7101 .m(m)
7102 .n(8)
7103 .k(16)
7104 .iterations(1)
7105 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
7106 }
7107 }
7108
TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_DUP,k_eq_16_subtile_n)7109 TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_DUP, k_eq_16_subtile_n) {
7110 TEST_REQUIRES_ARM_NEON;
7111 for (uint32_t n = 1; n <= 8; n++) {
7112 GemmMicrokernelTester()
7113 .mr(2)
7114 .nr(8)
7115 .kr(2)
7116 .sr(1)
7117 .m(2)
7118 .n(n)
7119 .k(16)
7120 .iterations(1)
7121 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
7122 }
7123 }
7124
TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_DUP,k_lt_16)7125 TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_DUP, k_lt_16) {
7126 TEST_REQUIRES_ARM_NEON;
7127 for (size_t k = 1; k < 16; k++) {
7128 GemmMicrokernelTester()
7129 .mr(2)
7130 .nr(8)
7131 .kr(2)
7132 .sr(1)
7133 .m(2)
7134 .n(8)
7135 .k(k)
7136 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
7137 }
7138 }
7139
TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_DUP,k_lt_16_subtile)7140 TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_DUP, k_lt_16_subtile) {
7141 TEST_REQUIRES_ARM_NEON;
7142 for (size_t k = 1; k < 16; k++) {
7143 for (uint32_t n = 1; n <= 8; n++) {
7144 for (uint32_t m = 1; m <= 2; m++) {
7145 GemmMicrokernelTester()
7146 .mr(2)
7147 .nr(8)
7148 .kr(2)
7149 .sr(1)
7150 .m(m)
7151 .n(n)
7152 .k(k)
7153 .iterations(1)
7154 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
7155 }
7156 }
7157 }
7158 }
7159
TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_DUP,k_gt_16)7160 TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_DUP, k_gt_16) {
7161 TEST_REQUIRES_ARM_NEON;
7162 for (size_t k = 17; k < 32; k++) {
7163 GemmMicrokernelTester()
7164 .mr(2)
7165 .nr(8)
7166 .kr(2)
7167 .sr(1)
7168 .m(2)
7169 .n(8)
7170 .k(k)
7171 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
7172 }
7173 }
7174
TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_DUP,k_gt_16_subtile)7175 TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_DUP, k_gt_16_subtile) {
7176 TEST_REQUIRES_ARM_NEON;
7177 for (size_t k = 17; k < 32; k++) {
7178 for (uint32_t n = 1; n <= 8; n++) {
7179 for (uint32_t m = 1; m <= 2; m++) {
7180 GemmMicrokernelTester()
7181 .mr(2)
7182 .nr(8)
7183 .kr(2)
7184 .sr(1)
7185 .m(m)
7186 .n(n)
7187 .k(k)
7188 .iterations(1)
7189 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
7190 }
7191 }
7192 }
7193 }
7194
TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_DUP,k_div_16)7195 TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_DUP, k_div_16) {
7196 TEST_REQUIRES_ARM_NEON;
7197 for (size_t k = 32; k <= 160; k += 16) {
7198 GemmMicrokernelTester()
7199 .mr(2)
7200 .nr(8)
7201 .kr(2)
7202 .sr(1)
7203 .m(2)
7204 .n(8)
7205 .k(k)
7206 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
7207 }
7208 }
7209
TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_DUP,k_div_16_subtile)7210 TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_DUP, k_div_16_subtile) {
7211 TEST_REQUIRES_ARM_NEON;
7212 for (size_t k = 32; k <= 160; k += 16) {
7213 for (uint32_t n = 1; n <= 8; n++) {
7214 for (uint32_t m = 1; m <= 2; m++) {
7215 GemmMicrokernelTester()
7216 .mr(2)
7217 .nr(8)
7218 .kr(2)
7219 .sr(1)
7220 .m(m)
7221 .n(n)
7222 .k(k)
7223 .iterations(1)
7224 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
7225 }
7226 }
7227 }
7228 }
7229
TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_DUP,n_gt_8)7230 TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_DUP, n_gt_8) {
7231 TEST_REQUIRES_ARM_NEON;
7232 for (uint32_t n = 9; n < 16; n++) {
7233 for (size_t k = 1; k <= 80; k += 17) {
7234 GemmMicrokernelTester()
7235 .mr(2)
7236 .nr(8)
7237 .kr(2)
7238 .sr(1)
7239 .m(2)
7240 .n(n)
7241 .k(k)
7242 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
7243 }
7244 }
7245 }
7246
TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_DUP,n_gt_8_strided_cn)7247 TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_DUP, n_gt_8_strided_cn) {
7248 TEST_REQUIRES_ARM_NEON;
7249 for (uint32_t n = 9; n < 16; n++) {
7250 for (size_t k = 1; k <= 80; k += 17) {
7251 GemmMicrokernelTester()
7252 .mr(2)
7253 .nr(8)
7254 .kr(2)
7255 .sr(1)
7256 .m(2)
7257 .n(n)
7258 .k(k)
7259 .cn_stride(11)
7260 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
7261 }
7262 }
7263 }
7264
TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_DUP,n_gt_8_subtile)7265 TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_DUP, n_gt_8_subtile) {
7266 TEST_REQUIRES_ARM_NEON;
7267 for (uint32_t n = 9; n < 16; n++) {
7268 for (size_t k = 1; k <= 80; k += 17) {
7269 for (uint32_t m = 1; m <= 2; m++) {
7270 GemmMicrokernelTester()
7271 .mr(2)
7272 .nr(8)
7273 .kr(2)
7274 .sr(1)
7275 .m(m)
7276 .n(n)
7277 .k(k)
7278 .iterations(1)
7279 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
7280 }
7281 }
7282 }
7283 }
7284
TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_DUP,n_div_8)7285 TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_DUP, n_div_8) {
7286 TEST_REQUIRES_ARM_NEON;
7287 for (uint32_t n = 16; n <= 24; n += 8) {
7288 for (size_t k = 1; k <= 80; k += 17) {
7289 GemmMicrokernelTester()
7290 .mr(2)
7291 .nr(8)
7292 .kr(2)
7293 .sr(1)
7294 .m(2)
7295 .n(n)
7296 .k(k)
7297 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
7298 }
7299 }
7300 }
7301
TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_DUP,n_div_8_strided_cn)7302 TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_DUP, n_div_8_strided_cn) {
7303 TEST_REQUIRES_ARM_NEON;
7304 for (uint32_t n = 16; n <= 24; n += 8) {
7305 for (size_t k = 1; k <= 80; k += 17) {
7306 GemmMicrokernelTester()
7307 .mr(2)
7308 .nr(8)
7309 .kr(2)
7310 .sr(1)
7311 .m(2)
7312 .n(n)
7313 .k(k)
7314 .cn_stride(11)
7315 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
7316 }
7317 }
7318 }
7319
TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_DUP,n_div_8_subtile)7320 TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_DUP, n_div_8_subtile) {
7321 TEST_REQUIRES_ARM_NEON;
7322 for (uint32_t n = 16; n <= 24; n += 8) {
7323 for (size_t k = 1; k <= 80; k += 17) {
7324 for (uint32_t m = 1; m <= 2; m++) {
7325 GemmMicrokernelTester()
7326 .mr(2)
7327 .nr(8)
7328 .kr(2)
7329 .sr(1)
7330 .m(m)
7331 .n(n)
7332 .k(k)
7333 .iterations(1)
7334 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
7335 }
7336 }
7337 }
7338 }
7339
TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_DUP,small_kernel)7340 TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_DUP, small_kernel) {
7341 TEST_REQUIRES_ARM_NEON;
7342 for (size_t k = 1; k <= 80; k += 17) {
7343 GemmMicrokernelTester()
7344 .mr(2)
7345 .nr(8)
7346 .kr(2)
7347 .sr(1)
7348 .m(2)
7349 .n(8)
7350 .k(k)
7351 .ks(3)
7352 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
7353 }
7354 }
7355
TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_DUP,small_kernel_subtile)7356 TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_DUP, small_kernel_subtile) {
7357 TEST_REQUIRES_ARM_NEON;
7358 for (size_t k = 1; k <= 80; k += 17) {
7359 for (uint32_t n = 1; n <= 8; n++) {
7360 for (uint32_t m = 1; m <= 2; m++) {
7361 GemmMicrokernelTester()
7362 .mr(2)
7363 .nr(8)
7364 .kr(2)
7365 .sr(1)
7366 .m(m)
7367 .n(n)
7368 .k(k)
7369 .ks(3)
7370 .iterations(1)
7371 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
7372 }
7373 }
7374 }
7375 }
7376
TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_DUP,n_gt_8_small_kernel)7377 TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_DUP, n_gt_8_small_kernel) {
7378 TEST_REQUIRES_ARM_NEON;
7379 for (uint32_t n = 9; n < 16; n++) {
7380 for (size_t k = 1; k <= 80; k += 17) {
7381 GemmMicrokernelTester()
7382 .mr(2)
7383 .nr(8)
7384 .kr(2)
7385 .sr(1)
7386 .m(2)
7387 .n(n)
7388 .k(k)
7389 .ks(3)
7390 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
7391 }
7392 }
7393 }
7394
TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_DUP,n_div_8_small_kernel)7395 TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_DUP, n_div_8_small_kernel) {
7396 TEST_REQUIRES_ARM_NEON;
7397 for (uint32_t n = 16; n <= 24; n += 8) {
7398 for (size_t k = 1; k <= 80; k += 17) {
7399 GemmMicrokernelTester()
7400 .mr(2)
7401 .nr(8)
7402 .kr(2)
7403 .sr(1)
7404 .m(2)
7405 .n(n)
7406 .k(k)
7407 .ks(3)
7408 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
7409 }
7410 }
7411 }
7412
TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_DUP,strided_cm_subtile)7413 TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_DUP, strided_cm_subtile) {
7414 TEST_REQUIRES_ARM_NEON;
7415 for (size_t k = 1; k <= 80; k += 17) {
7416 for (uint32_t n = 1; n <= 8; n++) {
7417 for (uint32_t m = 1; m <= 2; m++) {
7418 GemmMicrokernelTester()
7419 .mr(2)
7420 .nr(8)
7421 .kr(2)
7422 .sr(1)
7423 .m(m)
7424 .n(n)
7425 .k(k)
7426 .cm_stride(11)
7427 .iterations(1)
7428 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
7429 }
7430 }
7431 }
7432 }
7433
TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_DUP,a_offset)7434 TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_DUP, a_offset) {
7435 TEST_REQUIRES_ARM_NEON;
7436 for (size_t k = 1; k <= 80; k += 17) {
7437 GemmMicrokernelTester()
7438 .mr(2)
7439 .nr(8)
7440 .kr(2)
7441 .sr(1)
7442 .m(2)
7443 .n(8)
7444 .k(k)
7445 .ks(3)
7446 .a_offset(163)
7447 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
7448 }
7449 }
7450
TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_DUP,zero)7451 TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_DUP, zero) {
7452 TEST_REQUIRES_ARM_NEON;
7453 for (size_t k = 1; k <= 80; k += 17) {
7454 for (uint32_t mz = 0; mz < 2; mz++) {
7455 GemmMicrokernelTester()
7456 .mr(2)
7457 .nr(8)
7458 .kr(2)
7459 .sr(1)
7460 .m(2)
7461 .n(8)
7462 .k(k)
7463 .ks(3)
7464 .a_offset(163)
7465 .zero_index(mz)
7466 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
7467 }
7468 }
7469 }
7470
TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_DUP,qmin)7471 TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_DUP, qmin) {
7472 TEST_REQUIRES_ARM_NEON;
7473 GemmMicrokernelTester()
7474 .mr(2)
7475 .nr(8)
7476 .kr(2)
7477 .sr(1)
7478 .m(2)
7479 .n(8)
7480 .k(16)
7481 .qmin(128)
7482 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
7483 }
7484
TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_DUP,qmax)7485 TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_DUP, qmax) {
7486 TEST_REQUIRES_ARM_NEON;
7487 GemmMicrokernelTester()
7488 .mr(2)
7489 .nr(8)
7490 .kr(2)
7491 .sr(1)
7492 .m(2)
7493 .n(8)
7494 .k(16)
7495 .qmax(128)
7496 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
7497 }
7498
TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_DUP,strided_cm)7499 TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_DUP, strided_cm) {
7500 TEST_REQUIRES_ARM_NEON;
7501 GemmMicrokernelTester()
7502 .mr(2)
7503 .nr(8)
7504 .kr(2)
7505 .sr(1)
7506 .m(2)
7507 .n(8)
7508 .k(16)
7509 .cm_stride(11)
7510 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
7511 }
7512 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
7513
7514
7515 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD1R,k_eq_16)7516 TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD1R, k_eq_16) {
7517 TEST_REQUIRES_ARM_NEON;
7518 GemmMicrokernelTester()
7519 .mr(2)
7520 .nr(8)
7521 .kr(2)
7522 .sr(1)
7523 .m(2)
7524 .n(8)
7525 .k(16)
7526 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
7527 }
7528
TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD1R,strided_cn)7529 TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD1R, strided_cn) {
7530 TEST_REQUIRES_ARM_NEON;
7531 GemmMicrokernelTester()
7532 .mr(2)
7533 .nr(8)
7534 .kr(2)
7535 .sr(1)
7536 .m(2)
7537 .n(8)
7538 .k(16)
7539 .cn_stride(11)
7540 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
7541 }
7542
TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD1R,k_eq_16_subtile)7543 TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD1R, k_eq_16_subtile) {
7544 TEST_REQUIRES_ARM_NEON;
7545 for (uint32_t n = 1; n <= 8; n++) {
7546 for (uint32_t m = 1; m <= 2; m++) {
7547 GemmMicrokernelTester()
7548 .mr(2)
7549 .nr(8)
7550 .kr(2)
7551 .sr(1)
7552 .m(m)
7553 .n(n)
7554 .k(16)
7555 .iterations(1)
7556 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
7557 }
7558 }
7559 }
7560
TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD1R,k_eq_16_subtile_m)7561 TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD1R, k_eq_16_subtile_m) {
7562 TEST_REQUIRES_ARM_NEON;
7563 for (uint32_t m = 1; m <= 2; m++) {
7564 GemmMicrokernelTester()
7565 .mr(2)
7566 .nr(8)
7567 .kr(2)
7568 .sr(1)
7569 .m(m)
7570 .n(8)
7571 .k(16)
7572 .iterations(1)
7573 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
7574 }
7575 }
7576
TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD1R,k_eq_16_subtile_n)7577 TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD1R, k_eq_16_subtile_n) {
7578 TEST_REQUIRES_ARM_NEON;
7579 for (uint32_t n = 1; n <= 8; n++) {
7580 GemmMicrokernelTester()
7581 .mr(2)
7582 .nr(8)
7583 .kr(2)
7584 .sr(1)
7585 .m(2)
7586 .n(n)
7587 .k(16)
7588 .iterations(1)
7589 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
7590 }
7591 }
7592
TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD1R,k_lt_16)7593 TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD1R, k_lt_16) {
7594 TEST_REQUIRES_ARM_NEON;
7595 for (size_t k = 1; k < 16; k++) {
7596 GemmMicrokernelTester()
7597 .mr(2)
7598 .nr(8)
7599 .kr(2)
7600 .sr(1)
7601 .m(2)
7602 .n(8)
7603 .k(k)
7604 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
7605 }
7606 }
7607
TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD1R,k_lt_16_subtile)7608 TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD1R, k_lt_16_subtile) {
7609 TEST_REQUIRES_ARM_NEON;
7610 for (size_t k = 1; k < 16; k++) {
7611 for (uint32_t n = 1; n <= 8; n++) {
7612 for (uint32_t m = 1; m <= 2; m++) {
7613 GemmMicrokernelTester()
7614 .mr(2)
7615 .nr(8)
7616 .kr(2)
7617 .sr(1)
7618 .m(m)
7619 .n(n)
7620 .k(k)
7621 .iterations(1)
7622 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
7623 }
7624 }
7625 }
7626 }
7627
TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD1R,k_gt_16)7628 TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD1R, k_gt_16) {
7629 TEST_REQUIRES_ARM_NEON;
7630 for (size_t k = 17; k < 32; k++) {
7631 GemmMicrokernelTester()
7632 .mr(2)
7633 .nr(8)
7634 .kr(2)
7635 .sr(1)
7636 .m(2)
7637 .n(8)
7638 .k(k)
7639 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
7640 }
7641 }
7642
TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD1R,k_gt_16_subtile)7643 TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD1R, k_gt_16_subtile) {
7644 TEST_REQUIRES_ARM_NEON;
7645 for (size_t k = 17; k < 32; k++) {
7646 for (uint32_t n = 1; n <= 8; n++) {
7647 for (uint32_t m = 1; m <= 2; m++) {
7648 GemmMicrokernelTester()
7649 .mr(2)
7650 .nr(8)
7651 .kr(2)
7652 .sr(1)
7653 .m(m)
7654 .n(n)
7655 .k(k)
7656 .iterations(1)
7657 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
7658 }
7659 }
7660 }
7661 }
7662
TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD1R,k_div_16)7663 TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD1R, k_div_16) {
7664 TEST_REQUIRES_ARM_NEON;
7665 for (size_t k = 32; k <= 160; k += 16) {
7666 GemmMicrokernelTester()
7667 .mr(2)
7668 .nr(8)
7669 .kr(2)
7670 .sr(1)
7671 .m(2)
7672 .n(8)
7673 .k(k)
7674 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
7675 }
7676 }
7677
TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD1R,k_div_16_subtile)7678 TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD1R, k_div_16_subtile) {
7679 TEST_REQUIRES_ARM_NEON;
7680 for (size_t k = 32; k <= 160; k += 16) {
7681 for (uint32_t n = 1; n <= 8; n++) {
7682 for (uint32_t m = 1; m <= 2; m++) {
7683 GemmMicrokernelTester()
7684 .mr(2)
7685 .nr(8)
7686 .kr(2)
7687 .sr(1)
7688 .m(m)
7689 .n(n)
7690 .k(k)
7691 .iterations(1)
7692 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
7693 }
7694 }
7695 }
7696 }
7697
TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD1R,n_gt_8)7698 TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD1R, n_gt_8) {
7699 TEST_REQUIRES_ARM_NEON;
7700 for (uint32_t n = 9; n < 16; n++) {
7701 for (size_t k = 1; k <= 80; k += 17) {
7702 GemmMicrokernelTester()
7703 .mr(2)
7704 .nr(8)
7705 .kr(2)
7706 .sr(1)
7707 .m(2)
7708 .n(n)
7709 .k(k)
7710 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
7711 }
7712 }
7713 }
7714
TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD1R,n_gt_8_strided_cn)7715 TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD1R, n_gt_8_strided_cn) {
7716 TEST_REQUIRES_ARM_NEON;
7717 for (uint32_t n = 9; n < 16; n++) {
7718 for (size_t k = 1; k <= 80; k += 17) {
7719 GemmMicrokernelTester()
7720 .mr(2)
7721 .nr(8)
7722 .kr(2)
7723 .sr(1)
7724 .m(2)
7725 .n(n)
7726 .k(k)
7727 .cn_stride(11)
7728 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
7729 }
7730 }
7731 }
7732
TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD1R,n_gt_8_subtile)7733 TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD1R, n_gt_8_subtile) {
7734 TEST_REQUIRES_ARM_NEON;
7735 for (uint32_t n = 9; n < 16; n++) {
7736 for (size_t k = 1; k <= 80; k += 17) {
7737 for (uint32_t m = 1; m <= 2; m++) {
7738 GemmMicrokernelTester()
7739 .mr(2)
7740 .nr(8)
7741 .kr(2)
7742 .sr(1)
7743 .m(m)
7744 .n(n)
7745 .k(k)
7746 .iterations(1)
7747 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
7748 }
7749 }
7750 }
7751 }
7752
TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD1R,n_div_8)7753 TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD1R, n_div_8) {
7754 TEST_REQUIRES_ARM_NEON;
7755 for (uint32_t n = 16; n <= 24; n += 8) {
7756 for (size_t k = 1; k <= 80; k += 17) {
7757 GemmMicrokernelTester()
7758 .mr(2)
7759 .nr(8)
7760 .kr(2)
7761 .sr(1)
7762 .m(2)
7763 .n(n)
7764 .k(k)
7765 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
7766 }
7767 }
7768 }
7769
TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD1R,n_div_8_strided_cn)7770 TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD1R, n_div_8_strided_cn) {
7771 TEST_REQUIRES_ARM_NEON;
7772 for (uint32_t n = 16; n <= 24; n += 8) {
7773 for (size_t k = 1; k <= 80; k += 17) {
7774 GemmMicrokernelTester()
7775 .mr(2)
7776 .nr(8)
7777 .kr(2)
7778 .sr(1)
7779 .m(2)
7780 .n(n)
7781 .k(k)
7782 .cn_stride(11)
7783 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
7784 }
7785 }
7786 }
7787
TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD1R,n_div_8_subtile)7788 TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD1R, n_div_8_subtile) {
7789 TEST_REQUIRES_ARM_NEON;
7790 for (uint32_t n = 16; n <= 24; n += 8) {
7791 for (size_t k = 1; k <= 80; k += 17) {
7792 for (uint32_t m = 1; m <= 2; m++) {
7793 GemmMicrokernelTester()
7794 .mr(2)
7795 .nr(8)
7796 .kr(2)
7797 .sr(1)
7798 .m(m)
7799 .n(n)
7800 .k(k)
7801 .iterations(1)
7802 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
7803 }
7804 }
7805 }
7806 }
7807
TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD1R,small_kernel)7808 TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD1R, small_kernel) {
7809 TEST_REQUIRES_ARM_NEON;
7810 for (size_t k = 1; k <= 80; k += 17) {
7811 GemmMicrokernelTester()
7812 .mr(2)
7813 .nr(8)
7814 .kr(2)
7815 .sr(1)
7816 .m(2)
7817 .n(8)
7818 .k(k)
7819 .ks(3)
7820 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
7821 }
7822 }
7823
TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD1R,small_kernel_subtile)7824 TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD1R, small_kernel_subtile) {
7825 TEST_REQUIRES_ARM_NEON;
7826 for (size_t k = 1; k <= 80; k += 17) {
7827 for (uint32_t n = 1; n <= 8; n++) {
7828 for (uint32_t m = 1; m <= 2; m++) {
7829 GemmMicrokernelTester()
7830 .mr(2)
7831 .nr(8)
7832 .kr(2)
7833 .sr(1)
7834 .m(m)
7835 .n(n)
7836 .k(k)
7837 .ks(3)
7838 .iterations(1)
7839 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
7840 }
7841 }
7842 }
7843 }
7844
TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD1R,n_gt_8_small_kernel)7845 TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD1R, n_gt_8_small_kernel) {
7846 TEST_REQUIRES_ARM_NEON;
7847 for (uint32_t n = 9; n < 16; n++) {
7848 for (size_t k = 1; k <= 80; k += 17) {
7849 GemmMicrokernelTester()
7850 .mr(2)
7851 .nr(8)
7852 .kr(2)
7853 .sr(1)
7854 .m(2)
7855 .n(n)
7856 .k(k)
7857 .ks(3)
7858 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
7859 }
7860 }
7861 }
7862
TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD1R,n_div_8_small_kernel)7863 TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD1R, n_div_8_small_kernel) {
7864 TEST_REQUIRES_ARM_NEON;
7865 for (uint32_t n = 16; n <= 24; n += 8) {
7866 for (size_t k = 1; k <= 80; k += 17) {
7867 GemmMicrokernelTester()
7868 .mr(2)
7869 .nr(8)
7870 .kr(2)
7871 .sr(1)
7872 .m(2)
7873 .n(n)
7874 .k(k)
7875 .ks(3)
7876 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
7877 }
7878 }
7879 }
7880
TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD1R,strided_cm_subtile)7881 TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD1R, strided_cm_subtile) {
7882 TEST_REQUIRES_ARM_NEON;
7883 for (size_t k = 1; k <= 80; k += 17) {
7884 for (uint32_t n = 1; n <= 8; n++) {
7885 for (uint32_t m = 1; m <= 2; m++) {
7886 GemmMicrokernelTester()
7887 .mr(2)
7888 .nr(8)
7889 .kr(2)
7890 .sr(1)
7891 .m(m)
7892 .n(n)
7893 .k(k)
7894 .cm_stride(11)
7895 .iterations(1)
7896 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
7897 }
7898 }
7899 }
7900 }
7901
TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD1R,a_offset)7902 TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD1R, a_offset) {
7903 TEST_REQUIRES_ARM_NEON;
7904 for (size_t k = 1; k <= 80; k += 17) {
7905 GemmMicrokernelTester()
7906 .mr(2)
7907 .nr(8)
7908 .kr(2)
7909 .sr(1)
7910 .m(2)
7911 .n(8)
7912 .k(k)
7913 .ks(3)
7914 .a_offset(163)
7915 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
7916 }
7917 }
7918
TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD1R,zero)7919 TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD1R, zero) {
7920 TEST_REQUIRES_ARM_NEON;
7921 for (size_t k = 1; k <= 80; k += 17) {
7922 for (uint32_t mz = 0; mz < 2; mz++) {
7923 GemmMicrokernelTester()
7924 .mr(2)
7925 .nr(8)
7926 .kr(2)
7927 .sr(1)
7928 .m(2)
7929 .n(8)
7930 .k(k)
7931 .ks(3)
7932 .a_offset(163)
7933 .zero_index(mz)
7934 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
7935 }
7936 }
7937 }
7938
TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD1R,qmin)7939 TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD1R, qmin) {
7940 TEST_REQUIRES_ARM_NEON;
7941 GemmMicrokernelTester()
7942 .mr(2)
7943 .nr(8)
7944 .kr(2)
7945 .sr(1)
7946 .m(2)
7947 .n(8)
7948 .k(16)
7949 .qmin(128)
7950 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
7951 }
7952
TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD1R,qmax)7953 TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD1R, qmax) {
7954 TEST_REQUIRES_ARM_NEON;
7955 GemmMicrokernelTester()
7956 .mr(2)
7957 .nr(8)
7958 .kr(2)
7959 .sr(1)
7960 .m(2)
7961 .n(8)
7962 .k(16)
7963 .qmax(128)
7964 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
7965 }
7966
TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD1R,strided_cm)7967 TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEON_MLAL_LD1R, strided_cm) {
7968 TEST_REQUIRES_ARM_NEON;
7969 GemmMicrokernelTester()
7970 .mr(2)
7971 .nr(8)
7972 .kr(2)
7973 .sr(1)
7974 .m(2)
7975 .n(8)
7976 .k(16)
7977 .cm_stride(11)
7978 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
7979 }
7980 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
7981
7982
7983 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD1R,k_eq_16)7984 TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD1R, k_eq_16) {
7985 TEST_REQUIRES_ARM_NEON_V8;
7986 GemmMicrokernelTester()
7987 .mr(2)
7988 .nr(8)
7989 .kr(2)
7990 .sr(1)
7991 .m(2)
7992 .n(8)
7993 .k(16)
7994 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
7995 }
7996
TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD1R,strided_cn)7997 TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD1R, strided_cn) {
7998 TEST_REQUIRES_ARM_NEON_V8;
7999 GemmMicrokernelTester()
8000 .mr(2)
8001 .nr(8)
8002 .kr(2)
8003 .sr(1)
8004 .m(2)
8005 .n(8)
8006 .k(16)
8007 .cn_stride(11)
8008 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8009 }
8010
TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD1R,k_eq_16_subtile)8011 TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD1R, k_eq_16_subtile) {
8012 TEST_REQUIRES_ARM_NEON_V8;
8013 for (uint32_t n = 1; n <= 8; n++) {
8014 for (uint32_t m = 1; m <= 2; m++) {
8015 GemmMicrokernelTester()
8016 .mr(2)
8017 .nr(8)
8018 .kr(2)
8019 .sr(1)
8020 .m(m)
8021 .n(n)
8022 .k(16)
8023 .iterations(1)
8024 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8025 }
8026 }
8027 }
8028
TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD1R,k_eq_16_subtile_m)8029 TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD1R, k_eq_16_subtile_m) {
8030 TEST_REQUIRES_ARM_NEON_V8;
8031 for (uint32_t m = 1; m <= 2; m++) {
8032 GemmMicrokernelTester()
8033 .mr(2)
8034 .nr(8)
8035 .kr(2)
8036 .sr(1)
8037 .m(m)
8038 .n(8)
8039 .k(16)
8040 .iterations(1)
8041 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8042 }
8043 }
8044
TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD1R,k_eq_16_subtile_n)8045 TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD1R, k_eq_16_subtile_n) {
8046 TEST_REQUIRES_ARM_NEON_V8;
8047 for (uint32_t n = 1; n <= 8; n++) {
8048 GemmMicrokernelTester()
8049 .mr(2)
8050 .nr(8)
8051 .kr(2)
8052 .sr(1)
8053 .m(2)
8054 .n(n)
8055 .k(16)
8056 .iterations(1)
8057 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8058 }
8059 }
8060
TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD1R,k_lt_16)8061 TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD1R, k_lt_16) {
8062 TEST_REQUIRES_ARM_NEON_V8;
8063 for (size_t k = 1; k < 16; k++) {
8064 GemmMicrokernelTester()
8065 .mr(2)
8066 .nr(8)
8067 .kr(2)
8068 .sr(1)
8069 .m(2)
8070 .n(8)
8071 .k(k)
8072 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8073 }
8074 }
8075
TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD1R,k_lt_16_subtile)8076 TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD1R, k_lt_16_subtile) {
8077 TEST_REQUIRES_ARM_NEON_V8;
8078 for (size_t k = 1; k < 16; k++) {
8079 for (uint32_t n = 1; n <= 8; n++) {
8080 for (uint32_t m = 1; m <= 2; m++) {
8081 GemmMicrokernelTester()
8082 .mr(2)
8083 .nr(8)
8084 .kr(2)
8085 .sr(1)
8086 .m(m)
8087 .n(n)
8088 .k(k)
8089 .iterations(1)
8090 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8091 }
8092 }
8093 }
8094 }
8095
TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD1R,k_gt_16)8096 TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD1R, k_gt_16) {
8097 TEST_REQUIRES_ARM_NEON_V8;
8098 for (size_t k = 17; k < 32; k++) {
8099 GemmMicrokernelTester()
8100 .mr(2)
8101 .nr(8)
8102 .kr(2)
8103 .sr(1)
8104 .m(2)
8105 .n(8)
8106 .k(k)
8107 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8108 }
8109 }
8110
TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD1R,k_gt_16_subtile)8111 TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD1R, k_gt_16_subtile) {
8112 TEST_REQUIRES_ARM_NEON_V8;
8113 for (size_t k = 17; k < 32; k++) {
8114 for (uint32_t n = 1; n <= 8; n++) {
8115 for (uint32_t m = 1; m <= 2; m++) {
8116 GemmMicrokernelTester()
8117 .mr(2)
8118 .nr(8)
8119 .kr(2)
8120 .sr(1)
8121 .m(m)
8122 .n(n)
8123 .k(k)
8124 .iterations(1)
8125 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8126 }
8127 }
8128 }
8129 }
8130
TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD1R,k_div_16)8131 TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD1R, k_div_16) {
8132 TEST_REQUIRES_ARM_NEON_V8;
8133 for (size_t k = 32; k <= 160; k += 16) {
8134 GemmMicrokernelTester()
8135 .mr(2)
8136 .nr(8)
8137 .kr(2)
8138 .sr(1)
8139 .m(2)
8140 .n(8)
8141 .k(k)
8142 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8143 }
8144 }
8145
TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD1R,k_div_16_subtile)8146 TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD1R, k_div_16_subtile) {
8147 TEST_REQUIRES_ARM_NEON_V8;
8148 for (size_t k = 32; k <= 160; k += 16) {
8149 for (uint32_t n = 1; n <= 8; n++) {
8150 for (uint32_t m = 1; m <= 2; m++) {
8151 GemmMicrokernelTester()
8152 .mr(2)
8153 .nr(8)
8154 .kr(2)
8155 .sr(1)
8156 .m(m)
8157 .n(n)
8158 .k(k)
8159 .iterations(1)
8160 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8161 }
8162 }
8163 }
8164 }
8165
TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD1R,n_gt_8)8166 TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD1R, n_gt_8) {
8167 TEST_REQUIRES_ARM_NEON_V8;
8168 for (uint32_t n = 9; n < 16; n++) {
8169 for (size_t k = 1; k <= 80; k += 17) {
8170 GemmMicrokernelTester()
8171 .mr(2)
8172 .nr(8)
8173 .kr(2)
8174 .sr(1)
8175 .m(2)
8176 .n(n)
8177 .k(k)
8178 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8179 }
8180 }
8181 }
8182
TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD1R,n_gt_8_strided_cn)8183 TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD1R, n_gt_8_strided_cn) {
8184 TEST_REQUIRES_ARM_NEON_V8;
8185 for (uint32_t n = 9; n < 16; n++) {
8186 for (size_t k = 1; k <= 80; k += 17) {
8187 GemmMicrokernelTester()
8188 .mr(2)
8189 .nr(8)
8190 .kr(2)
8191 .sr(1)
8192 .m(2)
8193 .n(n)
8194 .k(k)
8195 .cn_stride(11)
8196 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8197 }
8198 }
8199 }
8200
TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD1R,n_gt_8_subtile)8201 TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD1R, n_gt_8_subtile) {
8202 TEST_REQUIRES_ARM_NEON_V8;
8203 for (uint32_t n = 9; n < 16; n++) {
8204 for (size_t k = 1; k <= 80; k += 17) {
8205 for (uint32_t m = 1; m <= 2; m++) {
8206 GemmMicrokernelTester()
8207 .mr(2)
8208 .nr(8)
8209 .kr(2)
8210 .sr(1)
8211 .m(m)
8212 .n(n)
8213 .k(k)
8214 .iterations(1)
8215 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8216 }
8217 }
8218 }
8219 }
8220
TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD1R,n_div_8)8221 TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD1R, n_div_8) {
8222 TEST_REQUIRES_ARM_NEON_V8;
8223 for (uint32_t n = 16; n <= 24; n += 8) {
8224 for (size_t k = 1; k <= 80; k += 17) {
8225 GemmMicrokernelTester()
8226 .mr(2)
8227 .nr(8)
8228 .kr(2)
8229 .sr(1)
8230 .m(2)
8231 .n(n)
8232 .k(k)
8233 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8234 }
8235 }
8236 }
8237
TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD1R,n_div_8_strided_cn)8238 TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD1R, n_div_8_strided_cn) {
8239 TEST_REQUIRES_ARM_NEON_V8;
8240 for (uint32_t n = 16; n <= 24; n += 8) {
8241 for (size_t k = 1; k <= 80; k += 17) {
8242 GemmMicrokernelTester()
8243 .mr(2)
8244 .nr(8)
8245 .kr(2)
8246 .sr(1)
8247 .m(2)
8248 .n(n)
8249 .k(k)
8250 .cn_stride(11)
8251 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8252 }
8253 }
8254 }
8255
TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD1R,n_div_8_subtile)8256 TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD1R, n_div_8_subtile) {
8257 TEST_REQUIRES_ARM_NEON_V8;
8258 for (uint32_t n = 16; n <= 24; n += 8) {
8259 for (size_t k = 1; k <= 80; k += 17) {
8260 for (uint32_t m = 1; m <= 2; m++) {
8261 GemmMicrokernelTester()
8262 .mr(2)
8263 .nr(8)
8264 .kr(2)
8265 .sr(1)
8266 .m(m)
8267 .n(n)
8268 .k(k)
8269 .iterations(1)
8270 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8271 }
8272 }
8273 }
8274 }
8275
TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD1R,small_kernel)8276 TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD1R, small_kernel) {
8277 TEST_REQUIRES_ARM_NEON_V8;
8278 for (size_t k = 1; k <= 80; k += 17) {
8279 GemmMicrokernelTester()
8280 .mr(2)
8281 .nr(8)
8282 .kr(2)
8283 .sr(1)
8284 .m(2)
8285 .n(8)
8286 .k(k)
8287 .ks(3)
8288 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8289 }
8290 }
8291
TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD1R,small_kernel_subtile)8292 TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD1R, small_kernel_subtile) {
8293 TEST_REQUIRES_ARM_NEON_V8;
8294 for (size_t k = 1; k <= 80; k += 17) {
8295 for (uint32_t n = 1; n <= 8; n++) {
8296 for (uint32_t m = 1; m <= 2; m++) {
8297 GemmMicrokernelTester()
8298 .mr(2)
8299 .nr(8)
8300 .kr(2)
8301 .sr(1)
8302 .m(m)
8303 .n(n)
8304 .k(k)
8305 .ks(3)
8306 .iterations(1)
8307 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8308 }
8309 }
8310 }
8311 }
8312
TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD1R,n_gt_8_small_kernel)8313 TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD1R, n_gt_8_small_kernel) {
8314 TEST_REQUIRES_ARM_NEON_V8;
8315 for (uint32_t n = 9; n < 16; n++) {
8316 for (size_t k = 1; k <= 80; k += 17) {
8317 GemmMicrokernelTester()
8318 .mr(2)
8319 .nr(8)
8320 .kr(2)
8321 .sr(1)
8322 .m(2)
8323 .n(n)
8324 .k(k)
8325 .ks(3)
8326 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8327 }
8328 }
8329 }
8330
TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD1R,n_div_8_small_kernel)8331 TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD1R, n_div_8_small_kernel) {
8332 TEST_REQUIRES_ARM_NEON_V8;
8333 for (uint32_t n = 16; n <= 24; n += 8) {
8334 for (size_t k = 1; k <= 80; k += 17) {
8335 GemmMicrokernelTester()
8336 .mr(2)
8337 .nr(8)
8338 .kr(2)
8339 .sr(1)
8340 .m(2)
8341 .n(n)
8342 .k(k)
8343 .ks(3)
8344 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8345 }
8346 }
8347 }
8348
TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD1R,strided_cm_subtile)8349 TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD1R, strided_cm_subtile) {
8350 TEST_REQUIRES_ARM_NEON_V8;
8351 for (size_t k = 1; k <= 80; k += 17) {
8352 for (uint32_t n = 1; n <= 8; n++) {
8353 for (uint32_t m = 1; m <= 2; m++) {
8354 GemmMicrokernelTester()
8355 .mr(2)
8356 .nr(8)
8357 .kr(2)
8358 .sr(1)
8359 .m(m)
8360 .n(n)
8361 .k(k)
8362 .cm_stride(11)
8363 .iterations(1)
8364 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8365 }
8366 }
8367 }
8368 }
8369
TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD1R,a_offset)8370 TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD1R, a_offset) {
8371 TEST_REQUIRES_ARM_NEON_V8;
8372 for (size_t k = 1; k <= 80; k += 17) {
8373 GemmMicrokernelTester()
8374 .mr(2)
8375 .nr(8)
8376 .kr(2)
8377 .sr(1)
8378 .m(2)
8379 .n(8)
8380 .k(k)
8381 .ks(3)
8382 .a_offset(163)
8383 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8384 }
8385 }
8386
TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD1R,zero)8387 TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD1R, zero) {
8388 TEST_REQUIRES_ARM_NEON_V8;
8389 for (size_t k = 1; k <= 80; k += 17) {
8390 for (uint32_t mz = 0; mz < 2; mz++) {
8391 GemmMicrokernelTester()
8392 .mr(2)
8393 .nr(8)
8394 .kr(2)
8395 .sr(1)
8396 .m(2)
8397 .n(8)
8398 .k(k)
8399 .ks(3)
8400 .a_offset(163)
8401 .zero_index(mz)
8402 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8403 }
8404 }
8405 }
8406
TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD1R,qmin)8407 TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD1R, qmin) {
8408 TEST_REQUIRES_ARM_NEON_V8;
8409 GemmMicrokernelTester()
8410 .mr(2)
8411 .nr(8)
8412 .kr(2)
8413 .sr(1)
8414 .m(2)
8415 .n(8)
8416 .k(16)
8417 .qmin(128)
8418 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8419 }
8420
TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD1R,qmax)8421 TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD1R, qmax) {
8422 TEST_REQUIRES_ARM_NEON_V8;
8423 GemmMicrokernelTester()
8424 .mr(2)
8425 .nr(8)
8426 .kr(2)
8427 .sr(1)
8428 .m(2)
8429 .n(8)
8430 .k(16)
8431 .qmax(128)
8432 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8433 }
8434
TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD1R,strided_cm)8435 TEST(QS8_IGEMM_MINMAX_FP32_2X8C2__NEONV8_MLAL_LD1R, strided_cm) {
8436 TEST_REQUIRES_ARM_NEON_V8;
8437 GemmMicrokernelTester()
8438 .mr(2)
8439 .nr(8)
8440 .kr(2)
8441 .sr(1)
8442 .m(2)
8443 .n(8)
8444 .k(16)
8445 .cm_stride(11)
8446 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8447 }
8448 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
8449
8450
8451 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QS8_IGEMM_MINMAX_FP32_2X8C2S4__NEON_MLAL,k_eq_16)8452 TEST(QS8_IGEMM_MINMAX_FP32_2X8C2S4__NEON_MLAL, k_eq_16) {
8453 TEST_REQUIRES_ARM_NEON;
8454 GemmMicrokernelTester()
8455 .mr(2)
8456 .nr(8)
8457 .kr(2)
8458 .sr(4)
8459 .m(2)
8460 .n(8)
8461 .k(16)
8462 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
8463 }
8464
TEST(QS8_IGEMM_MINMAX_FP32_2X8C2S4__NEON_MLAL,strided_cn)8465 TEST(QS8_IGEMM_MINMAX_FP32_2X8C2S4__NEON_MLAL, strided_cn) {
8466 TEST_REQUIRES_ARM_NEON;
8467 GemmMicrokernelTester()
8468 .mr(2)
8469 .nr(8)
8470 .kr(2)
8471 .sr(4)
8472 .m(2)
8473 .n(8)
8474 .k(16)
8475 .cn_stride(11)
8476 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
8477 }
8478
TEST(QS8_IGEMM_MINMAX_FP32_2X8C2S4__NEON_MLAL,k_eq_16_subtile)8479 TEST(QS8_IGEMM_MINMAX_FP32_2X8C2S4__NEON_MLAL, k_eq_16_subtile) {
8480 TEST_REQUIRES_ARM_NEON;
8481 for (uint32_t n = 1; n <= 8; n++) {
8482 for (uint32_t m = 1; m <= 2; m++) {
8483 GemmMicrokernelTester()
8484 .mr(2)
8485 .nr(8)
8486 .kr(2)
8487 .sr(4)
8488 .m(m)
8489 .n(n)
8490 .k(16)
8491 .iterations(1)
8492 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
8493 }
8494 }
8495 }
8496
TEST(QS8_IGEMM_MINMAX_FP32_2X8C2S4__NEON_MLAL,k_eq_16_subtile_m)8497 TEST(QS8_IGEMM_MINMAX_FP32_2X8C2S4__NEON_MLAL, k_eq_16_subtile_m) {
8498 TEST_REQUIRES_ARM_NEON;
8499 for (uint32_t m = 1; m <= 2; m++) {
8500 GemmMicrokernelTester()
8501 .mr(2)
8502 .nr(8)
8503 .kr(2)
8504 .sr(4)
8505 .m(m)
8506 .n(8)
8507 .k(16)
8508 .iterations(1)
8509 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
8510 }
8511 }
8512
TEST(QS8_IGEMM_MINMAX_FP32_2X8C2S4__NEON_MLAL,k_eq_16_subtile_n)8513 TEST(QS8_IGEMM_MINMAX_FP32_2X8C2S4__NEON_MLAL, k_eq_16_subtile_n) {
8514 TEST_REQUIRES_ARM_NEON;
8515 for (uint32_t n = 1; n <= 8; n++) {
8516 GemmMicrokernelTester()
8517 .mr(2)
8518 .nr(8)
8519 .kr(2)
8520 .sr(4)
8521 .m(2)
8522 .n(n)
8523 .k(16)
8524 .iterations(1)
8525 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
8526 }
8527 }
8528
TEST(QS8_IGEMM_MINMAX_FP32_2X8C2S4__NEON_MLAL,k_lt_16)8529 TEST(QS8_IGEMM_MINMAX_FP32_2X8C2S4__NEON_MLAL, k_lt_16) {
8530 TEST_REQUIRES_ARM_NEON;
8531 for (size_t k = 1; k < 16; k++) {
8532 GemmMicrokernelTester()
8533 .mr(2)
8534 .nr(8)
8535 .kr(2)
8536 .sr(4)
8537 .m(2)
8538 .n(8)
8539 .k(k)
8540 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
8541 }
8542 }
8543
TEST(QS8_IGEMM_MINMAX_FP32_2X8C2S4__NEON_MLAL,k_lt_16_subtile)8544 TEST(QS8_IGEMM_MINMAX_FP32_2X8C2S4__NEON_MLAL, k_lt_16_subtile) {
8545 TEST_REQUIRES_ARM_NEON;
8546 for (size_t k = 1; k < 16; k++) {
8547 for (uint32_t n = 1; n <= 8; n++) {
8548 for (uint32_t m = 1; m <= 2; m++) {
8549 GemmMicrokernelTester()
8550 .mr(2)
8551 .nr(8)
8552 .kr(2)
8553 .sr(4)
8554 .m(m)
8555 .n(n)
8556 .k(k)
8557 .iterations(1)
8558 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
8559 }
8560 }
8561 }
8562 }
8563
TEST(QS8_IGEMM_MINMAX_FP32_2X8C2S4__NEON_MLAL,k_gt_16)8564 TEST(QS8_IGEMM_MINMAX_FP32_2X8C2S4__NEON_MLAL, k_gt_16) {
8565 TEST_REQUIRES_ARM_NEON;
8566 for (size_t k = 17; k < 32; k++) {
8567 GemmMicrokernelTester()
8568 .mr(2)
8569 .nr(8)
8570 .kr(2)
8571 .sr(4)
8572 .m(2)
8573 .n(8)
8574 .k(k)
8575 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
8576 }
8577 }
8578
TEST(QS8_IGEMM_MINMAX_FP32_2X8C2S4__NEON_MLAL,k_gt_16_subtile)8579 TEST(QS8_IGEMM_MINMAX_FP32_2X8C2S4__NEON_MLAL, k_gt_16_subtile) {
8580 TEST_REQUIRES_ARM_NEON;
8581 for (size_t k = 17; k < 32; k++) {
8582 for (uint32_t n = 1; n <= 8; n++) {
8583 for (uint32_t m = 1; m <= 2; m++) {
8584 GemmMicrokernelTester()
8585 .mr(2)
8586 .nr(8)
8587 .kr(2)
8588 .sr(4)
8589 .m(m)
8590 .n(n)
8591 .k(k)
8592 .iterations(1)
8593 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
8594 }
8595 }
8596 }
8597 }
8598
TEST(QS8_IGEMM_MINMAX_FP32_2X8C2S4__NEON_MLAL,k_div_16)8599 TEST(QS8_IGEMM_MINMAX_FP32_2X8C2S4__NEON_MLAL, k_div_16) {
8600 TEST_REQUIRES_ARM_NEON;
8601 for (size_t k = 32; k <= 160; k += 16) {
8602 GemmMicrokernelTester()
8603 .mr(2)
8604 .nr(8)
8605 .kr(2)
8606 .sr(4)
8607 .m(2)
8608 .n(8)
8609 .k(k)
8610 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
8611 }
8612 }
8613
TEST(QS8_IGEMM_MINMAX_FP32_2X8C2S4__NEON_MLAL,k_div_16_subtile)8614 TEST(QS8_IGEMM_MINMAX_FP32_2X8C2S4__NEON_MLAL, k_div_16_subtile) {
8615 TEST_REQUIRES_ARM_NEON;
8616 for (size_t k = 32; k <= 160; k += 16) {
8617 for (uint32_t n = 1; n <= 8; n++) {
8618 for (uint32_t m = 1; m <= 2; m++) {
8619 GemmMicrokernelTester()
8620 .mr(2)
8621 .nr(8)
8622 .kr(2)
8623 .sr(4)
8624 .m(m)
8625 .n(n)
8626 .k(k)
8627 .iterations(1)
8628 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
8629 }
8630 }
8631 }
8632 }
8633
TEST(QS8_IGEMM_MINMAX_FP32_2X8C2S4__NEON_MLAL,n_gt_8)8634 TEST(QS8_IGEMM_MINMAX_FP32_2X8C2S4__NEON_MLAL, n_gt_8) {
8635 TEST_REQUIRES_ARM_NEON;
8636 for (uint32_t n = 9; n < 16; n++) {
8637 for (size_t k = 1; k <= 80; k += 17) {
8638 GemmMicrokernelTester()
8639 .mr(2)
8640 .nr(8)
8641 .kr(2)
8642 .sr(4)
8643 .m(2)
8644 .n(n)
8645 .k(k)
8646 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
8647 }
8648 }
8649 }
8650
TEST(QS8_IGEMM_MINMAX_FP32_2X8C2S4__NEON_MLAL,n_gt_8_strided_cn)8651 TEST(QS8_IGEMM_MINMAX_FP32_2X8C2S4__NEON_MLAL, n_gt_8_strided_cn) {
8652 TEST_REQUIRES_ARM_NEON;
8653 for (uint32_t n = 9; n < 16; n++) {
8654 for (size_t k = 1; k <= 80; k += 17) {
8655 GemmMicrokernelTester()
8656 .mr(2)
8657 .nr(8)
8658 .kr(2)
8659 .sr(4)
8660 .m(2)
8661 .n(n)
8662 .k(k)
8663 .cn_stride(11)
8664 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
8665 }
8666 }
8667 }
8668
TEST(QS8_IGEMM_MINMAX_FP32_2X8C2S4__NEON_MLAL,n_gt_8_subtile)8669 TEST(QS8_IGEMM_MINMAX_FP32_2X8C2S4__NEON_MLAL, n_gt_8_subtile) {
8670 TEST_REQUIRES_ARM_NEON;
8671 for (uint32_t n = 9; n < 16; n++) {
8672 for (size_t k = 1; k <= 80; k += 17) {
8673 for (uint32_t m = 1; m <= 2; m++) {
8674 GemmMicrokernelTester()
8675 .mr(2)
8676 .nr(8)
8677 .kr(2)
8678 .sr(4)
8679 .m(m)
8680 .n(n)
8681 .k(k)
8682 .iterations(1)
8683 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
8684 }
8685 }
8686 }
8687 }
8688
TEST(QS8_IGEMM_MINMAX_FP32_2X8C2S4__NEON_MLAL,n_div_8)8689 TEST(QS8_IGEMM_MINMAX_FP32_2X8C2S4__NEON_MLAL, n_div_8) {
8690 TEST_REQUIRES_ARM_NEON;
8691 for (uint32_t n = 16; n <= 24; n += 8) {
8692 for (size_t k = 1; k <= 80; k += 17) {
8693 GemmMicrokernelTester()
8694 .mr(2)
8695 .nr(8)
8696 .kr(2)
8697 .sr(4)
8698 .m(2)
8699 .n(n)
8700 .k(k)
8701 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
8702 }
8703 }
8704 }
8705
TEST(QS8_IGEMM_MINMAX_FP32_2X8C2S4__NEON_MLAL,n_div_8_strided_cn)8706 TEST(QS8_IGEMM_MINMAX_FP32_2X8C2S4__NEON_MLAL, n_div_8_strided_cn) {
8707 TEST_REQUIRES_ARM_NEON;
8708 for (uint32_t n = 16; n <= 24; n += 8) {
8709 for (size_t k = 1; k <= 80; k += 17) {
8710 GemmMicrokernelTester()
8711 .mr(2)
8712 .nr(8)
8713 .kr(2)
8714 .sr(4)
8715 .m(2)
8716 .n(n)
8717 .k(k)
8718 .cn_stride(11)
8719 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
8720 }
8721 }
8722 }
8723
TEST(QS8_IGEMM_MINMAX_FP32_2X8C2S4__NEON_MLAL,n_div_8_subtile)8724 TEST(QS8_IGEMM_MINMAX_FP32_2X8C2S4__NEON_MLAL, n_div_8_subtile) {
8725 TEST_REQUIRES_ARM_NEON;
8726 for (uint32_t n = 16; n <= 24; n += 8) {
8727 for (size_t k = 1; k <= 80; k += 17) {
8728 for (uint32_t m = 1; m <= 2; m++) {
8729 GemmMicrokernelTester()
8730 .mr(2)
8731 .nr(8)
8732 .kr(2)
8733 .sr(4)
8734 .m(m)
8735 .n(n)
8736 .k(k)
8737 .iterations(1)
8738 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
8739 }
8740 }
8741 }
8742 }
8743
TEST(QS8_IGEMM_MINMAX_FP32_2X8C2S4__NEON_MLAL,small_kernel)8744 TEST(QS8_IGEMM_MINMAX_FP32_2X8C2S4__NEON_MLAL, small_kernel) {
8745 TEST_REQUIRES_ARM_NEON;
8746 for (size_t k = 1; k <= 80; k += 17) {
8747 GemmMicrokernelTester()
8748 .mr(2)
8749 .nr(8)
8750 .kr(2)
8751 .sr(4)
8752 .m(2)
8753 .n(8)
8754 .k(k)
8755 .ks(3)
8756 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
8757 }
8758 }
8759
TEST(QS8_IGEMM_MINMAX_FP32_2X8C2S4__NEON_MLAL,small_kernel_subtile)8760 TEST(QS8_IGEMM_MINMAX_FP32_2X8C2S4__NEON_MLAL, small_kernel_subtile) {
8761 TEST_REQUIRES_ARM_NEON;
8762 for (size_t k = 1; k <= 80; k += 17) {
8763 for (uint32_t n = 1; n <= 8; n++) {
8764 for (uint32_t m = 1; m <= 2; m++) {
8765 GemmMicrokernelTester()
8766 .mr(2)
8767 .nr(8)
8768 .kr(2)
8769 .sr(4)
8770 .m(m)
8771 .n(n)
8772 .k(k)
8773 .ks(3)
8774 .iterations(1)
8775 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
8776 }
8777 }
8778 }
8779 }
8780
TEST(QS8_IGEMM_MINMAX_FP32_2X8C2S4__NEON_MLAL,n_gt_8_small_kernel)8781 TEST(QS8_IGEMM_MINMAX_FP32_2X8C2S4__NEON_MLAL, n_gt_8_small_kernel) {
8782 TEST_REQUIRES_ARM_NEON;
8783 for (uint32_t n = 9; n < 16; n++) {
8784 for (size_t k = 1; k <= 80; k += 17) {
8785 GemmMicrokernelTester()
8786 .mr(2)
8787 .nr(8)
8788 .kr(2)
8789 .sr(4)
8790 .m(2)
8791 .n(n)
8792 .k(k)
8793 .ks(3)
8794 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
8795 }
8796 }
8797 }
8798
TEST(QS8_IGEMM_MINMAX_FP32_2X8C2S4__NEON_MLAL,n_div_8_small_kernel)8799 TEST(QS8_IGEMM_MINMAX_FP32_2X8C2S4__NEON_MLAL, n_div_8_small_kernel) {
8800 TEST_REQUIRES_ARM_NEON;
8801 for (uint32_t n = 16; n <= 24; n += 8) {
8802 for (size_t k = 1; k <= 80; k += 17) {
8803 GemmMicrokernelTester()
8804 .mr(2)
8805 .nr(8)
8806 .kr(2)
8807 .sr(4)
8808 .m(2)
8809 .n(n)
8810 .k(k)
8811 .ks(3)
8812 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
8813 }
8814 }
8815 }
8816
TEST(QS8_IGEMM_MINMAX_FP32_2X8C2S4__NEON_MLAL,strided_cm_subtile)8817 TEST(QS8_IGEMM_MINMAX_FP32_2X8C2S4__NEON_MLAL, strided_cm_subtile) {
8818 TEST_REQUIRES_ARM_NEON;
8819 for (size_t k = 1; k <= 80; k += 17) {
8820 for (uint32_t n = 1; n <= 8; n++) {
8821 for (uint32_t m = 1; m <= 2; m++) {
8822 GemmMicrokernelTester()
8823 .mr(2)
8824 .nr(8)
8825 .kr(2)
8826 .sr(4)
8827 .m(m)
8828 .n(n)
8829 .k(k)
8830 .cm_stride(11)
8831 .iterations(1)
8832 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
8833 }
8834 }
8835 }
8836 }
8837
TEST(QS8_IGEMM_MINMAX_FP32_2X8C2S4__NEON_MLAL,a_offset)8838 TEST(QS8_IGEMM_MINMAX_FP32_2X8C2S4__NEON_MLAL, a_offset) {
8839 TEST_REQUIRES_ARM_NEON;
8840 for (size_t k = 1; k <= 80; k += 17) {
8841 GemmMicrokernelTester()
8842 .mr(2)
8843 .nr(8)
8844 .kr(2)
8845 .sr(4)
8846 .m(2)
8847 .n(8)
8848 .k(k)
8849 .ks(3)
8850 .a_offset(163)
8851 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
8852 }
8853 }
8854
TEST(QS8_IGEMM_MINMAX_FP32_2X8C2S4__NEON_MLAL,zero)8855 TEST(QS8_IGEMM_MINMAX_FP32_2X8C2S4__NEON_MLAL, zero) {
8856 TEST_REQUIRES_ARM_NEON;
8857 for (size_t k = 1; k <= 80; k += 17) {
8858 for (uint32_t mz = 0; mz < 2; mz++) {
8859 GemmMicrokernelTester()
8860 .mr(2)
8861 .nr(8)
8862 .kr(2)
8863 .sr(4)
8864 .m(2)
8865 .n(8)
8866 .k(k)
8867 .ks(3)
8868 .a_offset(163)
8869 .zero_index(mz)
8870 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
8871 }
8872 }
8873 }
8874
TEST(QS8_IGEMM_MINMAX_FP32_2X8C2S4__NEON_MLAL,qmin)8875 TEST(QS8_IGEMM_MINMAX_FP32_2X8C2S4__NEON_MLAL, qmin) {
8876 TEST_REQUIRES_ARM_NEON;
8877 GemmMicrokernelTester()
8878 .mr(2)
8879 .nr(8)
8880 .kr(2)
8881 .sr(4)
8882 .m(2)
8883 .n(8)
8884 .k(16)
8885 .qmin(128)
8886 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
8887 }
8888
TEST(QS8_IGEMM_MINMAX_FP32_2X8C2S4__NEON_MLAL,qmax)8889 TEST(QS8_IGEMM_MINMAX_FP32_2X8C2S4__NEON_MLAL, qmax) {
8890 TEST_REQUIRES_ARM_NEON;
8891 GemmMicrokernelTester()
8892 .mr(2)
8893 .nr(8)
8894 .kr(2)
8895 .sr(4)
8896 .m(2)
8897 .n(8)
8898 .k(16)
8899 .qmax(128)
8900 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
8901 }
8902
TEST(QS8_IGEMM_MINMAX_FP32_2X8C2S4__NEON_MLAL,strided_cm)8903 TEST(QS8_IGEMM_MINMAX_FP32_2X8C2S4__NEON_MLAL, strided_cm) {
8904 TEST_REQUIRES_ARM_NEON;
8905 GemmMicrokernelTester()
8906 .mr(2)
8907 .nr(8)
8908 .kr(2)
8909 .sr(4)
8910 .m(2)
8911 .n(8)
8912 .k(16)
8913 .cm_stride(11)
8914 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
8915 }
8916 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
8917
8918
8919 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QS8_IGEMM_MINMAX_FP32_2X8C2S4__NEONV8_MLAL,k_eq_16)8920 TEST(QS8_IGEMM_MINMAX_FP32_2X8C2S4__NEONV8_MLAL, k_eq_16) {
8921 TEST_REQUIRES_ARM_NEON_V8;
8922 GemmMicrokernelTester()
8923 .mr(2)
8924 .nr(8)
8925 .kr(2)
8926 .sr(4)
8927 .m(2)
8928 .n(8)
8929 .k(16)
8930 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2s4__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8931 }
8932
TEST(QS8_IGEMM_MINMAX_FP32_2X8C2S4__NEONV8_MLAL,strided_cn)8933 TEST(QS8_IGEMM_MINMAX_FP32_2X8C2S4__NEONV8_MLAL, strided_cn) {
8934 TEST_REQUIRES_ARM_NEON_V8;
8935 GemmMicrokernelTester()
8936 .mr(2)
8937 .nr(8)
8938 .kr(2)
8939 .sr(4)
8940 .m(2)
8941 .n(8)
8942 .k(16)
8943 .cn_stride(11)
8944 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2s4__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8945 }
8946
TEST(QS8_IGEMM_MINMAX_FP32_2X8C2S4__NEONV8_MLAL,k_eq_16_subtile)8947 TEST(QS8_IGEMM_MINMAX_FP32_2X8C2S4__NEONV8_MLAL, k_eq_16_subtile) {
8948 TEST_REQUIRES_ARM_NEON_V8;
8949 for (uint32_t n = 1; n <= 8; n++) {
8950 for (uint32_t m = 1; m <= 2; m++) {
8951 GemmMicrokernelTester()
8952 .mr(2)
8953 .nr(8)
8954 .kr(2)
8955 .sr(4)
8956 .m(m)
8957 .n(n)
8958 .k(16)
8959 .iterations(1)
8960 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2s4__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8961 }
8962 }
8963 }
8964
TEST(QS8_IGEMM_MINMAX_FP32_2X8C2S4__NEONV8_MLAL,k_eq_16_subtile_m)8965 TEST(QS8_IGEMM_MINMAX_FP32_2X8C2S4__NEONV8_MLAL, k_eq_16_subtile_m) {
8966 TEST_REQUIRES_ARM_NEON_V8;
8967 for (uint32_t m = 1; m <= 2; m++) {
8968 GemmMicrokernelTester()
8969 .mr(2)
8970 .nr(8)
8971 .kr(2)
8972 .sr(4)
8973 .m(m)
8974 .n(8)
8975 .k(16)
8976 .iterations(1)
8977 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2s4__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8978 }
8979 }
8980
TEST(QS8_IGEMM_MINMAX_FP32_2X8C2S4__NEONV8_MLAL,k_eq_16_subtile_n)8981 TEST(QS8_IGEMM_MINMAX_FP32_2X8C2S4__NEONV8_MLAL, k_eq_16_subtile_n) {
8982 TEST_REQUIRES_ARM_NEON_V8;
8983 for (uint32_t n = 1; n <= 8; n++) {
8984 GemmMicrokernelTester()
8985 .mr(2)
8986 .nr(8)
8987 .kr(2)
8988 .sr(4)
8989 .m(2)
8990 .n(n)
8991 .k(16)
8992 .iterations(1)
8993 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2s4__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
8994 }
8995 }
8996
TEST(QS8_IGEMM_MINMAX_FP32_2X8C2S4__NEONV8_MLAL,k_lt_16)8997 TEST(QS8_IGEMM_MINMAX_FP32_2X8C2S4__NEONV8_MLAL, k_lt_16) {
8998 TEST_REQUIRES_ARM_NEON_V8;
8999 for (size_t k = 1; k < 16; k++) {
9000 GemmMicrokernelTester()
9001 .mr(2)
9002 .nr(8)
9003 .kr(2)
9004 .sr(4)
9005 .m(2)
9006 .n(8)
9007 .k(k)
9008 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2s4__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
9009 }
9010 }
9011
TEST(QS8_IGEMM_MINMAX_FP32_2X8C2S4__NEONV8_MLAL,k_lt_16_subtile)9012 TEST(QS8_IGEMM_MINMAX_FP32_2X8C2S4__NEONV8_MLAL, k_lt_16_subtile) {
9013 TEST_REQUIRES_ARM_NEON_V8;
9014 for (size_t k = 1; k < 16; k++) {
9015 for (uint32_t n = 1; n <= 8; n++) {
9016 for (uint32_t m = 1; m <= 2; m++) {
9017 GemmMicrokernelTester()
9018 .mr(2)
9019 .nr(8)
9020 .kr(2)
9021 .sr(4)
9022 .m(m)
9023 .n(n)
9024 .k(k)
9025 .iterations(1)
9026 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2s4__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
9027 }
9028 }
9029 }
9030 }
9031
TEST(QS8_IGEMM_MINMAX_FP32_2X8C2S4__NEONV8_MLAL,k_gt_16)9032 TEST(QS8_IGEMM_MINMAX_FP32_2X8C2S4__NEONV8_MLAL, k_gt_16) {
9033 TEST_REQUIRES_ARM_NEON_V8;
9034 for (size_t k = 17; k < 32; k++) {
9035 GemmMicrokernelTester()
9036 .mr(2)
9037 .nr(8)
9038 .kr(2)
9039 .sr(4)
9040 .m(2)
9041 .n(8)
9042 .k(k)
9043 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2s4__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
9044 }
9045 }
9046
TEST(QS8_IGEMM_MINMAX_FP32_2X8C2S4__NEONV8_MLAL,k_gt_16_subtile)9047 TEST(QS8_IGEMM_MINMAX_FP32_2X8C2S4__NEONV8_MLAL, k_gt_16_subtile) {
9048 TEST_REQUIRES_ARM_NEON_V8;
9049 for (size_t k = 17; k < 32; k++) {
9050 for (uint32_t n = 1; n <= 8; n++) {
9051 for (uint32_t m = 1; m <= 2; m++) {
9052 GemmMicrokernelTester()
9053 .mr(2)
9054 .nr(8)
9055 .kr(2)
9056 .sr(4)
9057 .m(m)
9058 .n(n)
9059 .k(k)
9060 .iterations(1)
9061 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2s4__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
9062 }
9063 }
9064 }
9065 }
9066
TEST(QS8_IGEMM_MINMAX_FP32_2X8C2S4__NEONV8_MLAL,k_div_16)9067 TEST(QS8_IGEMM_MINMAX_FP32_2X8C2S4__NEONV8_MLAL, k_div_16) {
9068 TEST_REQUIRES_ARM_NEON_V8;
9069 for (size_t k = 32; k <= 160; k += 16) {
9070 GemmMicrokernelTester()
9071 .mr(2)
9072 .nr(8)
9073 .kr(2)
9074 .sr(4)
9075 .m(2)
9076 .n(8)
9077 .k(k)
9078 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2s4__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
9079 }
9080 }
9081
TEST(QS8_IGEMM_MINMAX_FP32_2X8C2S4__NEONV8_MLAL,k_div_16_subtile)9082 TEST(QS8_IGEMM_MINMAX_FP32_2X8C2S4__NEONV8_MLAL, k_div_16_subtile) {
9083 TEST_REQUIRES_ARM_NEON_V8;
9084 for (size_t k = 32; k <= 160; k += 16) {
9085 for (uint32_t n = 1; n <= 8; n++) {
9086 for (uint32_t m = 1; m <= 2; m++) {
9087 GemmMicrokernelTester()
9088 .mr(2)
9089 .nr(8)
9090 .kr(2)
9091 .sr(4)
9092 .m(m)
9093 .n(n)
9094 .k(k)
9095 .iterations(1)
9096 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2s4__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
9097 }
9098 }
9099 }
9100 }
9101
TEST(QS8_IGEMM_MINMAX_FP32_2X8C2S4__NEONV8_MLAL,n_gt_8)9102 TEST(QS8_IGEMM_MINMAX_FP32_2X8C2S4__NEONV8_MLAL, n_gt_8) {
9103 TEST_REQUIRES_ARM_NEON_V8;
9104 for (uint32_t n = 9; n < 16; n++) {
9105 for (size_t k = 1; k <= 80; k += 17) {
9106 GemmMicrokernelTester()
9107 .mr(2)
9108 .nr(8)
9109 .kr(2)
9110 .sr(4)
9111 .m(2)
9112 .n(n)
9113 .k(k)
9114 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2s4__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
9115 }
9116 }
9117 }
9118
TEST(QS8_IGEMM_MINMAX_FP32_2X8C2S4__NEONV8_MLAL,n_gt_8_strided_cn)9119 TEST(QS8_IGEMM_MINMAX_FP32_2X8C2S4__NEONV8_MLAL, n_gt_8_strided_cn) {
9120 TEST_REQUIRES_ARM_NEON_V8;
9121 for (uint32_t n = 9; n < 16; n++) {
9122 for (size_t k = 1; k <= 80; k += 17) {
9123 GemmMicrokernelTester()
9124 .mr(2)
9125 .nr(8)
9126 .kr(2)
9127 .sr(4)
9128 .m(2)
9129 .n(n)
9130 .k(k)
9131 .cn_stride(11)
9132 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2s4__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
9133 }
9134 }
9135 }
9136
TEST(QS8_IGEMM_MINMAX_FP32_2X8C2S4__NEONV8_MLAL,n_gt_8_subtile)9137 TEST(QS8_IGEMM_MINMAX_FP32_2X8C2S4__NEONV8_MLAL, n_gt_8_subtile) {
9138 TEST_REQUIRES_ARM_NEON_V8;
9139 for (uint32_t n = 9; n < 16; n++) {
9140 for (size_t k = 1; k <= 80; k += 17) {
9141 for (uint32_t m = 1; m <= 2; m++) {
9142 GemmMicrokernelTester()
9143 .mr(2)
9144 .nr(8)
9145 .kr(2)
9146 .sr(4)
9147 .m(m)
9148 .n(n)
9149 .k(k)
9150 .iterations(1)
9151 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2s4__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
9152 }
9153 }
9154 }
9155 }
9156
TEST(QS8_IGEMM_MINMAX_FP32_2X8C2S4__NEONV8_MLAL,n_div_8)9157 TEST(QS8_IGEMM_MINMAX_FP32_2X8C2S4__NEONV8_MLAL, n_div_8) {
9158 TEST_REQUIRES_ARM_NEON_V8;
9159 for (uint32_t n = 16; n <= 24; n += 8) {
9160 for (size_t k = 1; k <= 80; k += 17) {
9161 GemmMicrokernelTester()
9162 .mr(2)
9163 .nr(8)
9164 .kr(2)
9165 .sr(4)
9166 .m(2)
9167 .n(n)
9168 .k(k)
9169 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2s4__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
9170 }
9171 }
9172 }
9173
TEST(QS8_IGEMM_MINMAX_FP32_2X8C2S4__NEONV8_MLAL,n_div_8_strided_cn)9174 TEST(QS8_IGEMM_MINMAX_FP32_2X8C2S4__NEONV8_MLAL, n_div_8_strided_cn) {
9175 TEST_REQUIRES_ARM_NEON_V8;
9176 for (uint32_t n = 16; n <= 24; n += 8) {
9177 for (size_t k = 1; k <= 80; k += 17) {
9178 GemmMicrokernelTester()
9179 .mr(2)
9180 .nr(8)
9181 .kr(2)
9182 .sr(4)
9183 .m(2)
9184 .n(n)
9185 .k(k)
9186 .cn_stride(11)
9187 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2s4__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
9188 }
9189 }
9190 }
9191
TEST(QS8_IGEMM_MINMAX_FP32_2X8C2S4__NEONV8_MLAL,n_div_8_subtile)9192 TEST(QS8_IGEMM_MINMAX_FP32_2X8C2S4__NEONV8_MLAL, n_div_8_subtile) {
9193 TEST_REQUIRES_ARM_NEON_V8;
9194 for (uint32_t n = 16; n <= 24; n += 8) {
9195 for (size_t k = 1; k <= 80; k += 17) {
9196 for (uint32_t m = 1; m <= 2; m++) {
9197 GemmMicrokernelTester()
9198 .mr(2)
9199 .nr(8)
9200 .kr(2)
9201 .sr(4)
9202 .m(m)
9203 .n(n)
9204 .k(k)
9205 .iterations(1)
9206 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2s4__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
9207 }
9208 }
9209 }
9210 }
9211
TEST(QS8_IGEMM_MINMAX_FP32_2X8C2S4__NEONV8_MLAL,small_kernel)9212 TEST(QS8_IGEMM_MINMAX_FP32_2X8C2S4__NEONV8_MLAL, small_kernel) {
9213 TEST_REQUIRES_ARM_NEON_V8;
9214 for (size_t k = 1; k <= 80; k += 17) {
9215 GemmMicrokernelTester()
9216 .mr(2)
9217 .nr(8)
9218 .kr(2)
9219 .sr(4)
9220 .m(2)
9221 .n(8)
9222 .k(k)
9223 .ks(3)
9224 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2s4__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
9225 }
9226 }
9227
TEST(QS8_IGEMM_MINMAX_FP32_2X8C2S4__NEONV8_MLAL,small_kernel_subtile)9228 TEST(QS8_IGEMM_MINMAX_FP32_2X8C2S4__NEONV8_MLAL, small_kernel_subtile) {
9229 TEST_REQUIRES_ARM_NEON_V8;
9230 for (size_t k = 1; k <= 80; k += 17) {
9231 for (uint32_t n = 1; n <= 8; n++) {
9232 for (uint32_t m = 1; m <= 2; m++) {
9233 GemmMicrokernelTester()
9234 .mr(2)
9235 .nr(8)
9236 .kr(2)
9237 .sr(4)
9238 .m(m)
9239 .n(n)
9240 .k(k)
9241 .ks(3)
9242 .iterations(1)
9243 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2s4__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
9244 }
9245 }
9246 }
9247 }
9248
TEST(QS8_IGEMM_MINMAX_FP32_2X8C2S4__NEONV8_MLAL,n_gt_8_small_kernel)9249 TEST(QS8_IGEMM_MINMAX_FP32_2X8C2S4__NEONV8_MLAL, n_gt_8_small_kernel) {
9250 TEST_REQUIRES_ARM_NEON_V8;
9251 for (uint32_t n = 9; n < 16; n++) {
9252 for (size_t k = 1; k <= 80; k += 17) {
9253 GemmMicrokernelTester()
9254 .mr(2)
9255 .nr(8)
9256 .kr(2)
9257 .sr(4)
9258 .m(2)
9259 .n(n)
9260 .k(k)
9261 .ks(3)
9262 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2s4__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
9263 }
9264 }
9265 }
9266
TEST(QS8_IGEMM_MINMAX_FP32_2X8C2S4__NEONV8_MLAL,n_div_8_small_kernel)9267 TEST(QS8_IGEMM_MINMAX_FP32_2X8C2S4__NEONV8_MLAL, n_div_8_small_kernel) {
9268 TEST_REQUIRES_ARM_NEON_V8;
9269 for (uint32_t n = 16; n <= 24; n += 8) {
9270 for (size_t k = 1; k <= 80; k += 17) {
9271 GemmMicrokernelTester()
9272 .mr(2)
9273 .nr(8)
9274 .kr(2)
9275 .sr(4)
9276 .m(2)
9277 .n(n)
9278 .k(k)
9279 .ks(3)
9280 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2s4__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
9281 }
9282 }
9283 }
9284
TEST(QS8_IGEMM_MINMAX_FP32_2X8C2S4__NEONV8_MLAL,strided_cm_subtile)9285 TEST(QS8_IGEMM_MINMAX_FP32_2X8C2S4__NEONV8_MLAL, strided_cm_subtile) {
9286 TEST_REQUIRES_ARM_NEON_V8;
9287 for (size_t k = 1; k <= 80; k += 17) {
9288 for (uint32_t n = 1; n <= 8; n++) {
9289 for (uint32_t m = 1; m <= 2; m++) {
9290 GemmMicrokernelTester()
9291 .mr(2)
9292 .nr(8)
9293 .kr(2)
9294 .sr(4)
9295 .m(m)
9296 .n(n)
9297 .k(k)
9298 .cm_stride(11)
9299 .iterations(1)
9300 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2s4__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
9301 }
9302 }
9303 }
9304 }
9305
TEST(QS8_IGEMM_MINMAX_FP32_2X8C2S4__NEONV8_MLAL,a_offset)9306 TEST(QS8_IGEMM_MINMAX_FP32_2X8C2S4__NEONV8_MLAL, a_offset) {
9307 TEST_REQUIRES_ARM_NEON_V8;
9308 for (size_t k = 1; k <= 80; k += 17) {
9309 GemmMicrokernelTester()
9310 .mr(2)
9311 .nr(8)
9312 .kr(2)
9313 .sr(4)
9314 .m(2)
9315 .n(8)
9316 .k(k)
9317 .ks(3)
9318 .a_offset(163)
9319 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2s4__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
9320 }
9321 }
9322
TEST(QS8_IGEMM_MINMAX_FP32_2X8C2S4__NEONV8_MLAL,zero)9323 TEST(QS8_IGEMM_MINMAX_FP32_2X8C2S4__NEONV8_MLAL, zero) {
9324 TEST_REQUIRES_ARM_NEON_V8;
9325 for (size_t k = 1; k <= 80; k += 17) {
9326 for (uint32_t mz = 0; mz < 2; mz++) {
9327 GemmMicrokernelTester()
9328 .mr(2)
9329 .nr(8)
9330 .kr(2)
9331 .sr(4)
9332 .m(2)
9333 .n(8)
9334 .k(k)
9335 .ks(3)
9336 .a_offset(163)
9337 .zero_index(mz)
9338 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2s4__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
9339 }
9340 }
9341 }
9342
TEST(QS8_IGEMM_MINMAX_FP32_2X8C2S4__NEONV8_MLAL,qmin)9343 TEST(QS8_IGEMM_MINMAX_FP32_2X8C2S4__NEONV8_MLAL, qmin) {
9344 TEST_REQUIRES_ARM_NEON_V8;
9345 GemmMicrokernelTester()
9346 .mr(2)
9347 .nr(8)
9348 .kr(2)
9349 .sr(4)
9350 .m(2)
9351 .n(8)
9352 .k(16)
9353 .qmin(128)
9354 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2s4__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
9355 }
9356
TEST(QS8_IGEMM_MINMAX_FP32_2X8C2S4__NEONV8_MLAL,qmax)9357 TEST(QS8_IGEMM_MINMAX_FP32_2X8C2S4__NEONV8_MLAL, qmax) {
9358 TEST_REQUIRES_ARM_NEON_V8;
9359 GemmMicrokernelTester()
9360 .mr(2)
9361 .nr(8)
9362 .kr(2)
9363 .sr(4)
9364 .m(2)
9365 .n(8)
9366 .k(16)
9367 .qmax(128)
9368 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2s4__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
9369 }
9370
TEST(QS8_IGEMM_MINMAX_FP32_2X8C2S4__NEONV8_MLAL,strided_cm)9371 TEST(QS8_IGEMM_MINMAX_FP32_2X8C2S4__NEONV8_MLAL, strided_cm) {
9372 TEST_REQUIRES_ARM_NEON_V8;
9373 GemmMicrokernelTester()
9374 .mr(2)
9375 .nr(8)
9376 .kr(2)
9377 .sr(4)
9378 .m(2)
9379 .n(8)
9380 .k(16)
9381 .cm_stride(11)
9382 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2s4__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
9383 }
9384 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
9385
9386
9387 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_DUP,k_eq_16)9388 TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_DUP, k_eq_16) {
9389 TEST_REQUIRES_ARM_NEON;
9390 GemmMicrokernelTester()
9391 .mr(2)
9392 .nr(8)
9393 .kr(4)
9394 .sr(1)
9395 .m(2)
9396 .n(8)
9397 .k(16)
9398 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
9399 }
9400
TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_DUP,strided_cn)9401 TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_DUP, strided_cn) {
9402 TEST_REQUIRES_ARM_NEON;
9403 GemmMicrokernelTester()
9404 .mr(2)
9405 .nr(8)
9406 .kr(4)
9407 .sr(1)
9408 .m(2)
9409 .n(8)
9410 .k(16)
9411 .cn_stride(11)
9412 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
9413 }
9414
TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_DUP,k_eq_16_subtile)9415 TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_DUP, k_eq_16_subtile) {
9416 TEST_REQUIRES_ARM_NEON;
9417 for (uint32_t n = 1; n <= 8; n++) {
9418 for (uint32_t m = 1; m <= 2; m++) {
9419 GemmMicrokernelTester()
9420 .mr(2)
9421 .nr(8)
9422 .kr(4)
9423 .sr(1)
9424 .m(m)
9425 .n(n)
9426 .k(16)
9427 .iterations(1)
9428 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
9429 }
9430 }
9431 }
9432
TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_DUP,k_eq_16_subtile_m)9433 TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_DUP, k_eq_16_subtile_m) {
9434 TEST_REQUIRES_ARM_NEON;
9435 for (uint32_t m = 1; m <= 2; m++) {
9436 GemmMicrokernelTester()
9437 .mr(2)
9438 .nr(8)
9439 .kr(4)
9440 .sr(1)
9441 .m(m)
9442 .n(8)
9443 .k(16)
9444 .iterations(1)
9445 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
9446 }
9447 }
9448
TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_DUP,k_eq_16_subtile_n)9449 TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_DUP, k_eq_16_subtile_n) {
9450 TEST_REQUIRES_ARM_NEON;
9451 for (uint32_t n = 1; n <= 8; n++) {
9452 GemmMicrokernelTester()
9453 .mr(2)
9454 .nr(8)
9455 .kr(4)
9456 .sr(1)
9457 .m(2)
9458 .n(n)
9459 .k(16)
9460 .iterations(1)
9461 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
9462 }
9463 }
9464
TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_DUP,k_lt_16)9465 TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_DUP, k_lt_16) {
9466 TEST_REQUIRES_ARM_NEON;
9467 for (size_t k = 1; k < 16; k++) {
9468 GemmMicrokernelTester()
9469 .mr(2)
9470 .nr(8)
9471 .kr(4)
9472 .sr(1)
9473 .m(2)
9474 .n(8)
9475 .k(k)
9476 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
9477 }
9478 }
9479
TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_DUP,k_lt_16_subtile)9480 TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_DUP, k_lt_16_subtile) {
9481 TEST_REQUIRES_ARM_NEON;
9482 for (size_t k = 1; k < 16; k++) {
9483 for (uint32_t n = 1; n <= 8; n++) {
9484 for (uint32_t m = 1; m <= 2; m++) {
9485 GemmMicrokernelTester()
9486 .mr(2)
9487 .nr(8)
9488 .kr(4)
9489 .sr(1)
9490 .m(m)
9491 .n(n)
9492 .k(k)
9493 .iterations(1)
9494 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
9495 }
9496 }
9497 }
9498 }
9499
TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_DUP,k_gt_16)9500 TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_DUP, k_gt_16) {
9501 TEST_REQUIRES_ARM_NEON;
9502 for (size_t k = 17; k < 32; k++) {
9503 GemmMicrokernelTester()
9504 .mr(2)
9505 .nr(8)
9506 .kr(4)
9507 .sr(1)
9508 .m(2)
9509 .n(8)
9510 .k(k)
9511 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
9512 }
9513 }
9514
TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_DUP,k_gt_16_subtile)9515 TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_DUP, k_gt_16_subtile) {
9516 TEST_REQUIRES_ARM_NEON;
9517 for (size_t k = 17; k < 32; k++) {
9518 for (uint32_t n = 1; n <= 8; n++) {
9519 for (uint32_t m = 1; m <= 2; m++) {
9520 GemmMicrokernelTester()
9521 .mr(2)
9522 .nr(8)
9523 .kr(4)
9524 .sr(1)
9525 .m(m)
9526 .n(n)
9527 .k(k)
9528 .iterations(1)
9529 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
9530 }
9531 }
9532 }
9533 }
9534
TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_DUP,k_div_16)9535 TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_DUP, k_div_16) {
9536 TEST_REQUIRES_ARM_NEON;
9537 for (size_t k = 32; k <= 160; k += 16) {
9538 GemmMicrokernelTester()
9539 .mr(2)
9540 .nr(8)
9541 .kr(4)
9542 .sr(1)
9543 .m(2)
9544 .n(8)
9545 .k(k)
9546 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
9547 }
9548 }
9549
TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_DUP,k_div_16_subtile)9550 TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_DUP, k_div_16_subtile) {
9551 TEST_REQUIRES_ARM_NEON;
9552 for (size_t k = 32; k <= 160; k += 16) {
9553 for (uint32_t n = 1; n <= 8; n++) {
9554 for (uint32_t m = 1; m <= 2; m++) {
9555 GemmMicrokernelTester()
9556 .mr(2)
9557 .nr(8)
9558 .kr(4)
9559 .sr(1)
9560 .m(m)
9561 .n(n)
9562 .k(k)
9563 .iterations(1)
9564 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
9565 }
9566 }
9567 }
9568 }
9569
TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_DUP,n_gt_8)9570 TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_DUP, n_gt_8) {
9571 TEST_REQUIRES_ARM_NEON;
9572 for (uint32_t n = 9; n < 16; n++) {
9573 for (size_t k = 1; k <= 80; k += 17) {
9574 GemmMicrokernelTester()
9575 .mr(2)
9576 .nr(8)
9577 .kr(4)
9578 .sr(1)
9579 .m(2)
9580 .n(n)
9581 .k(k)
9582 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
9583 }
9584 }
9585 }
9586
TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_DUP,n_gt_8_strided_cn)9587 TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_DUP, n_gt_8_strided_cn) {
9588 TEST_REQUIRES_ARM_NEON;
9589 for (uint32_t n = 9; n < 16; n++) {
9590 for (size_t k = 1; k <= 80; k += 17) {
9591 GemmMicrokernelTester()
9592 .mr(2)
9593 .nr(8)
9594 .kr(4)
9595 .sr(1)
9596 .m(2)
9597 .n(n)
9598 .k(k)
9599 .cn_stride(11)
9600 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
9601 }
9602 }
9603 }
9604
TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_DUP,n_gt_8_subtile)9605 TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_DUP, n_gt_8_subtile) {
9606 TEST_REQUIRES_ARM_NEON;
9607 for (uint32_t n = 9; n < 16; n++) {
9608 for (size_t k = 1; k <= 80; k += 17) {
9609 for (uint32_t m = 1; m <= 2; m++) {
9610 GemmMicrokernelTester()
9611 .mr(2)
9612 .nr(8)
9613 .kr(4)
9614 .sr(1)
9615 .m(m)
9616 .n(n)
9617 .k(k)
9618 .iterations(1)
9619 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
9620 }
9621 }
9622 }
9623 }
9624
TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_DUP,n_div_8)9625 TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_DUP, n_div_8) {
9626 TEST_REQUIRES_ARM_NEON;
9627 for (uint32_t n = 16; n <= 24; n += 8) {
9628 for (size_t k = 1; k <= 80; k += 17) {
9629 GemmMicrokernelTester()
9630 .mr(2)
9631 .nr(8)
9632 .kr(4)
9633 .sr(1)
9634 .m(2)
9635 .n(n)
9636 .k(k)
9637 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
9638 }
9639 }
9640 }
9641
TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_DUP,n_div_8_strided_cn)9642 TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_DUP, n_div_8_strided_cn) {
9643 TEST_REQUIRES_ARM_NEON;
9644 for (uint32_t n = 16; n <= 24; n += 8) {
9645 for (size_t k = 1; k <= 80; k += 17) {
9646 GemmMicrokernelTester()
9647 .mr(2)
9648 .nr(8)
9649 .kr(4)
9650 .sr(1)
9651 .m(2)
9652 .n(n)
9653 .k(k)
9654 .cn_stride(11)
9655 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
9656 }
9657 }
9658 }
9659
TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_DUP,n_div_8_subtile)9660 TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_DUP, n_div_8_subtile) {
9661 TEST_REQUIRES_ARM_NEON;
9662 for (uint32_t n = 16; n <= 24; n += 8) {
9663 for (size_t k = 1; k <= 80; k += 17) {
9664 for (uint32_t m = 1; m <= 2; m++) {
9665 GemmMicrokernelTester()
9666 .mr(2)
9667 .nr(8)
9668 .kr(4)
9669 .sr(1)
9670 .m(m)
9671 .n(n)
9672 .k(k)
9673 .iterations(1)
9674 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
9675 }
9676 }
9677 }
9678 }
9679
TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_DUP,small_kernel)9680 TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_DUP, small_kernel) {
9681 TEST_REQUIRES_ARM_NEON;
9682 for (size_t k = 1; k <= 80; k += 17) {
9683 GemmMicrokernelTester()
9684 .mr(2)
9685 .nr(8)
9686 .kr(4)
9687 .sr(1)
9688 .m(2)
9689 .n(8)
9690 .k(k)
9691 .ks(3)
9692 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
9693 }
9694 }
9695
TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_DUP,small_kernel_subtile)9696 TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_DUP, small_kernel_subtile) {
9697 TEST_REQUIRES_ARM_NEON;
9698 for (size_t k = 1; k <= 80; k += 17) {
9699 for (uint32_t n = 1; n <= 8; n++) {
9700 for (uint32_t m = 1; m <= 2; m++) {
9701 GemmMicrokernelTester()
9702 .mr(2)
9703 .nr(8)
9704 .kr(4)
9705 .sr(1)
9706 .m(m)
9707 .n(n)
9708 .k(k)
9709 .ks(3)
9710 .iterations(1)
9711 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
9712 }
9713 }
9714 }
9715 }
9716
TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_DUP,n_gt_8_small_kernel)9717 TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_DUP, n_gt_8_small_kernel) {
9718 TEST_REQUIRES_ARM_NEON;
9719 for (uint32_t n = 9; n < 16; n++) {
9720 for (size_t k = 1; k <= 80; k += 17) {
9721 GemmMicrokernelTester()
9722 .mr(2)
9723 .nr(8)
9724 .kr(4)
9725 .sr(1)
9726 .m(2)
9727 .n(n)
9728 .k(k)
9729 .ks(3)
9730 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
9731 }
9732 }
9733 }
9734
TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_DUP,n_div_8_small_kernel)9735 TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_DUP, n_div_8_small_kernel) {
9736 TEST_REQUIRES_ARM_NEON;
9737 for (uint32_t n = 16; n <= 24; n += 8) {
9738 for (size_t k = 1; k <= 80; k += 17) {
9739 GemmMicrokernelTester()
9740 .mr(2)
9741 .nr(8)
9742 .kr(4)
9743 .sr(1)
9744 .m(2)
9745 .n(n)
9746 .k(k)
9747 .ks(3)
9748 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
9749 }
9750 }
9751 }
9752
TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_DUP,strided_cm_subtile)9753 TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_DUP, strided_cm_subtile) {
9754 TEST_REQUIRES_ARM_NEON;
9755 for (size_t k = 1; k <= 80; k += 17) {
9756 for (uint32_t n = 1; n <= 8; n++) {
9757 for (uint32_t m = 1; m <= 2; m++) {
9758 GemmMicrokernelTester()
9759 .mr(2)
9760 .nr(8)
9761 .kr(4)
9762 .sr(1)
9763 .m(m)
9764 .n(n)
9765 .k(k)
9766 .cm_stride(11)
9767 .iterations(1)
9768 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
9769 }
9770 }
9771 }
9772 }
9773
TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_DUP,a_offset)9774 TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_DUP, a_offset) {
9775 TEST_REQUIRES_ARM_NEON;
9776 for (size_t k = 1; k <= 80; k += 17) {
9777 GemmMicrokernelTester()
9778 .mr(2)
9779 .nr(8)
9780 .kr(4)
9781 .sr(1)
9782 .m(2)
9783 .n(8)
9784 .k(k)
9785 .ks(3)
9786 .a_offset(163)
9787 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
9788 }
9789 }
9790
TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_DUP,zero)9791 TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_DUP, zero) {
9792 TEST_REQUIRES_ARM_NEON;
9793 for (size_t k = 1; k <= 80; k += 17) {
9794 for (uint32_t mz = 0; mz < 2; mz++) {
9795 GemmMicrokernelTester()
9796 .mr(2)
9797 .nr(8)
9798 .kr(4)
9799 .sr(1)
9800 .m(2)
9801 .n(8)
9802 .k(k)
9803 .ks(3)
9804 .a_offset(163)
9805 .zero_index(mz)
9806 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
9807 }
9808 }
9809 }
9810
TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_DUP,qmin)9811 TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_DUP, qmin) {
9812 TEST_REQUIRES_ARM_NEON;
9813 GemmMicrokernelTester()
9814 .mr(2)
9815 .nr(8)
9816 .kr(4)
9817 .sr(1)
9818 .m(2)
9819 .n(8)
9820 .k(16)
9821 .qmin(128)
9822 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
9823 }
9824
TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_DUP,qmax)9825 TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_DUP, qmax) {
9826 TEST_REQUIRES_ARM_NEON;
9827 GemmMicrokernelTester()
9828 .mr(2)
9829 .nr(8)
9830 .kr(4)
9831 .sr(1)
9832 .m(2)
9833 .n(8)
9834 .k(16)
9835 .qmax(128)
9836 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
9837 }
9838
TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_DUP,strided_cm)9839 TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_DUP, strided_cm) {
9840 TEST_REQUIRES_ARM_NEON;
9841 GemmMicrokernelTester()
9842 .mr(2)
9843 .nr(8)
9844 .kr(4)
9845 .sr(1)
9846 .m(2)
9847 .n(8)
9848 .k(16)
9849 .cm_stride(11)
9850 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
9851 }
9852 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
9853
9854
9855 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_LD1R,k_eq_16)9856 TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_LD1R, k_eq_16) {
9857 TEST_REQUIRES_ARM_NEON;
9858 GemmMicrokernelTester()
9859 .mr(2)
9860 .nr(8)
9861 .kr(4)
9862 .sr(1)
9863 .m(2)
9864 .n(8)
9865 .k(16)
9866 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
9867 }
9868
TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_LD1R,strided_cn)9869 TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_LD1R, strided_cn) {
9870 TEST_REQUIRES_ARM_NEON;
9871 GemmMicrokernelTester()
9872 .mr(2)
9873 .nr(8)
9874 .kr(4)
9875 .sr(1)
9876 .m(2)
9877 .n(8)
9878 .k(16)
9879 .cn_stride(11)
9880 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
9881 }
9882
TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_LD1R,k_eq_16_subtile)9883 TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_LD1R, k_eq_16_subtile) {
9884 TEST_REQUIRES_ARM_NEON;
9885 for (uint32_t n = 1; n <= 8; n++) {
9886 for (uint32_t m = 1; m <= 2; m++) {
9887 GemmMicrokernelTester()
9888 .mr(2)
9889 .nr(8)
9890 .kr(4)
9891 .sr(1)
9892 .m(m)
9893 .n(n)
9894 .k(16)
9895 .iterations(1)
9896 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
9897 }
9898 }
9899 }
9900
TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_LD1R,k_eq_16_subtile_m)9901 TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_LD1R, k_eq_16_subtile_m) {
9902 TEST_REQUIRES_ARM_NEON;
9903 for (uint32_t m = 1; m <= 2; m++) {
9904 GemmMicrokernelTester()
9905 .mr(2)
9906 .nr(8)
9907 .kr(4)
9908 .sr(1)
9909 .m(m)
9910 .n(8)
9911 .k(16)
9912 .iterations(1)
9913 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
9914 }
9915 }
9916
TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_LD1R,k_eq_16_subtile_n)9917 TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_LD1R, k_eq_16_subtile_n) {
9918 TEST_REQUIRES_ARM_NEON;
9919 for (uint32_t n = 1; n <= 8; n++) {
9920 GemmMicrokernelTester()
9921 .mr(2)
9922 .nr(8)
9923 .kr(4)
9924 .sr(1)
9925 .m(2)
9926 .n(n)
9927 .k(16)
9928 .iterations(1)
9929 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
9930 }
9931 }
9932
TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_LD1R,k_lt_16)9933 TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_LD1R, k_lt_16) {
9934 TEST_REQUIRES_ARM_NEON;
9935 for (size_t k = 1; k < 16; k++) {
9936 GemmMicrokernelTester()
9937 .mr(2)
9938 .nr(8)
9939 .kr(4)
9940 .sr(1)
9941 .m(2)
9942 .n(8)
9943 .k(k)
9944 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
9945 }
9946 }
9947
TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_LD1R,k_lt_16_subtile)9948 TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_LD1R, k_lt_16_subtile) {
9949 TEST_REQUIRES_ARM_NEON;
9950 for (size_t k = 1; k < 16; k++) {
9951 for (uint32_t n = 1; n <= 8; n++) {
9952 for (uint32_t m = 1; m <= 2; m++) {
9953 GemmMicrokernelTester()
9954 .mr(2)
9955 .nr(8)
9956 .kr(4)
9957 .sr(1)
9958 .m(m)
9959 .n(n)
9960 .k(k)
9961 .iterations(1)
9962 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
9963 }
9964 }
9965 }
9966 }
9967
TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_LD1R,k_gt_16)9968 TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_LD1R, k_gt_16) {
9969 TEST_REQUIRES_ARM_NEON;
9970 for (size_t k = 17; k < 32; k++) {
9971 GemmMicrokernelTester()
9972 .mr(2)
9973 .nr(8)
9974 .kr(4)
9975 .sr(1)
9976 .m(2)
9977 .n(8)
9978 .k(k)
9979 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
9980 }
9981 }
9982
TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_LD1R,k_gt_16_subtile)9983 TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_LD1R, k_gt_16_subtile) {
9984 TEST_REQUIRES_ARM_NEON;
9985 for (size_t k = 17; k < 32; k++) {
9986 for (uint32_t n = 1; n <= 8; n++) {
9987 for (uint32_t m = 1; m <= 2; m++) {
9988 GemmMicrokernelTester()
9989 .mr(2)
9990 .nr(8)
9991 .kr(4)
9992 .sr(1)
9993 .m(m)
9994 .n(n)
9995 .k(k)
9996 .iterations(1)
9997 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
9998 }
9999 }
10000 }
10001 }
10002
TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_LD1R,k_div_16)10003 TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_LD1R, k_div_16) {
10004 TEST_REQUIRES_ARM_NEON;
10005 for (size_t k = 32; k <= 160; k += 16) {
10006 GemmMicrokernelTester()
10007 .mr(2)
10008 .nr(8)
10009 .kr(4)
10010 .sr(1)
10011 .m(2)
10012 .n(8)
10013 .k(k)
10014 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
10015 }
10016 }
10017
TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_LD1R,k_div_16_subtile)10018 TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_LD1R, k_div_16_subtile) {
10019 TEST_REQUIRES_ARM_NEON;
10020 for (size_t k = 32; k <= 160; k += 16) {
10021 for (uint32_t n = 1; n <= 8; n++) {
10022 for (uint32_t m = 1; m <= 2; m++) {
10023 GemmMicrokernelTester()
10024 .mr(2)
10025 .nr(8)
10026 .kr(4)
10027 .sr(1)
10028 .m(m)
10029 .n(n)
10030 .k(k)
10031 .iterations(1)
10032 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
10033 }
10034 }
10035 }
10036 }
10037
TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_LD1R,n_gt_8)10038 TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_LD1R, n_gt_8) {
10039 TEST_REQUIRES_ARM_NEON;
10040 for (uint32_t n = 9; n < 16; n++) {
10041 for (size_t k = 1; k <= 80; k += 17) {
10042 GemmMicrokernelTester()
10043 .mr(2)
10044 .nr(8)
10045 .kr(4)
10046 .sr(1)
10047 .m(2)
10048 .n(n)
10049 .k(k)
10050 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
10051 }
10052 }
10053 }
10054
TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_LD1R,n_gt_8_strided_cn)10055 TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_LD1R, n_gt_8_strided_cn) {
10056 TEST_REQUIRES_ARM_NEON;
10057 for (uint32_t n = 9; n < 16; n++) {
10058 for (size_t k = 1; k <= 80; k += 17) {
10059 GemmMicrokernelTester()
10060 .mr(2)
10061 .nr(8)
10062 .kr(4)
10063 .sr(1)
10064 .m(2)
10065 .n(n)
10066 .k(k)
10067 .cn_stride(11)
10068 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
10069 }
10070 }
10071 }
10072
TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_LD1R,n_gt_8_subtile)10073 TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_LD1R, n_gt_8_subtile) {
10074 TEST_REQUIRES_ARM_NEON;
10075 for (uint32_t n = 9; n < 16; n++) {
10076 for (size_t k = 1; k <= 80; k += 17) {
10077 for (uint32_t m = 1; m <= 2; m++) {
10078 GemmMicrokernelTester()
10079 .mr(2)
10080 .nr(8)
10081 .kr(4)
10082 .sr(1)
10083 .m(m)
10084 .n(n)
10085 .k(k)
10086 .iterations(1)
10087 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
10088 }
10089 }
10090 }
10091 }
10092
TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_LD1R,n_div_8)10093 TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_LD1R, n_div_8) {
10094 TEST_REQUIRES_ARM_NEON;
10095 for (uint32_t n = 16; n <= 24; n += 8) {
10096 for (size_t k = 1; k <= 80; k += 17) {
10097 GemmMicrokernelTester()
10098 .mr(2)
10099 .nr(8)
10100 .kr(4)
10101 .sr(1)
10102 .m(2)
10103 .n(n)
10104 .k(k)
10105 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
10106 }
10107 }
10108 }
10109
TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_LD1R,n_div_8_strided_cn)10110 TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_LD1R, n_div_8_strided_cn) {
10111 TEST_REQUIRES_ARM_NEON;
10112 for (uint32_t n = 16; n <= 24; n += 8) {
10113 for (size_t k = 1; k <= 80; k += 17) {
10114 GemmMicrokernelTester()
10115 .mr(2)
10116 .nr(8)
10117 .kr(4)
10118 .sr(1)
10119 .m(2)
10120 .n(n)
10121 .k(k)
10122 .cn_stride(11)
10123 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
10124 }
10125 }
10126 }
10127
TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_LD1R,n_div_8_subtile)10128 TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_LD1R, n_div_8_subtile) {
10129 TEST_REQUIRES_ARM_NEON;
10130 for (uint32_t n = 16; n <= 24; n += 8) {
10131 for (size_t k = 1; k <= 80; k += 17) {
10132 for (uint32_t m = 1; m <= 2; m++) {
10133 GemmMicrokernelTester()
10134 .mr(2)
10135 .nr(8)
10136 .kr(4)
10137 .sr(1)
10138 .m(m)
10139 .n(n)
10140 .k(k)
10141 .iterations(1)
10142 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
10143 }
10144 }
10145 }
10146 }
10147
TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_LD1R,small_kernel)10148 TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_LD1R, small_kernel) {
10149 TEST_REQUIRES_ARM_NEON;
10150 for (size_t k = 1; k <= 80; k += 17) {
10151 GemmMicrokernelTester()
10152 .mr(2)
10153 .nr(8)
10154 .kr(4)
10155 .sr(1)
10156 .m(2)
10157 .n(8)
10158 .k(k)
10159 .ks(3)
10160 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
10161 }
10162 }
10163
TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_LD1R,small_kernel_subtile)10164 TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_LD1R, small_kernel_subtile) {
10165 TEST_REQUIRES_ARM_NEON;
10166 for (size_t k = 1; k <= 80; k += 17) {
10167 for (uint32_t n = 1; n <= 8; n++) {
10168 for (uint32_t m = 1; m <= 2; m++) {
10169 GemmMicrokernelTester()
10170 .mr(2)
10171 .nr(8)
10172 .kr(4)
10173 .sr(1)
10174 .m(m)
10175 .n(n)
10176 .k(k)
10177 .ks(3)
10178 .iterations(1)
10179 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
10180 }
10181 }
10182 }
10183 }
10184
TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_LD1R,n_gt_8_small_kernel)10185 TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_LD1R, n_gt_8_small_kernel) {
10186 TEST_REQUIRES_ARM_NEON;
10187 for (uint32_t n = 9; n < 16; n++) {
10188 for (size_t k = 1; k <= 80; k += 17) {
10189 GemmMicrokernelTester()
10190 .mr(2)
10191 .nr(8)
10192 .kr(4)
10193 .sr(1)
10194 .m(2)
10195 .n(n)
10196 .k(k)
10197 .ks(3)
10198 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
10199 }
10200 }
10201 }
10202
TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_LD1R,n_div_8_small_kernel)10203 TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_LD1R, n_div_8_small_kernel) {
10204 TEST_REQUIRES_ARM_NEON;
10205 for (uint32_t n = 16; n <= 24; n += 8) {
10206 for (size_t k = 1; k <= 80; k += 17) {
10207 GemmMicrokernelTester()
10208 .mr(2)
10209 .nr(8)
10210 .kr(4)
10211 .sr(1)
10212 .m(2)
10213 .n(n)
10214 .k(k)
10215 .ks(3)
10216 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
10217 }
10218 }
10219 }
10220
TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_LD1R,strided_cm_subtile)10221 TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_LD1R, strided_cm_subtile) {
10222 TEST_REQUIRES_ARM_NEON;
10223 for (size_t k = 1; k <= 80; k += 17) {
10224 for (uint32_t n = 1; n <= 8; n++) {
10225 for (uint32_t m = 1; m <= 2; m++) {
10226 GemmMicrokernelTester()
10227 .mr(2)
10228 .nr(8)
10229 .kr(4)
10230 .sr(1)
10231 .m(m)
10232 .n(n)
10233 .k(k)
10234 .cm_stride(11)
10235 .iterations(1)
10236 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
10237 }
10238 }
10239 }
10240 }
10241
TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_LD1R,a_offset)10242 TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_LD1R, a_offset) {
10243 TEST_REQUIRES_ARM_NEON;
10244 for (size_t k = 1; k <= 80; k += 17) {
10245 GemmMicrokernelTester()
10246 .mr(2)
10247 .nr(8)
10248 .kr(4)
10249 .sr(1)
10250 .m(2)
10251 .n(8)
10252 .k(k)
10253 .ks(3)
10254 .a_offset(163)
10255 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
10256 }
10257 }
10258
TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_LD1R,zero)10259 TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_LD1R, zero) {
10260 TEST_REQUIRES_ARM_NEON;
10261 for (size_t k = 1; k <= 80; k += 17) {
10262 for (uint32_t mz = 0; mz < 2; mz++) {
10263 GemmMicrokernelTester()
10264 .mr(2)
10265 .nr(8)
10266 .kr(4)
10267 .sr(1)
10268 .m(2)
10269 .n(8)
10270 .k(k)
10271 .ks(3)
10272 .a_offset(163)
10273 .zero_index(mz)
10274 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
10275 }
10276 }
10277 }
10278
TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_LD1R,qmin)10279 TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_LD1R, qmin) {
10280 TEST_REQUIRES_ARM_NEON;
10281 GemmMicrokernelTester()
10282 .mr(2)
10283 .nr(8)
10284 .kr(4)
10285 .sr(1)
10286 .m(2)
10287 .n(8)
10288 .k(16)
10289 .qmin(128)
10290 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
10291 }
10292
TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_LD1R,qmax)10293 TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_LD1R, qmax) {
10294 TEST_REQUIRES_ARM_NEON;
10295 GemmMicrokernelTester()
10296 .mr(2)
10297 .nr(8)
10298 .kr(4)
10299 .sr(1)
10300 .m(2)
10301 .n(8)
10302 .k(16)
10303 .qmax(128)
10304 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
10305 }
10306
TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_LD1R,strided_cm)10307 TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEON_MLAL_LD1R, strided_cm) {
10308 TEST_REQUIRES_ARM_NEON;
10309 GemmMicrokernelTester()
10310 .mr(2)
10311 .nr(8)
10312 .kr(4)
10313 .sr(1)
10314 .m(2)
10315 .n(8)
10316 .k(16)
10317 .cm_stride(11)
10318 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
10319 }
10320 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
10321
10322
10323 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_DUP,k_eq_16)10324 TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_DUP, k_eq_16) {
10325 TEST_REQUIRES_ARM_NEON_V8;
10326 GemmMicrokernelTester()
10327 .mr(2)
10328 .nr(8)
10329 .kr(4)
10330 .sr(1)
10331 .m(2)
10332 .n(8)
10333 .k(16)
10334 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
10335 }
10336
TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_DUP,strided_cn)10337 TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_DUP, strided_cn) {
10338 TEST_REQUIRES_ARM_NEON_V8;
10339 GemmMicrokernelTester()
10340 .mr(2)
10341 .nr(8)
10342 .kr(4)
10343 .sr(1)
10344 .m(2)
10345 .n(8)
10346 .k(16)
10347 .cn_stride(11)
10348 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
10349 }
10350
TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_DUP,k_eq_16_subtile)10351 TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_DUP, k_eq_16_subtile) {
10352 TEST_REQUIRES_ARM_NEON_V8;
10353 for (uint32_t n = 1; n <= 8; n++) {
10354 for (uint32_t m = 1; m <= 2; m++) {
10355 GemmMicrokernelTester()
10356 .mr(2)
10357 .nr(8)
10358 .kr(4)
10359 .sr(1)
10360 .m(m)
10361 .n(n)
10362 .k(16)
10363 .iterations(1)
10364 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
10365 }
10366 }
10367 }
10368
TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_DUP,k_eq_16_subtile_m)10369 TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_DUP, k_eq_16_subtile_m) {
10370 TEST_REQUIRES_ARM_NEON_V8;
10371 for (uint32_t m = 1; m <= 2; m++) {
10372 GemmMicrokernelTester()
10373 .mr(2)
10374 .nr(8)
10375 .kr(4)
10376 .sr(1)
10377 .m(m)
10378 .n(8)
10379 .k(16)
10380 .iterations(1)
10381 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
10382 }
10383 }
10384
TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_DUP,k_eq_16_subtile_n)10385 TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_DUP, k_eq_16_subtile_n) {
10386 TEST_REQUIRES_ARM_NEON_V8;
10387 for (uint32_t n = 1; n <= 8; n++) {
10388 GemmMicrokernelTester()
10389 .mr(2)
10390 .nr(8)
10391 .kr(4)
10392 .sr(1)
10393 .m(2)
10394 .n(n)
10395 .k(16)
10396 .iterations(1)
10397 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
10398 }
10399 }
10400
TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_DUP,k_lt_16)10401 TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_DUP, k_lt_16) {
10402 TEST_REQUIRES_ARM_NEON_V8;
10403 for (size_t k = 1; k < 16; k++) {
10404 GemmMicrokernelTester()
10405 .mr(2)
10406 .nr(8)
10407 .kr(4)
10408 .sr(1)
10409 .m(2)
10410 .n(8)
10411 .k(k)
10412 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
10413 }
10414 }
10415
TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_DUP,k_lt_16_subtile)10416 TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_DUP, k_lt_16_subtile) {
10417 TEST_REQUIRES_ARM_NEON_V8;
10418 for (size_t k = 1; k < 16; k++) {
10419 for (uint32_t n = 1; n <= 8; n++) {
10420 for (uint32_t m = 1; m <= 2; m++) {
10421 GemmMicrokernelTester()
10422 .mr(2)
10423 .nr(8)
10424 .kr(4)
10425 .sr(1)
10426 .m(m)
10427 .n(n)
10428 .k(k)
10429 .iterations(1)
10430 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
10431 }
10432 }
10433 }
10434 }
10435
TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_DUP,k_gt_16)10436 TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_DUP, k_gt_16) {
10437 TEST_REQUIRES_ARM_NEON_V8;
10438 for (size_t k = 17; k < 32; k++) {
10439 GemmMicrokernelTester()
10440 .mr(2)
10441 .nr(8)
10442 .kr(4)
10443 .sr(1)
10444 .m(2)
10445 .n(8)
10446 .k(k)
10447 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
10448 }
10449 }
10450
TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_DUP,k_gt_16_subtile)10451 TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_DUP, k_gt_16_subtile) {
10452 TEST_REQUIRES_ARM_NEON_V8;
10453 for (size_t k = 17; k < 32; k++) {
10454 for (uint32_t n = 1; n <= 8; n++) {
10455 for (uint32_t m = 1; m <= 2; m++) {
10456 GemmMicrokernelTester()
10457 .mr(2)
10458 .nr(8)
10459 .kr(4)
10460 .sr(1)
10461 .m(m)
10462 .n(n)
10463 .k(k)
10464 .iterations(1)
10465 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
10466 }
10467 }
10468 }
10469 }
10470
TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_DUP,k_div_16)10471 TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_DUP, k_div_16) {
10472 TEST_REQUIRES_ARM_NEON_V8;
10473 for (size_t k = 32; k <= 160; k += 16) {
10474 GemmMicrokernelTester()
10475 .mr(2)
10476 .nr(8)
10477 .kr(4)
10478 .sr(1)
10479 .m(2)
10480 .n(8)
10481 .k(k)
10482 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
10483 }
10484 }
10485
TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_DUP,k_div_16_subtile)10486 TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_DUP, k_div_16_subtile) {
10487 TEST_REQUIRES_ARM_NEON_V8;
10488 for (size_t k = 32; k <= 160; k += 16) {
10489 for (uint32_t n = 1; n <= 8; n++) {
10490 for (uint32_t m = 1; m <= 2; m++) {
10491 GemmMicrokernelTester()
10492 .mr(2)
10493 .nr(8)
10494 .kr(4)
10495 .sr(1)
10496 .m(m)
10497 .n(n)
10498 .k(k)
10499 .iterations(1)
10500 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
10501 }
10502 }
10503 }
10504 }
10505
TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_DUP,n_gt_8)10506 TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_DUP, n_gt_8) {
10507 TEST_REQUIRES_ARM_NEON_V8;
10508 for (uint32_t n = 9; n < 16; n++) {
10509 for (size_t k = 1; k <= 80; k += 17) {
10510 GemmMicrokernelTester()
10511 .mr(2)
10512 .nr(8)
10513 .kr(4)
10514 .sr(1)
10515 .m(2)
10516 .n(n)
10517 .k(k)
10518 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
10519 }
10520 }
10521 }
10522
TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_DUP,n_gt_8_strided_cn)10523 TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_DUP, n_gt_8_strided_cn) {
10524 TEST_REQUIRES_ARM_NEON_V8;
10525 for (uint32_t n = 9; n < 16; n++) {
10526 for (size_t k = 1; k <= 80; k += 17) {
10527 GemmMicrokernelTester()
10528 .mr(2)
10529 .nr(8)
10530 .kr(4)
10531 .sr(1)
10532 .m(2)
10533 .n(n)
10534 .k(k)
10535 .cn_stride(11)
10536 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
10537 }
10538 }
10539 }
10540
TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_DUP,n_gt_8_subtile)10541 TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_DUP, n_gt_8_subtile) {
10542 TEST_REQUIRES_ARM_NEON_V8;
10543 for (uint32_t n = 9; n < 16; n++) {
10544 for (size_t k = 1; k <= 80; k += 17) {
10545 for (uint32_t m = 1; m <= 2; m++) {
10546 GemmMicrokernelTester()
10547 .mr(2)
10548 .nr(8)
10549 .kr(4)
10550 .sr(1)
10551 .m(m)
10552 .n(n)
10553 .k(k)
10554 .iterations(1)
10555 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
10556 }
10557 }
10558 }
10559 }
10560
TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_DUP,n_div_8)10561 TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_DUP, n_div_8) {
10562 TEST_REQUIRES_ARM_NEON_V8;
10563 for (uint32_t n = 16; n <= 24; n += 8) {
10564 for (size_t k = 1; k <= 80; k += 17) {
10565 GemmMicrokernelTester()
10566 .mr(2)
10567 .nr(8)
10568 .kr(4)
10569 .sr(1)
10570 .m(2)
10571 .n(n)
10572 .k(k)
10573 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
10574 }
10575 }
10576 }
10577
TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_DUP,n_div_8_strided_cn)10578 TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_DUP, n_div_8_strided_cn) {
10579 TEST_REQUIRES_ARM_NEON_V8;
10580 for (uint32_t n = 16; n <= 24; n += 8) {
10581 for (size_t k = 1; k <= 80; k += 17) {
10582 GemmMicrokernelTester()
10583 .mr(2)
10584 .nr(8)
10585 .kr(4)
10586 .sr(1)
10587 .m(2)
10588 .n(n)
10589 .k(k)
10590 .cn_stride(11)
10591 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
10592 }
10593 }
10594 }
10595
TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_DUP,n_div_8_subtile)10596 TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_DUP, n_div_8_subtile) {
10597 TEST_REQUIRES_ARM_NEON_V8;
10598 for (uint32_t n = 16; n <= 24; n += 8) {
10599 for (size_t k = 1; k <= 80; k += 17) {
10600 for (uint32_t m = 1; m <= 2; m++) {
10601 GemmMicrokernelTester()
10602 .mr(2)
10603 .nr(8)
10604 .kr(4)
10605 .sr(1)
10606 .m(m)
10607 .n(n)
10608 .k(k)
10609 .iterations(1)
10610 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
10611 }
10612 }
10613 }
10614 }
10615
TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_DUP,small_kernel)10616 TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_DUP, small_kernel) {
10617 TEST_REQUIRES_ARM_NEON_V8;
10618 for (size_t k = 1; k <= 80; k += 17) {
10619 GemmMicrokernelTester()
10620 .mr(2)
10621 .nr(8)
10622 .kr(4)
10623 .sr(1)
10624 .m(2)
10625 .n(8)
10626 .k(k)
10627 .ks(3)
10628 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
10629 }
10630 }
10631
TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_DUP,small_kernel_subtile)10632 TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_DUP, small_kernel_subtile) {
10633 TEST_REQUIRES_ARM_NEON_V8;
10634 for (size_t k = 1; k <= 80; k += 17) {
10635 for (uint32_t n = 1; n <= 8; n++) {
10636 for (uint32_t m = 1; m <= 2; m++) {
10637 GemmMicrokernelTester()
10638 .mr(2)
10639 .nr(8)
10640 .kr(4)
10641 .sr(1)
10642 .m(m)
10643 .n(n)
10644 .k(k)
10645 .ks(3)
10646 .iterations(1)
10647 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
10648 }
10649 }
10650 }
10651 }
10652
TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_DUP,n_gt_8_small_kernel)10653 TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_DUP, n_gt_8_small_kernel) {
10654 TEST_REQUIRES_ARM_NEON_V8;
10655 for (uint32_t n = 9; n < 16; n++) {
10656 for (size_t k = 1; k <= 80; k += 17) {
10657 GemmMicrokernelTester()
10658 .mr(2)
10659 .nr(8)
10660 .kr(4)
10661 .sr(1)
10662 .m(2)
10663 .n(n)
10664 .k(k)
10665 .ks(3)
10666 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
10667 }
10668 }
10669 }
10670
TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_DUP,n_div_8_small_kernel)10671 TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_DUP, n_div_8_small_kernel) {
10672 TEST_REQUIRES_ARM_NEON_V8;
10673 for (uint32_t n = 16; n <= 24; n += 8) {
10674 for (size_t k = 1; k <= 80; k += 17) {
10675 GemmMicrokernelTester()
10676 .mr(2)
10677 .nr(8)
10678 .kr(4)
10679 .sr(1)
10680 .m(2)
10681 .n(n)
10682 .k(k)
10683 .ks(3)
10684 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
10685 }
10686 }
10687 }
10688
TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_DUP,strided_cm_subtile)10689 TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_DUP, strided_cm_subtile) {
10690 TEST_REQUIRES_ARM_NEON_V8;
10691 for (size_t k = 1; k <= 80; k += 17) {
10692 for (uint32_t n = 1; n <= 8; n++) {
10693 for (uint32_t m = 1; m <= 2; m++) {
10694 GemmMicrokernelTester()
10695 .mr(2)
10696 .nr(8)
10697 .kr(4)
10698 .sr(1)
10699 .m(m)
10700 .n(n)
10701 .k(k)
10702 .cm_stride(11)
10703 .iterations(1)
10704 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
10705 }
10706 }
10707 }
10708 }
10709
TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_DUP,a_offset)10710 TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_DUP, a_offset) {
10711 TEST_REQUIRES_ARM_NEON_V8;
10712 for (size_t k = 1; k <= 80; k += 17) {
10713 GemmMicrokernelTester()
10714 .mr(2)
10715 .nr(8)
10716 .kr(4)
10717 .sr(1)
10718 .m(2)
10719 .n(8)
10720 .k(k)
10721 .ks(3)
10722 .a_offset(163)
10723 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
10724 }
10725 }
10726
TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_DUP,zero)10727 TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_DUP, zero) {
10728 TEST_REQUIRES_ARM_NEON_V8;
10729 for (size_t k = 1; k <= 80; k += 17) {
10730 for (uint32_t mz = 0; mz < 2; mz++) {
10731 GemmMicrokernelTester()
10732 .mr(2)
10733 .nr(8)
10734 .kr(4)
10735 .sr(1)
10736 .m(2)
10737 .n(8)
10738 .k(k)
10739 .ks(3)
10740 .a_offset(163)
10741 .zero_index(mz)
10742 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
10743 }
10744 }
10745 }
10746
TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_DUP,qmin)10747 TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_DUP, qmin) {
10748 TEST_REQUIRES_ARM_NEON_V8;
10749 GemmMicrokernelTester()
10750 .mr(2)
10751 .nr(8)
10752 .kr(4)
10753 .sr(1)
10754 .m(2)
10755 .n(8)
10756 .k(16)
10757 .qmin(128)
10758 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
10759 }
10760
TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_DUP,qmax)10761 TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_DUP, qmax) {
10762 TEST_REQUIRES_ARM_NEON_V8;
10763 GemmMicrokernelTester()
10764 .mr(2)
10765 .nr(8)
10766 .kr(4)
10767 .sr(1)
10768 .m(2)
10769 .n(8)
10770 .k(16)
10771 .qmax(128)
10772 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
10773 }
10774
TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_DUP,strided_cm)10775 TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_DUP, strided_cm) {
10776 TEST_REQUIRES_ARM_NEON_V8;
10777 GemmMicrokernelTester()
10778 .mr(2)
10779 .nr(8)
10780 .kr(4)
10781 .sr(1)
10782 .m(2)
10783 .n(8)
10784 .k(16)
10785 .cm_stride(11)
10786 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_dup, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
10787 }
10788 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
10789
10790
10791 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_LD2R,k_eq_16)10792 TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_LD2R, k_eq_16) {
10793 TEST_REQUIRES_ARM_NEON_V8;
10794 GemmMicrokernelTester()
10795 .mr(2)
10796 .nr(8)
10797 .kr(4)
10798 .sr(1)
10799 .m(2)
10800 .n(8)
10801 .k(16)
10802 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
10803 }
10804
TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_LD2R,strided_cn)10805 TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_LD2R, strided_cn) {
10806 TEST_REQUIRES_ARM_NEON_V8;
10807 GemmMicrokernelTester()
10808 .mr(2)
10809 .nr(8)
10810 .kr(4)
10811 .sr(1)
10812 .m(2)
10813 .n(8)
10814 .k(16)
10815 .cn_stride(11)
10816 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
10817 }
10818
TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_LD2R,k_eq_16_subtile)10819 TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_LD2R, k_eq_16_subtile) {
10820 TEST_REQUIRES_ARM_NEON_V8;
10821 for (uint32_t n = 1; n <= 8; n++) {
10822 for (uint32_t m = 1; m <= 2; m++) {
10823 GemmMicrokernelTester()
10824 .mr(2)
10825 .nr(8)
10826 .kr(4)
10827 .sr(1)
10828 .m(m)
10829 .n(n)
10830 .k(16)
10831 .iterations(1)
10832 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
10833 }
10834 }
10835 }
10836
TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_LD2R,k_eq_16_subtile_m)10837 TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_LD2R, k_eq_16_subtile_m) {
10838 TEST_REQUIRES_ARM_NEON_V8;
10839 for (uint32_t m = 1; m <= 2; m++) {
10840 GemmMicrokernelTester()
10841 .mr(2)
10842 .nr(8)
10843 .kr(4)
10844 .sr(1)
10845 .m(m)
10846 .n(8)
10847 .k(16)
10848 .iterations(1)
10849 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
10850 }
10851 }
10852
TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_LD2R,k_eq_16_subtile_n)10853 TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_LD2R, k_eq_16_subtile_n) {
10854 TEST_REQUIRES_ARM_NEON_V8;
10855 for (uint32_t n = 1; n <= 8; n++) {
10856 GemmMicrokernelTester()
10857 .mr(2)
10858 .nr(8)
10859 .kr(4)
10860 .sr(1)
10861 .m(2)
10862 .n(n)
10863 .k(16)
10864 .iterations(1)
10865 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
10866 }
10867 }
10868
TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_LD2R,k_lt_16)10869 TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_LD2R, k_lt_16) {
10870 TEST_REQUIRES_ARM_NEON_V8;
10871 for (size_t k = 1; k < 16; k++) {
10872 GemmMicrokernelTester()
10873 .mr(2)
10874 .nr(8)
10875 .kr(4)
10876 .sr(1)
10877 .m(2)
10878 .n(8)
10879 .k(k)
10880 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
10881 }
10882 }
10883
TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_LD2R,k_lt_16_subtile)10884 TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_LD2R, k_lt_16_subtile) {
10885 TEST_REQUIRES_ARM_NEON_V8;
10886 for (size_t k = 1; k < 16; k++) {
10887 for (uint32_t n = 1; n <= 8; n++) {
10888 for (uint32_t m = 1; m <= 2; m++) {
10889 GemmMicrokernelTester()
10890 .mr(2)
10891 .nr(8)
10892 .kr(4)
10893 .sr(1)
10894 .m(m)
10895 .n(n)
10896 .k(k)
10897 .iterations(1)
10898 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
10899 }
10900 }
10901 }
10902 }
10903
TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_LD2R,k_gt_16)10904 TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_LD2R, k_gt_16) {
10905 TEST_REQUIRES_ARM_NEON_V8;
10906 for (size_t k = 17; k < 32; k++) {
10907 GemmMicrokernelTester()
10908 .mr(2)
10909 .nr(8)
10910 .kr(4)
10911 .sr(1)
10912 .m(2)
10913 .n(8)
10914 .k(k)
10915 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
10916 }
10917 }
10918
TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_LD2R,k_gt_16_subtile)10919 TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_LD2R, k_gt_16_subtile) {
10920 TEST_REQUIRES_ARM_NEON_V8;
10921 for (size_t k = 17; k < 32; k++) {
10922 for (uint32_t n = 1; n <= 8; n++) {
10923 for (uint32_t m = 1; m <= 2; m++) {
10924 GemmMicrokernelTester()
10925 .mr(2)
10926 .nr(8)
10927 .kr(4)
10928 .sr(1)
10929 .m(m)
10930 .n(n)
10931 .k(k)
10932 .iterations(1)
10933 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
10934 }
10935 }
10936 }
10937 }
10938
TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_LD2R,k_div_16)10939 TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_LD2R, k_div_16) {
10940 TEST_REQUIRES_ARM_NEON_V8;
10941 for (size_t k = 32; k <= 160; k += 16) {
10942 GemmMicrokernelTester()
10943 .mr(2)
10944 .nr(8)
10945 .kr(4)
10946 .sr(1)
10947 .m(2)
10948 .n(8)
10949 .k(k)
10950 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
10951 }
10952 }
10953
TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_LD2R,k_div_16_subtile)10954 TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_LD2R, k_div_16_subtile) {
10955 TEST_REQUIRES_ARM_NEON_V8;
10956 for (size_t k = 32; k <= 160; k += 16) {
10957 for (uint32_t n = 1; n <= 8; n++) {
10958 for (uint32_t m = 1; m <= 2; m++) {
10959 GemmMicrokernelTester()
10960 .mr(2)
10961 .nr(8)
10962 .kr(4)
10963 .sr(1)
10964 .m(m)
10965 .n(n)
10966 .k(k)
10967 .iterations(1)
10968 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
10969 }
10970 }
10971 }
10972 }
10973
TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_LD2R,n_gt_8)10974 TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_LD2R, n_gt_8) {
10975 TEST_REQUIRES_ARM_NEON_V8;
10976 for (uint32_t n = 9; n < 16; n++) {
10977 for (size_t k = 1; k <= 80; k += 17) {
10978 GemmMicrokernelTester()
10979 .mr(2)
10980 .nr(8)
10981 .kr(4)
10982 .sr(1)
10983 .m(2)
10984 .n(n)
10985 .k(k)
10986 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
10987 }
10988 }
10989 }
10990
TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_LD2R,n_gt_8_strided_cn)10991 TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_LD2R, n_gt_8_strided_cn) {
10992 TEST_REQUIRES_ARM_NEON_V8;
10993 for (uint32_t n = 9; n < 16; n++) {
10994 for (size_t k = 1; k <= 80; k += 17) {
10995 GemmMicrokernelTester()
10996 .mr(2)
10997 .nr(8)
10998 .kr(4)
10999 .sr(1)
11000 .m(2)
11001 .n(n)
11002 .k(k)
11003 .cn_stride(11)
11004 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
11005 }
11006 }
11007 }
11008
TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_LD2R,n_gt_8_subtile)11009 TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_LD2R, n_gt_8_subtile) {
11010 TEST_REQUIRES_ARM_NEON_V8;
11011 for (uint32_t n = 9; n < 16; n++) {
11012 for (size_t k = 1; k <= 80; k += 17) {
11013 for (uint32_t m = 1; m <= 2; m++) {
11014 GemmMicrokernelTester()
11015 .mr(2)
11016 .nr(8)
11017 .kr(4)
11018 .sr(1)
11019 .m(m)
11020 .n(n)
11021 .k(k)
11022 .iterations(1)
11023 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
11024 }
11025 }
11026 }
11027 }
11028
TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_LD2R,n_div_8)11029 TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_LD2R, n_div_8) {
11030 TEST_REQUIRES_ARM_NEON_V8;
11031 for (uint32_t n = 16; n <= 24; n += 8) {
11032 for (size_t k = 1; k <= 80; k += 17) {
11033 GemmMicrokernelTester()
11034 .mr(2)
11035 .nr(8)
11036 .kr(4)
11037 .sr(1)
11038 .m(2)
11039 .n(n)
11040 .k(k)
11041 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
11042 }
11043 }
11044 }
11045
TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_LD2R,n_div_8_strided_cn)11046 TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_LD2R, n_div_8_strided_cn) {
11047 TEST_REQUIRES_ARM_NEON_V8;
11048 for (uint32_t n = 16; n <= 24; n += 8) {
11049 for (size_t k = 1; k <= 80; k += 17) {
11050 GemmMicrokernelTester()
11051 .mr(2)
11052 .nr(8)
11053 .kr(4)
11054 .sr(1)
11055 .m(2)
11056 .n(n)
11057 .k(k)
11058 .cn_stride(11)
11059 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
11060 }
11061 }
11062 }
11063
TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_LD2R,n_div_8_subtile)11064 TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_LD2R, n_div_8_subtile) {
11065 TEST_REQUIRES_ARM_NEON_V8;
11066 for (uint32_t n = 16; n <= 24; n += 8) {
11067 for (size_t k = 1; k <= 80; k += 17) {
11068 for (uint32_t m = 1; m <= 2; m++) {
11069 GemmMicrokernelTester()
11070 .mr(2)
11071 .nr(8)
11072 .kr(4)
11073 .sr(1)
11074 .m(m)
11075 .n(n)
11076 .k(k)
11077 .iterations(1)
11078 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
11079 }
11080 }
11081 }
11082 }
11083
TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_LD2R,small_kernel)11084 TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_LD2R, small_kernel) {
11085 TEST_REQUIRES_ARM_NEON_V8;
11086 for (size_t k = 1; k <= 80; k += 17) {
11087 GemmMicrokernelTester()
11088 .mr(2)
11089 .nr(8)
11090 .kr(4)
11091 .sr(1)
11092 .m(2)
11093 .n(8)
11094 .k(k)
11095 .ks(3)
11096 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
11097 }
11098 }
11099
TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_LD2R,small_kernel_subtile)11100 TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_LD2R, small_kernel_subtile) {
11101 TEST_REQUIRES_ARM_NEON_V8;
11102 for (size_t k = 1; k <= 80; k += 17) {
11103 for (uint32_t n = 1; n <= 8; n++) {
11104 for (uint32_t m = 1; m <= 2; m++) {
11105 GemmMicrokernelTester()
11106 .mr(2)
11107 .nr(8)
11108 .kr(4)
11109 .sr(1)
11110 .m(m)
11111 .n(n)
11112 .k(k)
11113 .ks(3)
11114 .iterations(1)
11115 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
11116 }
11117 }
11118 }
11119 }
11120
TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_LD2R,n_gt_8_small_kernel)11121 TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_LD2R, n_gt_8_small_kernel) {
11122 TEST_REQUIRES_ARM_NEON_V8;
11123 for (uint32_t n = 9; n < 16; n++) {
11124 for (size_t k = 1; k <= 80; k += 17) {
11125 GemmMicrokernelTester()
11126 .mr(2)
11127 .nr(8)
11128 .kr(4)
11129 .sr(1)
11130 .m(2)
11131 .n(n)
11132 .k(k)
11133 .ks(3)
11134 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
11135 }
11136 }
11137 }
11138
TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_LD2R,n_div_8_small_kernel)11139 TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_LD2R, n_div_8_small_kernel) {
11140 TEST_REQUIRES_ARM_NEON_V8;
11141 for (uint32_t n = 16; n <= 24; n += 8) {
11142 for (size_t k = 1; k <= 80; k += 17) {
11143 GemmMicrokernelTester()
11144 .mr(2)
11145 .nr(8)
11146 .kr(4)
11147 .sr(1)
11148 .m(2)
11149 .n(n)
11150 .k(k)
11151 .ks(3)
11152 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
11153 }
11154 }
11155 }
11156
TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_LD2R,strided_cm_subtile)11157 TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_LD2R, strided_cm_subtile) {
11158 TEST_REQUIRES_ARM_NEON_V8;
11159 for (size_t k = 1; k <= 80; k += 17) {
11160 for (uint32_t n = 1; n <= 8; n++) {
11161 for (uint32_t m = 1; m <= 2; m++) {
11162 GemmMicrokernelTester()
11163 .mr(2)
11164 .nr(8)
11165 .kr(4)
11166 .sr(1)
11167 .m(m)
11168 .n(n)
11169 .k(k)
11170 .cm_stride(11)
11171 .iterations(1)
11172 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
11173 }
11174 }
11175 }
11176 }
11177
TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_LD2R,a_offset)11178 TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_LD2R, a_offset) {
11179 TEST_REQUIRES_ARM_NEON_V8;
11180 for (size_t k = 1; k <= 80; k += 17) {
11181 GemmMicrokernelTester()
11182 .mr(2)
11183 .nr(8)
11184 .kr(4)
11185 .sr(1)
11186 .m(2)
11187 .n(8)
11188 .k(k)
11189 .ks(3)
11190 .a_offset(163)
11191 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
11192 }
11193 }
11194
TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_LD2R,zero)11195 TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_LD2R, zero) {
11196 TEST_REQUIRES_ARM_NEON_V8;
11197 for (size_t k = 1; k <= 80; k += 17) {
11198 for (uint32_t mz = 0; mz < 2; mz++) {
11199 GemmMicrokernelTester()
11200 .mr(2)
11201 .nr(8)
11202 .kr(4)
11203 .sr(1)
11204 .m(2)
11205 .n(8)
11206 .k(k)
11207 .ks(3)
11208 .a_offset(163)
11209 .zero_index(mz)
11210 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
11211 }
11212 }
11213 }
11214
TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_LD2R,qmin)11215 TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_LD2R, qmin) {
11216 TEST_REQUIRES_ARM_NEON_V8;
11217 GemmMicrokernelTester()
11218 .mr(2)
11219 .nr(8)
11220 .kr(4)
11221 .sr(1)
11222 .m(2)
11223 .n(8)
11224 .k(16)
11225 .qmin(128)
11226 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
11227 }
11228
TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_LD2R,qmax)11229 TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_LD2R, qmax) {
11230 TEST_REQUIRES_ARM_NEON_V8;
11231 GemmMicrokernelTester()
11232 .mr(2)
11233 .nr(8)
11234 .kr(4)
11235 .sr(1)
11236 .m(2)
11237 .n(8)
11238 .k(16)
11239 .qmax(128)
11240 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
11241 }
11242
TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_LD2R,strided_cm)11243 TEST(QS8_IGEMM_MINMAX_FP32_2X8C4__NEONV8_MLAL_LD2R, strided_cm) {
11244 TEST_REQUIRES_ARM_NEON_V8;
11245 GemmMicrokernelTester()
11246 .mr(2)
11247 .nr(8)
11248 .kr(4)
11249 .sr(1)
11250 .m(2)
11251 .n(8)
11252 .k(16)
11253 .cm_stride(11)
11254 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4__neonv8_mlal_ld2r, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
11255 }
11256 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
11257
11258
11259 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QS8_IGEMM_MINMAX_FP32_2X8C4S2__NEON_MLAL,k_eq_16)11260 TEST(QS8_IGEMM_MINMAX_FP32_2X8C4S2__NEON_MLAL, k_eq_16) {
11261 TEST_REQUIRES_ARM_NEON;
11262 GemmMicrokernelTester()
11263 .mr(2)
11264 .nr(8)
11265 .kr(4)
11266 .sr(2)
11267 .m(2)
11268 .n(8)
11269 .k(16)
11270 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4s2__neon_mlal, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
11271 }
11272
TEST(QS8_IGEMM_MINMAX_FP32_2X8C4S2__NEON_MLAL,strided_cn)11273 TEST(QS8_IGEMM_MINMAX_FP32_2X8C4S2__NEON_MLAL, strided_cn) {
11274 TEST_REQUIRES_ARM_NEON;
11275 GemmMicrokernelTester()
11276 .mr(2)
11277 .nr(8)
11278 .kr(4)
11279 .sr(2)
11280 .m(2)
11281 .n(8)
11282 .k(16)
11283 .cn_stride(11)
11284 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4s2__neon_mlal, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
11285 }
11286
TEST(QS8_IGEMM_MINMAX_FP32_2X8C4S2__NEON_MLAL,k_eq_16_subtile)11287 TEST(QS8_IGEMM_MINMAX_FP32_2X8C4S2__NEON_MLAL, k_eq_16_subtile) {
11288 TEST_REQUIRES_ARM_NEON;
11289 for (uint32_t n = 1; n <= 8; n++) {
11290 for (uint32_t m = 1; m <= 2; m++) {
11291 GemmMicrokernelTester()
11292 .mr(2)
11293 .nr(8)
11294 .kr(4)
11295 .sr(2)
11296 .m(m)
11297 .n(n)
11298 .k(16)
11299 .iterations(1)
11300 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4s2__neon_mlal, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
11301 }
11302 }
11303 }
11304
TEST(QS8_IGEMM_MINMAX_FP32_2X8C4S2__NEON_MLAL,k_eq_16_subtile_m)11305 TEST(QS8_IGEMM_MINMAX_FP32_2X8C4S2__NEON_MLAL, k_eq_16_subtile_m) {
11306 TEST_REQUIRES_ARM_NEON;
11307 for (uint32_t m = 1; m <= 2; m++) {
11308 GemmMicrokernelTester()
11309 .mr(2)
11310 .nr(8)
11311 .kr(4)
11312 .sr(2)
11313 .m(m)
11314 .n(8)
11315 .k(16)
11316 .iterations(1)
11317 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4s2__neon_mlal, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
11318 }
11319 }
11320
TEST(QS8_IGEMM_MINMAX_FP32_2X8C4S2__NEON_MLAL,k_eq_16_subtile_n)11321 TEST(QS8_IGEMM_MINMAX_FP32_2X8C4S2__NEON_MLAL, k_eq_16_subtile_n) {
11322 TEST_REQUIRES_ARM_NEON;
11323 for (uint32_t n = 1; n <= 8; n++) {
11324 GemmMicrokernelTester()
11325 .mr(2)
11326 .nr(8)
11327 .kr(4)
11328 .sr(2)
11329 .m(2)
11330 .n(n)
11331 .k(16)
11332 .iterations(1)
11333 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4s2__neon_mlal, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
11334 }
11335 }
11336
TEST(QS8_IGEMM_MINMAX_FP32_2X8C4S2__NEON_MLAL,k_lt_16)11337 TEST(QS8_IGEMM_MINMAX_FP32_2X8C4S2__NEON_MLAL, k_lt_16) {
11338 TEST_REQUIRES_ARM_NEON;
11339 for (size_t k = 1; k < 16; k++) {
11340 GemmMicrokernelTester()
11341 .mr(2)
11342 .nr(8)
11343 .kr(4)
11344 .sr(2)
11345 .m(2)
11346 .n(8)
11347 .k(k)
11348 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4s2__neon_mlal, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
11349 }
11350 }
11351
TEST(QS8_IGEMM_MINMAX_FP32_2X8C4S2__NEON_MLAL,k_lt_16_subtile)11352 TEST(QS8_IGEMM_MINMAX_FP32_2X8C4S2__NEON_MLAL, k_lt_16_subtile) {
11353 TEST_REQUIRES_ARM_NEON;
11354 for (size_t k = 1; k < 16; k++) {
11355 for (uint32_t n = 1; n <= 8; n++) {
11356 for (uint32_t m = 1; m <= 2; m++) {
11357 GemmMicrokernelTester()
11358 .mr(2)
11359 .nr(8)
11360 .kr(4)
11361 .sr(2)
11362 .m(m)
11363 .n(n)
11364 .k(k)
11365 .iterations(1)
11366 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4s2__neon_mlal, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
11367 }
11368 }
11369 }
11370 }
11371
TEST(QS8_IGEMM_MINMAX_FP32_2X8C4S2__NEON_MLAL,k_gt_16)11372 TEST(QS8_IGEMM_MINMAX_FP32_2X8C4S2__NEON_MLAL, k_gt_16) {
11373 TEST_REQUIRES_ARM_NEON;
11374 for (size_t k = 17; k < 32; k++) {
11375 GemmMicrokernelTester()
11376 .mr(2)
11377 .nr(8)
11378 .kr(4)
11379 .sr(2)
11380 .m(2)
11381 .n(8)
11382 .k(k)
11383 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4s2__neon_mlal, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
11384 }
11385 }
11386
TEST(QS8_IGEMM_MINMAX_FP32_2X8C4S2__NEON_MLAL,k_gt_16_subtile)11387 TEST(QS8_IGEMM_MINMAX_FP32_2X8C4S2__NEON_MLAL, k_gt_16_subtile) {
11388 TEST_REQUIRES_ARM_NEON;
11389 for (size_t k = 17; k < 32; k++) {
11390 for (uint32_t n = 1; n <= 8; n++) {
11391 for (uint32_t m = 1; m <= 2; m++) {
11392 GemmMicrokernelTester()
11393 .mr(2)
11394 .nr(8)
11395 .kr(4)
11396 .sr(2)
11397 .m(m)
11398 .n(n)
11399 .k(k)
11400 .iterations(1)
11401 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4s2__neon_mlal, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
11402 }
11403 }
11404 }
11405 }
11406
TEST(QS8_IGEMM_MINMAX_FP32_2X8C4S2__NEON_MLAL,k_div_16)11407 TEST(QS8_IGEMM_MINMAX_FP32_2X8C4S2__NEON_MLAL, k_div_16) {
11408 TEST_REQUIRES_ARM_NEON;
11409 for (size_t k = 32; k <= 160; k += 16) {
11410 GemmMicrokernelTester()
11411 .mr(2)
11412 .nr(8)
11413 .kr(4)
11414 .sr(2)
11415 .m(2)
11416 .n(8)
11417 .k(k)
11418 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4s2__neon_mlal, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
11419 }
11420 }
11421
TEST(QS8_IGEMM_MINMAX_FP32_2X8C4S2__NEON_MLAL,k_div_16_subtile)11422 TEST(QS8_IGEMM_MINMAX_FP32_2X8C4S2__NEON_MLAL, k_div_16_subtile) {
11423 TEST_REQUIRES_ARM_NEON;
11424 for (size_t k = 32; k <= 160; k += 16) {
11425 for (uint32_t n = 1; n <= 8; n++) {
11426 for (uint32_t m = 1; m <= 2; m++) {
11427 GemmMicrokernelTester()
11428 .mr(2)
11429 .nr(8)
11430 .kr(4)
11431 .sr(2)
11432 .m(m)
11433 .n(n)
11434 .k(k)
11435 .iterations(1)
11436 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4s2__neon_mlal, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
11437 }
11438 }
11439 }
11440 }
11441
TEST(QS8_IGEMM_MINMAX_FP32_2X8C4S2__NEON_MLAL,n_gt_8)11442 TEST(QS8_IGEMM_MINMAX_FP32_2X8C4S2__NEON_MLAL, n_gt_8) {
11443 TEST_REQUIRES_ARM_NEON;
11444 for (uint32_t n = 9; n < 16; n++) {
11445 for (size_t k = 1; k <= 80; k += 17) {
11446 GemmMicrokernelTester()
11447 .mr(2)
11448 .nr(8)
11449 .kr(4)
11450 .sr(2)
11451 .m(2)
11452 .n(n)
11453 .k(k)
11454 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4s2__neon_mlal, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
11455 }
11456 }
11457 }
11458
TEST(QS8_IGEMM_MINMAX_FP32_2X8C4S2__NEON_MLAL,n_gt_8_strided_cn)11459 TEST(QS8_IGEMM_MINMAX_FP32_2X8C4S2__NEON_MLAL, n_gt_8_strided_cn) {
11460 TEST_REQUIRES_ARM_NEON;
11461 for (uint32_t n = 9; n < 16; n++) {
11462 for (size_t k = 1; k <= 80; k += 17) {
11463 GemmMicrokernelTester()
11464 .mr(2)
11465 .nr(8)
11466 .kr(4)
11467 .sr(2)
11468 .m(2)
11469 .n(n)
11470 .k(k)
11471 .cn_stride(11)
11472 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4s2__neon_mlal, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
11473 }
11474 }
11475 }
11476
TEST(QS8_IGEMM_MINMAX_FP32_2X8C4S2__NEON_MLAL,n_gt_8_subtile)11477 TEST(QS8_IGEMM_MINMAX_FP32_2X8C4S2__NEON_MLAL, n_gt_8_subtile) {
11478 TEST_REQUIRES_ARM_NEON;
11479 for (uint32_t n = 9; n < 16; n++) {
11480 for (size_t k = 1; k <= 80; k += 17) {
11481 for (uint32_t m = 1; m <= 2; m++) {
11482 GemmMicrokernelTester()
11483 .mr(2)
11484 .nr(8)
11485 .kr(4)
11486 .sr(2)
11487 .m(m)
11488 .n(n)
11489 .k(k)
11490 .iterations(1)
11491 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4s2__neon_mlal, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
11492 }
11493 }
11494 }
11495 }
11496
TEST(QS8_IGEMM_MINMAX_FP32_2X8C4S2__NEON_MLAL,n_div_8)11497 TEST(QS8_IGEMM_MINMAX_FP32_2X8C4S2__NEON_MLAL, n_div_8) {
11498 TEST_REQUIRES_ARM_NEON;
11499 for (uint32_t n = 16; n <= 24; n += 8) {
11500 for (size_t k = 1; k <= 80; k += 17) {
11501 GemmMicrokernelTester()
11502 .mr(2)
11503 .nr(8)
11504 .kr(4)
11505 .sr(2)
11506 .m(2)
11507 .n(n)
11508 .k(k)
11509 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4s2__neon_mlal, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
11510 }
11511 }
11512 }
11513
TEST(QS8_IGEMM_MINMAX_FP32_2X8C4S2__NEON_MLAL,n_div_8_strided_cn)11514 TEST(QS8_IGEMM_MINMAX_FP32_2X8C4S2__NEON_MLAL, n_div_8_strided_cn) {
11515 TEST_REQUIRES_ARM_NEON;
11516 for (uint32_t n = 16; n <= 24; n += 8) {
11517 for (size_t k = 1; k <= 80; k += 17) {
11518 GemmMicrokernelTester()
11519 .mr(2)
11520 .nr(8)
11521 .kr(4)
11522 .sr(2)
11523 .m(2)
11524 .n(n)
11525 .k(k)
11526 .cn_stride(11)
11527 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4s2__neon_mlal, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
11528 }
11529 }
11530 }
11531
TEST(QS8_IGEMM_MINMAX_FP32_2X8C4S2__NEON_MLAL,n_div_8_subtile)11532 TEST(QS8_IGEMM_MINMAX_FP32_2X8C4S2__NEON_MLAL, n_div_8_subtile) {
11533 TEST_REQUIRES_ARM_NEON;
11534 for (uint32_t n = 16; n <= 24; n += 8) {
11535 for (size_t k = 1; k <= 80; k += 17) {
11536 for (uint32_t m = 1; m <= 2; m++) {
11537 GemmMicrokernelTester()
11538 .mr(2)
11539 .nr(8)
11540 .kr(4)
11541 .sr(2)
11542 .m(m)
11543 .n(n)
11544 .k(k)
11545 .iterations(1)
11546 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4s2__neon_mlal, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
11547 }
11548 }
11549 }
11550 }
11551
TEST(QS8_IGEMM_MINMAX_FP32_2X8C4S2__NEON_MLAL,small_kernel)11552 TEST(QS8_IGEMM_MINMAX_FP32_2X8C4S2__NEON_MLAL, small_kernel) {
11553 TEST_REQUIRES_ARM_NEON;
11554 for (size_t k = 1; k <= 80; k += 17) {
11555 GemmMicrokernelTester()
11556 .mr(2)
11557 .nr(8)
11558 .kr(4)
11559 .sr(2)
11560 .m(2)
11561 .n(8)
11562 .k(k)
11563 .ks(3)
11564 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4s2__neon_mlal, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
11565 }
11566 }
11567
TEST(QS8_IGEMM_MINMAX_FP32_2X8C4S2__NEON_MLAL,small_kernel_subtile)11568 TEST(QS8_IGEMM_MINMAX_FP32_2X8C4S2__NEON_MLAL, small_kernel_subtile) {
11569 TEST_REQUIRES_ARM_NEON;
11570 for (size_t k = 1; k <= 80; k += 17) {
11571 for (uint32_t n = 1; n <= 8; n++) {
11572 for (uint32_t m = 1; m <= 2; m++) {
11573 GemmMicrokernelTester()
11574 .mr(2)
11575 .nr(8)
11576 .kr(4)
11577 .sr(2)
11578 .m(m)
11579 .n(n)
11580 .k(k)
11581 .ks(3)
11582 .iterations(1)
11583 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4s2__neon_mlal, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
11584 }
11585 }
11586 }
11587 }
11588
TEST(QS8_IGEMM_MINMAX_FP32_2X8C4S2__NEON_MLAL,n_gt_8_small_kernel)11589 TEST(QS8_IGEMM_MINMAX_FP32_2X8C4S2__NEON_MLAL, n_gt_8_small_kernel) {
11590 TEST_REQUIRES_ARM_NEON;
11591 for (uint32_t n = 9; n < 16; n++) {
11592 for (size_t k = 1; k <= 80; k += 17) {
11593 GemmMicrokernelTester()
11594 .mr(2)
11595 .nr(8)
11596 .kr(4)
11597 .sr(2)
11598 .m(2)
11599 .n(n)
11600 .k(k)
11601 .ks(3)
11602 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4s2__neon_mlal, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
11603 }
11604 }
11605 }
11606
TEST(QS8_IGEMM_MINMAX_FP32_2X8C4S2__NEON_MLAL,n_div_8_small_kernel)11607 TEST(QS8_IGEMM_MINMAX_FP32_2X8C4S2__NEON_MLAL, n_div_8_small_kernel) {
11608 TEST_REQUIRES_ARM_NEON;
11609 for (uint32_t n = 16; n <= 24; n += 8) {
11610 for (size_t k = 1; k <= 80; k += 17) {
11611 GemmMicrokernelTester()
11612 .mr(2)
11613 .nr(8)
11614 .kr(4)
11615 .sr(2)
11616 .m(2)
11617 .n(n)
11618 .k(k)
11619 .ks(3)
11620 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4s2__neon_mlal, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
11621 }
11622 }
11623 }
11624
TEST(QS8_IGEMM_MINMAX_FP32_2X8C4S2__NEON_MLAL,strided_cm_subtile)11625 TEST(QS8_IGEMM_MINMAX_FP32_2X8C4S2__NEON_MLAL, strided_cm_subtile) {
11626 TEST_REQUIRES_ARM_NEON;
11627 for (size_t k = 1; k <= 80; k += 17) {
11628 for (uint32_t n = 1; n <= 8; n++) {
11629 for (uint32_t m = 1; m <= 2; m++) {
11630 GemmMicrokernelTester()
11631 .mr(2)
11632 .nr(8)
11633 .kr(4)
11634 .sr(2)
11635 .m(m)
11636 .n(n)
11637 .k(k)
11638 .cm_stride(11)
11639 .iterations(1)
11640 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4s2__neon_mlal, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
11641 }
11642 }
11643 }
11644 }
11645
TEST(QS8_IGEMM_MINMAX_FP32_2X8C4S2__NEON_MLAL,a_offset)11646 TEST(QS8_IGEMM_MINMAX_FP32_2X8C4S2__NEON_MLAL, a_offset) {
11647 TEST_REQUIRES_ARM_NEON;
11648 for (size_t k = 1; k <= 80; k += 17) {
11649 GemmMicrokernelTester()
11650 .mr(2)
11651 .nr(8)
11652 .kr(4)
11653 .sr(2)
11654 .m(2)
11655 .n(8)
11656 .k(k)
11657 .ks(3)
11658 .a_offset(163)
11659 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4s2__neon_mlal, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
11660 }
11661 }
11662
TEST(QS8_IGEMM_MINMAX_FP32_2X8C4S2__NEON_MLAL,zero)11663 TEST(QS8_IGEMM_MINMAX_FP32_2X8C4S2__NEON_MLAL, zero) {
11664 TEST_REQUIRES_ARM_NEON;
11665 for (size_t k = 1; k <= 80; k += 17) {
11666 for (uint32_t mz = 0; mz < 2; mz++) {
11667 GemmMicrokernelTester()
11668 .mr(2)
11669 .nr(8)
11670 .kr(4)
11671 .sr(2)
11672 .m(2)
11673 .n(8)
11674 .k(k)
11675 .ks(3)
11676 .a_offset(163)
11677 .zero_index(mz)
11678 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4s2__neon_mlal, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
11679 }
11680 }
11681 }
11682
TEST(QS8_IGEMM_MINMAX_FP32_2X8C4S2__NEON_MLAL,qmin)11683 TEST(QS8_IGEMM_MINMAX_FP32_2X8C4S2__NEON_MLAL, qmin) {
11684 TEST_REQUIRES_ARM_NEON;
11685 GemmMicrokernelTester()
11686 .mr(2)
11687 .nr(8)
11688 .kr(4)
11689 .sr(2)
11690 .m(2)
11691 .n(8)
11692 .k(16)
11693 .qmin(128)
11694 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4s2__neon_mlal, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
11695 }
11696
TEST(QS8_IGEMM_MINMAX_FP32_2X8C4S2__NEON_MLAL,qmax)11697 TEST(QS8_IGEMM_MINMAX_FP32_2X8C4S2__NEON_MLAL, qmax) {
11698 TEST_REQUIRES_ARM_NEON;
11699 GemmMicrokernelTester()
11700 .mr(2)
11701 .nr(8)
11702 .kr(4)
11703 .sr(2)
11704 .m(2)
11705 .n(8)
11706 .k(16)
11707 .qmax(128)
11708 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4s2__neon_mlal, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
11709 }
11710
TEST(QS8_IGEMM_MINMAX_FP32_2X8C4S2__NEON_MLAL,strided_cm)11711 TEST(QS8_IGEMM_MINMAX_FP32_2X8C4S2__NEON_MLAL, strided_cm) {
11712 TEST_REQUIRES_ARM_NEON;
11713 GemmMicrokernelTester()
11714 .mr(2)
11715 .nr(8)
11716 .kr(4)
11717 .sr(2)
11718 .m(2)
11719 .n(8)
11720 .k(16)
11721 .cm_stride(11)
11722 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4s2__neon_mlal, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
11723 }
11724 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
11725
11726
11727 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QS8_IGEMM_MINMAX_FP32_2X8C4S2__NEONV8_MLAL,k_eq_16)11728 TEST(QS8_IGEMM_MINMAX_FP32_2X8C4S2__NEONV8_MLAL, k_eq_16) {
11729 TEST_REQUIRES_ARM_NEON_V8;
11730 GemmMicrokernelTester()
11731 .mr(2)
11732 .nr(8)
11733 .kr(4)
11734 .sr(2)
11735 .m(2)
11736 .n(8)
11737 .k(16)
11738 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4s2__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
11739 }
11740
TEST(QS8_IGEMM_MINMAX_FP32_2X8C4S2__NEONV8_MLAL,strided_cn)11741 TEST(QS8_IGEMM_MINMAX_FP32_2X8C4S2__NEONV8_MLAL, strided_cn) {
11742 TEST_REQUIRES_ARM_NEON_V8;
11743 GemmMicrokernelTester()
11744 .mr(2)
11745 .nr(8)
11746 .kr(4)
11747 .sr(2)
11748 .m(2)
11749 .n(8)
11750 .k(16)
11751 .cn_stride(11)
11752 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4s2__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
11753 }
11754
TEST(QS8_IGEMM_MINMAX_FP32_2X8C4S2__NEONV8_MLAL,k_eq_16_subtile)11755 TEST(QS8_IGEMM_MINMAX_FP32_2X8C4S2__NEONV8_MLAL, k_eq_16_subtile) {
11756 TEST_REQUIRES_ARM_NEON_V8;
11757 for (uint32_t n = 1; n <= 8; n++) {
11758 for (uint32_t m = 1; m <= 2; m++) {
11759 GemmMicrokernelTester()
11760 .mr(2)
11761 .nr(8)
11762 .kr(4)
11763 .sr(2)
11764 .m(m)
11765 .n(n)
11766 .k(16)
11767 .iterations(1)
11768 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4s2__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
11769 }
11770 }
11771 }
11772
TEST(QS8_IGEMM_MINMAX_FP32_2X8C4S2__NEONV8_MLAL,k_eq_16_subtile_m)11773 TEST(QS8_IGEMM_MINMAX_FP32_2X8C4S2__NEONV8_MLAL, k_eq_16_subtile_m) {
11774 TEST_REQUIRES_ARM_NEON_V8;
11775 for (uint32_t m = 1; m <= 2; m++) {
11776 GemmMicrokernelTester()
11777 .mr(2)
11778 .nr(8)
11779 .kr(4)
11780 .sr(2)
11781 .m(m)
11782 .n(8)
11783 .k(16)
11784 .iterations(1)
11785 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4s2__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
11786 }
11787 }
11788
TEST(QS8_IGEMM_MINMAX_FP32_2X8C4S2__NEONV8_MLAL,k_eq_16_subtile_n)11789 TEST(QS8_IGEMM_MINMAX_FP32_2X8C4S2__NEONV8_MLAL, k_eq_16_subtile_n) {
11790 TEST_REQUIRES_ARM_NEON_V8;
11791 for (uint32_t n = 1; n <= 8; n++) {
11792 GemmMicrokernelTester()
11793 .mr(2)
11794 .nr(8)
11795 .kr(4)
11796 .sr(2)
11797 .m(2)
11798 .n(n)
11799 .k(16)
11800 .iterations(1)
11801 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4s2__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
11802 }
11803 }
11804
TEST(QS8_IGEMM_MINMAX_FP32_2X8C4S2__NEONV8_MLAL,k_lt_16)11805 TEST(QS8_IGEMM_MINMAX_FP32_2X8C4S2__NEONV8_MLAL, k_lt_16) {
11806 TEST_REQUIRES_ARM_NEON_V8;
11807 for (size_t k = 1; k < 16; k++) {
11808 GemmMicrokernelTester()
11809 .mr(2)
11810 .nr(8)
11811 .kr(4)
11812 .sr(2)
11813 .m(2)
11814 .n(8)
11815 .k(k)
11816 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4s2__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
11817 }
11818 }
11819
TEST(QS8_IGEMM_MINMAX_FP32_2X8C4S2__NEONV8_MLAL,k_lt_16_subtile)11820 TEST(QS8_IGEMM_MINMAX_FP32_2X8C4S2__NEONV8_MLAL, k_lt_16_subtile) {
11821 TEST_REQUIRES_ARM_NEON_V8;
11822 for (size_t k = 1; k < 16; k++) {
11823 for (uint32_t n = 1; n <= 8; n++) {
11824 for (uint32_t m = 1; m <= 2; m++) {
11825 GemmMicrokernelTester()
11826 .mr(2)
11827 .nr(8)
11828 .kr(4)
11829 .sr(2)
11830 .m(m)
11831 .n(n)
11832 .k(k)
11833 .iterations(1)
11834 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4s2__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
11835 }
11836 }
11837 }
11838 }
11839
TEST(QS8_IGEMM_MINMAX_FP32_2X8C4S2__NEONV8_MLAL,k_gt_16)11840 TEST(QS8_IGEMM_MINMAX_FP32_2X8C4S2__NEONV8_MLAL, k_gt_16) {
11841 TEST_REQUIRES_ARM_NEON_V8;
11842 for (size_t k = 17; k < 32; k++) {
11843 GemmMicrokernelTester()
11844 .mr(2)
11845 .nr(8)
11846 .kr(4)
11847 .sr(2)
11848 .m(2)
11849 .n(8)
11850 .k(k)
11851 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4s2__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
11852 }
11853 }
11854
TEST(QS8_IGEMM_MINMAX_FP32_2X8C4S2__NEONV8_MLAL,k_gt_16_subtile)11855 TEST(QS8_IGEMM_MINMAX_FP32_2X8C4S2__NEONV8_MLAL, k_gt_16_subtile) {
11856 TEST_REQUIRES_ARM_NEON_V8;
11857 for (size_t k = 17; k < 32; k++) {
11858 for (uint32_t n = 1; n <= 8; n++) {
11859 for (uint32_t m = 1; m <= 2; m++) {
11860 GemmMicrokernelTester()
11861 .mr(2)
11862 .nr(8)
11863 .kr(4)
11864 .sr(2)
11865 .m(m)
11866 .n(n)
11867 .k(k)
11868 .iterations(1)
11869 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4s2__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
11870 }
11871 }
11872 }
11873 }
11874
TEST(QS8_IGEMM_MINMAX_FP32_2X8C4S2__NEONV8_MLAL,k_div_16)11875 TEST(QS8_IGEMM_MINMAX_FP32_2X8C4S2__NEONV8_MLAL, k_div_16) {
11876 TEST_REQUIRES_ARM_NEON_V8;
11877 for (size_t k = 32; k <= 160; k += 16) {
11878 GemmMicrokernelTester()
11879 .mr(2)
11880 .nr(8)
11881 .kr(4)
11882 .sr(2)
11883 .m(2)
11884 .n(8)
11885 .k(k)
11886 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4s2__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
11887 }
11888 }
11889
TEST(QS8_IGEMM_MINMAX_FP32_2X8C4S2__NEONV8_MLAL,k_div_16_subtile)11890 TEST(QS8_IGEMM_MINMAX_FP32_2X8C4S2__NEONV8_MLAL, k_div_16_subtile) {
11891 TEST_REQUIRES_ARM_NEON_V8;
11892 for (size_t k = 32; k <= 160; k += 16) {
11893 for (uint32_t n = 1; n <= 8; n++) {
11894 for (uint32_t m = 1; m <= 2; m++) {
11895 GemmMicrokernelTester()
11896 .mr(2)
11897 .nr(8)
11898 .kr(4)
11899 .sr(2)
11900 .m(m)
11901 .n(n)
11902 .k(k)
11903 .iterations(1)
11904 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4s2__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
11905 }
11906 }
11907 }
11908 }
11909
TEST(QS8_IGEMM_MINMAX_FP32_2X8C4S2__NEONV8_MLAL,n_gt_8)11910 TEST(QS8_IGEMM_MINMAX_FP32_2X8C4S2__NEONV8_MLAL, n_gt_8) {
11911 TEST_REQUIRES_ARM_NEON_V8;
11912 for (uint32_t n = 9; n < 16; n++) {
11913 for (size_t k = 1; k <= 80; k += 17) {
11914 GemmMicrokernelTester()
11915 .mr(2)
11916 .nr(8)
11917 .kr(4)
11918 .sr(2)
11919 .m(2)
11920 .n(n)
11921 .k(k)
11922 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4s2__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
11923 }
11924 }
11925 }
11926
TEST(QS8_IGEMM_MINMAX_FP32_2X8C4S2__NEONV8_MLAL,n_gt_8_strided_cn)11927 TEST(QS8_IGEMM_MINMAX_FP32_2X8C4S2__NEONV8_MLAL, n_gt_8_strided_cn) {
11928 TEST_REQUIRES_ARM_NEON_V8;
11929 for (uint32_t n = 9; n < 16; n++) {
11930 for (size_t k = 1; k <= 80; k += 17) {
11931 GemmMicrokernelTester()
11932 .mr(2)
11933 .nr(8)
11934 .kr(4)
11935 .sr(2)
11936 .m(2)
11937 .n(n)
11938 .k(k)
11939 .cn_stride(11)
11940 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4s2__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
11941 }
11942 }
11943 }
11944
TEST(QS8_IGEMM_MINMAX_FP32_2X8C4S2__NEONV8_MLAL,n_gt_8_subtile)11945 TEST(QS8_IGEMM_MINMAX_FP32_2X8C4S2__NEONV8_MLAL, n_gt_8_subtile) {
11946 TEST_REQUIRES_ARM_NEON_V8;
11947 for (uint32_t n = 9; n < 16; n++) {
11948 for (size_t k = 1; k <= 80; k += 17) {
11949 for (uint32_t m = 1; m <= 2; m++) {
11950 GemmMicrokernelTester()
11951 .mr(2)
11952 .nr(8)
11953 .kr(4)
11954 .sr(2)
11955 .m(m)
11956 .n(n)
11957 .k(k)
11958 .iterations(1)
11959 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4s2__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
11960 }
11961 }
11962 }
11963 }
11964
TEST(QS8_IGEMM_MINMAX_FP32_2X8C4S2__NEONV8_MLAL,n_div_8)11965 TEST(QS8_IGEMM_MINMAX_FP32_2X8C4S2__NEONV8_MLAL, n_div_8) {
11966 TEST_REQUIRES_ARM_NEON_V8;
11967 for (uint32_t n = 16; n <= 24; n += 8) {
11968 for (size_t k = 1; k <= 80; k += 17) {
11969 GemmMicrokernelTester()
11970 .mr(2)
11971 .nr(8)
11972 .kr(4)
11973 .sr(2)
11974 .m(2)
11975 .n(n)
11976 .k(k)
11977 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4s2__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
11978 }
11979 }
11980 }
11981
TEST(QS8_IGEMM_MINMAX_FP32_2X8C4S2__NEONV8_MLAL,n_div_8_strided_cn)11982 TEST(QS8_IGEMM_MINMAX_FP32_2X8C4S2__NEONV8_MLAL, n_div_8_strided_cn) {
11983 TEST_REQUIRES_ARM_NEON_V8;
11984 for (uint32_t n = 16; n <= 24; n += 8) {
11985 for (size_t k = 1; k <= 80; k += 17) {
11986 GemmMicrokernelTester()
11987 .mr(2)
11988 .nr(8)
11989 .kr(4)
11990 .sr(2)
11991 .m(2)
11992 .n(n)
11993 .k(k)
11994 .cn_stride(11)
11995 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4s2__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
11996 }
11997 }
11998 }
11999
TEST(QS8_IGEMM_MINMAX_FP32_2X8C4S2__NEONV8_MLAL,n_div_8_subtile)12000 TEST(QS8_IGEMM_MINMAX_FP32_2X8C4S2__NEONV8_MLAL, n_div_8_subtile) {
12001 TEST_REQUIRES_ARM_NEON_V8;
12002 for (uint32_t n = 16; n <= 24; n += 8) {
12003 for (size_t k = 1; k <= 80; k += 17) {
12004 for (uint32_t m = 1; m <= 2; m++) {
12005 GemmMicrokernelTester()
12006 .mr(2)
12007 .nr(8)
12008 .kr(4)
12009 .sr(2)
12010 .m(m)
12011 .n(n)
12012 .k(k)
12013 .iterations(1)
12014 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4s2__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
12015 }
12016 }
12017 }
12018 }
12019
TEST(QS8_IGEMM_MINMAX_FP32_2X8C4S2__NEONV8_MLAL,small_kernel)12020 TEST(QS8_IGEMM_MINMAX_FP32_2X8C4S2__NEONV8_MLAL, small_kernel) {
12021 TEST_REQUIRES_ARM_NEON_V8;
12022 for (size_t k = 1; k <= 80; k += 17) {
12023 GemmMicrokernelTester()
12024 .mr(2)
12025 .nr(8)
12026 .kr(4)
12027 .sr(2)
12028 .m(2)
12029 .n(8)
12030 .k(k)
12031 .ks(3)
12032 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4s2__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
12033 }
12034 }
12035
TEST(QS8_IGEMM_MINMAX_FP32_2X8C4S2__NEONV8_MLAL,small_kernel_subtile)12036 TEST(QS8_IGEMM_MINMAX_FP32_2X8C4S2__NEONV8_MLAL, small_kernel_subtile) {
12037 TEST_REQUIRES_ARM_NEON_V8;
12038 for (size_t k = 1; k <= 80; k += 17) {
12039 for (uint32_t n = 1; n <= 8; n++) {
12040 for (uint32_t m = 1; m <= 2; m++) {
12041 GemmMicrokernelTester()
12042 .mr(2)
12043 .nr(8)
12044 .kr(4)
12045 .sr(2)
12046 .m(m)
12047 .n(n)
12048 .k(k)
12049 .ks(3)
12050 .iterations(1)
12051 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4s2__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
12052 }
12053 }
12054 }
12055 }
12056
TEST(QS8_IGEMM_MINMAX_FP32_2X8C4S2__NEONV8_MLAL,n_gt_8_small_kernel)12057 TEST(QS8_IGEMM_MINMAX_FP32_2X8C4S2__NEONV8_MLAL, n_gt_8_small_kernel) {
12058 TEST_REQUIRES_ARM_NEON_V8;
12059 for (uint32_t n = 9; n < 16; n++) {
12060 for (size_t k = 1; k <= 80; k += 17) {
12061 GemmMicrokernelTester()
12062 .mr(2)
12063 .nr(8)
12064 .kr(4)
12065 .sr(2)
12066 .m(2)
12067 .n(n)
12068 .k(k)
12069 .ks(3)
12070 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4s2__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
12071 }
12072 }
12073 }
12074
TEST(QS8_IGEMM_MINMAX_FP32_2X8C4S2__NEONV8_MLAL,n_div_8_small_kernel)12075 TEST(QS8_IGEMM_MINMAX_FP32_2X8C4S2__NEONV8_MLAL, n_div_8_small_kernel) {
12076 TEST_REQUIRES_ARM_NEON_V8;
12077 for (uint32_t n = 16; n <= 24; n += 8) {
12078 for (size_t k = 1; k <= 80; k += 17) {
12079 GemmMicrokernelTester()
12080 .mr(2)
12081 .nr(8)
12082 .kr(4)
12083 .sr(2)
12084 .m(2)
12085 .n(n)
12086 .k(k)
12087 .ks(3)
12088 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4s2__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
12089 }
12090 }
12091 }
12092
TEST(QS8_IGEMM_MINMAX_FP32_2X8C4S2__NEONV8_MLAL,strided_cm_subtile)12093 TEST(QS8_IGEMM_MINMAX_FP32_2X8C4S2__NEONV8_MLAL, strided_cm_subtile) {
12094 TEST_REQUIRES_ARM_NEON_V8;
12095 for (size_t k = 1; k <= 80; k += 17) {
12096 for (uint32_t n = 1; n <= 8; n++) {
12097 for (uint32_t m = 1; m <= 2; m++) {
12098 GemmMicrokernelTester()
12099 .mr(2)
12100 .nr(8)
12101 .kr(4)
12102 .sr(2)
12103 .m(m)
12104 .n(n)
12105 .k(k)
12106 .cm_stride(11)
12107 .iterations(1)
12108 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4s2__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
12109 }
12110 }
12111 }
12112 }
12113
TEST(QS8_IGEMM_MINMAX_FP32_2X8C4S2__NEONV8_MLAL,a_offset)12114 TEST(QS8_IGEMM_MINMAX_FP32_2X8C4S2__NEONV8_MLAL, a_offset) {
12115 TEST_REQUIRES_ARM_NEON_V8;
12116 for (size_t k = 1; k <= 80; k += 17) {
12117 GemmMicrokernelTester()
12118 .mr(2)
12119 .nr(8)
12120 .kr(4)
12121 .sr(2)
12122 .m(2)
12123 .n(8)
12124 .k(k)
12125 .ks(3)
12126 .a_offset(163)
12127 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4s2__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
12128 }
12129 }
12130
TEST(QS8_IGEMM_MINMAX_FP32_2X8C4S2__NEONV8_MLAL,zero)12131 TEST(QS8_IGEMM_MINMAX_FP32_2X8C4S2__NEONV8_MLAL, zero) {
12132 TEST_REQUIRES_ARM_NEON_V8;
12133 for (size_t k = 1; k <= 80; k += 17) {
12134 for (uint32_t mz = 0; mz < 2; mz++) {
12135 GemmMicrokernelTester()
12136 .mr(2)
12137 .nr(8)
12138 .kr(4)
12139 .sr(2)
12140 .m(2)
12141 .n(8)
12142 .k(k)
12143 .ks(3)
12144 .a_offset(163)
12145 .zero_index(mz)
12146 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4s2__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
12147 }
12148 }
12149 }
12150
TEST(QS8_IGEMM_MINMAX_FP32_2X8C4S2__NEONV8_MLAL,qmin)12151 TEST(QS8_IGEMM_MINMAX_FP32_2X8C4S2__NEONV8_MLAL, qmin) {
12152 TEST_REQUIRES_ARM_NEON_V8;
12153 GemmMicrokernelTester()
12154 .mr(2)
12155 .nr(8)
12156 .kr(4)
12157 .sr(2)
12158 .m(2)
12159 .n(8)
12160 .k(16)
12161 .qmin(128)
12162 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4s2__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
12163 }
12164
TEST(QS8_IGEMM_MINMAX_FP32_2X8C4S2__NEONV8_MLAL,qmax)12165 TEST(QS8_IGEMM_MINMAX_FP32_2X8C4S2__NEONV8_MLAL, qmax) {
12166 TEST_REQUIRES_ARM_NEON_V8;
12167 GemmMicrokernelTester()
12168 .mr(2)
12169 .nr(8)
12170 .kr(4)
12171 .sr(2)
12172 .m(2)
12173 .n(8)
12174 .k(16)
12175 .qmax(128)
12176 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4s2__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
12177 }
12178
TEST(QS8_IGEMM_MINMAX_FP32_2X8C4S2__NEONV8_MLAL,strided_cm)12179 TEST(QS8_IGEMM_MINMAX_FP32_2X8C4S2__NEONV8_MLAL, strided_cm) {
12180 TEST_REQUIRES_ARM_NEON_V8;
12181 GemmMicrokernelTester()
12182 .mr(2)
12183 .nr(8)
12184 .kr(4)
12185 .sr(2)
12186 .m(2)
12187 .n(8)
12188 .k(16)
12189 .cm_stride(11)
12190 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c4s2__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
12191 }
12192 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
12193
12194
12195 #if XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_CORTEX_A53,k_eq_16)12196 TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_CORTEX_A53, k_eq_16) {
12197 TEST_REQUIRES_ARM_NEON;
12198 GemmMicrokernelTester()
12199 .mr(1)
12200 .nr(8)
12201 .kr(8)
12202 .sr(1)
12203 .m(1)
12204 .n(8)
12205 .k(16)
12206 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal_cortex_a53, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
12207 }
12208
TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_CORTEX_A53,strided_cn)12209 TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_CORTEX_A53, strided_cn) {
12210 TEST_REQUIRES_ARM_NEON;
12211 GemmMicrokernelTester()
12212 .mr(1)
12213 .nr(8)
12214 .kr(8)
12215 .sr(1)
12216 .m(1)
12217 .n(8)
12218 .k(16)
12219 .cn_stride(11)
12220 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal_cortex_a53, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
12221 }
12222
TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_CORTEX_A53,k_eq_16_subtile)12223 TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_CORTEX_A53, k_eq_16_subtile) {
12224 TEST_REQUIRES_ARM_NEON;
12225 for (uint32_t n = 1; n <= 8; n++) {
12226 for (uint32_t m = 1; m <= 1; m++) {
12227 GemmMicrokernelTester()
12228 .mr(1)
12229 .nr(8)
12230 .kr(8)
12231 .sr(1)
12232 .m(m)
12233 .n(n)
12234 .k(16)
12235 .iterations(1)
12236 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal_cortex_a53, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
12237 }
12238 }
12239 }
12240
TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_CORTEX_A53,k_eq_16_subtile_m)12241 TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_CORTEX_A53, k_eq_16_subtile_m) {
12242 TEST_REQUIRES_ARM_NEON;
12243 for (uint32_t m = 1; m <= 1; m++) {
12244 GemmMicrokernelTester()
12245 .mr(1)
12246 .nr(8)
12247 .kr(8)
12248 .sr(1)
12249 .m(m)
12250 .n(8)
12251 .k(16)
12252 .iterations(1)
12253 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal_cortex_a53, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
12254 }
12255 }
12256
TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_CORTEX_A53,k_eq_16_subtile_n)12257 TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_CORTEX_A53, k_eq_16_subtile_n) {
12258 TEST_REQUIRES_ARM_NEON;
12259 for (uint32_t n = 1; n <= 8; n++) {
12260 GemmMicrokernelTester()
12261 .mr(1)
12262 .nr(8)
12263 .kr(8)
12264 .sr(1)
12265 .m(1)
12266 .n(n)
12267 .k(16)
12268 .iterations(1)
12269 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal_cortex_a53, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
12270 }
12271 }
12272
TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_CORTEX_A53,k_lt_16)12273 TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_CORTEX_A53, k_lt_16) {
12274 TEST_REQUIRES_ARM_NEON;
12275 for (size_t k = 1; k < 16; k++) {
12276 GemmMicrokernelTester()
12277 .mr(1)
12278 .nr(8)
12279 .kr(8)
12280 .sr(1)
12281 .m(1)
12282 .n(8)
12283 .k(k)
12284 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal_cortex_a53, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
12285 }
12286 }
12287
TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_CORTEX_A53,k_lt_16_subtile)12288 TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_CORTEX_A53, k_lt_16_subtile) {
12289 TEST_REQUIRES_ARM_NEON;
12290 for (size_t k = 1; k < 16; k++) {
12291 for (uint32_t n = 1; n <= 8; n++) {
12292 for (uint32_t m = 1; m <= 1; m++) {
12293 GemmMicrokernelTester()
12294 .mr(1)
12295 .nr(8)
12296 .kr(8)
12297 .sr(1)
12298 .m(m)
12299 .n(n)
12300 .k(k)
12301 .iterations(1)
12302 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal_cortex_a53, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
12303 }
12304 }
12305 }
12306 }
12307
TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_CORTEX_A53,k_gt_16)12308 TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_CORTEX_A53, k_gt_16) {
12309 TEST_REQUIRES_ARM_NEON;
12310 for (size_t k = 17; k < 32; k++) {
12311 GemmMicrokernelTester()
12312 .mr(1)
12313 .nr(8)
12314 .kr(8)
12315 .sr(1)
12316 .m(1)
12317 .n(8)
12318 .k(k)
12319 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal_cortex_a53, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
12320 }
12321 }
12322
TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_CORTEX_A53,k_gt_16_subtile)12323 TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_CORTEX_A53, k_gt_16_subtile) {
12324 TEST_REQUIRES_ARM_NEON;
12325 for (size_t k = 17; k < 32; k++) {
12326 for (uint32_t n = 1; n <= 8; n++) {
12327 for (uint32_t m = 1; m <= 1; m++) {
12328 GemmMicrokernelTester()
12329 .mr(1)
12330 .nr(8)
12331 .kr(8)
12332 .sr(1)
12333 .m(m)
12334 .n(n)
12335 .k(k)
12336 .iterations(1)
12337 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal_cortex_a53, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
12338 }
12339 }
12340 }
12341 }
12342
TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_CORTEX_A53,k_div_16)12343 TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_CORTEX_A53, k_div_16) {
12344 TEST_REQUIRES_ARM_NEON;
12345 for (size_t k = 32; k <= 160; k += 16) {
12346 GemmMicrokernelTester()
12347 .mr(1)
12348 .nr(8)
12349 .kr(8)
12350 .sr(1)
12351 .m(1)
12352 .n(8)
12353 .k(k)
12354 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal_cortex_a53, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
12355 }
12356 }
12357
TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_CORTEX_A53,k_div_16_subtile)12358 TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_CORTEX_A53, k_div_16_subtile) {
12359 TEST_REQUIRES_ARM_NEON;
12360 for (size_t k = 32; k <= 160; k += 16) {
12361 for (uint32_t n = 1; n <= 8; n++) {
12362 for (uint32_t m = 1; m <= 1; m++) {
12363 GemmMicrokernelTester()
12364 .mr(1)
12365 .nr(8)
12366 .kr(8)
12367 .sr(1)
12368 .m(m)
12369 .n(n)
12370 .k(k)
12371 .iterations(1)
12372 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal_cortex_a53, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
12373 }
12374 }
12375 }
12376 }
12377
TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_CORTEX_A53,n_gt_8)12378 TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_CORTEX_A53, n_gt_8) {
12379 TEST_REQUIRES_ARM_NEON;
12380 for (uint32_t n = 9; n < 16; n++) {
12381 for (size_t k = 1; k <= 80; k += 17) {
12382 GemmMicrokernelTester()
12383 .mr(1)
12384 .nr(8)
12385 .kr(8)
12386 .sr(1)
12387 .m(1)
12388 .n(n)
12389 .k(k)
12390 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal_cortex_a53, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
12391 }
12392 }
12393 }
12394
TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_CORTEX_A53,n_gt_8_strided_cn)12395 TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_CORTEX_A53, n_gt_8_strided_cn) {
12396 TEST_REQUIRES_ARM_NEON;
12397 for (uint32_t n = 9; n < 16; n++) {
12398 for (size_t k = 1; k <= 80; k += 17) {
12399 GemmMicrokernelTester()
12400 .mr(1)
12401 .nr(8)
12402 .kr(8)
12403 .sr(1)
12404 .m(1)
12405 .n(n)
12406 .k(k)
12407 .cn_stride(11)
12408 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal_cortex_a53, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
12409 }
12410 }
12411 }
12412
TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_CORTEX_A53,n_gt_8_subtile)12413 TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_CORTEX_A53, n_gt_8_subtile) {
12414 TEST_REQUIRES_ARM_NEON;
12415 for (uint32_t n = 9; n < 16; n++) {
12416 for (size_t k = 1; k <= 80; k += 17) {
12417 for (uint32_t m = 1; m <= 1; m++) {
12418 GemmMicrokernelTester()
12419 .mr(1)
12420 .nr(8)
12421 .kr(8)
12422 .sr(1)
12423 .m(m)
12424 .n(n)
12425 .k(k)
12426 .iterations(1)
12427 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal_cortex_a53, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
12428 }
12429 }
12430 }
12431 }
12432
TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_CORTEX_A53,n_div_8)12433 TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_CORTEX_A53, n_div_8) {
12434 TEST_REQUIRES_ARM_NEON;
12435 for (uint32_t n = 16; n <= 24; n += 8) {
12436 for (size_t k = 1; k <= 80; k += 17) {
12437 GemmMicrokernelTester()
12438 .mr(1)
12439 .nr(8)
12440 .kr(8)
12441 .sr(1)
12442 .m(1)
12443 .n(n)
12444 .k(k)
12445 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal_cortex_a53, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
12446 }
12447 }
12448 }
12449
TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_CORTEX_A53,n_div_8_strided_cn)12450 TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_CORTEX_A53, n_div_8_strided_cn) {
12451 TEST_REQUIRES_ARM_NEON;
12452 for (uint32_t n = 16; n <= 24; n += 8) {
12453 for (size_t k = 1; k <= 80; k += 17) {
12454 GemmMicrokernelTester()
12455 .mr(1)
12456 .nr(8)
12457 .kr(8)
12458 .sr(1)
12459 .m(1)
12460 .n(n)
12461 .k(k)
12462 .cn_stride(11)
12463 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal_cortex_a53, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
12464 }
12465 }
12466 }
12467
TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_CORTEX_A53,n_div_8_subtile)12468 TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_CORTEX_A53, n_div_8_subtile) {
12469 TEST_REQUIRES_ARM_NEON;
12470 for (uint32_t n = 16; n <= 24; n += 8) {
12471 for (size_t k = 1; k <= 80; k += 17) {
12472 for (uint32_t m = 1; m <= 1; m++) {
12473 GemmMicrokernelTester()
12474 .mr(1)
12475 .nr(8)
12476 .kr(8)
12477 .sr(1)
12478 .m(m)
12479 .n(n)
12480 .k(k)
12481 .iterations(1)
12482 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal_cortex_a53, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
12483 }
12484 }
12485 }
12486 }
12487
TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_CORTEX_A53,small_kernel)12488 TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_CORTEX_A53, small_kernel) {
12489 TEST_REQUIRES_ARM_NEON;
12490 for (size_t k = 1; k <= 80; k += 17) {
12491 GemmMicrokernelTester()
12492 .mr(1)
12493 .nr(8)
12494 .kr(8)
12495 .sr(1)
12496 .m(1)
12497 .n(8)
12498 .k(k)
12499 .ks(3)
12500 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal_cortex_a53, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
12501 }
12502 }
12503
TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_CORTEX_A53,small_kernel_subtile)12504 TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_CORTEX_A53, small_kernel_subtile) {
12505 TEST_REQUIRES_ARM_NEON;
12506 for (size_t k = 1; k <= 80; k += 17) {
12507 for (uint32_t n = 1; n <= 8; n++) {
12508 for (uint32_t m = 1; m <= 1; m++) {
12509 GemmMicrokernelTester()
12510 .mr(1)
12511 .nr(8)
12512 .kr(8)
12513 .sr(1)
12514 .m(m)
12515 .n(n)
12516 .k(k)
12517 .ks(3)
12518 .iterations(1)
12519 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal_cortex_a53, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
12520 }
12521 }
12522 }
12523 }
12524
TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_CORTEX_A53,n_gt_8_small_kernel)12525 TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_CORTEX_A53, n_gt_8_small_kernel) {
12526 TEST_REQUIRES_ARM_NEON;
12527 for (uint32_t n = 9; n < 16; n++) {
12528 for (size_t k = 1; k <= 80; k += 17) {
12529 GemmMicrokernelTester()
12530 .mr(1)
12531 .nr(8)
12532 .kr(8)
12533 .sr(1)
12534 .m(1)
12535 .n(n)
12536 .k(k)
12537 .ks(3)
12538 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal_cortex_a53, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
12539 }
12540 }
12541 }
12542
TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_CORTEX_A53,n_div_8_small_kernel)12543 TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_CORTEX_A53, n_div_8_small_kernel) {
12544 TEST_REQUIRES_ARM_NEON;
12545 for (uint32_t n = 16; n <= 24; n += 8) {
12546 for (size_t k = 1; k <= 80; k += 17) {
12547 GemmMicrokernelTester()
12548 .mr(1)
12549 .nr(8)
12550 .kr(8)
12551 .sr(1)
12552 .m(1)
12553 .n(n)
12554 .k(k)
12555 .ks(3)
12556 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal_cortex_a53, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
12557 }
12558 }
12559 }
12560
TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_CORTEX_A53,strided_cm_subtile)12561 TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_CORTEX_A53, strided_cm_subtile) {
12562 TEST_REQUIRES_ARM_NEON;
12563 for (size_t k = 1; k <= 80; k += 17) {
12564 for (uint32_t n = 1; n <= 8; n++) {
12565 for (uint32_t m = 1; m <= 1; m++) {
12566 GemmMicrokernelTester()
12567 .mr(1)
12568 .nr(8)
12569 .kr(8)
12570 .sr(1)
12571 .m(m)
12572 .n(n)
12573 .k(k)
12574 .cm_stride(11)
12575 .iterations(1)
12576 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal_cortex_a53, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
12577 }
12578 }
12579 }
12580 }
12581
TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_CORTEX_A53,a_offset)12582 TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_CORTEX_A53, a_offset) {
12583 TEST_REQUIRES_ARM_NEON;
12584 for (size_t k = 1; k <= 80; k += 17) {
12585 GemmMicrokernelTester()
12586 .mr(1)
12587 .nr(8)
12588 .kr(8)
12589 .sr(1)
12590 .m(1)
12591 .n(8)
12592 .k(k)
12593 .ks(3)
12594 .a_offset(83)
12595 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal_cortex_a53, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
12596 }
12597 }
12598
TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_CORTEX_A53,zero)12599 TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_CORTEX_A53, zero) {
12600 TEST_REQUIRES_ARM_NEON;
12601 for (size_t k = 1; k <= 80; k += 17) {
12602 for (uint32_t mz = 0; mz < 1; mz++) {
12603 GemmMicrokernelTester()
12604 .mr(1)
12605 .nr(8)
12606 .kr(8)
12607 .sr(1)
12608 .m(1)
12609 .n(8)
12610 .k(k)
12611 .ks(3)
12612 .a_offset(83)
12613 .zero_index(mz)
12614 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal_cortex_a53, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
12615 }
12616 }
12617 }
12618
TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_CORTEX_A53,qmin)12619 TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_CORTEX_A53, qmin) {
12620 TEST_REQUIRES_ARM_NEON;
12621 GemmMicrokernelTester()
12622 .mr(1)
12623 .nr(8)
12624 .kr(8)
12625 .sr(1)
12626 .m(1)
12627 .n(8)
12628 .k(16)
12629 .qmin(128)
12630 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal_cortex_a53, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
12631 }
12632
TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_CORTEX_A53,qmax)12633 TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_CORTEX_A53, qmax) {
12634 TEST_REQUIRES_ARM_NEON;
12635 GemmMicrokernelTester()
12636 .mr(1)
12637 .nr(8)
12638 .kr(8)
12639 .sr(1)
12640 .m(1)
12641 .n(8)
12642 .k(16)
12643 .qmax(128)
12644 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal_cortex_a53, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
12645 }
12646
TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_CORTEX_A53,strided_cm)12647 TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AARCH64_NEON_MLAL_CORTEX_A53, strided_cm) {
12648 TEST_REQUIRES_ARM_NEON;
12649 GemmMicrokernelTester()
12650 .mr(1)
12651 .nr(8)
12652 .kr(8)
12653 .sr(1)
12654 .m(1)
12655 .n(8)
12656 .k(16)
12657 .cm_stride(11)
12658 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c8__aarch64_neon_mlal_cortex_a53, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
12659 }
12660 #endif // XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
12661
12662
12663 #if XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
TEST(QS8_IGEMM_MINMAX_FP32_2X8C16__AARCH64_NEON_MLAL,k_eq_16)12664 TEST(QS8_IGEMM_MINMAX_FP32_2X8C16__AARCH64_NEON_MLAL, k_eq_16) {
12665 TEST_REQUIRES_ARM_NEON;
12666 GemmMicrokernelTester()
12667 .mr(2)
12668 .nr(8)
12669 .kr(16)
12670 .sr(1)
12671 .m(2)
12672 .n(8)
12673 .k(16)
12674 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c16__aarch64_neon_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
12675 }
12676
TEST(QS8_IGEMM_MINMAX_FP32_2X8C16__AARCH64_NEON_MLAL,strided_cn)12677 TEST(QS8_IGEMM_MINMAX_FP32_2X8C16__AARCH64_NEON_MLAL, strided_cn) {
12678 TEST_REQUIRES_ARM_NEON;
12679 GemmMicrokernelTester()
12680 .mr(2)
12681 .nr(8)
12682 .kr(16)
12683 .sr(1)
12684 .m(2)
12685 .n(8)
12686 .k(16)
12687 .cn_stride(11)
12688 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c16__aarch64_neon_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
12689 }
12690
TEST(QS8_IGEMM_MINMAX_FP32_2X8C16__AARCH64_NEON_MLAL,k_eq_16_subtile)12691 TEST(QS8_IGEMM_MINMAX_FP32_2X8C16__AARCH64_NEON_MLAL, k_eq_16_subtile) {
12692 TEST_REQUIRES_ARM_NEON;
12693 for (uint32_t n = 1; n <= 8; n++) {
12694 for (uint32_t m = 1; m <= 2; m++) {
12695 GemmMicrokernelTester()
12696 .mr(2)
12697 .nr(8)
12698 .kr(16)
12699 .sr(1)
12700 .m(m)
12701 .n(n)
12702 .k(16)
12703 .iterations(1)
12704 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c16__aarch64_neon_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
12705 }
12706 }
12707 }
12708
TEST(QS8_IGEMM_MINMAX_FP32_2X8C16__AARCH64_NEON_MLAL,k_eq_16_subtile_m)12709 TEST(QS8_IGEMM_MINMAX_FP32_2X8C16__AARCH64_NEON_MLAL, k_eq_16_subtile_m) {
12710 TEST_REQUIRES_ARM_NEON;
12711 for (uint32_t m = 1; m <= 2; m++) {
12712 GemmMicrokernelTester()
12713 .mr(2)
12714 .nr(8)
12715 .kr(16)
12716 .sr(1)
12717 .m(m)
12718 .n(8)
12719 .k(16)
12720 .iterations(1)
12721 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c16__aarch64_neon_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
12722 }
12723 }
12724
TEST(QS8_IGEMM_MINMAX_FP32_2X8C16__AARCH64_NEON_MLAL,k_eq_16_subtile_n)12725 TEST(QS8_IGEMM_MINMAX_FP32_2X8C16__AARCH64_NEON_MLAL, k_eq_16_subtile_n) {
12726 TEST_REQUIRES_ARM_NEON;
12727 for (uint32_t n = 1; n <= 8; n++) {
12728 GemmMicrokernelTester()
12729 .mr(2)
12730 .nr(8)
12731 .kr(16)
12732 .sr(1)
12733 .m(2)
12734 .n(n)
12735 .k(16)
12736 .iterations(1)
12737 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c16__aarch64_neon_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
12738 }
12739 }
12740
TEST(QS8_IGEMM_MINMAX_FP32_2X8C16__AARCH64_NEON_MLAL,k_lt_16)12741 TEST(QS8_IGEMM_MINMAX_FP32_2X8C16__AARCH64_NEON_MLAL, k_lt_16) {
12742 TEST_REQUIRES_ARM_NEON;
12743 for (size_t k = 1; k < 16; k++) {
12744 GemmMicrokernelTester()
12745 .mr(2)
12746 .nr(8)
12747 .kr(16)
12748 .sr(1)
12749 .m(2)
12750 .n(8)
12751 .k(k)
12752 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c16__aarch64_neon_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
12753 }
12754 }
12755
TEST(QS8_IGEMM_MINMAX_FP32_2X8C16__AARCH64_NEON_MLAL,k_lt_16_subtile)12756 TEST(QS8_IGEMM_MINMAX_FP32_2X8C16__AARCH64_NEON_MLAL, k_lt_16_subtile) {
12757 TEST_REQUIRES_ARM_NEON;
12758 for (size_t k = 1; k < 16; k++) {
12759 for (uint32_t n = 1; n <= 8; n++) {
12760 for (uint32_t m = 1; m <= 2; m++) {
12761 GemmMicrokernelTester()
12762 .mr(2)
12763 .nr(8)
12764 .kr(16)
12765 .sr(1)
12766 .m(m)
12767 .n(n)
12768 .k(k)
12769 .iterations(1)
12770 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c16__aarch64_neon_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
12771 }
12772 }
12773 }
12774 }
12775
TEST(QS8_IGEMM_MINMAX_FP32_2X8C16__AARCH64_NEON_MLAL,k_gt_16)12776 TEST(QS8_IGEMM_MINMAX_FP32_2X8C16__AARCH64_NEON_MLAL, k_gt_16) {
12777 TEST_REQUIRES_ARM_NEON;
12778 for (size_t k = 17; k < 32; k++) {
12779 GemmMicrokernelTester()
12780 .mr(2)
12781 .nr(8)
12782 .kr(16)
12783 .sr(1)
12784 .m(2)
12785 .n(8)
12786 .k(k)
12787 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c16__aarch64_neon_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
12788 }
12789 }
12790
TEST(QS8_IGEMM_MINMAX_FP32_2X8C16__AARCH64_NEON_MLAL,k_gt_16_subtile)12791 TEST(QS8_IGEMM_MINMAX_FP32_2X8C16__AARCH64_NEON_MLAL, k_gt_16_subtile) {
12792 TEST_REQUIRES_ARM_NEON;
12793 for (size_t k = 17; k < 32; k++) {
12794 for (uint32_t n = 1; n <= 8; n++) {
12795 for (uint32_t m = 1; m <= 2; m++) {
12796 GemmMicrokernelTester()
12797 .mr(2)
12798 .nr(8)
12799 .kr(16)
12800 .sr(1)
12801 .m(m)
12802 .n(n)
12803 .k(k)
12804 .iterations(1)
12805 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c16__aarch64_neon_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
12806 }
12807 }
12808 }
12809 }
12810
TEST(QS8_IGEMM_MINMAX_FP32_2X8C16__AARCH64_NEON_MLAL,k_div_16)12811 TEST(QS8_IGEMM_MINMAX_FP32_2X8C16__AARCH64_NEON_MLAL, k_div_16) {
12812 TEST_REQUIRES_ARM_NEON;
12813 for (size_t k = 32; k <= 160; k += 16) {
12814 GemmMicrokernelTester()
12815 .mr(2)
12816 .nr(8)
12817 .kr(16)
12818 .sr(1)
12819 .m(2)
12820 .n(8)
12821 .k(k)
12822 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c16__aarch64_neon_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
12823 }
12824 }
12825
TEST(QS8_IGEMM_MINMAX_FP32_2X8C16__AARCH64_NEON_MLAL,k_div_16_subtile)12826 TEST(QS8_IGEMM_MINMAX_FP32_2X8C16__AARCH64_NEON_MLAL, k_div_16_subtile) {
12827 TEST_REQUIRES_ARM_NEON;
12828 for (size_t k = 32; k <= 160; k += 16) {
12829 for (uint32_t n = 1; n <= 8; n++) {
12830 for (uint32_t m = 1; m <= 2; m++) {
12831 GemmMicrokernelTester()
12832 .mr(2)
12833 .nr(8)
12834 .kr(16)
12835 .sr(1)
12836 .m(m)
12837 .n(n)
12838 .k(k)
12839 .iterations(1)
12840 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c16__aarch64_neon_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
12841 }
12842 }
12843 }
12844 }
12845
TEST(QS8_IGEMM_MINMAX_FP32_2X8C16__AARCH64_NEON_MLAL,n_gt_8)12846 TEST(QS8_IGEMM_MINMAX_FP32_2X8C16__AARCH64_NEON_MLAL, n_gt_8) {
12847 TEST_REQUIRES_ARM_NEON;
12848 for (uint32_t n = 9; n < 16; n++) {
12849 for (size_t k = 1; k <= 80; k += 17) {
12850 GemmMicrokernelTester()
12851 .mr(2)
12852 .nr(8)
12853 .kr(16)
12854 .sr(1)
12855 .m(2)
12856 .n(n)
12857 .k(k)
12858 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c16__aarch64_neon_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
12859 }
12860 }
12861 }
12862
TEST(QS8_IGEMM_MINMAX_FP32_2X8C16__AARCH64_NEON_MLAL,n_gt_8_strided_cn)12863 TEST(QS8_IGEMM_MINMAX_FP32_2X8C16__AARCH64_NEON_MLAL, n_gt_8_strided_cn) {
12864 TEST_REQUIRES_ARM_NEON;
12865 for (uint32_t n = 9; n < 16; n++) {
12866 for (size_t k = 1; k <= 80; k += 17) {
12867 GemmMicrokernelTester()
12868 .mr(2)
12869 .nr(8)
12870 .kr(16)
12871 .sr(1)
12872 .m(2)
12873 .n(n)
12874 .k(k)
12875 .cn_stride(11)
12876 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c16__aarch64_neon_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
12877 }
12878 }
12879 }
12880
TEST(QS8_IGEMM_MINMAX_FP32_2X8C16__AARCH64_NEON_MLAL,n_gt_8_subtile)12881 TEST(QS8_IGEMM_MINMAX_FP32_2X8C16__AARCH64_NEON_MLAL, n_gt_8_subtile) {
12882 TEST_REQUIRES_ARM_NEON;
12883 for (uint32_t n = 9; n < 16; n++) {
12884 for (size_t k = 1; k <= 80; k += 17) {
12885 for (uint32_t m = 1; m <= 2; m++) {
12886 GemmMicrokernelTester()
12887 .mr(2)
12888 .nr(8)
12889 .kr(16)
12890 .sr(1)
12891 .m(m)
12892 .n(n)
12893 .k(k)
12894 .iterations(1)
12895 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c16__aarch64_neon_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
12896 }
12897 }
12898 }
12899 }
12900
TEST(QS8_IGEMM_MINMAX_FP32_2X8C16__AARCH64_NEON_MLAL,n_div_8)12901 TEST(QS8_IGEMM_MINMAX_FP32_2X8C16__AARCH64_NEON_MLAL, n_div_8) {
12902 TEST_REQUIRES_ARM_NEON;
12903 for (uint32_t n = 16; n <= 24; n += 8) {
12904 for (size_t k = 1; k <= 80; k += 17) {
12905 GemmMicrokernelTester()
12906 .mr(2)
12907 .nr(8)
12908 .kr(16)
12909 .sr(1)
12910 .m(2)
12911 .n(n)
12912 .k(k)
12913 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c16__aarch64_neon_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
12914 }
12915 }
12916 }
12917
TEST(QS8_IGEMM_MINMAX_FP32_2X8C16__AARCH64_NEON_MLAL,n_div_8_strided_cn)12918 TEST(QS8_IGEMM_MINMAX_FP32_2X8C16__AARCH64_NEON_MLAL, n_div_8_strided_cn) {
12919 TEST_REQUIRES_ARM_NEON;
12920 for (uint32_t n = 16; n <= 24; n += 8) {
12921 for (size_t k = 1; k <= 80; k += 17) {
12922 GemmMicrokernelTester()
12923 .mr(2)
12924 .nr(8)
12925 .kr(16)
12926 .sr(1)
12927 .m(2)
12928 .n(n)
12929 .k(k)
12930 .cn_stride(11)
12931 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c16__aarch64_neon_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
12932 }
12933 }
12934 }
12935
TEST(QS8_IGEMM_MINMAX_FP32_2X8C16__AARCH64_NEON_MLAL,n_div_8_subtile)12936 TEST(QS8_IGEMM_MINMAX_FP32_2X8C16__AARCH64_NEON_MLAL, n_div_8_subtile) {
12937 TEST_REQUIRES_ARM_NEON;
12938 for (uint32_t n = 16; n <= 24; n += 8) {
12939 for (size_t k = 1; k <= 80; k += 17) {
12940 for (uint32_t m = 1; m <= 2; m++) {
12941 GemmMicrokernelTester()
12942 .mr(2)
12943 .nr(8)
12944 .kr(16)
12945 .sr(1)
12946 .m(m)
12947 .n(n)
12948 .k(k)
12949 .iterations(1)
12950 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c16__aarch64_neon_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
12951 }
12952 }
12953 }
12954 }
12955
TEST(QS8_IGEMM_MINMAX_FP32_2X8C16__AARCH64_NEON_MLAL,small_kernel)12956 TEST(QS8_IGEMM_MINMAX_FP32_2X8C16__AARCH64_NEON_MLAL, small_kernel) {
12957 TEST_REQUIRES_ARM_NEON;
12958 for (size_t k = 1; k <= 80; k += 17) {
12959 GemmMicrokernelTester()
12960 .mr(2)
12961 .nr(8)
12962 .kr(16)
12963 .sr(1)
12964 .m(2)
12965 .n(8)
12966 .k(k)
12967 .ks(3)
12968 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c16__aarch64_neon_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
12969 }
12970 }
12971
TEST(QS8_IGEMM_MINMAX_FP32_2X8C16__AARCH64_NEON_MLAL,small_kernel_subtile)12972 TEST(QS8_IGEMM_MINMAX_FP32_2X8C16__AARCH64_NEON_MLAL, small_kernel_subtile) {
12973 TEST_REQUIRES_ARM_NEON;
12974 for (size_t k = 1; k <= 80; k += 17) {
12975 for (uint32_t n = 1; n <= 8; n++) {
12976 for (uint32_t m = 1; m <= 2; m++) {
12977 GemmMicrokernelTester()
12978 .mr(2)
12979 .nr(8)
12980 .kr(16)
12981 .sr(1)
12982 .m(m)
12983 .n(n)
12984 .k(k)
12985 .ks(3)
12986 .iterations(1)
12987 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c16__aarch64_neon_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
12988 }
12989 }
12990 }
12991 }
12992
TEST(QS8_IGEMM_MINMAX_FP32_2X8C16__AARCH64_NEON_MLAL,n_gt_8_small_kernel)12993 TEST(QS8_IGEMM_MINMAX_FP32_2X8C16__AARCH64_NEON_MLAL, n_gt_8_small_kernel) {
12994 TEST_REQUIRES_ARM_NEON;
12995 for (uint32_t n = 9; n < 16; n++) {
12996 for (size_t k = 1; k <= 80; k += 17) {
12997 GemmMicrokernelTester()
12998 .mr(2)
12999 .nr(8)
13000 .kr(16)
13001 .sr(1)
13002 .m(2)
13003 .n(n)
13004 .k(k)
13005 .ks(3)
13006 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c16__aarch64_neon_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
13007 }
13008 }
13009 }
13010
TEST(QS8_IGEMM_MINMAX_FP32_2X8C16__AARCH64_NEON_MLAL,n_div_8_small_kernel)13011 TEST(QS8_IGEMM_MINMAX_FP32_2X8C16__AARCH64_NEON_MLAL, n_div_8_small_kernel) {
13012 TEST_REQUIRES_ARM_NEON;
13013 for (uint32_t n = 16; n <= 24; n += 8) {
13014 for (size_t k = 1; k <= 80; k += 17) {
13015 GemmMicrokernelTester()
13016 .mr(2)
13017 .nr(8)
13018 .kr(16)
13019 .sr(1)
13020 .m(2)
13021 .n(n)
13022 .k(k)
13023 .ks(3)
13024 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c16__aarch64_neon_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
13025 }
13026 }
13027 }
13028
TEST(QS8_IGEMM_MINMAX_FP32_2X8C16__AARCH64_NEON_MLAL,strided_cm_subtile)13029 TEST(QS8_IGEMM_MINMAX_FP32_2X8C16__AARCH64_NEON_MLAL, strided_cm_subtile) {
13030 TEST_REQUIRES_ARM_NEON;
13031 for (size_t k = 1; k <= 80; k += 17) {
13032 for (uint32_t n = 1; n <= 8; n++) {
13033 for (uint32_t m = 1; m <= 2; m++) {
13034 GemmMicrokernelTester()
13035 .mr(2)
13036 .nr(8)
13037 .kr(16)
13038 .sr(1)
13039 .m(m)
13040 .n(n)
13041 .k(k)
13042 .cm_stride(11)
13043 .iterations(1)
13044 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c16__aarch64_neon_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
13045 }
13046 }
13047 }
13048 }
13049
TEST(QS8_IGEMM_MINMAX_FP32_2X8C16__AARCH64_NEON_MLAL,a_offset)13050 TEST(QS8_IGEMM_MINMAX_FP32_2X8C16__AARCH64_NEON_MLAL, a_offset) {
13051 TEST_REQUIRES_ARM_NEON;
13052 for (size_t k = 1; k <= 80; k += 17) {
13053 GemmMicrokernelTester()
13054 .mr(2)
13055 .nr(8)
13056 .kr(16)
13057 .sr(1)
13058 .m(2)
13059 .n(8)
13060 .k(k)
13061 .ks(3)
13062 .a_offset(163)
13063 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c16__aarch64_neon_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
13064 }
13065 }
13066
TEST(QS8_IGEMM_MINMAX_FP32_2X8C16__AARCH64_NEON_MLAL,zero)13067 TEST(QS8_IGEMM_MINMAX_FP32_2X8C16__AARCH64_NEON_MLAL, zero) {
13068 TEST_REQUIRES_ARM_NEON;
13069 for (size_t k = 1; k <= 80; k += 17) {
13070 for (uint32_t mz = 0; mz < 2; mz++) {
13071 GemmMicrokernelTester()
13072 .mr(2)
13073 .nr(8)
13074 .kr(16)
13075 .sr(1)
13076 .m(2)
13077 .n(8)
13078 .k(k)
13079 .ks(3)
13080 .a_offset(163)
13081 .zero_index(mz)
13082 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c16__aarch64_neon_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
13083 }
13084 }
13085 }
13086
TEST(QS8_IGEMM_MINMAX_FP32_2X8C16__AARCH64_NEON_MLAL,qmin)13087 TEST(QS8_IGEMM_MINMAX_FP32_2X8C16__AARCH64_NEON_MLAL, qmin) {
13088 TEST_REQUIRES_ARM_NEON;
13089 GemmMicrokernelTester()
13090 .mr(2)
13091 .nr(8)
13092 .kr(16)
13093 .sr(1)
13094 .m(2)
13095 .n(8)
13096 .k(16)
13097 .qmin(128)
13098 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c16__aarch64_neon_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
13099 }
13100
TEST(QS8_IGEMM_MINMAX_FP32_2X8C16__AARCH64_NEON_MLAL,qmax)13101 TEST(QS8_IGEMM_MINMAX_FP32_2X8C16__AARCH64_NEON_MLAL, qmax) {
13102 TEST_REQUIRES_ARM_NEON;
13103 GemmMicrokernelTester()
13104 .mr(2)
13105 .nr(8)
13106 .kr(16)
13107 .sr(1)
13108 .m(2)
13109 .n(8)
13110 .k(16)
13111 .qmax(128)
13112 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c16__aarch64_neon_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
13113 }
13114
TEST(QS8_IGEMM_MINMAX_FP32_2X8C16__AARCH64_NEON_MLAL,strided_cm)13115 TEST(QS8_IGEMM_MINMAX_FP32_2X8C16__AARCH64_NEON_MLAL, strided_cm) {
13116 TEST_REQUIRES_ARM_NEON;
13117 GemmMicrokernelTester()
13118 .mr(2)
13119 .nr(8)
13120 .kr(16)
13121 .sr(1)
13122 .m(2)
13123 .n(8)
13124 .k(16)
13125 .cm_stride(11)
13126 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c16__aarch64_neon_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
13127 }
13128 #endif // XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
13129
13130
13131 #if XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
TEST(QS8_IGEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53,k_eq_8)13132 TEST(QS8_IGEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, k_eq_8) {
13133 TEST_REQUIRES_ARM_NEON;
13134 GemmMicrokernelTester()
13135 .mr(4)
13136 .nr(16)
13137 .kr(1)
13138 .sr(1)
13139 .m(4)
13140 .n(16)
13141 .k(8)
13142 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
13143 }
13144
TEST(QS8_IGEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53,strided_cn)13145 TEST(QS8_IGEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, strided_cn) {
13146 TEST_REQUIRES_ARM_NEON;
13147 GemmMicrokernelTester()
13148 .mr(4)
13149 .nr(16)
13150 .kr(1)
13151 .sr(1)
13152 .m(4)
13153 .n(16)
13154 .k(8)
13155 .cn_stride(19)
13156 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
13157 }
13158
TEST(QS8_IGEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53,k_eq_8_subtile)13159 TEST(QS8_IGEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, k_eq_8_subtile) {
13160 TEST_REQUIRES_ARM_NEON;
13161 for (uint32_t n = 1; n <= 16; n++) {
13162 for (uint32_t m = 1; m <= 4; m++) {
13163 GemmMicrokernelTester()
13164 .mr(4)
13165 .nr(16)
13166 .kr(1)
13167 .sr(1)
13168 .m(m)
13169 .n(n)
13170 .k(8)
13171 .iterations(1)
13172 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
13173 }
13174 }
13175 }
13176
TEST(QS8_IGEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53,k_eq_8_subtile_m)13177 TEST(QS8_IGEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, k_eq_8_subtile_m) {
13178 TEST_REQUIRES_ARM_NEON;
13179 for (uint32_t m = 1; m <= 4; m++) {
13180 GemmMicrokernelTester()
13181 .mr(4)
13182 .nr(16)
13183 .kr(1)
13184 .sr(1)
13185 .m(m)
13186 .n(16)
13187 .k(8)
13188 .iterations(1)
13189 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
13190 }
13191 }
13192
TEST(QS8_IGEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53,k_eq_8_subtile_n)13193 TEST(QS8_IGEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, k_eq_8_subtile_n) {
13194 TEST_REQUIRES_ARM_NEON;
13195 for (uint32_t n = 1; n <= 16; n++) {
13196 GemmMicrokernelTester()
13197 .mr(4)
13198 .nr(16)
13199 .kr(1)
13200 .sr(1)
13201 .m(4)
13202 .n(n)
13203 .k(8)
13204 .iterations(1)
13205 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
13206 }
13207 }
13208
TEST(QS8_IGEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53,k_lt_8)13209 TEST(QS8_IGEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, k_lt_8) {
13210 TEST_REQUIRES_ARM_NEON;
13211 for (size_t k = 1; k < 8; k++) {
13212 GemmMicrokernelTester()
13213 .mr(4)
13214 .nr(16)
13215 .kr(1)
13216 .sr(1)
13217 .m(4)
13218 .n(16)
13219 .k(k)
13220 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
13221 }
13222 }
13223
TEST(QS8_IGEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53,k_lt_8_subtile)13224 TEST(QS8_IGEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, k_lt_8_subtile) {
13225 TEST_REQUIRES_ARM_NEON;
13226 for (size_t k = 1; k < 8; k++) {
13227 for (uint32_t n = 1; n <= 16; n++) {
13228 for (uint32_t m = 1; m <= 4; m++) {
13229 GemmMicrokernelTester()
13230 .mr(4)
13231 .nr(16)
13232 .kr(1)
13233 .sr(1)
13234 .m(m)
13235 .n(n)
13236 .k(k)
13237 .iterations(1)
13238 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
13239 }
13240 }
13241 }
13242 }
13243
TEST(QS8_IGEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53,k_gt_8)13244 TEST(QS8_IGEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, k_gt_8) {
13245 TEST_REQUIRES_ARM_NEON;
13246 for (size_t k = 9; k < 16; k++) {
13247 GemmMicrokernelTester()
13248 .mr(4)
13249 .nr(16)
13250 .kr(1)
13251 .sr(1)
13252 .m(4)
13253 .n(16)
13254 .k(k)
13255 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
13256 }
13257 }
13258
TEST(QS8_IGEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53,k_gt_8_subtile)13259 TEST(QS8_IGEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, k_gt_8_subtile) {
13260 TEST_REQUIRES_ARM_NEON;
13261 for (size_t k = 9; k < 16; k++) {
13262 for (uint32_t n = 1; n <= 16; n++) {
13263 for (uint32_t m = 1; m <= 4; m++) {
13264 GemmMicrokernelTester()
13265 .mr(4)
13266 .nr(16)
13267 .kr(1)
13268 .sr(1)
13269 .m(m)
13270 .n(n)
13271 .k(k)
13272 .iterations(1)
13273 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
13274 }
13275 }
13276 }
13277 }
13278
TEST(QS8_IGEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53,k_div_8)13279 TEST(QS8_IGEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, k_div_8) {
13280 TEST_REQUIRES_ARM_NEON;
13281 for (size_t k = 16; k <= 80; k += 8) {
13282 GemmMicrokernelTester()
13283 .mr(4)
13284 .nr(16)
13285 .kr(1)
13286 .sr(1)
13287 .m(4)
13288 .n(16)
13289 .k(k)
13290 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
13291 }
13292 }
13293
TEST(QS8_IGEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53,k_div_8_subtile)13294 TEST(QS8_IGEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, k_div_8_subtile) {
13295 TEST_REQUIRES_ARM_NEON;
13296 for (size_t k = 16; k <= 80; k += 8) {
13297 for (uint32_t n = 1; n <= 16; n++) {
13298 for (uint32_t m = 1; m <= 4; m++) {
13299 GemmMicrokernelTester()
13300 .mr(4)
13301 .nr(16)
13302 .kr(1)
13303 .sr(1)
13304 .m(m)
13305 .n(n)
13306 .k(k)
13307 .iterations(1)
13308 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
13309 }
13310 }
13311 }
13312 }
13313
TEST(QS8_IGEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53,n_gt_16)13314 TEST(QS8_IGEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, n_gt_16) {
13315 TEST_REQUIRES_ARM_NEON;
13316 for (uint32_t n = 17; n < 32; n++) {
13317 for (size_t k = 1; k <= 40; k += 9) {
13318 GemmMicrokernelTester()
13319 .mr(4)
13320 .nr(16)
13321 .kr(1)
13322 .sr(1)
13323 .m(4)
13324 .n(n)
13325 .k(k)
13326 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
13327 }
13328 }
13329 }
13330
TEST(QS8_IGEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53,n_gt_16_strided_cn)13331 TEST(QS8_IGEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, n_gt_16_strided_cn) {
13332 TEST_REQUIRES_ARM_NEON;
13333 for (uint32_t n = 17; n < 32; n++) {
13334 for (size_t k = 1; k <= 40; k += 9) {
13335 GemmMicrokernelTester()
13336 .mr(4)
13337 .nr(16)
13338 .kr(1)
13339 .sr(1)
13340 .m(4)
13341 .n(n)
13342 .k(k)
13343 .cn_stride(19)
13344 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
13345 }
13346 }
13347 }
13348
TEST(QS8_IGEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53,n_gt_16_subtile)13349 TEST(QS8_IGEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, n_gt_16_subtile) {
13350 TEST_REQUIRES_ARM_NEON;
13351 for (uint32_t n = 17; n < 32; n++) {
13352 for (size_t k = 1; k <= 40; k += 9) {
13353 for (uint32_t m = 1; m <= 4; m++) {
13354 GemmMicrokernelTester()
13355 .mr(4)
13356 .nr(16)
13357 .kr(1)
13358 .sr(1)
13359 .m(m)
13360 .n(n)
13361 .k(k)
13362 .iterations(1)
13363 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
13364 }
13365 }
13366 }
13367 }
13368
TEST(QS8_IGEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53,n_div_16)13369 TEST(QS8_IGEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, n_div_16) {
13370 TEST_REQUIRES_ARM_NEON;
13371 for (uint32_t n = 32; n <= 48; n += 16) {
13372 for (size_t k = 1; k <= 40; k += 9) {
13373 GemmMicrokernelTester()
13374 .mr(4)
13375 .nr(16)
13376 .kr(1)
13377 .sr(1)
13378 .m(4)
13379 .n(n)
13380 .k(k)
13381 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
13382 }
13383 }
13384 }
13385
TEST(QS8_IGEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53,n_div_16_strided_cn)13386 TEST(QS8_IGEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, n_div_16_strided_cn) {
13387 TEST_REQUIRES_ARM_NEON;
13388 for (uint32_t n = 32; n <= 48; n += 16) {
13389 for (size_t k = 1; k <= 40; k += 9) {
13390 GemmMicrokernelTester()
13391 .mr(4)
13392 .nr(16)
13393 .kr(1)
13394 .sr(1)
13395 .m(4)
13396 .n(n)
13397 .k(k)
13398 .cn_stride(19)
13399 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
13400 }
13401 }
13402 }
13403
TEST(QS8_IGEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53,n_div_16_subtile)13404 TEST(QS8_IGEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, n_div_16_subtile) {
13405 TEST_REQUIRES_ARM_NEON;
13406 for (uint32_t n = 32; n <= 48; n += 16) {
13407 for (size_t k = 1; k <= 40; k += 9) {
13408 for (uint32_t m = 1; m <= 4; m++) {
13409 GemmMicrokernelTester()
13410 .mr(4)
13411 .nr(16)
13412 .kr(1)
13413 .sr(1)
13414 .m(m)
13415 .n(n)
13416 .k(k)
13417 .iterations(1)
13418 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
13419 }
13420 }
13421 }
13422 }
13423
TEST(QS8_IGEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53,small_kernel)13424 TEST(QS8_IGEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, small_kernel) {
13425 TEST_REQUIRES_ARM_NEON;
13426 for (size_t k = 1; k <= 40; k += 9) {
13427 GemmMicrokernelTester()
13428 .mr(4)
13429 .nr(16)
13430 .kr(1)
13431 .sr(1)
13432 .m(4)
13433 .n(16)
13434 .k(k)
13435 .ks(3)
13436 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
13437 }
13438 }
13439
TEST(QS8_IGEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53,small_kernel_subtile)13440 TEST(QS8_IGEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, small_kernel_subtile) {
13441 TEST_REQUIRES_ARM_NEON;
13442 for (size_t k = 1; k <= 40; k += 9) {
13443 for (uint32_t n = 1; n <= 16; n++) {
13444 for (uint32_t m = 1; m <= 4; m++) {
13445 GemmMicrokernelTester()
13446 .mr(4)
13447 .nr(16)
13448 .kr(1)
13449 .sr(1)
13450 .m(m)
13451 .n(n)
13452 .k(k)
13453 .ks(3)
13454 .iterations(1)
13455 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
13456 }
13457 }
13458 }
13459 }
13460
TEST(QS8_IGEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53,n_gt_16_small_kernel)13461 TEST(QS8_IGEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, n_gt_16_small_kernel) {
13462 TEST_REQUIRES_ARM_NEON;
13463 for (uint32_t n = 17; n < 32; n++) {
13464 for (size_t k = 1; k <= 40; k += 9) {
13465 GemmMicrokernelTester()
13466 .mr(4)
13467 .nr(16)
13468 .kr(1)
13469 .sr(1)
13470 .m(4)
13471 .n(n)
13472 .k(k)
13473 .ks(3)
13474 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
13475 }
13476 }
13477 }
13478
TEST(QS8_IGEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53,n_div_16_small_kernel)13479 TEST(QS8_IGEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, n_div_16_small_kernel) {
13480 TEST_REQUIRES_ARM_NEON;
13481 for (uint32_t n = 32; n <= 48; n += 16) {
13482 for (size_t k = 1; k <= 40; k += 9) {
13483 GemmMicrokernelTester()
13484 .mr(4)
13485 .nr(16)
13486 .kr(1)
13487 .sr(1)
13488 .m(4)
13489 .n(n)
13490 .k(k)
13491 .ks(3)
13492 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
13493 }
13494 }
13495 }
13496
TEST(QS8_IGEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53,strided_cm_subtile)13497 TEST(QS8_IGEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, strided_cm_subtile) {
13498 TEST_REQUIRES_ARM_NEON;
13499 for (size_t k = 1; k <= 40; k += 9) {
13500 for (uint32_t n = 1; n <= 16; n++) {
13501 for (uint32_t m = 1; m <= 4; m++) {
13502 GemmMicrokernelTester()
13503 .mr(4)
13504 .nr(16)
13505 .kr(1)
13506 .sr(1)
13507 .m(m)
13508 .n(n)
13509 .k(k)
13510 .cm_stride(19)
13511 .iterations(1)
13512 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
13513 }
13514 }
13515 }
13516 }
13517
TEST(QS8_IGEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53,a_offset)13518 TEST(QS8_IGEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, a_offset) {
13519 TEST_REQUIRES_ARM_NEON;
13520 for (size_t k = 1; k <= 40; k += 9) {
13521 GemmMicrokernelTester()
13522 .mr(4)
13523 .nr(16)
13524 .kr(1)
13525 .sr(1)
13526 .m(4)
13527 .n(16)
13528 .k(k)
13529 .ks(3)
13530 .a_offset(163)
13531 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
13532 }
13533 }
13534
TEST(QS8_IGEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53,zero)13535 TEST(QS8_IGEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, zero) {
13536 TEST_REQUIRES_ARM_NEON;
13537 for (size_t k = 1; k <= 40; k += 9) {
13538 for (uint32_t mz = 0; mz < 4; mz++) {
13539 GemmMicrokernelTester()
13540 .mr(4)
13541 .nr(16)
13542 .kr(1)
13543 .sr(1)
13544 .m(4)
13545 .n(16)
13546 .k(k)
13547 .ks(3)
13548 .a_offset(163)
13549 .zero_index(mz)
13550 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
13551 }
13552 }
13553 }
13554
TEST(QS8_IGEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53,qmin)13555 TEST(QS8_IGEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, qmin) {
13556 TEST_REQUIRES_ARM_NEON;
13557 GemmMicrokernelTester()
13558 .mr(4)
13559 .nr(16)
13560 .kr(1)
13561 .sr(1)
13562 .m(4)
13563 .n(16)
13564 .k(8)
13565 .qmin(128)
13566 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
13567 }
13568
TEST(QS8_IGEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53,qmax)13569 TEST(QS8_IGEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, qmax) {
13570 TEST_REQUIRES_ARM_NEON;
13571 GemmMicrokernelTester()
13572 .mr(4)
13573 .nr(16)
13574 .kr(1)
13575 .sr(1)
13576 .m(4)
13577 .n(16)
13578 .k(8)
13579 .qmax(128)
13580 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
13581 }
13582
TEST(QS8_IGEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53,strided_cm)13583 TEST(QS8_IGEMM_MINMAX_FP32_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, strided_cm) {
13584 TEST_REQUIRES_ARM_NEON;
13585 GemmMicrokernelTester()
13586 .mr(4)
13587 .nr(16)
13588 .kr(1)
13589 .sr(1)
13590 .m(4)
13591 .n(16)
13592 .k(8)
13593 .cm_stride(19)
13594 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
13595 }
13596 #endif // XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
13597
13598
13599 #if XNN_ENABLE_ARM_DOTPROD && XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
TEST(QS8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55,k_eq_16)13600 TEST(QS8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55, k_eq_16) {
13601 TEST_REQUIRES_ARM_NEON_DOT;
13602 GemmMicrokernelTester()
13603 .mr(4)
13604 .nr(16)
13605 .kr(4)
13606 .sr(1)
13607 .m(4)
13608 .n(16)
13609 .k(16)
13610 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
13611 }
13612
TEST(QS8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55,strided_cn)13613 TEST(QS8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55, strided_cn) {
13614 TEST_REQUIRES_ARM_NEON_DOT;
13615 GemmMicrokernelTester()
13616 .mr(4)
13617 .nr(16)
13618 .kr(4)
13619 .sr(1)
13620 .m(4)
13621 .n(16)
13622 .k(16)
13623 .cn_stride(19)
13624 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
13625 }
13626
TEST(QS8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55,k_eq_16_subtile)13627 TEST(QS8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55, k_eq_16_subtile) {
13628 TEST_REQUIRES_ARM_NEON_DOT;
13629 for (uint32_t n = 1; n <= 16; n++) {
13630 for (uint32_t m = 1; m <= 4; m++) {
13631 GemmMicrokernelTester()
13632 .mr(4)
13633 .nr(16)
13634 .kr(4)
13635 .sr(1)
13636 .m(m)
13637 .n(n)
13638 .k(16)
13639 .iterations(1)
13640 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
13641 }
13642 }
13643 }
13644
TEST(QS8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55,k_eq_16_subtile_m)13645 TEST(QS8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55, k_eq_16_subtile_m) {
13646 TEST_REQUIRES_ARM_NEON_DOT;
13647 for (uint32_t m = 1; m <= 4; m++) {
13648 GemmMicrokernelTester()
13649 .mr(4)
13650 .nr(16)
13651 .kr(4)
13652 .sr(1)
13653 .m(m)
13654 .n(16)
13655 .k(16)
13656 .iterations(1)
13657 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
13658 }
13659 }
13660
TEST(QS8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55,k_eq_16_subtile_n)13661 TEST(QS8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55, k_eq_16_subtile_n) {
13662 TEST_REQUIRES_ARM_NEON_DOT;
13663 for (uint32_t n = 1; n <= 16; n++) {
13664 GemmMicrokernelTester()
13665 .mr(4)
13666 .nr(16)
13667 .kr(4)
13668 .sr(1)
13669 .m(4)
13670 .n(n)
13671 .k(16)
13672 .iterations(1)
13673 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
13674 }
13675 }
13676
TEST(QS8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55,k_lt_16)13677 TEST(QS8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55, k_lt_16) {
13678 TEST_REQUIRES_ARM_NEON_DOT;
13679 for (size_t k = 1; k < 16; k++) {
13680 GemmMicrokernelTester()
13681 .mr(4)
13682 .nr(16)
13683 .kr(4)
13684 .sr(1)
13685 .m(4)
13686 .n(16)
13687 .k(k)
13688 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
13689 }
13690 }
13691
TEST(QS8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55,k_lt_16_subtile)13692 TEST(QS8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55, k_lt_16_subtile) {
13693 TEST_REQUIRES_ARM_NEON_DOT;
13694 for (size_t k = 1; k < 16; k++) {
13695 for (uint32_t n = 1; n <= 16; n++) {
13696 for (uint32_t m = 1; m <= 4; m++) {
13697 GemmMicrokernelTester()
13698 .mr(4)
13699 .nr(16)
13700 .kr(4)
13701 .sr(1)
13702 .m(m)
13703 .n(n)
13704 .k(k)
13705 .iterations(1)
13706 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
13707 }
13708 }
13709 }
13710 }
13711
TEST(QS8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55,k_gt_16)13712 TEST(QS8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55, k_gt_16) {
13713 TEST_REQUIRES_ARM_NEON_DOT;
13714 for (size_t k = 17; k < 32; k++) {
13715 GemmMicrokernelTester()
13716 .mr(4)
13717 .nr(16)
13718 .kr(4)
13719 .sr(1)
13720 .m(4)
13721 .n(16)
13722 .k(k)
13723 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
13724 }
13725 }
13726
TEST(QS8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55,k_gt_16_subtile)13727 TEST(QS8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55, k_gt_16_subtile) {
13728 TEST_REQUIRES_ARM_NEON_DOT;
13729 for (size_t k = 17; k < 32; k++) {
13730 for (uint32_t n = 1; n <= 16; n++) {
13731 for (uint32_t m = 1; m <= 4; m++) {
13732 GemmMicrokernelTester()
13733 .mr(4)
13734 .nr(16)
13735 .kr(4)
13736 .sr(1)
13737 .m(m)
13738 .n(n)
13739 .k(k)
13740 .iterations(1)
13741 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
13742 }
13743 }
13744 }
13745 }
13746
TEST(QS8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55,k_div_16)13747 TEST(QS8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55, k_div_16) {
13748 TEST_REQUIRES_ARM_NEON_DOT;
13749 for (size_t k = 32; k <= 160; k += 16) {
13750 GemmMicrokernelTester()
13751 .mr(4)
13752 .nr(16)
13753 .kr(4)
13754 .sr(1)
13755 .m(4)
13756 .n(16)
13757 .k(k)
13758 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
13759 }
13760 }
13761
TEST(QS8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55,k_div_16_subtile)13762 TEST(QS8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55, k_div_16_subtile) {
13763 TEST_REQUIRES_ARM_NEON_DOT;
13764 for (size_t k = 32; k <= 160; k += 16) {
13765 for (uint32_t n = 1; n <= 16; n++) {
13766 for (uint32_t m = 1; m <= 4; m++) {
13767 GemmMicrokernelTester()
13768 .mr(4)
13769 .nr(16)
13770 .kr(4)
13771 .sr(1)
13772 .m(m)
13773 .n(n)
13774 .k(k)
13775 .iterations(1)
13776 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
13777 }
13778 }
13779 }
13780 }
13781
TEST(QS8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55,n_gt_16)13782 TEST(QS8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55, n_gt_16) {
13783 TEST_REQUIRES_ARM_NEON_DOT;
13784 for (uint32_t n = 17; n < 32; n++) {
13785 for (size_t k = 1; k <= 80; k += 17) {
13786 GemmMicrokernelTester()
13787 .mr(4)
13788 .nr(16)
13789 .kr(4)
13790 .sr(1)
13791 .m(4)
13792 .n(n)
13793 .k(k)
13794 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
13795 }
13796 }
13797 }
13798
TEST(QS8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55,n_gt_16_strided_cn)13799 TEST(QS8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55, n_gt_16_strided_cn) {
13800 TEST_REQUIRES_ARM_NEON_DOT;
13801 for (uint32_t n = 17; n < 32; n++) {
13802 for (size_t k = 1; k <= 80; k += 17) {
13803 GemmMicrokernelTester()
13804 .mr(4)
13805 .nr(16)
13806 .kr(4)
13807 .sr(1)
13808 .m(4)
13809 .n(n)
13810 .k(k)
13811 .cn_stride(19)
13812 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
13813 }
13814 }
13815 }
13816
TEST(QS8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55,n_gt_16_subtile)13817 TEST(QS8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55, n_gt_16_subtile) {
13818 TEST_REQUIRES_ARM_NEON_DOT;
13819 for (uint32_t n = 17; n < 32; n++) {
13820 for (size_t k = 1; k <= 80; k += 17) {
13821 for (uint32_t m = 1; m <= 4; m++) {
13822 GemmMicrokernelTester()
13823 .mr(4)
13824 .nr(16)
13825 .kr(4)
13826 .sr(1)
13827 .m(m)
13828 .n(n)
13829 .k(k)
13830 .iterations(1)
13831 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
13832 }
13833 }
13834 }
13835 }
13836
TEST(QS8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55,n_div_16)13837 TEST(QS8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55, n_div_16) {
13838 TEST_REQUIRES_ARM_NEON_DOT;
13839 for (uint32_t n = 32; n <= 48; n += 16) {
13840 for (size_t k = 1; k <= 80; k += 17) {
13841 GemmMicrokernelTester()
13842 .mr(4)
13843 .nr(16)
13844 .kr(4)
13845 .sr(1)
13846 .m(4)
13847 .n(n)
13848 .k(k)
13849 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
13850 }
13851 }
13852 }
13853
TEST(QS8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55,n_div_16_strided_cn)13854 TEST(QS8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55, n_div_16_strided_cn) {
13855 TEST_REQUIRES_ARM_NEON_DOT;
13856 for (uint32_t n = 32; n <= 48; n += 16) {
13857 for (size_t k = 1; k <= 80; k += 17) {
13858 GemmMicrokernelTester()
13859 .mr(4)
13860 .nr(16)
13861 .kr(4)
13862 .sr(1)
13863 .m(4)
13864 .n(n)
13865 .k(k)
13866 .cn_stride(19)
13867 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
13868 }
13869 }
13870 }
13871
TEST(QS8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55,n_div_16_subtile)13872 TEST(QS8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55, n_div_16_subtile) {
13873 TEST_REQUIRES_ARM_NEON_DOT;
13874 for (uint32_t n = 32; n <= 48; n += 16) {
13875 for (size_t k = 1; k <= 80; k += 17) {
13876 for (uint32_t m = 1; m <= 4; m++) {
13877 GemmMicrokernelTester()
13878 .mr(4)
13879 .nr(16)
13880 .kr(4)
13881 .sr(1)
13882 .m(m)
13883 .n(n)
13884 .k(k)
13885 .iterations(1)
13886 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
13887 }
13888 }
13889 }
13890 }
13891
TEST(QS8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55,small_kernel)13892 TEST(QS8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55, small_kernel) {
13893 TEST_REQUIRES_ARM_NEON_DOT;
13894 for (size_t k = 1; k <= 80; k += 17) {
13895 GemmMicrokernelTester()
13896 .mr(4)
13897 .nr(16)
13898 .kr(4)
13899 .sr(1)
13900 .m(4)
13901 .n(16)
13902 .k(k)
13903 .ks(3)
13904 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
13905 }
13906 }
13907
TEST(QS8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55,small_kernel_subtile)13908 TEST(QS8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55, small_kernel_subtile) {
13909 TEST_REQUIRES_ARM_NEON_DOT;
13910 for (size_t k = 1; k <= 80; k += 17) {
13911 for (uint32_t n = 1; n <= 16; n++) {
13912 for (uint32_t m = 1; m <= 4; m++) {
13913 GemmMicrokernelTester()
13914 .mr(4)
13915 .nr(16)
13916 .kr(4)
13917 .sr(1)
13918 .m(m)
13919 .n(n)
13920 .k(k)
13921 .ks(3)
13922 .iterations(1)
13923 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
13924 }
13925 }
13926 }
13927 }
13928
TEST(QS8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55,n_gt_16_small_kernel)13929 TEST(QS8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55, n_gt_16_small_kernel) {
13930 TEST_REQUIRES_ARM_NEON_DOT;
13931 for (uint32_t n = 17; n < 32; n++) {
13932 for (size_t k = 1; k <= 80; k += 17) {
13933 GemmMicrokernelTester()
13934 .mr(4)
13935 .nr(16)
13936 .kr(4)
13937 .sr(1)
13938 .m(4)
13939 .n(n)
13940 .k(k)
13941 .ks(3)
13942 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
13943 }
13944 }
13945 }
13946
TEST(QS8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55,n_div_16_small_kernel)13947 TEST(QS8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55, n_div_16_small_kernel) {
13948 TEST_REQUIRES_ARM_NEON_DOT;
13949 for (uint32_t n = 32; n <= 48; n += 16) {
13950 for (size_t k = 1; k <= 80; k += 17) {
13951 GemmMicrokernelTester()
13952 .mr(4)
13953 .nr(16)
13954 .kr(4)
13955 .sr(1)
13956 .m(4)
13957 .n(n)
13958 .k(k)
13959 .ks(3)
13960 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
13961 }
13962 }
13963 }
13964
TEST(QS8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55,strided_cm_subtile)13965 TEST(QS8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55, strided_cm_subtile) {
13966 TEST_REQUIRES_ARM_NEON_DOT;
13967 for (size_t k = 1; k <= 80; k += 17) {
13968 for (uint32_t n = 1; n <= 16; n++) {
13969 for (uint32_t m = 1; m <= 4; m++) {
13970 GemmMicrokernelTester()
13971 .mr(4)
13972 .nr(16)
13973 .kr(4)
13974 .sr(1)
13975 .m(m)
13976 .n(n)
13977 .k(k)
13978 .cm_stride(19)
13979 .iterations(1)
13980 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
13981 }
13982 }
13983 }
13984 }
13985
TEST(QS8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55,a_offset)13986 TEST(QS8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55, a_offset) {
13987 TEST_REQUIRES_ARM_NEON_DOT;
13988 for (size_t k = 1; k <= 80; k += 17) {
13989 GemmMicrokernelTester()
13990 .mr(4)
13991 .nr(16)
13992 .kr(4)
13993 .sr(1)
13994 .m(4)
13995 .n(16)
13996 .k(k)
13997 .ks(3)
13998 .a_offset(331)
13999 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
14000 }
14001 }
14002
TEST(QS8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55,zero)14003 TEST(QS8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55, zero) {
14004 TEST_REQUIRES_ARM_NEON_DOT;
14005 for (size_t k = 1; k <= 80; k += 17) {
14006 for (uint32_t mz = 0; mz < 4; mz++) {
14007 GemmMicrokernelTester()
14008 .mr(4)
14009 .nr(16)
14010 .kr(4)
14011 .sr(1)
14012 .m(4)
14013 .n(16)
14014 .k(k)
14015 .ks(3)
14016 .a_offset(331)
14017 .zero_index(mz)
14018 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
14019 }
14020 }
14021 }
14022
TEST(QS8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55,qmin)14023 TEST(QS8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55, qmin) {
14024 TEST_REQUIRES_ARM_NEON_DOT;
14025 GemmMicrokernelTester()
14026 .mr(4)
14027 .nr(16)
14028 .kr(4)
14029 .sr(1)
14030 .m(4)
14031 .n(16)
14032 .k(16)
14033 .qmin(128)
14034 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
14035 }
14036
TEST(QS8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55,qmax)14037 TEST(QS8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55, qmax) {
14038 TEST_REQUIRES_ARM_NEON_DOT;
14039 GemmMicrokernelTester()
14040 .mr(4)
14041 .nr(16)
14042 .kr(4)
14043 .sr(1)
14044 .m(4)
14045 .n(16)
14046 .k(16)
14047 .qmax(128)
14048 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
14049 }
14050
TEST(QS8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55,strided_cm)14051 TEST(QS8_IGEMM_MINMAX_FP32_4X16C4__AARCH64_NEONDOT_CORTEX_A55, strided_cm) {
14052 TEST_REQUIRES_ARM_NEON_DOT;
14053 GemmMicrokernelTester()
14054 .mr(4)
14055 .nr(16)
14056 .kr(4)
14057 .sr(1)
14058 .m(4)
14059 .n(16)
14060 .k(16)
14061 .cm_stride(19)
14062 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
14063 }
14064 #endif // XNN_ENABLE_ARM_DOTPROD && XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
14065
14066
14067 #if XNN_ENABLE_ARM_DOTPROD && (XNN_ARCH_ARM && !XNN_PLATFORM_IOS || XNN_ARCH_ARM64)
TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONDOT,k_eq_8)14068 TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONDOT, k_eq_8) {
14069 TEST_REQUIRES_ARM_NEON_DOT;
14070 GemmMicrokernelTester()
14071 .mr(1)
14072 .nr(8)
14073 .kr(4)
14074 .sr(1)
14075 .m(1)
14076 .n(8)
14077 .k(8)
14078 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neondot, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
14079 }
14080
TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONDOT,strided_cn)14081 TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONDOT, strided_cn) {
14082 TEST_REQUIRES_ARM_NEON_DOT;
14083 GemmMicrokernelTester()
14084 .mr(1)
14085 .nr(8)
14086 .kr(4)
14087 .sr(1)
14088 .m(1)
14089 .n(8)
14090 .k(8)
14091 .cn_stride(11)
14092 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neondot, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
14093 }
14094
TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONDOT,k_eq_8_subtile)14095 TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONDOT, k_eq_8_subtile) {
14096 TEST_REQUIRES_ARM_NEON_DOT;
14097 for (uint32_t n = 1; n <= 8; n++) {
14098 for (uint32_t m = 1; m <= 1; m++) {
14099 GemmMicrokernelTester()
14100 .mr(1)
14101 .nr(8)
14102 .kr(4)
14103 .sr(1)
14104 .m(m)
14105 .n(n)
14106 .k(8)
14107 .iterations(1)
14108 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neondot, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
14109 }
14110 }
14111 }
14112
TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONDOT,k_eq_8_subtile_m)14113 TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONDOT, k_eq_8_subtile_m) {
14114 TEST_REQUIRES_ARM_NEON_DOT;
14115 for (uint32_t m = 1; m <= 1; m++) {
14116 GemmMicrokernelTester()
14117 .mr(1)
14118 .nr(8)
14119 .kr(4)
14120 .sr(1)
14121 .m(m)
14122 .n(8)
14123 .k(8)
14124 .iterations(1)
14125 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neondot, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
14126 }
14127 }
14128
TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONDOT,k_eq_8_subtile_n)14129 TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONDOT, k_eq_8_subtile_n) {
14130 TEST_REQUIRES_ARM_NEON_DOT;
14131 for (uint32_t n = 1; n <= 8; n++) {
14132 GemmMicrokernelTester()
14133 .mr(1)
14134 .nr(8)
14135 .kr(4)
14136 .sr(1)
14137 .m(1)
14138 .n(n)
14139 .k(8)
14140 .iterations(1)
14141 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neondot, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
14142 }
14143 }
14144
TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONDOT,k_lt_8)14145 TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONDOT, k_lt_8) {
14146 TEST_REQUIRES_ARM_NEON_DOT;
14147 for (size_t k = 1; k < 8; k++) {
14148 GemmMicrokernelTester()
14149 .mr(1)
14150 .nr(8)
14151 .kr(4)
14152 .sr(1)
14153 .m(1)
14154 .n(8)
14155 .k(k)
14156 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neondot, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
14157 }
14158 }
14159
TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONDOT,k_lt_8_subtile)14160 TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONDOT, k_lt_8_subtile) {
14161 TEST_REQUIRES_ARM_NEON_DOT;
14162 for (size_t k = 1; k < 8; k++) {
14163 for (uint32_t n = 1; n <= 8; n++) {
14164 for (uint32_t m = 1; m <= 1; m++) {
14165 GemmMicrokernelTester()
14166 .mr(1)
14167 .nr(8)
14168 .kr(4)
14169 .sr(1)
14170 .m(m)
14171 .n(n)
14172 .k(k)
14173 .iterations(1)
14174 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neondot, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
14175 }
14176 }
14177 }
14178 }
14179
TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONDOT,k_gt_8)14180 TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONDOT, k_gt_8) {
14181 TEST_REQUIRES_ARM_NEON_DOT;
14182 for (size_t k = 9; k < 16; k++) {
14183 GemmMicrokernelTester()
14184 .mr(1)
14185 .nr(8)
14186 .kr(4)
14187 .sr(1)
14188 .m(1)
14189 .n(8)
14190 .k(k)
14191 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neondot, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
14192 }
14193 }
14194
TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONDOT,k_gt_8_subtile)14195 TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONDOT, k_gt_8_subtile) {
14196 TEST_REQUIRES_ARM_NEON_DOT;
14197 for (size_t k = 9; k < 16; k++) {
14198 for (uint32_t n = 1; n <= 8; n++) {
14199 for (uint32_t m = 1; m <= 1; m++) {
14200 GemmMicrokernelTester()
14201 .mr(1)
14202 .nr(8)
14203 .kr(4)
14204 .sr(1)
14205 .m(m)
14206 .n(n)
14207 .k(k)
14208 .iterations(1)
14209 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neondot, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
14210 }
14211 }
14212 }
14213 }
14214
TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONDOT,k_div_8)14215 TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONDOT, k_div_8) {
14216 TEST_REQUIRES_ARM_NEON_DOT;
14217 for (size_t k = 16; k <= 80; k += 8) {
14218 GemmMicrokernelTester()
14219 .mr(1)
14220 .nr(8)
14221 .kr(4)
14222 .sr(1)
14223 .m(1)
14224 .n(8)
14225 .k(k)
14226 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neondot, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
14227 }
14228 }
14229
TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONDOT,k_div_8_subtile)14230 TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONDOT, k_div_8_subtile) {
14231 TEST_REQUIRES_ARM_NEON_DOT;
14232 for (size_t k = 16; k <= 80; k += 8) {
14233 for (uint32_t n = 1; n <= 8; n++) {
14234 for (uint32_t m = 1; m <= 1; m++) {
14235 GemmMicrokernelTester()
14236 .mr(1)
14237 .nr(8)
14238 .kr(4)
14239 .sr(1)
14240 .m(m)
14241 .n(n)
14242 .k(k)
14243 .iterations(1)
14244 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neondot, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
14245 }
14246 }
14247 }
14248 }
14249
TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONDOT,n_gt_8)14250 TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONDOT, n_gt_8) {
14251 TEST_REQUIRES_ARM_NEON_DOT;
14252 for (uint32_t n = 9; n < 16; n++) {
14253 for (size_t k = 1; k <= 40; k += 9) {
14254 GemmMicrokernelTester()
14255 .mr(1)
14256 .nr(8)
14257 .kr(4)
14258 .sr(1)
14259 .m(1)
14260 .n(n)
14261 .k(k)
14262 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neondot, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
14263 }
14264 }
14265 }
14266
TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONDOT,n_gt_8_strided_cn)14267 TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONDOT, n_gt_8_strided_cn) {
14268 TEST_REQUIRES_ARM_NEON_DOT;
14269 for (uint32_t n = 9; n < 16; n++) {
14270 for (size_t k = 1; k <= 40; k += 9) {
14271 GemmMicrokernelTester()
14272 .mr(1)
14273 .nr(8)
14274 .kr(4)
14275 .sr(1)
14276 .m(1)
14277 .n(n)
14278 .k(k)
14279 .cn_stride(11)
14280 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neondot, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
14281 }
14282 }
14283 }
14284
TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONDOT,n_gt_8_subtile)14285 TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONDOT, n_gt_8_subtile) {
14286 TEST_REQUIRES_ARM_NEON_DOT;
14287 for (uint32_t n = 9; n < 16; n++) {
14288 for (size_t k = 1; k <= 40; k += 9) {
14289 for (uint32_t m = 1; m <= 1; m++) {
14290 GemmMicrokernelTester()
14291 .mr(1)
14292 .nr(8)
14293 .kr(4)
14294 .sr(1)
14295 .m(m)
14296 .n(n)
14297 .k(k)
14298 .iterations(1)
14299 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neondot, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
14300 }
14301 }
14302 }
14303 }
14304
TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONDOT,n_div_8)14305 TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONDOT, n_div_8) {
14306 TEST_REQUIRES_ARM_NEON_DOT;
14307 for (uint32_t n = 16; n <= 24; n += 8) {
14308 for (size_t k = 1; k <= 40; k += 9) {
14309 GemmMicrokernelTester()
14310 .mr(1)
14311 .nr(8)
14312 .kr(4)
14313 .sr(1)
14314 .m(1)
14315 .n(n)
14316 .k(k)
14317 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neondot, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
14318 }
14319 }
14320 }
14321
TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONDOT,n_div_8_strided_cn)14322 TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONDOT, n_div_8_strided_cn) {
14323 TEST_REQUIRES_ARM_NEON_DOT;
14324 for (uint32_t n = 16; n <= 24; n += 8) {
14325 for (size_t k = 1; k <= 40; k += 9) {
14326 GemmMicrokernelTester()
14327 .mr(1)
14328 .nr(8)
14329 .kr(4)
14330 .sr(1)
14331 .m(1)
14332 .n(n)
14333 .k(k)
14334 .cn_stride(11)
14335 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neondot, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
14336 }
14337 }
14338 }
14339
TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONDOT,n_div_8_subtile)14340 TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONDOT, n_div_8_subtile) {
14341 TEST_REQUIRES_ARM_NEON_DOT;
14342 for (uint32_t n = 16; n <= 24; n += 8) {
14343 for (size_t k = 1; k <= 40; k += 9) {
14344 for (uint32_t m = 1; m <= 1; m++) {
14345 GemmMicrokernelTester()
14346 .mr(1)
14347 .nr(8)
14348 .kr(4)
14349 .sr(1)
14350 .m(m)
14351 .n(n)
14352 .k(k)
14353 .iterations(1)
14354 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neondot, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
14355 }
14356 }
14357 }
14358 }
14359
TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONDOT,small_kernel)14360 TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONDOT, small_kernel) {
14361 TEST_REQUIRES_ARM_NEON_DOT;
14362 for (size_t k = 1; k <= 40; k += 9) {
14363 GemmMicrokernelTester()
14364 .mr(1)
14365 .nr(8)
14366 .kr(4)
14367 .sr(1)
14368 .m(1)
14369 .n(8)
14370 .k(k)
14371 .ks(3)
14372 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neondot, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
14373 }
14374 }
14375
TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONDOT,small_kernel_subtile)14376 TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONDOT, small_kernel_subtile) {
14377 TEST_REQUIRES_ARM_NEON_DOT;
14378 for (size_t k = 1; k <= 40; k += 9) {
14379 for (uint32_t n = 1; n <= 8; n++) {
14380 for (uint32_t m = 1; m <= 1; m++) {
14381 GemmMicrokernelTester()
14382 .mr(1)
14383 .nr(8)
14384 .kr(4)
14385 .sr(1)
14386 .m(m)
14387 .n(n)
14388 .k(k)
14389 .ks(3)
14390 .iterations(1)
14391 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neondot, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
14392 }
14393 }
14394 }
14395 }
14396
TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONDOT,n_gt_8_small_kernel)14397 TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONDOT, n_gt_8_small_kernel) {
14398 TEST_REQUIRES_ARM_NEON_DOT;
14399 for (uint32_t n = 9; n < 16; n++) {
14400 for (size_t k = 1; k <= 40; k += 9) {
14401 GemmMicrokernelTester()
14402 .mr(1)
14403 .nr(8)
14404 .kr(4)
14405 .sr(1)
14406 .m(1)
14407 .n(n)
14408 .k(k)
14409 .ks(3)
14410 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neondot, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
14411 }
14412 }
14413 }
14414
TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONDOT,n_div_8_small_kernel)14415 TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONDOT, n_div_8_small_kernel) {
14416 TEST_REQUIRES_ARM_NEON_DOT;
14417 for (uint32_t n = 16; n <= 24; n += 8) {
14418 for (size_t k = 1; k <= 40; k += 9) {
14419 GemmMicrokernelTester()
14420 .mr(1)
14421 .nr(8)
14422 .kr(4)
14423 .sr(1)
14424 .m(1)
14425 .n(n)
14426 .k(k)
14427 .ks(3)
14428 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neondot, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
14429 }
14430 }
14431 }
14432
TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONDOT,strided_cm_subtile)14433 TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONDOT, strided_cm_subtile) {
14434 TEST_REQUIRES_ARM_NEON_DOT;
14435 for (size_t k = 1; k <= 40; k += 9) {
14436 for (uint32_t n = 1; n <= 8; n++) {
14437 for (uint32_t m = 1; m <= 1; m++) {
14438 GemmMicrokernelTester()
14439 .mr(1)
14440 .nr(8)
14441 .kr(4)
14442 .sr(1)
14443 .m(m)
14444 .n(n)
14445 .k(k)
14446 .cm_stride(11)
14447 .iterations(1)
14448 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neondot, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
14449 }
14450 }
14451 }
14452 }
14453
TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONDOT,a_offset)14454 TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONDOT, a_offset) {
14455 TEST_REQUIRES_ARM_NEON_DOT;
14456 for (size_t k = 1; k <= 40; k += 9) {
14457 GemmMicrokernelTester()
14458 .mr(1)
14459 .nr(8)
14460 .kr(4)
14461 .sr(1)
14462 .m(1)
14463 .n(8)
14464 .k(k)
14465 .ks(3)
14466 .a_offset(43)
14467 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neondot, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
14468 }
14469 }
14470
TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONDOT,zero)14471 TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONDOT, zero) {
14472 TEST_REQUIRES_ARM_NEON_DOT;
14473 for (size_t k = 1; k <= 40; k += 9) {
14474 for (uint32_t mz = 0; mz < 1; mz++) {
14475 GemmMicrokernelTester()
14476 .mr(1)
14477 .nr(8)
14478 .kr(4)
14479 .sr(1)
14480 .m(1)
14481 .n(8)
14482 .k(k)
14483 .ks(3)
14484 .a_offset(43)
14485 .zero_index(mz)
14486 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neondot, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
14487 }
14488 }
14489 }
14490
TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONDOT,qmin)14491 TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONDOT, qmin) {
14492 TEST_REQUIRES_ARM_NEON_DOT;
14493 GemmMicrokernelTester()
14494 .mr(1)
14495 .nr(8)
14496 .kr(4)
14497 .sr(1)
14498 .m(1)
14499 .n(8)
14500 .k(8)
14501 .qmin(128)
14502 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neondot, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
14503 }
14504
TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONDOT,qmax)14505 TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONDOT, qmax) {
14506 TEST_REQUIRES_ARM_NEON_DOT;
14507 GemmMicrokernelTester()
14508 .mr(1)
14509 .nr(8)
14510 .kr(4)
14511 .sr(1)
14512 .m(1)
14513 .n(8)
14514 .k(8)
14515 .qmax(128)
14516 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neondot, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
14517 }
14518
TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONDOT,strided_cm)14519 TEST(QS8_IGEMM_MINMAX_FP32_1X8C4__NEONDOT, strided_cm) {
14520 TEST_REQUIRES_ARM_NEON_DOT;
14521 GemmMicrokernelTester()
14522 .mr(1)
14523 .nr(8)
14524 .kr(4)
14525 .sr(1)
14526 .m(1)
14527 .n(8)
14528 .k(8)
14529 .cm_stride(11)
14530 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c4__neondot, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
14531 }
14532 #endif // XNN_ENABLE_ARM_DOTPROD && (XNN_ARCH_ARM && !XNN_PLATFORM_IOS || XNN_ARCH_ARM64)
14533
14534
14535 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__NEONV8_MLAL,k_eq_16)14536 TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__NEONV8_MLAL, k_eq_16) {
14537 TEST_REQUIRES_ARM_NEON_V8;
14538 GemmMicrokernelTester()
14539 .mr(1)
14540 .nr(8)
14541 .kr(8)
14542 .sr(1)
14543 .m(1)
14544 .n(8)
14545 .k(16)
14546 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c8__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
14547 }
14548
TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__NEONV8_MLAL,strided_cn)14549 TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__NEONV8_MLAL, strided_cn) {
14550 TEST_REQUIRES_ARM_NEON_V8;
14551 GemmMicrokernelTester()
14552 .mr(1)
14553 .nr(8)
14554 .kr(8)
14555 .sr(1)
14556 .m(1)
14557 .n(8)
14558 .k(16)
14559 .cn_stride(11)
14560 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c8__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
14561 }
14562
TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__NEONV8_MLAL,k_eq_16_subtile)14563 TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__NEONV8_MLAL, k_eq_16_subtile) {
14564 TEST_REQUIRES_ARM_NEON_V8;
14565 for (uint32_t n = 1; n <= 8; n++) {
14566 for (uint32_t m = 1; m <= 1; m++) {
14567 GemmMicrokernelTester()
14568 .mr(1)
14569 .nr(8)
14570 .kr(8)
14571 .sr(1)
14572 .m(m)
14573 .n(n)
14574 .k(16)
14575 .iterations(1)
14576 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c8__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
14577 }
14578 }
14579 }
14580
TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__NEONV8_MLAL,k_eq_16_subtile_m)14581 TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__NEONV8_MLAL, k_eq_16_subtile_m) {
14582 TEST_REQUIRES_ARM_NEON_V8;
14583 for (uint32_t m = 1; m <= 1; m++) {
14584 GemmMicrokernelTester()
14585 .mr(1)
14586 .nr(8)
14587 .kr(8)
14588 .sr(1)
14589 .m(m)
14590 .n(8)
14591 .k(16)
14592 .iterations(1)
14593 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c8__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
14594 }
14595 }
14596
TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__NEONV8_MLAL,k_eq_16_subtile_n)14597 TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__NEONV8_MLAL, k_eq_16_subtile_n) {
14598 TEST_REQUIRES_ARM_NEON_V8;
14599 for (uint32_t n = 1; n <= 8; n++) {
14600 GemmMicrokernelTester()
14601 .mr(1)
14602 .nr(8)
14603 .kr(8)
14604 .sr(1)
14605 .m(1)
14606 .n(n)
14607 .k(16)
14608 .iterations(1)
14609 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c8__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
14610 }
14611 }
14612
TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__NEONV8_MLAL,k_lt_16)14613 TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__NEONV8_MLAL, k_lt_16) {
14614 TEST_REQUIRES_ARM_NEON_V8;
14615 for (size_t k = 1; k < 16; k++) {
14616 GemmMicrokernelTester()
14617 .mr(1)
14618 .nr(8)
14619 .kr(8)
14620 .sr(1)
14621 .m(1)
14622 .n(8)
14623 .k(k)
14624 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c8__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
14625 }
14626 }
14627
TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__NEONV8_MLAL,k_lt_16_subtile)14628 TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__NEONV8_MLAL, k_lt_16_subtile) {
14629 TEST_REQUIRES_ARM_NEON_V8;
14630 for (size_t k = 1; k < 16; k++) {
14631 for (uint32_t n = 1; n <= 8; n++) {
14632 for (uint32_t m = 1; m <= 1; m++) {
14633 GemmMicrokernelTester()
14634 .mr(1)
14635 .nr(8)
14636 .kr(8)
14637 .sr(1)
14638 .m(m)
14639 .n(n)
14640 .k(k)
14641 .iterations(1)
14642 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c8__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
14643 }
14644 }
14645 }
14646 }
14647
TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__NEONV8_MLAL,k_gt_16)14648 TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__NEONV8_MLAL, k_gt_16) {
14649 TEST_REQUIRES_ARM_NEON_V8;
14650 for (size_t k = 17; k < 32; k++) {
14651 GemmMicrokernelTester()
14652 .mr(1)
14653 .nr(8)
14654 .kr(8)
14655 .sr(1)
14656 .m(1)
14657 .n(8)
14658 .k(k)
14659 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c8__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
14660 }
14661 }
14662
TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__NEONV8_MLAL,k_gt_16_subtile)14663 TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__NEONV8_MLAL, k_gt_16_subtile) {
14664 TEST_REQUIRES_ARM_NEON_V8;
14665 for (size_t k = 17; k < 32; k++) {
14666 for (uint32_t n = 1; n <= 8; n++) {
14667 for (uint32_t m = 1; m <= 1; m++) {
14668 GemmMicrokernelTester()
14669 .mr(1)
14670 .nr(8)
14671 .kr(8)
14672 .sr(1)
14673 .m(m)
14674 .n(n)
14675 .k(k)
14676 .iterations(1)
14677 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c8__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
14678 }
14679 }
14680 }
14681 }
14682
TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__NEONV8_MLAL,k_div_16)14683 TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__NEONV8_MLAL, k_div_16) {
14684 TEST_REQUIRES_ARM_NEON_V8;
14685 for (size_t k = 32; k <= 160; k += 16) {
14686 GemmMicrokernelTester()
14687 .mr(1)
14688 .nr(8)
14689 .kr(8)
14690 .sr(1)
14691 .m(1)
14692 .n(8)
14693 .k(k)
14694 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c8__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
14695 }
14696 }
14697
TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__NEONV8_MLAL,k_div_16_subtile)14698 TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__NEONV8_MLAL, k_div_16_subtile) {
14699 TEST_REQUIRES_ARM_NEON_V8;
14700 for (size_t k = 32; k <= 160; k += 16) {
14701 for (uint32_t n = 1; n <= 8; n++) {
14702 for (uint32_t m = 1; m <= 1; m++) {
14703 GemmMicrokernelTester()
14704 .mr(1)
14705 .nr(8)
14706 .kr(8)
14707 .sr(1)
14708 .m(m)
14709 .n(n)
14710 .k(k)
14711 .iterations(1)
14712 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c8__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
14713 }
14714 }
14715 }
14716 }
14717
TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__NEONV8_MLAL,n_gt_8)14718 TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__NEONV8_MLAL, n_gt_8) {
14719 TEST_REQUIRES_ARM_NEON_V8;
14720 for (uint32_t n = 9; n < 16; n++) {
14721 for (size_t k = 1; k <= 80; k += 17) {
14722 GemmMicrokernelTester()
14723 .mr(1)
14724 .nr(8)
14725 .kr(8)
14726 .sr(1)
14727 .m(1)
14728 .n(n)
14729 .k(k)
14730 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c8__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
14731 }
14732 }
14733 }
14734
TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__NEONV8_MLAL,n_gt_8_strided_cn)14735 TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__NEONV8_MLAL, n_gt_8_strided_cn) {
14736 TEST_REQUIRES_ARM_NEON_V8;
14737 for (uint32_t n = 9; n < 16; n++) {
14738 for (size_t k = 1; k <= 80; k += 17) {
14739 GemmMicrokernelTester()
14740 .mr(1)
14741 .nr(8)
14742 .kr(8)
14743 .sr(1)
14744 .m(1)
14745 .n(n)
14746 .k(k)
14747 .cn_stride(11)
14748 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c8__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
14749 }
14750 }
14751 }
14752
TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__NEONV8_MLAL,n_gt_8_subtile)14753 TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__NEONV8_MLAL, n_gt_8_subtile) {
14754 TEST_REQUIRES_ARM_NEON_V8;
14755 for (uint32_t n = 9; n < 16; n++) {
14756 for (size_t k = 1; k <= 80; k += 17) {
14757 for (uint32_t m = 1; m <= 1; m++) {
14758 GemmMicrokernelTester()
14759 .mr(1)
14760 .nr(8)
14761 .kr(8)
14762 .sr(1)
14763 .m(m)
14764 .n(n)
14765 .k(k)
14766 .iterations(1)
14767 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c8__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
14768 }
14769 }
14770 }
14771 }
14772
TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__NEONV8_MLAL,n_div_8)14773 TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__NEONV8_MLAL, n_div_8) {
14774 TEST_REQUIRES_ARM_NEON_V8;
14775 for (uint32_t n = 16; n <= 24; n += 8) {
14776 for (size_t k = 1; k <= 80; k += 17) {
14777 GemmMicrokernelTester()
14778 .mr(1)
14779 .nr(8)
14780 .kr(8)
14781 .sr(1)
14782 .m(1)
14783 .n(n)
14784 .k(k)
14785 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c8__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
14786 }
14787 }
14788 }
14789
TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__NEONV8_MLAL,n_div_8_strided_cn)14790 TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__NEONV8_MLAL, n_div_8_strided_cn) {
14791 TEST_REQUIRES_ARM_NEON_V8;
14792 for (uint32_t n = 16; n <= 24; n += 8) {
14793 for (size_t k = 1; k <= 80; k += 17) {
14794 GemmMicrokernelTester()
14795 .mr(1)
14796 .nr(8)
14797 .kr(8)
14798 .sr(1)
14799 .m(1)
14800 .n(n)
14801 .k(k)
14802 .cn_stride(11)
14803 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c8__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
14804 }
14805 }
14806 }
14807
TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__NEONV8_MLAL,n_div_8_subtile)14808 TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__NEONV8_MLAL, n_div_8_subtile) {
14809 TEST_REQUIRES_ARM_NEON_V8;
14810 for (uint32_t n = 16; n <= 24; n += 8) {
14811 for (size_t k = 1; k <= 80; k += 17) {
14812 for (uint32_t m = 1; m <= 1; m++) {
14813 GemmMicrokernelTester()
14814 .mr(1)
14815 .nr(8)
14816 .kr(8)
14817 .sr(1)
14818 .m(m)
14819 .n(n)
14820 .k(k)
14821 .iterations(1)
14822 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c8__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
14823 }
14824 }
14825 }
14826 }
14827
TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__NEONV8_MLAL,small_kernel)14828 TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__NEONV8_MLAL, small_kernel) {
14829 TEST_REQUIRES_ARM_NEON_V8;
14830 for (size_t k = 1; k <= 80; k += 17) {
14831 GemmMicrokernelTester()
14832 .mr(1)
14833 .nr(8)
14834 .kr(8)
14835 .sr(1)
14836 .m(1)
14837 .n(8)
14838 .k(k)
14839 .ks(3)
14840 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c8__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
14841 }
14842 }
14843
TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__NEONV8_MLAL,small_kernel_subtile)14844 TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__NEONV8_MLAL, small_kernel_subtile) {
14845 TEST_REQUIRES_ARM_NEON_V8;
14846 for (size_t k = 1; k <= 80; k += 17) {
14847 for (uint32_t n = 1; n <= 8; n++) {
14848 for (uint32_t m = 1; m <= 1; m++) {
14849 GemmMicrokernelTester()
14850 .mr(1)
14851 .nr(8)
14852 .kr(8)
14853 .sr(1)
14854 .m(m)
14855 .n(n)
14856 .k(k)
14857 .ks(3)
14858 .iterations(1)
14859 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c8__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
14860 }
14861 }
14862 }
14863 }
14864
TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__NEONV8_MLAL,n_gt_8_small_kernel)14865 TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__NEONV8_MLAL, n_gt_8_small_kernel) {
14866 TEST_REQUIRES_ARM_NEON_V8;
14867 for (uint32_t n = 9; n < 16; n++) {
14868 for (size_t k = 1; k <= 80; k += 17) {
14869 GemmMicrokernelTester()
14870 .mr(1)
14871 .nr(8)
14872 .kr(8)
14873 .sr(1)
14874 .m(1)
14875 .n(n)
14876 .k(k)
14877 .ks(3)
14878 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c8__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
14879 }
14880 }
14881 }
14882
TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__NEONV8_MLAL,n_div_8_small_kernel)14883 TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__NEONV8_MLAL, n_div_8_small_kernel) {
14884 TEST_REQUIRES_ARM_NEON_V8;
14885 for (uint32_t n = 16; n <= 24; n += 8) {
14886 for (size_t k = 1; k <= 80; k += 17) {
14887 GemmMicrokernelTester()
14888 .mr(1)
14889 .nr(8)
14890 .kr(8)
14891 .sr(1)
14892 .m(1)
14893 .n(n)
14894 .k(k)
14895 .ks(3)
14896 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c8__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
14897 }
14898 }
14899 }
14900
TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__NEONV8_MLAL,strided_cm_subtile)14901 TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__NEONV8_MLAL, strided_cm_subtile) {
14902 TEST_REQUIRES_ARM_NEON_V8;
14903 for (size_t k = 1; k <= 80; k += 17) {
14904 for (uint32_t n = 1; n <= 8; n++) {
14905 for (uint32_t m = 1; m <= 1; m++) {
14906 GemmMicrokernelTester()
14907 .mr(1)
14908 .nr(8)
14909 .kr(8)
14910 .sr(1)
14911 .m(m)
14912 .n(n)
14913 .k(k)
14914 .cm_stride(11)
14915 .iterations(1)
14916 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c8__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
14917 }
14918 }
14919 }
14920 }
14921
TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__NEONV8_MLAL,a_offset)14922 TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__NEONV8_MLAL, a_offset) {
14923 TEST_REQUIRES_ARM_NEON_V8;
14924 for (size_t k = 1; k <= 80; k += 17) {
14925 GemmMicrokernelTester()
14926 .mr(1)
14927 .nr(8)
14928 .kr(8)
14929 .sr(1)
14930 .m(1)
14931 .n(8)
14932 .k(k)
14933 .ks(3)
14934 .a_offset(83)
14935 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c8__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
14936 }
14937 }
14938
TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__NEONV8_MLAL,zero)14939 TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__NEONV8_MLAL, zero) {
14940 TEST_REQUIRES_ARM_NEON_V8;
14941 for (size_t k = 1; k <= 80; k += 17) {
14942 for (uint32_t mz = 0; mz < 1; mz++) {
14943 GemmMicrokernelTester()
14944 .mr(1)
14945 .nr(8)
14946 .kr(8)
14947 .sr(1)
14948 .m(1)
14949 .n(8)
14950 .k(k)
14951 .ks(3)
14952 .a_offset(83)
14953 .zero_index(mz)
14954 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c8__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
14955 }
14956 }
14957 }
14958
TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__NEONV8_MLAL,qmin)14959 TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__NEONV8_MLAL, qmin) {
14960 TEST_REQUIRES_ARM_NEON_V8;
14961 GemmMicrokernelTester()
14962 .mr(1)
14963 .nr(8)
14964 .kr(8)
14965 .sr(1)
14966 .m(1)
14967 .n(8)
14968 .k(16)
14969 .qmin(128)
14970 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c8__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
14971 }
14972
TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__NEONV8_MLAL,qmax)14973 TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__NEONV8_MLAL, qmax) {
14974 TEST_REQUIRES_ARM_NEON_V8;
14975 GemmMicrokernelTester()
14976 .mr(1)
14977 .nr(8)
14978 .kr(8)
14979 .sr(1)
14980 .m(1)
14981 .n(8)
14982 .k(16)
14983 .qmax(128)
14984 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c8__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
14985 }
14986
TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__NEONV8_MLAL,strided_cm)14987 TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__NEONV8_MLAL, strided_cm) {
14988 TEST_REQUIRES_ARM_NEON_V8;
14989 GemmMicrokernelTester()
14990 .mr(1)
14991 .nr(8)
14992 .kr(8)
14993 .sr(1)
14994 .m(1)
14995 .n(8)
14996 .k(16)
14997 .cm_stride(11)
14998 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c8__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
14999 }
15000 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
15001
15002
15003 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__NEONV8_MLAL,k_eq_16)15004 TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__NEONV8_MLAL, k_eq_16) {
15005 TEST_REQUIRES_ARM_NEON_V8;
15006 GemmMicrokernelTester()
15007 .mr(2)
15008 .nr(8)
15009 .kr(8)
15010 .sr(1)
15011 .m(2)
15012 .n(8)
15013 .k(16)
15014 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c8__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
15015 }
15016
TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__NEONV8_MLAL,strided_cn)15017 TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__NEONV8_MLAL, strided_cn) {
15018 TEST_REQUIRES_ARM_NEON_V8;
15019 GemmMicrokernelTester()
15020 .mr(2)
15021 .nr(8)
15022 .kr(8)
15023 .sr(1)
15024 .m(2)
15025 .n(8)
15026 .k(16)
15027 .cn_stride(11)
15028 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c8__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
15029 }
15030
TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__NEONV8_MLAL,k_eq_16_subtile)15031 TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__NEONV8_MLAL, k_eq_16_subtile) {
15032 TEST_REQUIRES_ARM_NEON_V8;
15033 for (uint32_t n = 1; n <= 8; n++) {
15034 for (uint32_t m = 1; m <= 2; m++) {
15035 GemmMicrokernelTester()
15036 .mr(2)
15037 .nr(8)
15038 .kr(8)
15039 .sr(1)
15040 .m(m)
15041 .n(n)
15042 .k(16)
15043 .iterations(1)
15044 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c8__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
15045 }
15046 }
15047 }
15048
TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__NEONV8_MLAL,k_eq_16_subtile_m)15049 TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__NEONV8_MLAL, k_eq_16_subtile_m) {
15050 TEST_REQUIRES_ARM_NEON_V8;
15051 for (uint32_t m = 1; m <= 2; m++) {
15052 GemmMicrokernelTester()
15053 .mr(2)
15054 .nr(8)
15055 .kr(8)
15056 .sr(1)
15057 .m(m)
15058 .n(8)
15059 .k(16)
15060 .iterations(1)
15061 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c8__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
15062 }
15063 }
15064
TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__NEONV8_MLAL,k_eq_16_subtile_n)15065 TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__NEONV8_MLAL, k_eq_16_subtile_n) {
15066 TEST_REQUIRES_ARM_NEON_V8;
15067 for (uint32_t n = 1; n <= 8; n++) {
15068 GemmMicrokernelTester()
15069 .mr(2)
15070 .nr(8)
15071 .kr(8)
15072 .sr(1)
15073 .m(2)
15074 .n(n)
15075 .k(16)
15076 .iterations(1)
15077 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c8__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
15078 }
15079 }
15080
TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__NEONV8_MLAL,k_lt_16)15081 TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__NEONV8_MLAL, k_lt_16) {
15082 TEST_REQUIRES_ARM_NEON_V8;
15083 for (size_t k = 1; k < 16; k++) {
15084 GemmMicrokernelTester()
15085 .mr(2)
15086 .nr(8)
15087 .kr(8)
15088 .sr(1)
15089 .m(2)
15090 .n(8)
15091 .k(k)
15092 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c8__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
15093 }
15094 }
15095
TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__NEONV8_MLAL,k_lt_16_subtile)15096 TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__NEONV8_MLAL, k_lt_16_subtile) {
15097 TEST_REQUIRES_ARM_NEON_V8;
15098 for (size_t k = 1; k < 16; k++) {
15099 for (uint32_t n = 1; n <= 8; n++) {
15100 for (uint32_t m = 1; m <= 2; m++) {
15101 GemmMicrokernelTester()
15102 .mr(2)
15103 .nr(8)
15104 .kr(8)
15105 .sr(1)
15106 .m(m)
15107 .n(n)
15108 .k(k)
15109 .iterations(1)
15110 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c8__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
15111 }
15112 }
15113 }
15114 }
15115
TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__NEONV8_MLAL,k_gt_16)15116 TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__NEONV8_MLAL, k_gt_16) {
15117 TEST_REQUIRES_ARM_NEON_V8;
15118 for (size_t k = 17; k < 32; k++) {
15119 GemmMicrokernelTester()
15120 .mr(2)
15121 .nr(8)
15122 .kr(8)
15123 .sr(1)
15124 .m(2)
15125 .n(8)
15126 .k(k)
15127 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c8__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
15128 }
15129 }
15130
TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__NEONV8_MLAL,k_gt_16_subtile)15131 TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__NEONV8_MLAL, k_gt_16_subtile) {
15132 TEST_REQUIRES_ARM_NEON_V8;
15133 for (size_t k = 17; k < 32; k++) {
15134 for (uint32_t n = 1; n <= 8; n++) {
15135 for (uint32_t m = 1; m <= 2; m++) {
15136 GemmMicrokernelTester()
15137 .mr(2)
15138 .nr(8)
15139 .kr(8)
15140 .sr(1)
15141 .m(m)
15142 .n(n)
15143 .k(k)
15144 .iterations(1)
15145 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c8__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
15146 }
15147 }
15148 }
15149 }
15150
TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__NEONV8_MLAL,k_div_16)15151 TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__NEONV8_MLAL, k_div_16) {
15152 TEST_REQUIRES_ARM_NEON_V8;
15153 for (size_t k = 32; k <= 160; k += 16) {
15154 GemmMicrokernelTester()
15155 .mr(2)
15156 .nr(8)
15157 .kr(8)
15158 .sr(1)
15159 .m(2)
15160 .n(8)
15161 .k(k)
15162 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c8__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
15163 }
15164 }
15165
TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__NEONV8_MLAL,k_div_16_subtile)15166 TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__NEONV8_MLAL, k_div_16_subtile) {
15167 TEST_REQUIRES_ARM_NEON_V8;
15168 for (size_t k = 32; k <= 160; k += 16) {
15169 for (uint32_t n = 1; n <= 8; n++) {
15170 for (uint32_t m = 1; m <= 2; m++) {
15171 GemmMicrokernelTester()
15172 .mr(2)
15173 .nr(8)
15174 .kr(8)
15175 .sr(1)
15176 .m(m)
15177 .n(n)
15178 .k(k)
15179 .iterations(1)
15180 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c8__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
15181 }
15182 }
15183 }
15184 }
15185
TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__NEONV8_MLAL,n_gt_8)15186 TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__NEONV8_MLAL, n_gt_8) {
15187 TEST_REQUIRES_ARM_NEON_V8;
15188 for (uint32_t n = 9; n < 16; n++) {
15189 for (size_t k = 1; k <= 80; k += 17) {
15190 GemmMicrokernelTester()
15191 .mr(2)
15192 .nr(8)
15193 .kr(8)
15194 .sr(1)
15195 .m(2)
15196 .n(n)
15197 .k(k)
15198 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c8__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
15199 }
15200 }
15201 }
15202
TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__NEONV8_MLAL,n_gt_8_strided_cn)15203 TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__NEONV8_MLAL, n_gt_8_strided_cn) {
15204 TEST_REQUIRES_ARM_NEON_V8;
15205 for (uint32_t n = 9; n < 16; n++) {
15206 for (size_t k = 1; k <= 80; k += 17) {
15207 GemmMicrokernelTester()
15208 .mr(2)
15209 .nr(8)
15210 .kr(8)
15211 .sr(1)
15212 .m(2)
15213 .n(n)
15214 .k(k)
15215 .cn_stride(11)
15216 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c8__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
15217 }
15218 }
15219 }
15220
TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__NEONV8_MLAL,n_gt_8_subtile)15221 TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__NEONV8_MLAL, n_gt_8_subtile) {
15222 TEST_REQUIRES_ARM_NEON_V8;
15223 for (uint32_t n = 9; n < 16; n++) {
15224 for (size_t k = 1; k <= 80; k += 17) {
15225 for (uint32_t m = 1; m <= 2; m++) {
15226 GemmMicrokernelTester()
15227 .mr(2)
15228 .nr(8)
15229 .kr(8)
15230 .sr(1)
15231 .m(m)
15232 .n(n)
15233 .k(k)
15234 .iterations(1)
15235 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c8__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
15236 }
15237 }
15238 }
15239 }
15240
TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__NEONV8_MLAL,n_div_8)15241 TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__NEONV8_MLAL, n_div_8) {
15242 TEST_REQUIRES_ARM_NEON_V8;
15243 for (uint32_t n = 16; n <= 24; n += 8) {
15244 for (size_t k = 1; k <= 80; k += 17) {
15245 GemmMicrokernelTester()
15246 .mr(2)
15247 .nr(8)
15248 .kr(8)
15249 .sr(1)
15250 .m(2)
15251 .n(n)
15252 .k(k)
15253 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c8__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
15254 }
15255 }
15256 }
15257
TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__NEONV8_MLAL,n_div_8_strided_cn)15258 TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__NEONV8_MLAL, n_div_8_strided_cn) {
15259 TEST_REQUIRES_ARM_NEON_V8;
15260 for (uint32_t n = 16; n <= 24; n += 8) {
15261 for (size_t k = 1; k <= 80; k += 17) {
15262 GemmMicrokernelTester()
15263 .mr(2)
15264 .nr(8)
15265 .kr(8)
15266 .sr(1)
15267 .m(2)
15268 .n(n)
15269 .k(k)
15270 .cn_stride(11)
15271 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c8__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
15272 }
15273 }
15274 }
15275
TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__NEONV8_MLAL,n_div_8_subtile)15276 TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__NEONV8_MLAL, n_div_8_subtile) {
15277 TEST_REQUIRES_ARM_NEON_V8;
15278 for (uint32_t n = 16; n <= 24; n += 8) {
15279 for (size_t k = 1; k <= 80; k += 17) {
15280 for (uint32_t m = 1; m <= 2; m++) {
15281 GemmMicrokernelTester()
15282 .mr(2)
15283 .nr(8)
15284 .kr(8)
15285 .sr(1)
15286 .m(m)
15287 .n(n)
15288 .k(k)
15289 .iterations(1)
15290 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c8__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
15291 }
15292 }
15293 }
15294 }
15295
TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__NEONV8_MLAL,small_kernel)15296 TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__NEONV8_MLAL, small_kernel) {
15297 TEST_REQUIRES_ARM_NEON_V8;
15298 for (size_t k = 1; k <= 80; k += 17) {
15299 GemmMicrokernelTester()
15300 .mr(2)
15301 .nr(8)
15302 .kr(8)
15303 .sr(1)
15304 .m(2)
15305 .n(8)
15306 .k(k)
15307 .ks(3)
15308 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c8__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
15309 }
15310 }
15311
TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__NEONV8_MLAL,small_kernel_subtile)15312 TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__NEONV8_MLAL, small_kernel_subtile) {
15313 TEST_REQUIRES_ARM_NEON_V8;
15314 for (size_t k = 1; k <= 80; k += 17) {
15315 for (uint32_t n = 1; n <= 8; n++) {
15316 for (uint32_t m = 1; m <= 2; m++) {
15317 GemmMicrokernelTester()
15318 .mr(2)
15319 .nr(8)
15320 .kr(8)
15321 .sr(1)
15322 .m(m)
15323 .n(n)
15324 .k(k)
15325 .ks(3)
15326 .iterations(1)
15327 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c8__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
15328 }
15329 }
15330 }
15331 }
15332
TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__NEONV8_MLAL,n_gt_8_small_kernel)15333 TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__NEONV8_MLAL, n_gt_8_small_kernel) {
15334 TEST_REQUIRES_ARM_NEON_V8;
15335 for (uint32_t n = 9; n < 16; n++) {
15336 for (size_t k = 1; k <= 80; k += 17) {
15337 GemmMicrokernelTester()
15338 .mr(2)
15339 .nr(8)
15340 .kr(8)
15341 .sr(1)
15342 .m(2)
15343 .n(n)
15344 .k(k)
15345 .ks(3)
15346 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c8__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
15347 }
15348 }
15349 }
15350
TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__NEONV8_MLAL,n_div_8_small_kernel)15351 TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__NEONV8_MLAL, n_div_8_small_kernel) {
15352 TEST_REQUIRES_ARM_NEON_V8;
15353 for (uint32_t n = 16; n <= 24; n += 8) {
15354 for (size_t k = 1; k <= 80; k += 17) {
15355 GemmMicrokernelTester()
15356 .mr(2)
15357 .nr(8)
15358 .kr(8)
15359 .sr(1)
15360 .m(2)
15361 .n(n)
15362 .k(k)
15363 .ks(3)
15364 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c8__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
15365 }
15366 }
15367 }
15368
TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__NEONV8_MLAL,strided_cm_subtile)15369 TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__NEONV8_MLAL, strided_cm_subtile) {
15370 TEST_REQUIRES_ARM_NEON_V8;
15371 for (size_t k = 1; k <= 80; k += 17) {
15372 for (uint32_t n = 1; n <= 8; n++) {
15373 for (uint32_t m = 1; m <= 2; m++) {
15374 GemmMicrokernelTester()
15375 .mr(2)
15376 .nr(8)
15377 .kr(8)
15378 .sr(1)
15379 .m(m)
15380 .n(n)
15381 .k(k)
15382 .cm_stride(11)
15383 .iterations(1)
15384 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c8__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
15385 }
15386 }
15387 }
15388 }
15389
TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__NEONV8_MLAL,a_offset)15390 TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__NEONV8_MLAL, a_offset) {
15391 TEST_REQUIRES_ARM_NEON_V8;
15392 for (size_t k = 1; k <= 80; k += 17) {
15393 GemmMicrokernelTester()
15394 .mr(2)
15395 .nr(8)
15396 .kr(8)
15397 .sr(1)
15398 .m(2)
15399 .n(8)
15400 .k(k)
15401 .ks(3)
15402 .a_offset(163)
15403 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c8__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
15404 }
15405 }
15406
TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__NEONV8_MLAL,zero)15407 TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__NEONV8_MLAL, zero) {
15408 TEST_REQUIRES_ARM_NEON_V8;
15409 for (size_t k = 1; k <= 80; k += 17) {
15410 for (uint32_t mz = 0; mz < 2; mz++) {
15411 GemmMicrokernelTester()
15412 .mr(2)
15413 .nr(8)
15414 .kr(8)
15415 .sr(1)
15416 .m(2)
15417 .n(8)
15418 .k(k)
15419 .ks(3)
15420 .a_offset(163)
15421 .zero_index(mz)
15422 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c8__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
15423 }
15424 }
15425 }
15426
TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__NEONV8_MLAL,qmin)15427 TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__NEONV8_MLAL, qmin) {
15428 TEST_REQUIRES_ARM_NEON_V8;
15429 GemmMicrokernelTester()
15430 .mr(2)
15431 .nr(8)
15432 .kr(8)
15433 .sr(1)
15434 .m(2)
15435 .n(8)
15436 .k(16)
15437 .qmin(128)
15438 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c8__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
15439 }
15440
TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__NEONV8_MLAL,qmax)15441 TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__NEONV8_MLAL, qmax) {
15442 TEST_REQUIRES_ARM_NEON_V8;
15443 GemmMicrokernelTester()
15444 .mr(2)
15445 .nr(8)
15446 .kr(8)
15447 .sr(1)
15448 .m(2)
15449 .n(8)
15450 .k(16)
15451 .qmax(128)
15452 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c8__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
15453 }
15454
TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__NEONV8_MLAL,strided_cm)15455 TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__NEONV8_MLAL, strided_cm) {
15456 TEST_REQUIRES_ARM_NEON_V8;
15457 GemmMicrokernelTester()
15458 .mr(2)
15459 .nr(8)
15460 .kr(8)
15461 .sr(1)
15462 .m(2)
15463 .n(8)
15464 .k(16)
15465 .cm_stride(11)
15466 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c8__neonv8_mlal, xnn_init_qs8_conv_minmax_fp32_neonv8_params, xnn_qs8_requantize_fp32);
15467 }
15468 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
15469
15470
15471 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(QS8_IGEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE,k_eq_8)15472 TEST(QS8_IGEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE, k_eq_8) {
15473 TEST_REQUIRES_ARM_NEON;
15474 GemmMicrokernelTester()
15475 .mr(4)
15476 .nr(16)
15477 .kr(1)
15478 .sr(1)
15479 .m(4)
15480 .n(16)
15481 .k(8)
15482 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
15483 }
15484
TEST(QS8_IGEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE,strided_cn)15485 TEST(QS8_IGEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE, strided_cn) {
15486 TEST_REQUIRES_ARM_NEON;
15487 GemmMicrokernelTester()
15488 .mr(4)
15489 .nr(16)
15490 .kr(1)
15491 .sr(1)
15492 .m(4)
15493 .n(16)
15494 .k(8)
15495 .cn_stride(19)
15496 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
15497 }
15498
TEST(QS8_IGEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE,k_eq_8_subtile)15499 TEST(QS8_IGEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE, k_eq_8_subtile) {
15500 TEST_REQUIRES_ARM_NEON;
15501 for (uint32_t n = 1; n <= 16; n++) {
15502 for (uint32_t m = 1; m <= 4; m++) {
15503 GemmMicrokernelTester()
15504 .mr(4)
15505 .nr(16)
15506 .kr(1)
15507 .sr(1)
15508 .m(m)
15509 .n(n)
15510 .k(8)
15511 .iterations(1)
15512 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
15513 }
15514 }
15515 }
15516
TEST(QS8_IGEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE,k_eq_8_subtile_m)15517 TEST(QS8_IGEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE, k_eq_8_subtile_m) {
15518 TEST_REQUIRES_ARM_NEON;
15519 for (uint32_t m = 1; m <= 4; m++) {
15520 GemmMicrokernelTester()
15521 .mr(4)
15522 .nr(16)
15523 .kr(1)
15524 .sr(1)
15525 .m(m)
15526 .n(16)
15527 .k(8)
15528 .iterations(1)
15529 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
15530 }
15531 }
15532
TEST(QS8_IGEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE,k_eq_8_subtile_n)15533 TEST(QS8_IGEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE, k_eq_8_subtile_n) {
15534 TEST_REQUIRES_ARM_NEON;
15535 for (uint32_t n = 1; n <= 16; n++) {
15536 GemmMicrokernelTester()
15537 .mr(4)
15538 .nr(16)
15539 .kr(1)
15540 .sr(1)
15541 .m(4)
15542 .n(n)
15543 .k(8)
15544 .iterations(1)
15545 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
15546 }
15547 }
15548
TEST(QS8_IGEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE,k_lt_8)15549 TEST(QS8_IGEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE, k_lt_8) {
15550 TEST_REQUIRES_ARM_NEON;
15551 for (size_t k = 1; k < 8; k++) {
15552 GemmMicrokernelTester()
15553 .mr(4)
15554 .nr(16)
15555 .kr(1)
15556 .sr(1)
15557 .m(4)
15558 .n(16)
15559 .k(k)
15560 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
15561 }
15562 }
15563
TEST(QS8_IGEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE,k_lt_8_subtile)15564 TEST(QS8_IGEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE, k_lt_8_subtile) {
15565 TEST_REQUIRES_ARM_NEON;
15566 for (size_t k = 1; k < 8; k++) {
15567 for (uint32_t n = 1; n <= 16; n++) {
15568 for (uint32_t m = 1; m <= 4; m++) {
15569 GemmMicrokernelTester()
15570 .mr(4)
15571 .nr(16)
15572 .kr(1)
15573 .sr(1)
15574 .m(m)
15575 .n(n)
15576 .k(k)
15577 .iterations(1)
15578 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
15579 }
15580 }
15581 }
15582 }
15583
TEST(QS8_IGEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE,k_gt_8)15584 TEST(QS8_IGEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE, k_gt_8) {
15585 TEST_REQUIRES_ARM_NEON;
15586 for (size_t k = 9; k < 16; k++) {
15587 GemmMicrokernelTester()
15588 .mr(4)
15589 .nr(16)
15590 .kr(1)
15591 .sr(1)
15592 .m(4)
15593 .n(16)
15594 .k(k)
15595 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
15596 }
15597 }
15598
TEST(QS8_IGEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE,k_gt_8_subtile)15599 TEST(QS8_IGEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE, k_gt_8_subtile) {
15600 TEST_REQUIRES_ARM_NEON;
15601 for (size_t k = 9; k < 16; k++) {
15602 for (uint32_t n = 1; n <= 16; n++) {
15603 for (uint32_t m = 1; m <= 4; m++) {
15604 GemmMicrokernelTester()
15605 .mr(4)
15606 .nr(16)
15607 .kr(1)
15608 .sr(1)
15609 .m(m)
15610 .n(n)
15611 .k(k)
15612 .iterations(1)
15613 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
15614 }
15615 }
15616 }
15617 }
15618
TEST(QS8_IGEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE,k_div_8)15619 TEST(QS8_IGEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE, k_div_8) {
15620 TEST_REQUIRES_ARM_NEON;
15621 for (size_t k = 16; k <= 80; k += 8) {
15622 GemmMicrokernelTester()
15623 .mr(4)
15624 .nr(16)
15625 .kr(1)
15626 .sr(1)
15627 .m(4)
15628 .n(16)
15629 .k(k)
15630 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
15631 }
15632 }
15633
TEST(QS8_IGEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE,k_div_8_subtile)15634 TEST(QS8_IGEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE, k_div_8_subtile) {
15635 TEST_REQUIRES_ARM_NEON;
15636 for (size_t k = 16; k <= 80; k += 8) {
15637 for (uint32_t n = 1; n <= 16; n++) {
15638 for (uint32_t m = 1; m <= 4; m++) {
15639 GemmMicrokernelTester()
15640 .mr(4)
15641 .nr(16)
15642 .kr(1)
15643 .sr(1)
15644 .m(m)
15645 .n(n)
15646 .k(k)
15647 .iterations(1)
15648 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
15649 }
15650 }
15651 }
15652 }
15653
TEST(QS8_IGEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE,n_gt_16)15654 TEST(QS8_IGEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE, n_gt_16) {
15655 TEST_REQUIRES_ARM_NEON;
15656 for (uint32_t n = 17; n < 32; n++) {
15657 for (size_t k = 1; k <= 40; k += 9) {
15658 GemmMicrokernelTester()
15659 .mr(4)
15660 .nr(16)
15661 .kr(1)
15662 .sr(1)
15663 .m(4)
15664 .n(n)
15665 .k(k)
15666 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
15667 }
15668 }
15669 }
15670
TEST(QS8_IGEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE,n_gt_16_strided_cn)15671 TEST(QS8_IGEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE, n_gt_16_strided_cn) {
15672 TEST_REQUIRES_ARM_NEON;
15673 for (uint32_t n = 17; n < 32; n++) {
15674 for (size_t k = 1; k <= 40; k += 9) {
15675 GemmMicrokernelTester()
15676 .mr(4)
15677 .nr(16)
15678 .kr(1)
15679 .sr(1)
15680 .m(4)
15681 .n(n)
15682 .k(k)
15683 .cn_stride(19)
15684 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
15685 }
15686 }
15687 }
15688
TEST(QS8_IGEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE,n_gt_16_subtile)15689 TEST(QS8_IGEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE, n_gt_16_subtile) {
15690 TEST_REQUIRES_ARM_NEON;
15691 for (uint32_t n = 17; n < 32; n++) {
15692 for (size_t k = 1; k <= 40; k += 9) {
15693 for (uint32_t m = 1; m <= 4; m++) {
15694 GemmMicrokernelTester()
15695 .mr(4)
15696 .nr(16)
15697 .kr(1)
15698 .sr(1)
15699 .m(m)
15700 .n(n)
15701 .k(k)
15702 .iterations(1)
15703 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
15704 }
15705 }
15706 }
15707 }
15708
TEST(QS8_IGEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE,n_div_16)15709 TEST(QS8_IGEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE, n_div_16) {
15710 TEST_REQUIRES_ARM_NEON;
15711 for (uint32_t n = 32; n <= 48; n += 16) {
15712 for (size_t k = 1; k <= 40; k += 9) {
15713 GemmMicrokernelTester()
15714 .mr(4)
15715 .nr(16)
15716 .kr(1)
15717 .sr(1)
15718 .m(4)
15719 .n(n)
15720 .k(k)
15721 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
15722 }
15723 }
15724 }
15725
TEST(QS8_IGEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE,n_div_16_strided_cn)15726 TEST(QS8_IGEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE, n_div_16_strided_cn) {
15727 TEST_REQUIRES_ARM_NEON;
15728 for (uint32_t n = 32; n <= 48; n += 16) {
15729 for (size_t k = 1; k <= 40; k += 9) {
15730 GemmMicrokernelTester()
15731 .mr(4)
15732 .nr(16)
15733 .kr(1)
15734 .sr(1)
15735 .m(4)
15736 .n(n)
15737 .k(k)
15738 .cn_stride(19)
15739 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
15740 }
15741 }
15742 }
15743
TEST(QS8_IGEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE,n_div_16_subtile)15744 TEST(QS8_IGEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE, n_div_16_subtile) {
15745 TEST_REQUIRES_ARM_NEON;
15746 for (uint32_t n = 32; n <= 48; n += 16) {
15747 for (size_t k = 1; k <= 40; k += 9) {
15748 for (uint32_t m = 1; m <= 4; m++) {
15749 GemmMicrokernelTester()
15750 .mr(4)
15751 .nr(16)
15752 .kr(1)
15753 .sr(1)
15754 .m(m)
15755 .n(n)
15756 .k(k)
15757 .iterations(1)
15758 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
15759 }
15760 }
15761 }
15762 }
15763
TEST(QS8_IGEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE,small_kernel)15764 TEST(QS8_IGEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE, small_kernel) {
15765 TEST_REQUIRES_ARM_NEON;
15766 for (size_t k = 1; k <= 40; k += 9) {
15767 GemmMicrokernelTester()
15768 .mr(4)
15769 .nr(16)
15770 .kr(1)
15771 .sr(1)
15772 .m(4)
15773 .n(16)
15774 .k(k)
15775 .ks(3)
15776 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
15777 }
15778 }
15779
TEST(QS8_IGEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE,small_kernel_subtile)15780 TEST(QS8_IGEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE, small_kernel_subtile) {
15781 TEST_REQUIRES_ARM_NEON;
15782 for (size_t k = 1; k <= 40; k += 9) {
15783 for (uint32_t n = 1; n <= 16; n++) {
15784 for (uint32_t m = 1; m <= 4; m++) {
15785 GemmMicrokernelTester()
15786 .mr(4)
15787 .nr(16)
15788 .kr(1)
15789 .sr(1)
15790 .m(m)
15791 .n(n)
15792 .k(k)
15793 .ks(3)
15794 .iterations(1)
15795 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
15796 }
15797 }
15798 }
15799 }
15800
TEST(QS8_IGEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE,n_gt_16_small_kernel)15801 TEST(QS8_IGEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE, n_gt_16_small_kernel) {
15802 TEST_REQUIRES_ARM_NEON;
15803 for (uint32_t n = 17; n < 32; n++) {
15804 for (size_t k = 1; k <= 40; k += 9) {
15805 GemmMicrokernelTester()
15806 .mr(4)
15807 .nr(16)
15808 .kr(1)
15809 .sr(1)
15810 .m(4)
15811 .n(n)
15812 .k(k)
15813 .ks(3)
15814 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
15815 }
15816 }
15817 }
15818
TEST(QS8_IGEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE,n_div_16_small_kernel)15819 TEST(QS8_IGEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE, n_div_16_small_kernel) {
15820 TEST_REQUIRES_ARM_NEON;
15821 for (uint32_t n = 32; n <= 48; n += 16) {
15822 for (size_t k = 1; k <= 40; k += 9) {
15823 GemmMicrokernelTester()
15824 .mr(4)
15825 .nr(16)
15826 .kr(1)
15827 .sr(1)
15828 .m(4)
15829 .n(n)
15830 .k(k)
15831 .ks(3)
15832 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
15833 }
15834 }
15835 }
15836
TEST(QS8_IGEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE,strided_cm_subtile)15837 TEST(QS8_IGEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE, strided_cm_subtile) {
15838 TEST_REQUIRES_ARM_NEON;
15839 for (size_t k = 1; k <= 40; k += 9) {
15840 for (uint32_t n = 1; n <= 16; n++) {
15841 for (uint32_t m = 1; m <= 4; m++) {
15842 GemmMicrokernelTester()
15843 .mr(4)
15844 .nr(16)
15845 .kr(1)
15846 .sr(1)
15847 .m(m)
15848 .n(n)
15849 .k(k)
15850 .cm_stride(19)
15851 .iterations(1)
15852 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
15853 }
15854 }
15855 }
15856 }
15857
TEST(QS8_IGEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE,a_offset)15858 TEST(QS8_IGEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE, a_offset) {
15859 TEST_REQUIRES_ARM_NEON;
15860 for (size_t k = 1; k <= 40; k += 9) {
15861 GemmMicrokernelTester()
15862 .mr(4)
15863 .nr(16)
15864 .kr(1)
15865 .sr(1)
15866 .m(4)
15867 .n(16)
15868 .k(k)
15869 .ks(3)
15870 .a_offset(163)
15871 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
15872 }
15873 }
15874
TEST(QS8_IGEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE,zero)15875 TEST(QS8_IGEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE, zero) {
15876 TEST_REQUIRES_ARM_NEON;
15877 for (size_t k = 1; k <= 40; k += 9) {
15878 for (uint32_t mz = 0; mz < 4; mz++) {
15879 GemmMicrokernelTester()
15880 .mr(4)
15881 .nr(16)
15882 .kr(1)
15883 .sr(1)
15884 .m(4)
15885 .n(16)
15886 .k(k)
15887 .ks(3)
15888 .a_offset(163)
15889 .zero_index(mz)
15890 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
15891 }
15892 }
15893 }
15894
TEST(QS8_IGEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE,qmin)15895 TEST(QS8_IGEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE, qmin) {
15896 TEST_REQUIRES_ARM_NEON;
15897 GemmMicrokernelTester()
15898 .mr(4)
15899 .nr(16)
15900 .kr(1)
15901 .sr(1)
15902 .m(4)
15903 .n(16)
15904 .k(8)
15905 .qmin(128)
15906 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
15907 }
15908
TEST(QS8_IGEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE,qmax)15909 TEST(QS8_IGEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE, qmax) {
15910 TEST_REQUIRES_ARM_NEON;
15911 GemmMicrokernelTester()
15912 .mr(4)
15913 .nr(16)
15914 .kr(1)
15915 .sr(1)
15916 .m(4)
15917 .n(16)
15918 .k(8)
15919 .qmax(128)
15920 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
15921 }
15922
TEST(QS8_IGEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE,strided_cm)15923 TEST(QS8_IGEMM_MINMAX_FP32_4X16__NEON_MLAL_LANE, strided_cm) {
15924 TEST_REQUIRES_ARM_NEON;
15925 GemmMicrokernelTester()
15926 .mr(4)
15927 .nr(16)
15928 .kr(1)
15929 .sr(1)
15930 .m(4)
15931 .n(16)
15932 .k(8)
15933 .cm_stride(19)
15934 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_fp32_neon_params, xnn_qs8_requantize_fp32);
15935 }
15936 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
15937
15938
15939 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD64,k_eq_8)15940 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD64, k_eq_8) {
15941 TEST_REQUIRES_X86_SSE2;
15942 GemmMicrokernelTester()
15943 .mr(3)
15944 .nr(4)
15945 .kr(2)
15946 .sr(1)
15947 .m(3)
15948 .n(4)
15949 .k(8)
15950 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
15951 }
15952
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD64,strided_cn)15953 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD64, strided_cn) {
15954 TEST_REQUIRES_X86_SSE2;
15955 GemmMicrokernelTester()
15956 .mr(3)
15957 .nr(4)
15958 .kr(2)
15959 .sr(1)
15960 .m(3)
15961 .n(4)
15962 .k(8)
15963 .cn_stride(7)
15964 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
15965 }
15966
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD64,k_eq_8_subtile)15967 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD64, k_eq_8_subtile) {
15968 TEST_REQUIRES_X86_SSE2;
15969 for (uint32_t n = 1; n <= 4; n++) {
15970 for (uint32_t m = 1; m <= 3; m++) {
15971 GemmMicrokernelTester()
15972 .mr(3)
15973 .nr(4)
15974 .kr(2)
15975 .sr(1)
15976 .m(m)
15977 .n(n)
15978 .k(8)
15979 .iterations(1)
15980 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
15981 }
15982 }
15983 }
15984
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD64,k_eq_8_subtile_m)15985 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD64, k_eq_8_subtile_m) {
15986 TEST_REQUIRES_X86_SSE2;
15987 for (uint32_t m = 1; m <= 3; m++) {
15988 GemmMicrokernelTester()
15989 .mr(3)
15990 .nr(4)
15991 .kr(2)
15992 .sr(1)
15993 .m(m)
15994 .n(4)
15995 .k(8)
15996 .iterations(1)
15997 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
15998 }
15999 }
16000
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD64,k_eq_8_subtile_n)16001 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD64, k_eq_8_subtile_n) {
16002 TEST_REQUIRES_X86_SSE2;
16003 for (uint32_t n = 1; n <= 4; n++) {
16004 GemmMicrokernelTester()
16005 .mr(3)
16006 .nr(4)
16007 .kr(2)
16008 .sr(1)
16009 .m(3)
16010 .n(n)
16011 .k(8)
16012 .iterations(1)
16013 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
16014 }
16015 }
16016
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD64,k_lt_8)16017 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD64, k_lt_8) {
16018 TEST_REQUIRES_X86_SSE2;
16019 for (size_t k = 1; k < 8; k++) {
16020 GemmMicrokernelTester()
16021 .mr(3)
16022 .nr(4)
16023 .kr(2)
16024 .sr(1)
16025 .m(3)
16026 .n(4)
16027 .k(k)
16028 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
16029 }
16030 }
16031
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD64,k_lt_8_subtile)16032 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD64, k_lt_8_subtile) {
16033 TEST_REQUIRES_X86_SSE2;
16034 for (size_t k = 1; k < 8; k++) {
16035 for (uint32_t n = 1; n <= 4; n++) {
16036 for (uint32_t m = 1; m <= 3; m++) {
16037 GemmMicrokernelTester()
16038 .mr(3)
16039 .nr(4)
16040 .kr(2)
16041 .sr(1)
16042 .m(m)
16043 .n(n)
16044 .k(k)
16045 .iterations(1)
16046 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
16047 }
16048 }
16049 }
16050 }
16051
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD64,k_gt_8)16052 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD64, k_gt_8) {
16053 TEST_REQUIRES_X86_SSE2;
16054 for (size_t k = 9; k < 16; k++) {
16055 GemmMicrokernelTester()
16056 .mr(3)
16057 .nr(4)
16058 .kr(2)
16059 .sr(1)
16060 .m(3)
16061 .n(4)
16062 .k(k)
16063 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
16064 }
16065 }
16066
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD64,k_gt_8_subtile)16067 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD64, k_gt_8_subtile) {
16068 TEST_REQUIRES_X86_SSE2;
16069 for (size_t k = 9; k < 16; k++) {
16070 for (uint32_t n = 1; n <= 4; n++) {
16071 for (uint32_t m = 1; m <= 3; m++) {
16072 GemmMicrokernelTester()
16073 .mr(3)
16074 .nr(4)
16075 .kr(2)
16076 .sr(1)
16077 .m(m)
16078 .n(n)
16079 .k(k)
16080 .iterations(1)
16081 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
16082 }
16083 }
16084 }
16085 }
16086
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD64,k_div_8)16087 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD64, k_div_8) {
16088 TEST_REQUIRES_X86_SSE2;
16089 for (size_t k = 16; k <= 80; k += 8) {
16090 GemmMicrokernelTester()
16091 .mr(3)
16092 .nr(4)
16093 .kr(2)
16094 .sr(1)
16095 .m(3)
16096 .n(4)
16097 .k(k)
16098 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
16099 }
16100 }
16101
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD64,k_div_8_subtile)16102 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD64, k_div_8_subtile) {
16103 TEST_REQUIRES_X86_SSE2;
16104 for (size_t k = 16; k <= 80; k += 8) {
16105 for (uint32_t n = 1; n <= 4; n++) {
16106 for (uint32_t m = 1; m <= 3; m++) {
16107 GemmMicrokernelTester()
16108 .mr(3)
16109 .nr(4)
16110 .kr(2)
16111 .sr(1)
16112 .m(m)
16113 .n(n)
16114 .k(k)
16115 .iterations(1)
16116 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
16117 }
16118 }
16119 }
16120 }
16121
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD64,n_gt_4)16122 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD64, n_gt_4) {
16123 TEST_REQUIRES_X86_SSE2;
16124 for (uint32_t n = 5; n < 8; n++) {
16125 for (size_t k = 1; k <= 40; k += 9) {
16126 GemmMicrokernelTester()
16127 .mr(3)
16128 .nr(4)
16129 .kr(2)
16130 .sr(1)
16131 .m(3)
16132 .n(n)
16133 .k(k)
16134 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
16135 }
16136 }
16137 }
16138
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD64,n_gt_4_strided_cn)16139 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD64, n_gt_4_strided_cn) {
16140 TEST_REQUIRES_X86_SSE2;
16141 for (uint32_t n = 5; n < 8; n++) {
16142 for (size_t k = 1; k <= 40; k += 9) {
16143 GemmMicrokernelTester()
16144 .mr(3)
16145 .nr(4)
16146 .kr(2)
16147 .sr(1)
16148 .m(3)
16149 .n(n)
16150 .k(k)
16151 .cn_stride(7)
16152 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
16153 }
16154 }
16155 }
16156
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD64,n_gt_4_subtile)16157 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD64, n_gt_4_subtile) {
16158 TEST_REQUIRES_X86_SSE2;
16159 for (uint32_t n = 5; n < 8; n++) {
16160 for (size_t k = 1; k <= 40; k += 9) {
16161 for (uint32_t m = 1; m <= 3; m++) {
16162 GemmMicrokernelTester()
16163 .mr(3)
16164 .nr(4)
16165 .kr(2)
16166 .sr(1)
16167 .m(m)
16168 .n(n)
16169 .k(k)
16170 .iterations(1)
16171 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
16172 }
16173 }
16174 }
16175 }
16176
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD64,n_div_4)16177 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD64, n_div_4) {
16178 TEST_REQUIRES_X86_SSE2;
16179 for (uint32_t n = 8; n <= 12; n += 4) {
16180 for (size_t k = 1; k <= 40; k += 9) {
16181 GemmMicrokernelTester()
16182 .mr(3)
16183 .nr(4)
16184 .kr(2)
16185 .sr(1)
16186 .m(3)
16187 .n(n)
16188 .k(k)
16189 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
16190 }
16191 }
16192 }
16193
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD64,n_div_4_strided_cn)16194 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD64, n_div_4_strided_cn) {
16195 TEST_REQUIRES_X86_SSE2;
16196 for (uint32_t n = 8; n <= 12; n += 4) {
16197 for (size_t k = 1; k <= 40; k += 9) {
16198 GemmMicrokernelTester()
16199 .mr(3)
16200 .nr(4)
16201 .kr(2)
16202 .sr(1)
16203 .m(3)
16204 .n(n)
16205 .k(k)
16206 .cn_stride(7)
16207 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
16208 }
16209 }
16210 }
16211
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD64,n_div_4_subtile)16212 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD64, n_div_4_subtile) {
16213 TEST_REQUIRES_X86_SSE2;
16214 for (uint32_t n = 8; n <= 12; n += 4) {
16215 for (size_t k = 1; k <= 40; k += 9) {
16216 for (uint32_t m = 1; m <= 3; m++) {
16217 GemmMicrokernelTester()
16218 .mr(3)
16219 .nr(4)
16220 .kr(2)
16221 .sr(1)
16222 .m(m)
16223 .n(n)
16224 .k(k)
16225 .iterations(1)
16226 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
16227 }
16228 }
16229 }
16230 }
16231
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD64,small_kernel)16232 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD64, small_kernel) {
16233 TEST_REQUIRES_X86_SSE2;
16234 for (size_t k = 1; k <= 40; k += 9) {
16235 GemmMicrokernelTester()
16236 .mr(3)
16237 .nr(4)
16238 .kr(2)
16239 .sr(1)
16240 .m(3)
16241 .n(4)
16242 .k(k)
16243 .ks(3)
16244 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
16245 }
16246 }
16247
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD64,small_kernel_subtile)16248 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD64, small_kernel_subtile) {
16249 TEST_REQUIRES_X86_SSE2;
16250 for (size_t k = 1; k <= 40; k += 9) {
16251 for (uint32_t n = 1; n <= 4; n++) {
16252 for (uint32_t m = 1; m <= 3; m++) {
16253 GemmMicrokernelTester()
16254 .mr(3)
16255 .nr(4)
16256 .kr(2)
16257 .sr(1)
16258 .m(m)
16259 .n(n)
16260 .k(k)
16261 .ks(3)
16262 .iterations(1)
16263 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
16264 }
16265 }
16266 }
16267 }
16268
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD64,n_gt_4_small_kernel)16269 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD64, n_gt_4_small_kernel) {
16270 TEST_REQUIRES_X86_SSE2;
16271 for (uint32_t n = 5; n < 8; n++) {
16272 for (size_t k = 1; k <= 40; k += 9) {
16273 GemmMicrokernelTester()
16274 .mr(3)
16275 .nr(4)
16276 .kr(2)
16277 .sr(1)
16278 .m(3)
16279 .n(n)
16280 .k(k)
16281 .ks(3)
16282 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
16283 }
16284 }
16285 }
16286
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD64,n_div_4_small_kernel)16287 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD64, n_div_4_small_kernel) {
16288 TEST_REQUIRES_X86_SSE2;
16289 for (uint32_t n = 8; n <= 12; n += 4) {
16290 for (size_t k = 1; k <= 40; k += 9) {
16291 GemmMicrokernelTester()
16292 .mr(3)
16293 .nr(4)
16294 .kr(2)
16295 .sr(1)
16296 .m(3)
16297 .n(n)
16298 .k(k)
16299 .ks(3)
16300 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
16301 }
16302 }
16303 }
16304
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD64,strided_cm_subtile)16305 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD64, strided_cm_subtile) {
16306 TEST_REQUIRES_X86_SSE2;
16307 for (size_t k = 1; k <= 40; k += 9) {
16308 for (uint32_t n = 1; n <= 4; n++) {
16309 for (uint32_t m = 1; m <= 3; m++) {
16310 GemmMicrokernelTester()
16311 .mr(3)
16312 .nr(4)
16313 .kr(2)
16314 .sr(1)
16315 .m(m)
16316 .n(n)
16317 .k(k)
16318 .cm_stride(7)
16319 .iterations(1)
16320 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
16321 }
16322 }
16323 }
16324 }
16325
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD64,a_offset)16326 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD64, a_offset) {
16327 TEST_REQUIRES_X86_SSE2;
16328 for (size_t k = 1; k <= 40; k += 9) {
16329 GemmMicrokernelTester()
16330 .mr(3)
16331 .nr(4)
16332 .kr(2)
16333 .sr(1)
16334 .m(3)
16335 .n(4)
16336 .k(k)
16337 .ks(3)
16338 .a_offset(127)
16339 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
16340 }
16341 }
16342
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD64,zero)16343 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD64, zero) {
16344 TEST_REQUIRES_X86_SSE2;
16345 for (size_t k = 1; k <= 40; k += 9) {
16346 for (uint32_t mz = 0; mz < 3; mz++) {
16347 GemmMicrokernelTester()
16348 .mr(3)
16349 .nr(4)
16350 .kr(2)
16351 .sr(1)
16352 .m(3)
16353 .n(4)
16354 .k(k)
16355 .ks(3)
16356 .a_offset(127)
16357 .zero_index(mz)
16358 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
16359 }
16360 }
16361 }
16362
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD64,qmin)16363 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD64, qmin) {
16364 TEST_REQUIRES_X86_SSE2;
16365 GemmMicrokernelTester()
16366 .mr(3)
16367 .nr(4)
16368 .kr(2)
16369 .sr(1)
16370 .m(3)
16371 .n(4)
16372 .k(8)
16373 .qmin(128)
16374 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
16375 }
16376
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD64,qmax)16377 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD64, qmax) {
16378 TEST_REQUIRES_X86_SSE2;
16379 GemmMicrokernelTester()
16380 .mr(3)
16381 .nr(4)
16382 .kr(2)
16383 .sr(1)
16384 .m(3)
16385 .n(4)
16386 .k(8)
16387 .qmax(128)
16388 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
16389 }
16390
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD64,strided_cm)16391 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__SSE2_LD64, strided_cm) {
16392 TEST_REQUIRES_X86_SSE2;
16393 GemmMicrokernelTester()
16394 .mr(3)
16395 .nr(4)
16396 .kr(2)
16397 .sr(1)
16398 .m(3)
16399 .n(4)
16400 .k(8)
16401 .cm_stride(7)
16402 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
16403 }
16404 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
16405
16406
16407 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD64,k_eq_8)16408 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD64, k_eq_8) {
16409 TEST_REQUIRES_X86_SSE41;
16410 GemmMicrokernelTester()
16411 .mr(3)
16412 .nr(4)
16413 .kr(2)
16414 .sr(1)
16415 .m(3)
16416 .n(4)
16417 .k(8)
16418 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
16419 }
16420
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD64,strided_cn)16421 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD64, strided_cn) {
16422 TEST_REQUIRES_X86_SSE41;
16423 GemmMicrokernelTester()
16424 .mr(3)
16425 .nr(4)
16426 .kr(2)
16427 .sr(1)
16428 .m(3)
16429 .n(4)
16430 .k(8)
16431 .cn_stride(7)
16432 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
16433 }
16434
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD64,k_eq_8_subtile)16435 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD64, k_eq_8_subtile) {
16436 TEST_REQUIRES_X86_SSE41;
16437 for (uint32_t n = 1; n <= 4; n++) {
16438 for (uint32_t m = 1; m <= 3; m++) {
16439 GemmMicrokernelTester()
16440 .mr(3)
16441 .nr(4)
16442 .kr(2)
16443 .sr(1)
16444 .m(m)
16445 .n(n)
16446 .k(8)
16447 .iterations(1)
16448 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
16449 }
16450 }
16451 }
16452
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD64,k_eq_8_subtile_m)16453 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD64, k_eq_8_subtile_m) {
16454 TEST_REQUIRES_X86_SSE41;
16455 for (uint32_t m = 1; m <= 3; m++) {
16456 GemmMicrokernelTester()
16457 .mr(3)
16458 .nr(4)
16459 .kr(2)
16460 .sr(1)
16461 .m(m)
16462 .n(4)
16463 .k(8)
16464 .iterations(1)
16465 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
16466 }
16467 }
16468
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD64,k_eq_8_subtile_n)16469 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD64, k_eq_8_subtile_n) {
16470 TEST_REQUIRES_X86_SSE41;
16471 for (uint32_t n = 1; n <= 4; n++) {
16472 GemmMicrokernelTester()
16473 .mr(3)
16474 .nr(4)
16475 .kr(2)
16476 .sr(1)
16477 .m(3)
16478 .n(n)
16479 .k(8)
16480 .iterations(1)
16481 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
16482 }
16483 }
16484
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD64,k_lt_8)16485 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD64, k_lt_8) {
16486 TEST_REQUIRES_X86_SSE41;
16487 for (size_t k = 1; k < 8; k++) {
16488 GemmMicrokernelTester()
16489 .mr(3)
16490 .nr(4)
16491 .kr(2)
16492 .sr(1)
16493 .m(3)
16494 .n(4)
16495 .k(k)
16496 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
16497 }
16498 }
16499
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD64,k_lt_8_subtile)16500 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD64, k_lt_8_subtile) {
16501 TEST_REQUIRES_X86_SSE41;
16502 for (size_t k = 1; k < 8; k++) {
16503 for (uint32_t n = 1; n <= 4; n++) {
16504 for (uint32_t m = 1; m <= 3; m++) {
16505 GemmMicrokernelTester()
16506 .mr(3)
16507 .nr(4)
16508 .kr(2)
16509 .sr(1)
16510 .m(m)
16511 .n(n)
16512 .k(k)
16513 .iterations(1)
16514 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
16515 }
16516 }
16517 }
16518 }
16519
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD64,k_gt_8)16520 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD64, k_gt_8) {
16521 TEST_REQUIRES_X86_SSE41;
16522 for (size_t k = 9; k < 16; k++) {
16523 GemmMicrokernelTester()
16524 .mr(3)
16525 .nr(4)
16526 .kr(2)
16527 .sr(1)
16528 .m(3)
16529 .n(4)
16530 .k(k)
16531 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
16532 }
16533 }
16534
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD64,k_gt_8_subtile)16535 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD64, k_gt_8_subtile) {
16536 TEST_REQUIRES_X86_SSE41;
16537 for (size_t k = 9; k < 16; k++) {
16538 for (uint32_t n = 1; n <= 4; n++) {
16539 for (uint32_t m = 1; m <= 3; m++) {
16540 GemmMicrokernelTester()
16541 .mr(3)
16542 .nr(4)
16543 .kr(2)
16544 .sr(1)
16545 .m(m)
16546 .n(n)
16547 .k(k)
16548 .iterations(1)
16549 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
16550 }
16551 }
16552 }
16553 }
16554
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD64,k_div_8)16555 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD64, k_div_8) {
16556 TEST_REQUIRES_X86_SSE41;
16557 for (size_t k = 16; k <= 80; k += 8) {
16558 GemmMicrokernelTester()
16559 .mr(3)
16560 .nr(4)
16561 .kr(2)
16562 .sr(1)
16563 .m(3)
16564 .n(4)
16565 .k(k)
16566 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
16567 }
16568 }
16569
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD64,k_div_8_subtile)16570 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD64, k_div_8_subtile) {
16571 TEST_REQUIRES_X86_SSE41;
16572 for (size_t k = 16; k <= 80; k += 8) {
16573 for (uint32_t n = 1; n <= 4; n++) {
16574 for (uint32_t m = 1; m <= 3; m++) {
16575 GemmMicrokernelTester()
16576 .mr(3)
16577 .nr(4)
16578 .kr(2)
16579 .sr(1)
16580 .m(m)
16581 .n(n)
16582 .k(k)
16583 .iterations(1)
16584 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
16585 }
16586 }
16587 }
16588 }
16589
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD64,n_gt_4)16590 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD64, n_gt_4) {
16591 TEST_REQUIRES_X86_SSE41;
16592 for (uint32_t n = 5; n < 8; n++) {
16593 for (size_t k = 1; k <= 40; k += 9) {
16594 GemmMicrokernelTester()
16595 .mr(3)
16596 .nr(4)
16597 .kr(2)
16598 .sr(1)
16599 .m(3)
16600 .n(n)
16601 .k(k)
16602 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
16603 }
16604 }
16605 }
16606
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD64,n_gt_4_strided_cn)16607 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD64, n_gt_4_strided_cn) {
16608 TEST_REQUIRES_X86_SSE41;
16609 for (uint32_t n = 5; n < 8; n++) {
16610 for (size_t k = 1; k <= 40; k += 9) {
16611 GemmMicrokernelTester()
16612 .mr(3)
16613 .nr(4)
16614 .kr(2)
16615 .sr(1)
16616 .m(3)
16617 .n(n)
16618 .k(k)
16619 .cn_stride(7)
16620 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
16621 }
16622 }
16623 }
16624
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD64,n_gt_4_subtile)16625 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD64, n_gt_4_subtile) {
16626 TEST_REQUIRES_X86_SSE41;
16627 for (uint32_t n = 5; n < 8; n++) {
16628 for (size_t k = 1; k <= 40; k += 9) {
16629 for (uint32_t m = 1; m <= 3; m++) {
16630 GemmMicrokernelTester()
16631 .mr(3)
16632 .nr(4)
16633 .kr(2)
16634 .sr(1)
16635 .m(m)
16636 .n(n)
16637 .k(k)
16638 .iterations(1)
16639 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
16640 }
16641 }
16642 }
16643 }
16644
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD64,n_div_4)16645 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD64, n_div_4) {
16646 TEST_REQUIRES_X86_SSE41;
16647 for (uint32_t n = 8; n <= 12; n += 4) {
16648 for (size_t k = 1; k <= 40; k += 9) {
16649 GemmMicrokernelTester()
16650 .mr(3)
16651 .nr(4)
16652 .kr(2)
16653 .sr(1)
16654 .m(3)
16655 .n(n)
16656 .k(k)
16657 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
16658 }
16659 }
16660 }
16661
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD64,n_div_4_strided_cn)16662 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD64, n_div_4_strided_cn) {
16663 TEST_REQUIRES_X86_SSE41;
16664 for (uint32_t n = 8; n <= 12; n += 4) {
16665 for (size_t k = 1; k <= 40; k += 9) {
16666 GemmMicrokernelTester()
16667 .mr(3)
16668 .nr(4)
16669 .kr(2)
16670 .sr(1)
16671 .m(3)
16672 .n(n)
16673 .k(k)
16674 .cn_stride(7)
16675 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
16676 }
16677 }
16678 }
16679
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD64,n_div_4_subtile)16680 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD64, n_div_4_subtile) {
16681 TEST_REQUIRES_X86_SSE41;
16682 for (uint32_t n = 8; n <= 12; n += 4) {
16683 for (size_t k = 1; k <= 40; k += 9) {
16684 for (uint32_t m = 1; m <= 3; m++) {
16685 GemmMicrokernelTester()
16686 .mr(3)
16687 .nr(4)
16688 .kr(2)
16689 .sr(1)
16690 .m(m)
16691 .n(n)
16692 .k(k)
16693 .iterations(1)
16694 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
16695 }
16696 }
16697 }
16698 }
16699
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD64,small_kernel)16700 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD64, small_kernel) {
16701 TEST_REQUIRES_X86_SSE41;
16702 for (size_t k = 1; k <= 40; k += 9) {
16703 GemmMicrokernelTester()
16704 .mr(3)
16705 .nr(4)
16706 .kr(2)
16707 .sr(1)
16708 .m(3)
16709 .n(4)
16710 .k(k)
16711 .ks(3)
16712 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
16713 }
16714 }
16715
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD64,small_kernel_subtile)16716 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD64, small_kernel_subtile) {
16717 TEST_REQUIRES_X86_SSE41;
16718 for (size_t k = 1; k <= 40; k += 9) {
16719 for (uint32_t n = 1; n <= 4; n++) {
16720 for (uint32_t m = 1; m <= 3; m++) {
16721 GemmMicrokernelTester()
16722 .mr(3)
16723 .nr(4)
16724 .kr(2)
16725 .sr(1)
16726 .m(m)
16727 .n(n)
16728 .k(k)
16729 .ks(3)
16730 .iterations(1)
16731 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
16732 }
16733 }
16734 }
16735 }
16736
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD64,n_gt_4_small_kernel)16737 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD64, n_gt_4_small_kernel) {
16738 TEST_REQUIRES_X86_SSE41;
16739 for (uint32_t n = 5; n < 8; n++) {
16740 for (size_t k = 1; k <= 40; k += 9) {
16741 GemmMicrokernelTester()
16742 .mr(3)
16743 .nr(4)
16744 .kr(2)
16745 .sr(1)
16746 .m(3)
16747 .n(n)
16748 .k(k)
16749 .ks(3)
16750 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
16751 }
16752 }
16753 }
16754
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD64,n_div_4_small_kernel)16755 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD64, n_div_4_small_kernel) {
16756 TEST_REQUIRES_X86_SSE41;
16757 for (uint32_t n = 8; n <= 12; n += 4) {
16758 for (size_t k = 1; k <= 40; k += 9) {
16759 GemmMicrokernelTester()
16760 .mr(3)
16761 .nr(4)
16762 .kr(2)
16763 .sr(1)
16764 .m(3)
16765 .n(n)
16766 .k(k)
16767 .ks(3)
16768 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
16769 }
16770 }
16771 }
16772
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD64,strided_cm_subtile)16773 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD64, strided_cm_subtile) {
16774 TEST_REQUIRES_X86_SSE41;
16775 for (size_t k = 1; k <= 40; k += 9) {
16776 for (uint32_t n = 1; n <= 4; n++) {
16777 for (uint32_t m = 1; m <= 3; m++) {
16778 GemmMicrokernelTester()
16779 .mr(3)
16780 .nr(4)
16781 .kr(2)
16782 .sr(1)
16783 .m(m)
16784 .n(n)
16785 .k(k)
16786 .cm_stride(7)
16787 .iterations(1)
16788 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
16789 }
16790 }
16791 }
16792 }
16793
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD64,a_offset)16794 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD64, a_offset) {
16795 TEST_REQUIRES_X86_SSE41;
16796 for (size_t k = 1; k <= 40; k += 9) {
16797 GemmMicrokernelTester()
16798 .mr(3)
16799 .nr(4)
16800 .kr(2)
16801 .sr(1)
16802 .m(3)
16803 .n(4)
16804 .k(k)
16805 .ks(3)
16806 .a_offset(127)
16807 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
16808 }
16809 }
16810
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD64,zero)16811 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD64, zero) {
16812 TEST_REQUIRES_X86_SSE41;
16813 for (size_t k = 1; k <= 40; k += 9) {
16814 for (uint32_t mz = 0; mz < 3; mz++) {
16815 GemmMicrokernelTester()
16816 .mr(3)
16817 .nr(4)
16818 .kr(2)
16819 .sr(1)
16820 .m(3)
16821 .n(4)
16822 .k(k)
16823 .ks(3)
16824 .a_offset(127)
16825 .zero_index(mz)
16826 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
16827 }
16828 }
16829 }
16830
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD64,qmin)16831 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD64, qmin) {
16832 TEST_REQUIRES_X86_SSE41;
16833 GemmMicrokernelTester()
16834 .mr(3)
16835 .nr(4)
16836 .kr(2)
16837 .sr(1)
16838 .m(3)
16839 .n(4)
16840 .k(8)
16841 .qmin(128)
16842 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
16843 }
16844
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD64,qmax)16845 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD64, qmax) {
16846 TEST_REQUIRES_X86_SSE41;
16847 GemmMicrokernelTester()
16848 .mr(3)
16849 .nr(4)
16850 .kr(2)
16851 .sr(1)
16852 .m(3)
16853 .n(4)
16854 .k(8)
16855 .qmax(128)
16856 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
16857 }
16858
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD64,strided_cm)16859 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__SSE41_LD64, strided_cm) {
16860 TEST_REQUIRES_X86_SSE41;
16861 GemmMicrokernelTester()
16862 .mr(3)
16863 .nr(4)
16864 .kr(2)
16865 .sr(1)
16866 .m(3)
16867 .n(4)
16868 .k(8)
16869 .cm_stride(7)
16870 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
16871 }
16872 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
16873
16874
16875 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD64,k_eq_8)16876 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD64, k_eq_8) {
16877 TEST_REQUIRES_X86_AVX;
16878 GemmMicrokernelTester()
16879 .mr(2)
16880 .nr(4)
16881 .kr(2)
16882 .sr(1)
16883 .m(2)
16884 .n(4)
16885 .k(8)
16886 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
16887 }
16888
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD64,strided_cn)16889 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD64, strided_cn) {
16890 TEST_REQUIRES_X86_AVX;
16891 GemmMicrokernelTester()
16892 .mr(2)
16893 .nr(4)
16894 .kr(2)
16895 .sr(1)
16896 .m(2)
16897 .n(4)
16898 .k(8)
16899 .cn_stride(7)
16900 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
16901 }
16902
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD64,k_eq_8_subtile)16903 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD64, k_eq_8_subtile) {
16904 TEST_REQUIRES_X86_AVX;
16905 for (uint32_t n = 1; n <= 4; n++) {
16906 for (uint32_t m = 1; m <= 2; m++) {
16907 GemmMicrokernelTester()
16908 .mr(2)
16909 .nr(4)
16910 .kr(2)
16911 .sr(1)
16912 .m(m)
16913 .n(n)
16914 .k(8)
16915 .iterations(1)
16916 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
16917 }
16918 }
16919 }
16920
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD64,k_eq_8_subtile_m)16921 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD64, k_eq_8_subtile_m) {
16922 TEST_REQUIRES_X86_AVX;
16923 for (uint32_t m = 1; m <= 2; m++) {
16924 GemmMicrokernelTester()
16925 .mr(2)
16926 .nr(4)
16927 .kr(2)
16928 .sr(1)
16929 .m(m)
16930 .n(4)
16931 .k(8)
16932 .iterations(1)
16933 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
16934 }
16935 }
16936
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD64,k_eq_8_subtile_n)16937 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD64, k_eq_8_subtile_n) {
16938 TEST_REQUIRES_X86_AVX;
16939 for (uint32_t n = 1; n <= 4; n++) {
16940 GemmMicrokernelTester()
16941 .mr(2)
16942 .nr(4)
16943 .kr(2)
16944 .sr(1)
16945 .m(2)
16946 .n(n)
16947 .k(8)
16948 .iterations(1)
16949 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
16950 }
16951 }
16952
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD64,k_lt_8)16953 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD64, k_lt_8) {
16954 TEST_REQUIRES_X86_AVX;
16955 for (size_t k = 1; k < 8; k++) {
16956 GemmMicrokernelTester()
16957 .mr(2)
16958 .nr(4)
16959 .kr(2)
16960 .sr(1)
16961 .m(2)
16962 .n(4)
16963 .k(k)
16964 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
16965 }
16966 }
16967
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD64,k_lt_8_subtile)16968 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD64, k_lt_8_subtile) {
16969 TEST_REQUIRES_X86_AVX;
16970 for (size_t k = 1; k < 8; k++) {
16971 for (uint32_t n = 1; n <= 4; n++) {
16972 for (uint32_t m = 1; m <= 2; m++) {
16973 GemmMicrokernelTester()
16974 .mr(2)
16975 .nr(4)
16976 .kr(2)
16977 .sr(1)
16978 .m(m)
16979 .n(n)
16980 .k(k)
16981 .iterations(1)
16982 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
16983 }
16984 }
16985 }
16986 }
16987
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD64,k_gt_8)16988 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD64, k_gt_8) {
16989 TEST_REQUIRES_X86_AVX;
16990 for (size_t k = 9; k < 16; k++) {
16991 GemmMicrokernelTester()
16992 .mr(2)
16993 .nr(4)
16994 .kr(2)
16995 .sr(1)
16996 .m(2)
16997 .n(4)
16998 .k(k)
16999 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
17000 }
17001 }
17002
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD64,k_gt_8_subtile)17003 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD64, k_gt_8_subtile) {
17004 TEST_REQUIRES_X86_AVX;
17005 for (size_t k = 9; k < 16; k++) {
17006 for (uint32_t n = 1; n <= 4; n++) {
17007 for (uint32_t m = 1; m <= 2; m++) {
17008 GemmMicrokernelTester()
17009 .mr(2)
17010 .nr(4)
17011 .kr(2)
17012 .sr(1)
17013 .m(m)
17014 .n(n)
17015 .k(k)
17016 .iterations(1)
17017 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
17018 }
17019 }
17020 }
17021 }
17022
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD64,k_div_8)17023 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD64, k_div_8) {
17024 TEST_REQUIRES_X86_AVX;
17025 for (size_t k = 16; k <= 80; k += 8) {
17026 GemmMicrokernelTester()
17027 .mr(2)
17028 .nr(4)
17029 .kr(2)
17030 .sr(1)
17031 .m(2)
17032 .n(4)
17033 .k(k)
17034 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
17035 }
17036 }
17037
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD64,k_div_8_subtile)17038 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD64, k_div_8_subtile) {
17039 TEST_REQUIRES_X86_AVX;
17040 for (size_t k = 16; k <= 80; k += 8) {
17041 for (uint32_t n = 1; n <= 4; n++) {
17042 for (uint32_t m = 1; m <= 2; m++) {
17043 GemmMicrokernelTester()
17044 .mr(2)
17045 .nr(4)
17046 .kr(2)
17047 .sr(1)
17048 .m(m)
17049 .n(n)
17050 .k(k)
17051 .iterations(1)
17052 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
17053 }
17054 }
17055 }
17056 }
17057
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD64,n_gt_4)17058 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD64, n_gt_4) {
17059 TEST_REQUIRES_X86_AVX;
17060 for (uint32_t n = 5; n < 8; n++) {
17061 for (size_t k = 1; k <= 40; k += 9) {
17062 GemmMicrokernelTester()
17063 .mr(2)
17064 .nr(4)
17065 .kr(2)
17066 .sr(1)
17067 .m(2)
17068 .n(n)
17069 .k(k)
17070 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
17071 }
17072 }
17073 }
17074
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD64,n_gt_4_strided_cn)17075 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD64, n_gt_4_strided_cn) {
17076 TEST_REQUIRES_X86_AVX;
17077 for (uint32_t n = 5; n < 8; n++) {
17078 for (size_t k = 1; k <= 40; k += 9) {
17079 GemmMicrokernelTester()
17080 .mr(2)
17081 .nr(4)
17082 .kr(2)
17083 .sr(1)
17084 .m(2)
17085 .n(n)
17086 .k(k)
17087 .cn_stride(7)
17088 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
17089 }
17090 }
17091 }
17092
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD64,n_gt_4_subtile)17093 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD64, n_gt_4_subtile) {
17094 TEST_REQUIRES_X86_AVX;
17095 for (uint32_t n = 5; n < 8; n++) {
17096 for (size_t k = 1; k <= 40; k += 9) {
17097 for (uint32_t m = 1; m <= 2; m++) {
17098 GemmMicrokernelTester()
17099 .mr(2)
17100 .nr(4)
17101 .kr(2)
17102 .sr(1)
17103 .m(m)
17104 .n(n)
17105 .k(k)
17106 .iterations(1)
17107 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
17108 }
17109 }
17110 }
17111 }
17112
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD64,n_div_4)17113 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD64, n_div_4) {
17114 TEST_REQUIRES_X86_AVX;
17115 for (uint32_t n = 8; n <= 12; n += 4) {
17116 for (size_t k = 1; k <= 40; k += 9) {
17117 GemmMicrokernelTester()
17118 .mr(2)
17119 .nr(4)
17120 .kr(2)
17121 .sr(1)
17122 .m(2)
17123 .n(n)
17124 .k(k)
17125 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
17126 }
17127 }
17128 }
17129
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD64,n_div_4_strided_cn)17130 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD64, n_div_4_strided_cn) {
17131 TEST_REQUIRES_X86_AVX;
17132 for (uint32_t n = 8; n <= 12; n += 4) {
17133 for (size_t k = 1; k <= 40; k += 9) {
17134 GemmMicrokernelTester()
17135 .mr(2)
17136 .nr(4)
17137 .kr(2)
17138 .sr(1)
17139 .m(2)
17140 .n(n)
17141 .k(k)
17142 .cn_stride(7)
17143 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
17144 }
17145 }
17146 }
17147
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD64,n_div_4_subtile)17148 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD64, n_div_4_subtile) {
17149 TEST_REQUIRES_X86_AVX;
17150 for (uint32_t n = 8; n <= 12; n += 4) {
17151 for (size_t k = 1; k <= 40; k += 9) {
17152 for (uint32_t m = 1; m <= 2; m++) {
17153 GemmMicrokernelTester()
17154 .mr(2)
17155 .nr(4)
17156 .kr(2)
17157 .sr(1)
17158 .m(m)
17159 .n(n)
17160 .k(k)
17161 .iterations(1)
17162 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
17163 }
17164 }
17165 }
17166 }
17167
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD64,small_kernel)17168 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD64, small_kernel) {
17169 TEST_REQUIRES_X86_AVX;
17170 for (size_t k = 1; k <= 40; k += 9) {
17171 GemmMicrokernelTester()
17172 .mr(2)
17173 .nr(4)
17174 .kr(2)
17175 .sr(1)
17176 .m(2)
17177 .n(4)
17178 .k(k)
17179 .ks(3)
17180 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
17181 }
17182 }
17183
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD64,small_kernel_subtile)17184 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD64, small_kernel_subtile) {
17185 TEST_REQUIRES_X86_AVX;
17186 for (size_t k = 1; k <= 40; k += 9) {
17187 for (uint32_t n = 1; n <= 4; n++) {
17188 for (uint32_t m = 1; m <= 2; m++) {
17189 GemmMicrokernelTester()
17190 .mr(2)
17191 .nr(4)
17192 .kr(2)
17193 .sr(1)
17194 .m(m)
17195 .n(n)
17196 .k(k)
17197 .ks(3)
17198 .iterations(1)
17199 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
17200 }
17201 }
17202 }
17203 }
17204
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD64,n_gt_4_small_kernel)17205 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD64, n_gt_4_small_kernel) {
17206 TEST_REQUIRES_X86_AVX;
17207 for (uint32_t n = 5; n < 8; n++) {
17208 for (size_t k = 1; k <= 40; k += 9) {
17209 GemmMicrokernelTester()
17210 .mr(2)
17211 .nr(4)
17212 .kr(2)
17213 .sr(1)
17214 .m(2)
17215 .n(n)
17216 .k(k)
17217 .ks(3)
17218 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
17219 }
17220 }
17221 }
17222
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD64,n_div_4_small_kernel)17223 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD64, n_div_4_small_kernel) {
17224 TEST_REQUIRES_X86_AVX;
17225 for (uint32_t n = 8; n <= 12; n += 4) {
17226 for (size_t k = 1; k <= 40; k += 9) {
17227 GemmMicrokernelTester()
17228 .mr(2)
17229 .nr(4)
17230 .kr(2)
17231 .sr(1)
17232 .m(2)
17233 .n(n)
17234 .k(k)
17235 .ks(3)
17236 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
17237 }
17238 }
17239 }
17240
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD64,strided_cm_subtile)17241 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD64, strided_cm_subtile) {
17242 TEST_REQUIRES_X86_AVX;
17243 for (size_t k = 1; k <= 40; k += 9) {
17244 for (uint32_t n = 1; n <= 4; n++) {
17245 for (uint32_t m = 1; m <= 2; m++) {
17246 GemmMicrokernelTester()
17247 .mr(2)
17248 .nr(4)
17249 .kr(2)
17250 .sr(1)
17251 .m(m)
17252 .n(n)
17253 .k(k)
17254 .cm_stride(7)
17255 .iterations(1)
17256 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
17257 }
17258 }
17259 }
17260 }
17261
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD64,a_offset)17262 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD64, a_offset) {
17263 TEST_REQUIRES_X86_AVX;
17264 for (size_t k = 1; k <= 40; k += 9) {
17265 GemmMicrokernelTester()
17266 .mr(2)
17267 .nr(4)
17268 .kr(2)
17269 .sr(1)
17270 .m(2)
17271 .n(4)
17272 .k(k)
17273 .ks(3)
17274 .a_offset(83)
17275 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
17276 }
17277 }
17278
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD64,zero)17279 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD64, zero) {
17280 TEST_REQUIRES_X86_AVX;
17281 for (size_t k = 1; k <= 40; k += 9) {
17282 for (uint32_t mz = 0; mz < 2; mz++) {
17283 GemmMicrokernelTester()
17284 .mr(2)
17285 .nr(4)
17286 .kr(2)
17287 .sr(1)
17288 .m(2)
17289 .n(4)
17290 .k(k)
17291 .ks(3)
17292 .a_offset(83)
17293 .zero_index(mz)
17294 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
17295 }
17296 }
17297 }
17298
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD64,qmin)17299 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD64, qmin) {
17300 TEST_REQUIRES_X86_AVX;
17301 GemmMicrokernelTester()
17302 .mr(2)
17303 .nr(4)
17304 .kr(2)
17305 .sr(1)
17306 .m(2)
17307 .n(4)
17308 .k(8)
17309 .qmin(128)
17310 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
17311 }
17312
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD64,qmax)17313 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD64, qmax) {
17314 TEST_REQUIRES_X86_AVX;
17315 GemmMicrokernelTester()
17316 .mr(2)
17317 .nr(4)
17318 .kr(2)
17319 .sr(1)
17320 .m(2)
17321 .n(4)
17322 .k(8)
17323 .qmax(128)
17324 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
17325 }
17326
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD64,strided_cm)17327 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__AVX_LD64, strided_cm) {
17328 TEST_REQUIRES_X86_AVX;
17329 GemmMicrokernelTester()
17330 .mr(2)
17331 .nr(4)
17332 .kr(2)
17333 .sr(1)
17334 .m(2)
17335 .n(4)
17336 .k(8)
17337 .cm_stride(7)
17338 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
17339 }
17340 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
17341
17342
17343 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD64,k_eq_8)17344 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD64, k_eq_8) {
17345 TEST_REQUIRES_X86_XOP;
17346 GemmMicrokernelTester()
17347 .mr(2)
17348 .nr(4)
17349 .kr(2)
17350 .sr(1)
17351 .m(2)
17352 .n(4)
17353 .k(8)
17354 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
17355 }
17356
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD64,strided_cn)17357 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD64, strided_cn) {
17358 TEST_REQUIRES_X86_XOP;
17359 GemmMicrokernelTester()
17360 .mr(2)
17361 .nr(4)
17362 .kr(2)
17363 .sr(1)
17364 .m(2)
17365 .n(4)
17366 .k(8)
17367 .cn_stride(7)
17368 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
17369 }
17370
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD64,k_eq_8_subtile)17371 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD64, k_eq_8_subtile) {
17372 TEST_REQUIRES_X86_XOP;
17373 for (uint32_t n = 1; n <= 4; n++) {
17374 for (uint32_t m = 1; m <= 2; m++) {
17375 GemmMicrokernelTester()
17376 .mr(2)
17377 .nr(4)
17378 .kr(2)
17379 .sr(1)
17380 .m(m)
17381 .n(n)
17382 .k(8)
17383 .iterations(1)
17384 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
17385 }
17386 }
17387 }
17388
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD64,k_eq_8_subtile_m)17389 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD64, k_eq_8_subtile_m) {
17390 TEST_REQUIRES_X86_XOP;
17391 for (uint32_t m = 1; m <= 2; m++) {
17392 GemmMicrokernelTester()
17393 .mr(2)
17394 .nr(4)
17395 .kr(2)
17396 .sr(1)
17397 .m(m)
17398 .n(4)
17399 .k(8)
17400 .iterations(1)
17401 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
17402 }
17403 }
17404
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD64,k_eq_8_subtile_n)17405 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD64, k_eq_8_subtile_n) {
17406 TEST_REQUIRES_X86_XOP;
17407 for (uint32_t n = 1; n <= 4; n++) {
17408 GemmMicrokernelTester()
17409 .mr(2)
17410 .nr(4)
17411 .kr(2)
17412 .sr(1)
17413 .m(2)
17414 .n(n)
17415 .k(8)
17416 .iterations(1)
17417 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
17418 }
17419 }
17420
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD64,k_lt_8)17421 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD64, k_lt_8) {
17422 TEST_REQUIRES_X86_XOP;
17423 for (size_t k = 1; k < 8; k++) {
17424 GemmMicrokernelTester()
17425 .mr(2)
17426 .nr(4)
17427 .kr(2)
17428 .sr(1)
17429 .m(2)
17430 .n(4)
17431 .k(k)
17432 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
17433 }
17434 }
17435
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD64,k_lt_8_subtile)17436 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD64, k_lt_8_subtile) {
17437 TEST_REQUIRES_X86_XOP;
17438 for (size_t k = 1; k < 8; k++) {
17439 for (uint32_t n = 1; n <= 4; n++) {
17440 for (uint32_t m = 1; m <= 2; m++) {
17441 GemmMicrokernelTester()
17442 .mr(2)
17443 .nr(4)
17444 .kr(2)
17445 .sr(1)
17446 .m(m)
17447 .n(n)
17448 .k(k)
17449 .iterations(1)
17450 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
17451 }
17452 }
17453 }
17454 }
17455
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD64,k_gt_8)17456 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD64, k_gt_8) {
17457 TEST_REQUIRES_X86_XOP;
17458 for (size_t k = 9; k < 16; k++) {
17459 GemmMicrokernelTester()
17460 .mr(2)
17461 .nr(4)
17462 .kr(2)
17463 .sr(1)
17464 .m(2)
17465 .n(4)
17466 .k(k)
17467 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
17468 }
17469 }
17470
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD64,k_gt_8_subtile)17471 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD64, k_gt_8_subtile) {
17472 TEST_REQUIRES_X86_XOP;
17473 for (size_t k = 9; k < 16; k++) {
17474 for (uint32_t n = 1; n <= 4; n++) {
17475 for (uint32_t m = 1; m <= 2; m++) {
17476 GemmMicrokernelTester()
17477 .mr(2)
17478 .nr(4)
17479 .kr(2)
17480 .sr(1)
17481 .m(m)
17482 .n(n)
17483 .k(k)
17484 .iterations(1)
17485 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
17486 }
17487 }
17488 }
17489 }
17490
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD64,k_div_8)17491 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD64, k_div_8) {
17492 TEST_REQUIRES_X86_XOP;
17493 for (size_t k = 16; k <= 80; k += 8) {
17494 GemmMicrokernelTester()
17495 .mr(2)
17496 .nr(4)
17497 .kr(2)
17498 .sr(1)
17499 .m(2)
17500 .n(4)
17501 .k(k)
17502 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
17503 }
17504 }
17505
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD64,k_div_8_subtile)17506 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD64, k_div_8_subtile) {
17507 TEST_REQUIRES_X86_XOP;
17508 for (size_t k = 16; k <= 80; k += 8) {
17509 for (uint32_t n = 1; n <= 4; n++) {
17510 for (uint32_t m = 1; m <= 2; m++) {
17511 GemmMicrokernelTester()
17512 .mr(2)
17513 .nr(4)
17514 .kr(2)
17515 .sr(1)
17516 .m(m)
17517 .n(n)
17518 .k(k)
17519 .iterations(1)
17520 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
17521 }
17522 }
17523 }
17524 }
17525
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD64,n_gt_4)17526 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD64, n_gt_4) {
17527 TEST_REQUIRES_X86_XOP;
17528 for (uint32_t n = 5; n < 8; n++) {
17529 for (size_t k = 1; k <= 40; k += 9) {
17530 GemmMicrokernelTester()
17531 .mr(2)
17532 .nr(4)
17533 .kr(2)
17534 .sr(1)
17535 .m(2)
17536 .n(n)
17537 .k(k)
17538 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
17539 }
17540 }
17541 }
17542
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD64,n_gt_4_strided_cn)17543 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD64, n_gt_4_strided_cn) {
17544 TEST_REQUIRES_X86_XOP;
17545 for (uint32_t n = 5; n < 8; n++) {
17546 for (size_t k = 1; k <= 40; k += 9) {
17547 GemmMicrokernelTester()
17548 .mr(2)
17549 .nr(4)
17550 .kr(2)
17551 .sr(1)
17552 .m(2)
17553 .n(n)
17554 .k(k)
17555 .cn_stride(7)
17556 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
17557 }
17558 }
17559 }
17560
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD64,n_gt_4_subtile)17561 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD64, n_gt_4_subtile) {
17562 TEST_REQUIRES_X86_XOP;
17563 for (uint32_t n = 5; n < 8; n++) {
17564 for (size_t k = 1; k <= 40; k += 9) {
17565 for (uint32_t m = 1; m <= 2; m++) {
17566 GemmMicrokernelTester()
17567 .mr(2)
17568 .nr(4)
17569 .kr(2)
17570 .sr(1)
17571 .m(m)
17572 .n(n)
17573 .k(k)
17574 .iterations(1)
17575 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
17576 }
17577 }
17578 }
17579 }
17580
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD64,n_div_4)17581 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD64, n_div_4) {
17582 TEST_REQUIRES_X86_XOP;
17583 for (uint32_t n = 8; n <= 12; n += 4) {
17584 for (size_t k = 1; k <= 40; k += 9) {
17585 GemmMicrokernelTester()
17586 .mr(2)
17587 .nr(4)
17588 .kr(2)
17589 .sr(1)
17590 .m(2)
17591 .n(n)
17592 .k(k)
17593 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
17594 }
17595 }
17596 }
17597
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD64,n_div_4_strided_cn)17598 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD64, n_div_4_strided_cn) {
17599 TEST_REQUIRES_X86_XOP;
17600 for (uint32_t n = 8; n <= 12; n += 4) {
17601 for (size_t k = 1; k <= 40; k += 9) {
17602 GemmMicrokernelTester()
17603 .mr(2)
17604 .nr(4)
17605 .kr(2)
17606 .sr(1)
17607 .m(2)
17608 .n(n)
17609 .k(k)
17610 .cn_stride(7)
17611 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
17612 }
17613 }
17614 }
17615
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD64,n_div_4_subtile)17616 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD64, n_div_4_subtile) {
17617 TEST_REQUIRES_X86_XOP;
17618 for (uint32_t n = 8; n <= 12; n += 4) {
17619 for (size_t k = 1; k <= 40; k += 9) {
17620 for (uint32_t m = 1; m <= 2; m++) {
17621 GemmMicrokernelTester()
17622 .mr(2)
17623 .nr(4)
17624 .kr(2)
17625 .sr(1)
17626 .m(m)
17627 .n(n)
17628 .k(k)
17629 .iterations(1)
17630 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
17631 }
17632 }
17633 }
17634 }
17635
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD64,small_kernel)17636 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD64, small_kernel) {
17637 TEST_REQUIRES_X86_XOP;
17638 for (size_t k = 1; k <= 40; k += 9) {
17639 GemmMicrokernelTester()
17640 .mr(2)
17641 .nr(4)
17642 .kr(2)
17643 .sr(1)
17644 .m(2)
17645 .n(4)
17646 .k(k)
17647 .ks(3)
17648 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
17649 }
17650 }
17651
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD64,small_kernel_subtile)17652 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD64, small_kernel_subtile) {
17653 TEST_REQUIRES_X86_XOP;
17654 for (size_t k = 1; k <= 40; k += 9) {
17655 for (uint32_t n = 1; n <= 4; n++) {
17656 for (uint32_t m = 1; m <= 2; m++) {
17657 GemmMicrokernelTester()
17658 .mr(2)
17659 .nr(4)
17660 .kr(2)
17661 .sr(1)
17662 .m(m)
17663 .n(n)
17664 .k(k)
17665 .ks(3)
17666 .iterations(1)
17667 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
17668 }
17669 }
17670 }
17671 }
17672
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD64,n_gt_4_small_kernel)17673 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD64, n_gt_4_small_kernel) {
17674 TEST_REQUIRES_X86_XOP;
17675 for (uint32_t n = 5; n < 8; n++) {
17676 for (size_t k = 1; k <= 40; k += 9) {
17677 GemmMicrokernelTester()
17678 .mr(2)
17679 .nr(4)
17680 .kr(2)
17681 .sr(1)
17682 .m(2)
17683 .n(n)
17684 .k(k)
17685 .ks(3)
17686 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
17687 }
17688 }
17689 }
17690
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD64,n_div_4_small_kernel)17691 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD64, n_div_4_small_kernel) {
17692 TEST_REQUIRES_X86_XOP;
17693 for (uint32_t n = 8; n <= 12; n += 4) {
17694 for (size_t k = 1; k <= 40; k += 9) {
17695 GemmMicrokernelTester()
17696 .mr(2)
17697 .nr(4)
17698 .kr(2)
17699 .sr(1)
17700 .m(2)
17701 .n(n)
17702 .k(k)
17703 .ks(3)
17704 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
17705 }
17706 }
17707 }
17708
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD64,strided_cm_subtile)17709 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD64, strided_cm_subtile) {
17710 TEST_REQUIRES_X86_XOP;
17711 for (size_t k = 1; k <= 40; k += 9) {
17712 for (uint32_t n = 1; n <= 4; n++) {
17713 for (uint32_t m = 1; m <= 2; m++) {
17714 GemmMicrokernelTester()
17715 .mr(2)
17716 .nr(4)
17717 .kr(2)
17718 .sr(1)
17719 .m(m)
17720 .n(n)
17721 .k(k)
17722 .cm_stride(7)
17723 .iterations(1)
17724 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
17725 }
17726 }
17727 }
17728 }
17729
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD64,a_offset)17730 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD64, a_offset) {
17731 TEST_REQUIRES_X86_XOP;
17732 for (size_t k = 1; k <= 40; k += 9) {
17733 GemmMicrokernelTester()
17734 .mr(2)
17735 .nr(4)
17736 .kr(2)
17737 .sr(1)
17738 .m(2)
17739 .n(4)
17740 .k(k)
17741 .ks(3)
17742 .a_offset(83)
17743 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
17744 }
17745 }
17746
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD64,zero)17747 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD64, zero) {
17748 TEST_REQUIRES_X86_XOP;
17749 for (size_t k = 1; k <= 40; k += 9) {
17750 for (uint32_t mz = 0; mz < 2; mz++) {
17751 GemmMicrokernelTester()
17752 .mr(2)
17753 .nr(4)
17754 .kr(2)
17755 .sr(1)
17756 .m(2)
17757 .n(4)
17758 .k(k)
17759 .ks(3)
17760 .a_offset(83)
17761 .zero_index(mz)
17762 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
17763 }
17764 }
17765 }
17766
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD64,qmin)17767 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD64, qmin) {
17768 TEST_REQUIRES_X86_XOP;
17769 GemmMicrokernelTester()
17770 .mr(2)
17771 .nr(4)
17772 .kr(2)
17773 .sr(1)
17774 .m(2)
17775 .n(4)
17776 .k(8)
17777 .qmin(128)
17778 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
17779 }
17780
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD64,qmax)17781 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD64, qmax) {
17782 TEST_REQUIRES_X86_XOP;
17783 GemmMicrokernelTester()
17784 .mr(2)
17785 .nr(4)
17786 .kr(2)
17787 .sr(1)
17788 .m(2)
17789 .n(4)
17790 .k(8)
17791 .qmax(128)
17792 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
17793 }
17794
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD64,strided_cm)17795 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD64, strided_cm) {
17796 TEST_REQUIRES_X86_XOP;
17797 GemmMicrokernelTester()
17798 .mr(2)
17799 .nr(4)
17800 .kr(2)
17801 .sr(1)
17802 .m(2)
17803 .n(4)
17804 .k(8)
17805 .cm_stride(7)
17806 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
17807 }
17808 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
17809
17810
17811 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD64,k_eq_8)17812 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD64, k_eq_8) {
17813 TEST_REQUIRES_X86_AVX;
17814 GemmMicrokernelTester()
17815 .mr(3)
17816 .nr(4)
17817 .kr(2)
17818 .sr(1)
17819 .m(3)
17820 .n(4)
17821 .k(8)
17822 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
17823 }
17824
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD64,strided_cn)17825 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD64, strided_cn) {
17826 TEST_REQUIRES_X86_AVX;
17827 GemmMicrokernelTester()
17828 .mr(3)
17829 .nr(4)
17830 .kr(2)
17831 .sr(1)
17832 .m(3)
17833 .n(4)
17834 .k(8)
17835 .cn_stride(7)
17836 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
17837 }
17838
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD64,k_eq_8_subtile)17839 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD64, k_eq_8_subtile) {
17840 TEST_REQUIRES_X86_AVX;
17841 for (uint32_t n = 1; n <= 4; n++) {
17842 for (uint32_t m = 1; m <= 3; m++) {
17843 GemmMicrokernelTester()
17844 .mr(3)
17845 .nr(4)
17846 .kr(2)
17847 .sr(1)
17848 .m(m)
17849 .n(n)
17850 .k(8)
17851 .iterations(1)
17852 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
17853 }
17854 }
17855 }
17856
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD64,k_eq_8_subtile_m)17857 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD64, k_eq_8_subtile_m) {
17858 TEST_REQUIRES_X86_AVX;
17859 for (uint32_t m = 1; m <= 3; m++) {
17860 GemmMicrokernelTester()
17861 .mr(3)
17862 .nr(4)
17863 .kr(2)
17864 .sr(1)
17865 .m(m)
17866 .n(4)
17867 .k(8)
17868 .iterations(1)
17869 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
17870 }
17871 }
17872
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD64,k_eq_8_subtile_n)17873 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD64, k_eq_8_subtile_n) {
17874 TEST_REQUIRES_X86_AVX;
17875 for (uint32_t n = 1; n <= 4; n++) {
17876 GemmMicrokernelTester()
17877 .mr(3)
17878 .nr(4)
17879 .kr(2)
17880 .sr(1)
17881 .m(3)
17882 .n(n)
17883 .k(8)
17884 .iterations(1)
17885 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
17886 }
17887 }
17888
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD64,k_lt_8)17889 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD64, k_lt_8) {
17890 TEST_REQUIRES_X86_AVX;
17891 for (size_t k = 1; k < 8; k++) {
17892 GemmMicrokernelTester()
17893 .mr(3)
17894 .nr(4)
17895 .kr(2)
17896 .sr(1)
17897 .m(3)
17898 .n(4)
17899 .k(k)
17900 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
17901 }
17902 }
17903
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD64,k_lt_8_subtile)17904 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD64, k_lt_8_subtile) {
17905 TEST_REQUIRES_X86_AVX;
17906 for (size_t k = 1; k < 8; k++) {
17907 for (uint32_t n = 1; n <= 4; n++) {
17908 for (uint32_t m = 1; m <= 3; m++) {
17909 GemmMicrokernelTester()
17910 .mr(3)
17911 .nr(4)
17912 .kr(2)
17913 .sr(1)
17914 .m(m)
17915 .n(n)
17916 .k(k)
17917 .iterations(1)
17918 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
17919 }
17920 }
17921 }
17922 }
17923
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD64,k_gt_8)17924 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD64, k_gt_8) {
17925 TEST_REQUIRES_X86_AVX;
17926 for (size_t k = 9; k < 16; k++) {
17927 GemmMicrokernelTester()
17928 .mr(3)
17929 .nr(4)
17930 .kr(2)
17931 .sr(1)
17932 .m(3)
17933 .n(4)
17934 .k(k)
17935 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
17936 }
17937 }
17938
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD64,k_gt_8_subtile)17939 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD64, k_gt_8_subtile) {
17940 TEST_REQUIRES_X86_AVX;
17941 for (size_t k = 9; k < 16; k++) {
17942 for (uint32_t n = 1; n <= 4; n++) {
17943 for (uint32_t m = 1; m <= 3; m++) {
17944 GemmMicrokernelTester()
17945 .mr(3)
17946 .nr(4)
17947 .kr(2)
17948 .sr(1)
17949 .m(m)
17950 .n(n)
17951 .k(k)
17952 .iterations(1)
17953 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
17954 }
17955 }
17956 }
17957 }
17958
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD64,k_div_8)17959 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD64, k_div_8) {
17960 TEST_REQUIRES_X86_AVX;
17961 for (size_t k = 16; k <= 80; k += 8) {
17962 GemmMicrokernelTester()
17963 .mr(3)
17964 .nr(4)
17965 .kr(2)
17966 .sr(1)
17967 .m(3)
17968 .n(4)
17969 .k(k)
17970 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
17971 }
17972 }
17973
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD64,k_div_8_subtile)17974 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD64, k_div_8_subtile) {
17975 TEST_REQUIRES_X86_AVX;
17976 for (size_t k = 16; k <= 80; k += 8) {
17977 for (uint32_t n = 1; n <= 4; n++) {
17978 for (uint32_t m = 1; m <= 3; m++) {
17979 GemmMicrokernelTester()
17980 .mr(3)
17981 .nr(4)
17982 .kr(2)
17983 .sr(1)
17984 .m(m)
17985 .n(n)
17986 .k(k)
17987 .iterations(1)
17988 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
17989 }
17990 }
17991 }
17992 }
17993
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD64,n_gt_4)17994 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD64, n_gt_4) {
17995 TEST_REQUIRES_X86_AVX;
17996 for (uint32_t n = 5; n < 8; n++) {
17997 for (size_t k = 1; k <= 40; k += 9) {
17998 GemmMicrokernelTester()
17999 .mr(3)
18000 .nr(4)
18001 .kr(2)
18002 .sr(1)
18003 .m(3)
18004 .n(n)
18005 .k(k)
18006 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
18007 }
18008 }
18009 }
18010
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD64,n_gt_4_strided_cn)18011 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD64, n_gt_4_strided_cn) {
18012 TEST_REQUIRES_X86_AVX;
18013 for (uint32_t n = 5; n < 8; n++) {
18014 for (size_t k = 1; k <= 40; k += 9) {
18015 GemmMicrokernelTester()
18016 .mr(3)
18017 .nr(4)
18018 .kr(2)
18019 .sr(1)
18020 .m(3)
18021 .n(n)
18022 .k(k)
18023 .cn_stride(7)
18024 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
18025 }
18026 }
18027 }
18028
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD64,n_gt_4_subtile)18029 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD64, n_gt_4_subtile) {
18030 TEST_REQUIRES_X86_AVX;
18031 for (uint32_t n = 5; n < 8; n++) {
18032 for (size_t k = 1; k <= 40; k += 9) {
18033 for (uint32_t m = 1; m <= 3; m++) {
18034 GemmMicrokernelTester()
18035 .mr(3)
18036 .nr(4)
18037 .kr(2)
18038 .sr(1)
18039 .m(m)
18040 .n(n)
18041 .k(k)
18042 .iterations(1)
18043 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
18044 }
18045 }
18046 }
18047 }
18048
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD64,n_div_4)18049 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD64, n_div_4) {
18050 TEST_REQUIRES_X86_AVX;
18051 for (uint32_t n = 8; n <= 12; n += 4) {
18052 for (size_t k = 1; k <= 40; k += 9) {
18053 GemmMicrokernelTester()
18054 .mr(3)
18055 .nr(4)
18056 .kr(2)
18057 .sr(1)
18058 .m(3)
18059 .n(n)
18060 .k(k)
18061 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
18062 }
18063 }
18064 }
18065
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD64,n_div_4_strided_cn)18066 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD64, n_div_4_strided_cn) {
18067 TEST_REQUIRES_X86_AVX;
18068 for (uint32_t n = 8; n <= 12; n += 4) {
18069 for (size_t k = 1; k <= 40; k += 9) {
18070 GemmMicrokernelTester()
18071 .mr(3)
18072 .nr(4)
18073 .kr(2)
18074 .sr(1)
18075 .m(3)
18076 .n(n)
18077 .k(k)
18078 .cn_stride(7)
18079 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
18080 }
18081 }
18082 }
18083
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD64,n_div_4_subtile)18084 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD64, n_div_4_subtile) {
18085 TEST_REQUIRES_X86_AVX;
18086 for (uint32_t n = 8; n <= 12; n += 4) {
18087 for (size_t k = 1; k <= 40; k += 9) {
18088 for (uint32_t m = 1; m <= 3; m++) {
18089 GemmMicrokernelTester()
18090 .mr(3)
18091 .nr(4)
18092 .kr(2)
18093 .sr(1)
18094 .m(m)
18095 .n(n)
18096 .k(k)
18097 .iterations(1)
18098 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
18099 }
18100 }
18101 }
18102 }
18103
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD64,small_kernel)18104 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD64, small_kernel) {
18105 TEST_REQUIRES_X86_AVX;
18106 for (size_t k = 1; k <= 40; k += 9) {
18107 GemmMicrokernelTester()
18108 .mr(3)
18109 .nr(4)
18110 .kr(2)
18111 .sr(1)
18112 .m(3)
18113 .n(4)
18114 .k(k)
18115 .ks(3)
18116 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
18117 }
18118 }
18119
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD64,small_kernel_subtile)18120 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD64, small_kernel_subtile) {
18121 TEST_REQUIRES_X86_AVX;
18122 for (size_t k = 1; k <= 40; k += 9) {
18123 for (uint32_t n = 1; n <= 4; n++) {
18124 for (uint32_t m = 1; m <= 3; m++) {
18125 GemmMicrokernelTester()
18126 .mr(3)
18127 .nr(4)
18128 .kr(2)
18129 .sr(1)
18130 .m(m)
18131 .n(n)
18132 .k(k)
18133 .ks(3)
18134 .iterations(1)
18135 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
18136 }
18137 }
18138 }
18139 }
18140
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD64,n_gt_4_small_kernel)18141 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD64, n_gt_4_small_kernel) {
18142 TEST_REQUIRES_X86_AVX;
18143 for (uint32_t n = 5; n < 8; n++) {
18144 for (size_t k = 1; k <= 40; k += 9) {
18145 GemmMicrokernelTester()
18146 .mr(3)
18147 .nr(4)
18148 .kr(2)
18149 .sr(1)
18150 .m(3)
18151 .n(n)
18152 .k(k)
18153 .ks(3)
18154 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
18155 }
18156 }
18157 }
18158
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD64,n_div_4_small_kernel)18159 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD64, n_div_4_small_kernel) {
18160 TEST_REQUIRES_X86_AVX;
18161 for (uint32_t n = 8; n <= 12; n += 4) {
18162 for (size_t k = 1; k <= 40; k += 9) {
18163 GemmMicrokernelTester()
18164 .mr(3)
18165 .nr(4)
18166 .kr(2)
18167 .sr(1)
18168 .m(3)
18169 .n(n)
18170 .k(k)
18171 .ks(3)
18172 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
18173 }
18174 }
18175 }
18176
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD64,strided_cm_subtile)18177 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD64, strided_cm_subtile) {
18178 TEST_REQUIRES_X86_AVX;
18179 for (size_t k = 1; k <= 40; k += 9) {
18180 for (uint32_t n = 1; n <= 4; n++) {
18181 for (uint32_t m = 1; m <= 3; m++) {
18182 GemmMicrokernelTester()
18183 .mr(3)
18184 .nr(4)
18185 .kr(2)
18186 .sr(1)
18187 .m(m)
18188 .n(n)
18189 .k(k)
18190 .cm_stride(7)
18191 .iterations(1)
18192 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
18193 }
18194 }
18195 }
18196 }
18197
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD64,a_offset)18198 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD64, a_offset) {
18199 TEST_REQUIRES_X86_AVX;
18200 for (size_t k = 1; k <= 40; k += 9) {
18201 GemmMicrokernelTester()
18202 .mr(3)
18203 .nr(4)
18204 .kr(2)
18205 .sr(1)
18206 .m(3)
18207 .n(4)
18208 .k(k)
18209 .ks(3)
18210 .a_offset(127)
18211 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
18212 }
18213 }
18214
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD64,zero)18215 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD64, zero) {
18216 TEST_REQUIRES_X86_AVX;
18217 for (size_t k = 1; k <= 40; k += 9) {
18218 for (uint32_t mz = 0; mz < 3; mz++) {
18219 GemmMicrokernelTester()
18220 .mr(3)
18221 .nr(4)
18222 .kr(2)
18223 .sr(1)
18224 .m(3)
18225 .n(4)
18226 .k(k)
18227 .ks(3)
18228 .a_offset(127)
18229 .zero_index(mz)
18230 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
18231 }
18232 }
18233 }
18234
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD64,qmin)18235 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD64, qmin) {
18236 TEST_REQUIRES_X86_AVX;
18237 GemmMicrokernelTester()
18238 .mr(3)
18239 .nr(4)
18240 .kr(2)
18241 .sr(1)
18242 .m(3)
18243 .n(4)
18244 .k(8)
18245 .qmin(128)
18246 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
18247 }
18248
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD64,qmax)18249 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD64, qmax) {
18250 TEST_REQUIRES_X86_AVX;
18251 GemmMicrokernelTester()
18252 .mr(3)
18253 .nr(4)
18254 .kr(2)
18255 .sr(1)
18256 .m(3)
18257 .n(4)
18258 .k(8)
18259 .qmax(128)
18260 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
18261 }
18262
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD64,strided_cm)18263 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD64, strided_cm) {
18264 TEST_REQUIRES_X86_AVX;
18265 GemmMicrokernelTester()
18266 .mr(3)
18267 .nr(4)
18268 .kr(2)
18269 .sr(1)
18270 .m(3)
18271 .n(4)
18272 .k(8)
18273 .cm_stride(7)
18274 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
18275 }
18276 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
18277
18278
18279 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD64,k_eq_8)18280 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD64, k_eq_8) {
18281 TEST_REQUIRES_X86_XOP;
18282 GemmMicrokernelTester()
18283 .mr(3)
18284 .nr(4)
18285 .kr(2)
18286 .sr(1)
18287 .m(3)
18288 .n(4)
18289 .k(8)
18290 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
18291 }
18292
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD64,strided_cn)18293 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD64, strided_cn) {
18294 TEST_REQUIRES_X86_XOP;
18295 GemmMicrokernelTester()
18296 .mr(3)
18297 .nr(4)
18298 .kr(2)
18299 .sr(1)
18300 .m(3)
18301 .n(4)
18302 .k(8)
18303 .cn_stride(7)
18304 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
18305 }
18306
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD64,k_eq_8_subtile)18307 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD64, k_eq_8_subtile) {
18308 TEST_REQUIRES_X86_XOP;
18309 for (uint32_t n = 1; n <= 4; n++) {
18310 for (uint32_t m = 1; m <= 3; m++) {
18311 GemmMicrokernelTester()
18312 .mr(3)
18313 .nr(4)
18314 .kr(2)
18315 .sr(1)
18316 .m(m)
18317 .n(n)
18318 .k(8)
18319 .iterations(1)
18320 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
18321 }
18322 }
18323 }
18324
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD64,k_eq_8_subtile_m)18325 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD64, k_eq_8_subtile_m) {
18326 TEST_REQUIRES_X86_XOP;
18327 for (uint32_t m = 1; m <= 3; m++) {
18328 GemmMicrokernelTester()
18329 .mr(3)
18330 .nr(4)
18331 .kr(2)
18332 .sr(1)
18333 .m(m)
18334 .n(4)
18335 .k(8)
18336 .iterations(1)
18337 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
18338 }
18339 }
18340
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD64,k_eq_8_subtile_n)18341 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD64, k_eq_8_subtile_n) {
18342 TEST_REQUIRES_X86_XOP;
18343 for (uint32_t n = 1; n <= 4; n++) {
18344 GemmMicrokernelTester()
18345 .mr(3)
18346 .nr(4)
18347 .kr(2)
18348 .sr(1)
18349 .m(3)
18350 .n(n)
18351 .k(8)
18352 .iterations(1)
18353 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
18354 }
18355 }
18356
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD64,k_lt_8)18357 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD64, k_lt_8) {
18358 TEST_REQUIRES_X86_XOP;
18359 for (size_t k = 1; k < 8; k++) {
18360 GemmMicrokernelTester()
18361 .mr(3)
18362 .nr(4)
18363 .kr(2)
18364 .sr(1)
18365 .m(3)
18366 .n(4)
18367 .k(k)
18368 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
18369 }
18370 }
18371
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD64,k_lt_8_subtile)18372 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD64, k_lt_8_subtile) {
18373 TEST_REQUIRES_X86_XOP;
18374 for (size_t k = 1; k < 8; k++) {
18375 for (uint32_t n = 1; n <= 4; n++) {
18376 for (uint32_t m = 1; m <= 3; m++) {
18377 GemmMicrokernelTester()
18378 .mr(3)
18379 .nr(4)
18380 .kr(2)
18381 .sr(1)
18382 .m(m)
18383 .n(n)
18384 .k(k)
18385 .iterations(1)
18386 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
18387 }
18388 }
18389 }
18390 }
18391
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD64,k_gt_8)18392 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD64, k_gt_8) {
18393 TEST_REQUIRES_X86_XOP;
18394 for (size_t k = 9; k < 16; k++) {
18395 GemmMicrokernelTester()
18396 .mr(3)
18397 .nr(4)
18398 .kr(2)
18399 .sr(1)
18400 .m(3)
18401 .n(4)
18402 .k(k)
18403 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
18404 }
18405 }
18406
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD64,k_gt_8_subtile)18407 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD64, k_gt_8_subtile) {
18408 TEST_REQUIRES_X86_XOP;
18409 for (size_t k = 9; k < 16; k++) {
18410 for (uint32_t n = 1; n <= 4; n++) {
18411 for (uint32_t m = 1; m <= 3; m++) {
18412 GemmMicrokernelTester()
18413 .mr(3)
18414 .nr(4)
18415 .kr(2)
18416 .sr(1)
18417 .m(m)
18418 .n(n)
18419 .k(k)
18420 .iterations(1)
18421 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
18422 }
18423 }
18424 }
18425 }
18426
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD64,k_div_8)18427 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD64, k_div_8) {
18428 TEST_REQUIRES_X86_XOP;
18429 for (size_t k = 16; k <= 80; k += 8) {
18430 GemmMicrokernelTester()
18431 .mr(3)
18432 .nr(4)
18433 .kr(2)
18434 .sr(1)
18435 .m(3)
18436 .n(4)
18437 .k(k)
18438 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
18439 }
18440 }
18441
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD64,k_div_8_subtile)18442 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD64, k_div_8_subtile) {
18443 TEST_REQUIRES_X86_XOP;
18444 for (size_t k = 16; k <= 80; k += 8) {
18445 for (uint32_t n = 1; n <= 4; n++) {
18446 for (uint32_t m = 1; m <= 3; m++) {
18447 GemmMicrokernelTester()
18448 .mr(3)
18449 .nr(4)
18450 .kr(2)
18451 .sr(1)
18452 .m(m)
18453 .n(n)
18454 .k(k)
18455 .iterations(1)
18456 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
18457 }
18458 }
18459 }
18460 }
18461
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD64,n_gt_4)18462 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD64, n_gt_4) {
18463 TEST_REQUIRES_X86_XOP;
18464 for (uint32_t n = 5; n < 8; n++) {
18465 for (size_t k = 1; k <= 40; k += 9) {
18466 GemmMicrokernelTester()
18467 .mr(3)
18468 .nr(4)
18469 .kr(2)
18470 .sr(1)
18471 .m(3)
18472 .n(n)
18473 .k(k)
18474 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
18475 }
18476 }
18477 }
18478
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD64,n_gt_4_strided_cn)18479 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD64, n_gt_4_strided_cn) {
18480 TEST_REQUIRES_X86_XOP;
18481 for (uint32_t n = 5; n < 8; n++) {
18482 for (size_t k = 1; k <= 40; k += 9) {
18483 GemmMicrokernelTester()
18484 .mr(3)
18485 .nr(4)
18486 .kr(2)
18487 .sr(1)
18488 .m(3)
18489 .n(n)
18490 .k(k)
18491 .cn_stride(7)
18492 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
18493 }
18494 }
18495 }
18496
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD64,n_gt_4_subtile)18497 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD64, n_gt_4_subtile) {
18498 TEST_REQUIRES_X86_XOP;
18499 for (uint32_t n = 5; n < 8; n++) {
18500 for (size_t k = 1; k <= 40; k += 9) {
18501 for (uint32_t m = 1; m <= 3; m++) {
18502 GemmMicrokernelTester()
18503 .mr(3)
18504 .nr(4)
18505 .kr(2)
18506 .sr(1)
18507 .m(m)
18508 .n(n)
18509 .k(k)
18510 .iterations(1)
18511 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
18512 }
18513 }
18514 }
18515 }
18516
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD64,n_div_4)18517 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD64, n_div_4) {
18518 TEST_REQUIRES_X86_XOP;
18519 for (uint32_t n = 8; n <= 12; n += 4) {
18520 for (size_t k = 1; k <= 40; k += 9) {
18521 GemmMicrokernelTester()
18522 .mr(3)
18523 .nr(4)
18524 .kr(2)
18525 .sr(1)
18526 .m(3)
18527 .n(n)
18528 .k(k)
18529 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
18530 }
18531 }
18532 }
18533
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD64,n_div_4_strided_cn)18534 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD64, n_div_4_strided_cn) {
18535 TEST_REQUIRES_X86_XOP;
18536 for (uint32_t n = 8; n <= 12; n += 4) {
18537 for (size_t k = 1; k <= 40; k += 9) {
18538 GemmMicrokernelTester()
18539 .mr(3)
18540 .nr(4)
18541 .kr(2)
18542 .sr(1)
18543 .m(3)
18544 .n(n)
18545 .k(k)
18546 .cn_stride(7)
18547 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
18548 }
18549 }
18550 }
18551
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD64,n_div_4_subtile)18552 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD64, n_div_4_subtile) {
18553 TEST_REQUIRES_X86_XOP;
18554 for (uint32_t n = 8; n <= 12; n += 4) {
18555 for (size_t k = 1; k <= 40; k += 9) {
18556 for (uint32_t m = 1; m <= 3; m++) {
18557 GemmMicrokernelTester()
18558 .mr(3)
18559 .nr(4)
18560 .kr(2)
18561 .sr(1)
18562 .m(m)
18563 .n(n)
18564 .k(k)
18565 .iterations(1)
18566 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
18567 }
18568 }
18569 }
18570 }
18571
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD64,small_kernel)18572 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD64, small_kernel) {
18573 TEST_REQUIRES_X86_XOP;
18574 for (size_t k = 1; k <= 40; k += 9) {
18575 GemmMicrokernelTester()
18576 .mr(3)
18577 .nr(4)
18578 .kr(2)
18579 .sr(1)
18580 .m(3)
18581 .n(4)
18582 .k(k)
18583 .ks(3)
18584 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
18585 }
18586 }
18587
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD64,small_kernel_subtile)18588 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD64, small_kernel_subtile) {
18589 TEST_REQUIRES_X86_XOP;
18590 for (size_t k = 1; k <= 40; k += 9) {
18591 for (uint32_t n = 1; n <= 4; n++) {
18592 for (uint32_t m = 1; m <= 3; m++) {
18593 GemmMicrokernelTester()
18594 .mr(3)
18595 .nr(4)
18596 .kr(2)
18597 .sr(1)
18598 .m(m)
18599 .n(n)
18600 .k(k)
18601 .ks(3)
18602 .iterations(1)
18603 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
18604 }
18605 }
18606 }
18607 }
18608
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD64,n_gt_4_small_kernel)18609 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD64, n_gt_4_small_kernel) {
18610 TEST_REQUIRES_X86_XOP;
18611 for (uint32_t n = 5; n < 8; n++) {
18612 for (size_t k = 1; k <= 40; k += 9) {
18613 GemmMicrokernelTester()
18614 .mr(3)
18615 .nr(4)
18616 .kr(2)
18617 .sr(1)
18618 .m(3)
18619 .n(n)
18620 .k(k)
18621 .ks(3)
18622 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
18623 }
18624 }
18625 }
18626
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD64,n_div_4_small_kernel)18627 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD64, n_div_4_small_kernel) {
18628 TEST_REQUIRES_X86_XOP;
18629 for (uint32_t n = 8; n <= 12; n += 4) {
18630 for (size_t k = 1; k <= 40; k += 9) {
18631 GemmMicrokernelTester()
18632 .mr(3)
18633 .nr(4)
18634 .kr(2)
18635 .sr(1)
18636 .m(3)
18637 .n(n)
18638 .k(k)
18639 .ks(3)
18640 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
18641 }
18642 }
18643 }
18644
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD64,strided_cm_subtile)18645 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD64, strided_cm_subtile) {
18646 TEST_REQUIRES_X86_XOP;
18647 for (size_t k = 1; k <= 40; k += 9) {
18648 for (uint32_t n = 1; n <= 4; n++) {
18649 for (uint32_t m = 1; m <= 3; m++) {
18650 GemmMicrokernelTester()
18651 .mr(3)
18652 .nr(4)
18653 .kr(2)
18654 .sr(1)
18655 .m(m)
18656 .n(n)
18657 .k(k)
18658 .cm_stride(7)
18659 .iterations(1)
18660 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
18661 }
18662 }
18663 }
18664 }
18665
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD64,a_offset)18666 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD64, a_offset) {
18667 TEST_REQUIRES_X86_XOP;
18668 for (size_t k = 1; k <= 40; k += 9) {
18669 GemmMicrokernelTester()
18670 .mr(3)
18671 .nr(4)
18672 .kr(2)
18673 .sr(1)
18674 .m(3)
18675 .n(4)
18676 .k(k)
18677 .ks(3)
18678 .a_offset(127)
18679 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
18680 }
18681 }
18682
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD64,zero)18683 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD64, zero) {
18684 TEST_REQUIRES_X86_XOP;
18685 for (size_t k = 1; k <= 40; k += 9) {
18686 for (uint32_t mz = 0; mz < 3; mz++) {
18687 GemmMicrokernelTester()
18688 .mr(3)
18689 .nr(4)
18690 .kr(2)
18691 .sr(1)
18692 .m(3)
18693 .n(4)
18694 .k(k)
18695 .ks(3)
18696 .a_offset(127)
18697 .zero_index(mz)
18698 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
18699 }
18700 }
18701 }
18702
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD64,qmin)18703 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD64, qmin) {
18704 TEST_REQUIRES_X86_XOP;
18705 GemmMicrokernelTester()
18706 .mr(3)
18707 .nr(4)
18708 .kr(2)
18709 .sr(1)
18710 .m(3)
18711 .n(4)
18712 .k(8)
18713 .qmin(128)
18714 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
18715 }
18716
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD64,qmax)18717 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD64, qmax) {
18718 TEST_REQUIRES_X86_XOP;
18719 GemmMicrokernelTester()
18720 .mr(3)
18721 .nr(4)
18722 .kr(2)
18723 .sr(1)
18724 .m(3)
18725 .n(4)
18726 .k(8)
18727 .qmax(128)
18728 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
18729 }
18730
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD64,strided_cm)18731 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__XOP_LD64, strided_cm) {
18732 TEST_REQUIRES_X86_XOP;
18733 GemmMicrokernelTester()
18734 .mr(3)
18735 .nr(4)
18736 .kr(2)
18737 .sr(1)
18738 .m(3)
18739 .n(4)
18740 .k(8)
18741 .cm_stride(7)
18742 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
18743 }
18744 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
18745
18746
18747 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD64,k_eq_8)18748 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD64, k_eq_8) {
18749 TEST_REQUIRES_X86_AVX;
18750 GemmMicrokernelTester()
18751 .mr(4)
18752 .nr(4)
18753 .kr(2)
18754 .sr(1)
18755 .m(4)
18756 .n(4)
18757 .k(8)
18758 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
18759 }
18760
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD64,strided_cn)18761 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD64, strided_cn) {
18762 TEST_REQUIRES_X86_AVX;
18763 GemmMicrokernelTester()
18764 .mr(4)
18765 .nr(4)
18766 .kr(2)
18767 .sr(1)
18768 .m(4)
18769 .n(4)
18770 .k(8)
18771 .cn_stride(7)
18772 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
18773 }
18774
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD64,k_eq_8_subtile)18775 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD64, k_eq_8_subtile) {
18776 TEST_REQUIRES_X86_AVX;
18777 for (uint32_t n = 1; n <= 4; n++) {
18778 for (uint32_t m = 1; m <= 4; m++) {
18779 GemmMicrokernelTester()
18780 .mr(4)
18781 .nr(4)
18782 .kr(2)
18783 .sr(1)
18784 .m(m)
18785 .n(n)
18786 .k(8)
18787 .iterations(1)
18788 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
18789 }
18790 }
18791 }
18792
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD64,k_eq_8_subtile_m)18793 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD64, k_eq_8_subtile_m) {
18794 TEST_REQUIRES_X86_AVX;
18795 for (uint32_t m = 1; m <= 4; m++) {
18796 GemmMicrokernelTester()
18797 .mr(4)
18798 .nr(4)
18799 .kr(2)
18800 .sr(1)
18801 .m(m)
18802 .n(4)
18803 .k(8)
18804 .iterations(1)
18805 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
18806 }
18807 }
18808
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD64,k_eq_8_subtile_n)18809 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD64, k_eq_8_subtile_n) {
18810 TEST_REQUIRES_X86_AVX;
18811 for (uint32_t n = 1; n <= 4; n++) {
18812 GemmMicrokernelTester()
18813 .mr(4)
18814 .nr(4)
18815 .kr(2)
18816 .sr(1)
18817 .m(4)
18818 .n(n)
18819 .k(8)
18820 .iterations(1)
18821 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
18822 }
18823 }
18824
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD64,k_lt_8)18825 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD64, k_lt_8) {
18826 TEST_REQUIRES_X86_AVX;
18827 for (size_t k = 1; k < 8; k++) {
18828 GemmMicrokernelTester()
18829 .mr(4)
18830 .nr(4)
18831 .kr(2)
18832 .sr(1)
18833 .m(4)
18834 .n(4)
18835 .k(k)
18836 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
18837 }
18838 }
18839
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD64,k_lt_8_subtile)18840 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD64, k_lt_8_subtile) {
18841 TEST_REQUIRES_X86_AVX;
18842 for (size_t k = 1; k < 8; k++) {
18843 for (uint32_t n = 1; n <= 4; n++) {
18844 for (uint32_t m = 1; m <= 4; m++) {
18845 GemmMicrokernelTester()
18846 .mr(4)
18847 .nr(4)
18848 .kr(2)
18849 .sr(1)
18850 .m(m)
18851 .n(n)
18852 .k(k)
18853 .iterations(1)
18854 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
18855 }
18856 }
18857 }
18858 }
18859
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD64,k_gt_8)18860 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD64, k_gt_8) {
18861 TEST_REQUIRES_X86_AVX;
18862 for (size_t k = 9; k < 16; k++) {
18863 GemmMicrokernelTester()
18864 .mr(4)
18865 .nr(4)
18866 .kr(2)
18867 .sr(1)
18868 .m(4)
18869 .n(4)
18870 .k(k)
18871 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
18872 }
18873 }
18874
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD64,k_gt_8_subtile)18875 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD64, k_gt_8_subtile) {
18876 TEST_REQUIRES_X86_AVX;
18877 for (size_t k = 9; k < 16; k++) {
18878 for (uint32_t n = 1; n <= 4; n++) {
18879 for (uint32_t m = 1; m <= 4; m++) {
18880 GemmMicrokernelTester()
18881 .mr(4)
18882 .nr(4)
18883 .kr(2)
18884 .sr(1)
18885 .m(m)
18886 .n(n)
18887 .k(k)
18888 .iterations(1)
18889 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
18890 }
18891 }
18892 }
18893 }
18894
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD64,k_div_8)18895 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD64, k_div_8) {
18896 TEST_REQUIRES_X86_AVX;
18897 for (size_t k = 16; k <= 80; k += 8) {
18898 GemmMicrokernelTester()
18899 .mr(4)
18900 .nr(4)
18901 .kr(2)
18902 .sr(1)
18903 .m(4)
18904 .n(4)
18905 .k(k)
18906 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
18907 }
18908 }
18909
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD64,k_div_8_subtile)18910 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD64, k_div_8_subtile) {
18911 TEST_REQUIRES_X86_AVX;
18912 for (size_t k = 16; k <= 80; k += 8) {
18913 for (uint32_t n = 1; n <= 4; n++) {
18914 for (uint32_t m = 1; m <= 4; m++) {
18915 GemmMicrokernelTester()
18916 .mr(4)
18917 .nr(4)
18918 .kr(2)
18919 .sr(1)
18920 .m(m)
18921 .n(n)
18922 .k(k)
18923 .iterations(1)
18924 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
18925 }
18926 }
18927 }
18928 }
18929
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD64,n_gt_4)18930 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD64, n_gt_4) {
18931 TEST_REQUIRES_X86_AVX;
18932 for (uint32_t n = 5; n < 8; n++) {
18933 for (size_t k = 1; k <= 40; k += 9) {
18934 GemmMicrokernelTester()
18935 .mr(4)
18936 .nr(4)
18937 .kr(2)
18938 .sr(1)
18939 .m(4)
18940 .n(n)
18941 .k(k)
18942 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
18943 }
18944 }
18945 }
18946
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD64,n_gt_4_strided_cn)18947 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD64, n_gt_4_strided_cn) {
18948 TEST_REQUIRES_X86_AVX;
18949 for (uint32_t n = 5; n < 8; n++) {
18950 for (size_t k = 1; k <= 40; k += 9) {
18951 GemmMicrokernelTester()
18952 .mr(4)
18953 .nr(4)
18954 .kr(2)
18955 .sr(1)
18956 .m(4)
18957 .n(n)
18958 .k(k)
18959 .cn_stride(7)
18960 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
18961 }
18962 }
18963 }
18964
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD64,n_gt_4_subtile)18965 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD64, n_gt_4_subtile) {
18966 TEST_REQUIRES_X86_AVX;
18967 for (uint32_t n = 5; n < 8; n++) {
18968 for (size_t k = 1; k <= 40; k += 9) {
18969 for (uint32_t m = 1; m <= 4; m++) {
18970 GemmMicrokernelTester()
18971 .mr(4)
18972 .nr(4)
18973 .kr(2)
18974 .sr(1)
18975 .m(m)
18976 .n(n)
18977 .k(k)
18978 .iterations(1)
18979 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
18980 }
18981 }
18982 }
18983 }
18984
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD64,n_div_4)18985 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD64, n_div_4) {
18986 TEST_REQUIRES_X86_AVX;
18987 for (uint32_t n = 8; n <= 12; n += 4) {
18988 for (size_t k = 1; k <= 40; k += 9) {
18989 GemmMicrokernelTester()
18990 .mr(4)
18991 .nr(4)
18992 .kr(2)
18993 .sr(1)
18994 .m(4)
18995 .n(n)
18996 .k(k)
18997 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
18998 }
18999 }
19000 }
19001
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD64,n_div_4_strided_cn)19002 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD64, n_div_4_strided_cn) {
19003 TEST_REQUIRES_X86_AVX;
19004 for (uint32_t n = 8; n <= 12; n += 4) {
19005 for (size_t k = 1; k <= 40; k += 9) {
19006 GemmMicrokernelTester()
19007 .mr(4)
19008 .nr(4)
19009 .kr(2)
19010 .sr(1)
19011 .m(4)
19012 .n(n)
19013 .k(k)
19014 .cn_stride(7)
19015 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
19016 }
19017 }
19018 }
19019
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD64,n_div_4_subtile)19020 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD64, n_div_4_subtile) {
19021 TEST_REQUIRES_X86_AVX;
19022 for (uint32_t n = 8; n <= 12; n += 4) {
19023 for (size_t k = 1; k <= 40; k += 9) {
19024 for (uint32_t m = 1; m <= 4; m++) {
19025 GemmMicrokernelTester()
19026 .mr(4)
19027 .nr(4)
19028 .kr(2)
19029 .sr(1)
19030 .m(m)
19031 .n(n)
19032 .k(k)
19033 .iterations(1)
19034 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
19035 }
19036 }
19037 }
19038 }
19039
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD64,small_kernel)19040 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD64, small_kernel) {
19041 TEST_REQUIRES_X86_AVX;
19042 for (size_t k = 1; k <= 40; k += 9) {
19043 GemmMicrokernelTester()
19044 .mr(4)
19045 .nr(4)
19046 .kr(2)
19047 .sr(1)
19048 .m(4)
19049 .n(4)
19050 .k(k)
19051 .ks(3)
19052 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
19053 }
19054 }
19055
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD64,small_kernel_subtile)19056 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD64, small_kernel_subtile) {
19057 TEST_REQUIRES_X86_AVX;
19058 for (size_t k = 1; k <= 40; k += 9) {
19059 for (uint32_t n = 1; n <= 4; n++) {
19060 for (uint32_t m = 1; m <= 4; m++) {
19061 GemmMicrokernelTester()
19062 .mr(4)
19063 .nr(4)
19064 .kr(2)
19065 .sr(1)
19066 .m(m)
19067 .n(n)
19068 .k(k)
19069 .ks(3)
19070 .iterations(1)
19071 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
19072 }
19073 }
19074 }
19075 }
19076
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD64,n_gt_4_small_kernel)19077 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD64, n_gt_4_small_kernel) {
19078 TEST_REQUIRES_X86_AVX;
19079 for (uint32_t n = 5; n < 8; n++) {
19080 for (size_t k = 1; k <= 40; k += 9) {
19081 GemmMicrokernelTester()
19082 .mr(4)
19083 .nr(4)
19084 .kr(2)
19085 .sr(1)
19086 .m(4)
19087 .n(n)
19088 .k(k)
19089 .ks(3)
19090 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
19091 }
19092 }
19093 }
19094
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD64,n_div_4_small_kernel)19095 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD64, n_div_4_small_kernel) {
19096 TEST_REQUIRES_X86_AVX;
19097 for (uint32_t n = 8; n <= 12; n += 4) {
19098 for (size_t k = 1; k <= 40; k += 9) {
19099 GemmMicrokernelTester()
19100 .mr(4)
19101 .nr(4)
19102 .kr(2)
19103 .sr(1)
19104 .m(4)
19105 .n(n)
19106 .k(k)
19107 .ks(3)
19108 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
19109 }
19110 }
19111 }
19112
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD64,strided_cm_subtile)19113 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD64, strided_cm_subtile) {
19114 TEST_REQUIRES_X86_AVX;
19115 for (size_t k = 1; k <= 40; k += 9) {
19116 for (uint32_t n = 1; n <= 4; n++) {
19117 for (uint32_t m = 1; m <= 4; m++) {
19118 GemmMicrokernelTester()
19119 .mr(4)
19120 .nr(4)
19121 .kr(2)
19122 .sr(1)
19123 .m(m)
19124 .n(n)
19125 .k(k)
19126 .cm_stride(7)
19127 .iterations(1)
19128 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
19129 }
19130 }
19131 }
19132 }
19133
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD64,a_offset)19134 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD64, a_offset) {
19135 TEST_REQUIRES_X86_AVX;
19136 for (size_t k = 1; k <= 40; k += 9) {
19137 GemmMicrokernelTester()
19138 .mr(4)
19139 .nr(4)
19140 .kr(2)
19141 .sr(1)
19142 .m(4)
19143 .n(4)
19144 .k(k)
19145 .ks(3)
19146 .a_offset(163)
19147 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
19148 }
19149 }
19150
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD64,zero)19151 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD64, zero) {
19152 TEST_REQUIRES_X86_AVX;
19153 for (size_t k = 1; k <= 40; k += 9) {
19154 for (uint32_t mz = 0; mz < 4; mz++) {
19155 GemmMicrokernelTester()
19156 .mr(4)
19157 .nr(4)
19158 .kr(2)
19159 .sr(1)
19160 .m(4)
19161 .n(4)
19162 .k(k)
19163 .ks(3)
19164 .a_offset(163)
19165 .zero_index(mz)
19166 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
19167 }
19168 }
19169 }
19170
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD64,qmin)19171 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD64, qmin) {
19172 TEST_REQUIRES_X86_AVX;
19173 GemmMicrokernelTester()
19174 .mr(4)
19175 .nr(4)
19176 .kr(2)
19177 .sr(1)
19178 .m(4)
19179 .n(4)
19180 .k(8)
19181 .qmin(128)
19182 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
19183 }
19184
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD64,qmax)19185 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD64, qmax) {
19186 TEST_REQUIRES_X86_AVX;
19187 GemmMicrokernelTester()
19188 .mr(4)
19189 .nr(4)
19190 .kr(2)
19191 .sr(1)
19192 .m(4)
19193 .n(4)
19194 .k(8)
19195 .qmax(128)
19196 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
19197 }
19198
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD64,strided_cm)19199 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__AVX_LD64, strided_cm) {
19200 TEST_REQUIRES_X86_AVX;
19201 GemmMicrokernelTester()
19202 .mr(4)
19203 .nr(4)
19204 .kr(2)
19205 .sr(1)
19206 .m(4)
19207 .n(4)
19208 .k(8)
19209 .cm_stride(7)
19210 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
19211 }
19212 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
19213
19214
19215 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD64,k_eq_8)19216 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD64, k_eq_8) {
19217 TEST_REQUIRES_X86_XOP;
19218 GemmMicrokernelTester()
19219 .mr(4)
19220 .nr(4)
19221 .kr(2)
19222 .sr(1)
19223 .m(4)
19224 .n(4)
19225 .k(8)
19226 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
19227 }
19228
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD64,strided_cn)19229 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD64, strided_cn) {
19230 TEST_REQUIRES_X86_XOP;
19231 GemmMicrokernelTester()
19232 .mr(4)
19233 .nr(4)
19234 .kr(2)
19235 .sr(1)
19236 .m(4)
19237 .n(4)
19238 .k(8)
19239 .cn_stride(7)
19240 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
19241 }
19242
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD64,k_eq_8_subtile)19243 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD64, k_eq_8_subtile) {
19244 TEST_REQUIRES_X86_XOP;
19245 for (uint32_t n = 1; n <= 4; n++) {
19246 for (uint32_t m = 1; m <= 4; m++) {
19247 GemmMicrokernelTester()
19248 .mr(4)
19249 .nr(4)
19250 .kr(2)
19251 .sr(1)
19252 .m(m)
19253 .n(n)
19254 .k(8)
19255 .iterations(1)
19256 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
19257 }
19258 }
19259 }
19260
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD64,k_eq_8_subtile_m)19261 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD64, k_eq_8_subtile_m) {
19262 TEST_REQUIRES_X86_XOP;
19263 for (uint32_t m = 1; m <= 4; m++) {
19264 GemmMicrokernelTester()
19265 .mr(4)
19266 .nr(4)
19267 .kr(2)
19268 .sr(1)
19269 .m(m)
19270 .n(4)
19271 .k(8)
19272 .iterations(1)
19273 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
19274 }
19275 }
19276
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD64,k_eq_8_subtile_n)19277 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD64, k_eq_8_subtile_n) {
19278 TEST_REQUIRES_X86_XOP;
19279 for (uint32_t n = 1; n <= 4; n++) {
19280 GemmMicrokernelTester()
19281 .mr(4)
19282 .nr(4)
19283 .kr(2)
19284 .sr(1)
19285 .m(4)
19286 .n(n)
19287 .k(8)
19288 .iterations(1)
19289 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
19290 }
19291 }
19292
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD64,k_lt_8)19293 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD64, k_lt_8) {
19294 TEST_REQUIRES_X86_XOP;
19295 for (size_t k = 1; k < 8; k++) {
19296 GemmMicrokernelTester()
19297 .mr(4)
19298 .nr(4)
19299 .kr(2)
19300 .sr(1)
19301 .m(4)
19302 .n(4)
19303 .k(k)
19304 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
19305 }
19306 }
19307
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD64,k_lt_8_subtile)19308 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD64, k_lt_8_subtile) {
19309 TEST_REQUIRES_X86_XOP;
19310 for (size_t k = 1; k < 8; k++) {
19311 for (uint32_t n = 1; n <= 4; n++) {
19312 for (uint32_t m = 1; m <= 4; m++) {
19313 GemmMicrokernelTester()
19314 .mr(4)
19315 .nr(4)
19316 .kr(2)
19317 .sr(1)
19318 .m(m)
19319 .n(n)
19320 .k(k)
19321 .iterations(1)
19322 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
19323 }
19324 }
19325 }
19326 }
19327
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD64,k_gt_8)19328 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD64, k_gt_8) {
19329 TEST_REQUIRES_X86_XOP;
19330 for (size_t k = 9; k < 16; k++) {
19331 GemmMicrokernelTester()
19332 .mr(4)
19333 .nr(4)
19334 .kr(2)
19335 .sr(1)
19336 .m(4)
19337 .n(4)
19338 .k(k)
19339 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
19340 }
19341 }
19342
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD64,k_gt_8_subtile)19343 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD64, k_gt_8_subtile) {
19344 TEST_REQUIRES_X86_XOP;
19345 for (size_t k = 9; k < 16; k++) {
19346 for (uint32_t n = 1; n <= 4; n++) {
19347 for (uint32_t m = 1; m <= 4; m++) {
19348 GemmMicrokernelTester()
19349 .mr(4)
19350 .nr(4)
19351 .kr(2)
19352 .sr(1)
19353 .m(m)
19354 .n(n)
19355 .k(k)
19356 .iterations(1)
19357 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
19358 }
19359 }
19360 }
19361 }
19362
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD64,k_div_8)19363 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD64, k_div_8) {
19364 TEST_REQUIRES_X86_XOP;
19365 for (size_t k = 16; k <= 80; k += 8) {
19366 GemmMicrokernelTester()
19367 .mr(4)
19368 .nr(4)
19369 .kr(2)
19370 .sr(1)
19371 .m(4)
19372 .n(4)
19373 .k(k)
19374 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
19375 }
19376 }
19377
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD64,k_div_8_subtile)19378 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD64, k_div_8_subtile) {
19379 TEST_REQUIRES_X86_XOP;
19380 for (size_t k = 16; k <= 80; k += 8) {
19381 for (uint32_t n = 1; n <= 4; n++) {
19382 for (uint32_t m = 1; m <= 4; m++) {
19383 GemmMicrokernelTester()
19384 .mr(4)
19385 .nr(4)
19386 .kr(2)
19387 .sr(1)
19388 .m(m)
19389 .n(n)
19390 .k(k)
19391 .iterations(1)
19392 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
19393 }
19394 }
19395 }
19396 }
19397
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD64,n_gt_4)19398 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD64, n_gt_4) {
19399 TEST_REQUIRES_X86_XOP;
19400 for (uint32_t n = 5; n < 8; n++) {
19401 for (size_t k = 1; k <= 40; k += 9) {
19402 GemmMicrokernelTester()
19403 .mr(4)
19404 .nr(4)
19405 .kr(2)
19406 .sr(1)
19407 .m(4)
19408 .n(n)
19409 .k(k)
19410 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
19411 }
19412 }
19413 }
19414
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD64,n_gt_4_strided_cn)19415 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD64, n_gt_4_strided_cn) {
19416 TEST_REQUIRES_X86_XOP;
19417 for (uint32_t n = 5; n < 8; n++) {
19418 for (size_t k = 1; k <= 40; k += 9) {
19419 GemmMicrokernelTester()
19420 .mr(4)
19421 .nr(4)
19422 .kr(2)
19423 .sr(1)
19424 .m(4)
19425 .n(n)
19426 .k(k)
19427 .cn_stride(7)
19428 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
19429 }
19430 }
19431 }
19432
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD64,n_gt_4_subtile)19433 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD64, n_gt_4_subtile) {
19434 TEST_REQUIRES_X86_XOP;
19435 for (uint32_t n = 5; n < 8; n++) {
19436 for (size_t k = 1; k <= 40; k += 9) {
19437 for (uint32_t m = 1; m <= 4; m++) {
19438 GemmMicrokernelTester()
19439 .mr(4)
19440 .nr(4)
19441 .kr(2)
19442 .sr(1)
19443 .m(m)
19444 .n(n)
19445 .k(k)
19446 .iterations(1)
19447 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
19448 }
19449 }
19450 }
19451 }
19452
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD64,n_div_4)19453 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD64, n_div_4) {
19454 TEST_REQUIRES_X86_XOP;
19455 for (uint32_t n = 8; n <= 12; n += 4) {
19456 for (size_t k = 1; k <= 40; k += 9) {
19457 GemmMicrokernelTester()
19458 .mr(4)
19459 .nr(4)
19460 .kr(2)
19461 .sr(1)
19462 .m(4)
19463 .n(n)
19464 .k(k)
19465 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
19466 }
19467 }
19468 }
19469
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD64,n_div_4_strided_cn)19470 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD64, n_div_4_strided_cn) {
19471 TEST_REQUIRES_X86_XOP;
19472 for (uint32_t n = 8; n <= 12; n += 4) {
19473 for (size_t k = 1; k <= 40; k += 9) {
19474 GemmMicrokernelTester()
19475 .mr(4)
19476 .nr(4)
19477 .kr(2)
19478 .sr(1)
19479 .m(4)
19480 .n(n)
19481 .k(k)
19482 .cn_stride(7)
19483 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
19484 }
19485 }
19486 }
19487
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD64,n_div_4_subtile)19488 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD64, n_div_4_subtile) {
19489 TEST_REQUIRES_X86_XOP;
19490 for (uint32_t n = 8; n <= 12; n += 4) {
19491 for (size_t k = 1; k <= 40; k += 9) {
19492 for (uint32_t m = 1; m <= 4; m++) {
19493 GemmMicrokernelTester()
19494 .mr(4)
19495 .nr(4)
19496 .kr(2)
19497 .sr(1)
19498 .m(m)
19499 .n(n)
19500 .k(k)
19501 .iterations(1)
19502 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
19503 }
19504 }
19505 }
19506 }
19507
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD64,small_kernel)19508 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD64, small_kernel) {
19509 TEST_REQUIRES_X86_XOP;
19510 for (size_t k = 1; k <= 40; k += 9) {
19511 GemmMicrokernelTester()
19512 .mr(4)
19513 .nr(4)
19514 .kr(2)
19515 .sr(1)
19516 .m(4)
19517 .n(4)
19518 .k(k)
19519 .ks(3)
19520 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
19521 }
19522 }
19523
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD64,small_kernel_subtile)19524 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD64, small_kernel_subtile) {
19525 TEST_REQUIRES_X86_XOP;
19526 for (size_t k = 1; k <= 40; k += 9) {
19527 for (uint32_t n = 1; n <= 4; n++) {
19528 for (uint32_t m = 1; m <= 4; m++) {
19529 GemmMicrokernelTester()
19530 .mr(4)
19531 .nr(4)
19532 .kr(2)
19533 .sr(1)
19534 .m(m)
19535 .n(n)
19536 .k(k)
19537 .ks(3)
19538 .iterations(1)
19539 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
19540 }
19541 }
19542 }
19543 }
19544
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD64,n_gt_4_small_kernel)19545 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD64, n_gt_4_small_kernel) {
19546 TEST_REQUIRES_X86_XOP;
19547 for (uint32_t n = 5; n < 8; n++) {
19548 for (size_t k = 1; k <= 40; k += 9) {
19549 GemmMicrokernelTester()
19550 .mr(4)
19551 .nr(4)
19552 .kr(2)
19553 .sr(1)
19554 .m(4)
19555 .n(n)
19556 .k(k)
19557 .ks(3)
19558 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
19559 }
19560 }
19561 }
19562
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD64,n_div_4_small_kernel)19563 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD64, n_div_4_small_kernel) {
19564 TEST_REQUIRES_X86_XOP;
19565 for (uint32_t n = 8; n <= 12; n += 4) {
19566 for (size_t k = 1; k <= 40; k += 9) {
19567 GemmMicrokernelTester()
19568 .mr(4)
19569 .nr(4)
19570 .kr(2)
19571 .sr(1)
19572 .m(4)
19573 .n(n)
19574 .k(k)
19575 .ks(3)
19576 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
19577 }
19578 }
19579 }
19580
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD64,strided_cm_subtile)19581 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD64, strided_cm_subtile) {
19582 TEST_REQUIRES_X86_XOP;
19583 for (size_t k = 1; k <= 40; k += 9) {
19584 for (uint32_t n = 1; n <= 4; n++) {
19585 for (uint32_t m = 1; m <= 4; m++) {
19586 GemmMicrokernelTester()
19587 .mr(4)
19588 .nr(4)
19589 .kr(2)
19590 .sr(1)
19591 .m(m)
19592 .n(n)
19593 .k(k)
19594 .cm_stride(7)
19595 .iterations(1)
19596 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
19597 }
19598 }
19599 }
19600 }
19601
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD64,a_offset)19602 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD64, a_offset) {
19603 TEST_REQUIRES_X86_XOP;
19604 for (size_t k = 1; k <= 40; k += 9) {
19605 GemmMicrokernelTester()
19606 .mr(4)
19607 .nr(4)
19608 .kr(2)
19609 .sr(1)
19610 .m(4)
19611 .n(4)
19612 .k(k)
19613 .ks(3)
19614 .a_offset(163)
19615 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
19616 }
19617 }
19618
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD64,zero)19619 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD64, zero) {
19620 TEST_REQUIRES_X86_XOP;
19621 for (size_t k = 1; k <= 40; k += 9) {
19622 for (uint32_t mz = 0; mz < 4; mz++) {
19623 GemmMicrokernelTester()
19624 .mr(4)
19625 .nr(4)
19626 .kr(2)
19627 .sr(1)
19628 .m(4)
19629 .n(4)
19630 .k(k)
19631 .ks(3)
19632 .a_offset(163)
19633 .zero_index(mz)
19634 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
19635 }
19636 }
19637 }
19638
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD64,qmin)19639 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD64, qmin) {
19640 TEST_REQUIRES_X86_XOP;
19641 GemmMicrokernelTester()
19642 .mr(4)
19643 .nr(4)
19644 .kr(2)
19645 .sr(1)
19646 .m(4)
19647 .n(4)
19648 .k(8)
19649 .qmin(128)
19650 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
19651 }
19652
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD64,qmax)19653 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD64, qmax) {
19654 TEST_REQUIRES_X86_XOP;
19655 GemmMicrokernelTester()
19656 .mr(4)
19657 .nr(4)
19658 .kr(2)
19659 .sr(1)
19660 .m(4)
19661 .n(4)
19662 .k(8)
19663 .qmax(128)
19664 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
19665 }
19666
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD64,strided_cm)19667 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD64, strided_cm) {
19668 TEST_REQUIRES_X86_XOP;
19669 GemmMicrokernelTester()
19670 .mr(4)
19671 .nr(4)
19672 .kr(2)
19673 .sr(1)
19674 .m(4)
19675 .n(4)
19676 .k(8)
19677 .cm_stride(7)
19678 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
19679 }
19680 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
19681
19682
19683 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD128,k_eq_8)19684 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD128, k_eq_8) {
19685 TEST_REQUIRES_X86_SSE2;
19686 GemmMicrokernelTester()
19687 .mr(1)
19688 .nr(4)
19689 .kr(2)
19690 .sr(1)
19691 .m(1)
19692 .n(4)
19693 .k(8)
19694 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
19695 }
19696
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD128,strided_cn)19697 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD128, strided_cn) {
19698 TEST_REQUIRES_X86_SSE2;
19699 GemmMicrokernelTester()
19700 .mr(1)
19701 .nr(4)
19702 .kr(2)
19703 .sr(1)
19704 .m(1)
19705 .n(4)
19706 .k(8)
19707 .cn_stride(7)
19708 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
19709 }
19710
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD128,k_eq_8_subtile)19711 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD128, k_eq_8_subtile) {
19712 TEST_REQUIRES_X86_SSE2;
19713 for (uint32_t n = 1; n <= 4; n++) {
19714 for (uint32_t m = 1; m <= 1; m++) {
19715 GemmMicrokernelTester()
19716 .mr(1)
19717 .nr(4)
19718 .kr(2)
19719 .sr(1)
19720 .m(m)
19721 .n(n)
19722 .k(8)
19723 .iterations(1)
19724 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
19725 }
19726 }
19727 }
19728
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD128,k_eq_8_subtile_m)19729 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD128, k_eq_8_subtile_m) {
19730 TEST_REQUIRES_X86_SSE2;
19731 for (uint32_t m = 1; m <= 1; m++) {
19732 GemmMicrokernelTester()
19733 .mr(1)
19734 .nr(4)
19735 .kr(2)
19736 .sr(1)
19737 .m(m)
19738 .n(4)
19739 .k(8)
19740 .iterations(1)
19741 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
19742 }
19743 }
19744
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD128,k_eq_8_subtile_n)19745 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD128, k_eq_8_subtile_n) {
19746 TEST_REQUIRES_X86_SSE2;
19747 for (uint32_t n = 1; n <= 4; n++) {
19748 GemmMicrokernelTester()
19749 .mr(1)
19750 .nr(4)
19751 .kr(2)
19752 .sr(1)
19753 .m(1)
19754 .n(n)
19755 .k(8)
19756 .iterations(1)
19757 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
19758 }
19759 }
19760
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD128,k_lt_8)19761 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD128, k_lt_8) {
19762 TEST_REQUIRES_X86_SSE2;
19763 for (size_t k = 1; k < 8; k++) {
19764 GemmMicrokernelTester()
19765 .mr(1)
19766 .nr(4)
19767 .kr(2)
19768 .sr(1)
19769 .m(1)
19770 .n(4)
19771 .k(k)
19772 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
19773 }
19774 }
19775
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD128,k_lt_8_subtile)19776 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD128, k_lt_8_subtile) {
19777 TEST_REQUIRES_X86_SSE2;
19778 for (size_t k = 1; k < 8; k++) {
19779 for (uint32_t n = 1; n <= 4; n++) {
19780 for (uint32_t m = 1; m <= 1; m++) {
19781 GemmMicrokernelTester()
19782 .mr(1)
19783 .nr(4)
19784 .kr(2)
19785 .sr(1)
19786 .m(m)
19787 .n(n)
19788 .k(k)
19789 .iterations(1)
19790 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
19791 }
19792 }
19793 }
19794 }
19795
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD128,k_gt_8)19796 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD128, k_gt_8) {
19797 TEST_REQUIRES_X86_SSE2;
19798 for (size_t k = 9; k < 16; k++) {
19799 GemmMicrokernelTester()
19800 .mr(1)
19801 .nr(4)
19802 .kr(2)
19803 .sr(1)
19804 .m(1)
19805 .n(4)
19806 .k(k)
19807 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
19808 }
19809 }
19810
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD128,k_gt_8_subtile)19811 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD128, k_gt_8_subtile) {
19812 TEST_REQUIRES_X86_SSE2;
19813 for (size_t k = 9; k < 16; k++) {
19814 for (uint32_t n = 1; n <= 4; n++) {
19815 for (uint32_t m = 1; m <= 1; m++) {
19816 GemmMicrokernelTester()
19817 .mr(1)
19818 .nr(4)
19819 .kr(2)
19820 .sr(1)
19821 .m(m)
19822 .n(n)
19823 .k(k)
19824 .iterations(1)
19825 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
19826 }
19827 }
19828 }
19829 }
19830
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD128,k_div_8)19831 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD128, k_div_8) {
19832 TEST_REQUIRES_X86_SSE2;
19833 for (size_t k = 16; k <= 80; k += 8) {
19834 GemmMicrokernelTester()
19835 .mr(1)
19836 .nr(4)
19837 .kr(2)
19838 .sr(1)
19839 .m(1)
19840 .n(4)
19841 .k(k)
19842 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
19843 }
19844 }
19845
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD128,k_div_8_subtile)19846 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD128, k_div_8_subtile) {
19847 TEST_REQUIRES_X86_SSE2;
19848 for (size_t k = 16; k <= 80; k += 8) {
19849 for (uint32_t n = 1; n <= 4; n++) {
19850 for (uint32_t m = 1; m <= 1; m++) {
19851 GemmMicrokernelTester()
19852 .mr(1)
19853 .nr(4)
19854 .kr(2)
19855 .sr(1)
19856 .m(m)
19857 .n(n)
19858 .k(k)
19859 .iterations(1)
19860 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
19861 }
19862 }
19863 }
19864 }
19865
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD128,n_gt_4)19866 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD128, n_gt_4) {
19867 TEST_REQUIRES_X86_SSE2;
19868 for (uint32_t n = 5; n < 8; n++) {
19869 for (size_t k = 1; k <= 40; k += 9) {
19870 GemmMicrokernelTester()
19871 .mr(1)
19872 .nr(4)
19873 .kr(2)
19874 .sr(1)
19875 .m(1)
19876 .n(n)
19877 .k(k)
19878 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
19879 }
19880 }
19881 }
19882
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD128,n_gt_4_strided_cn)19883 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD128, n_gt_4_strided_cn) {
19884 TEST_REQUIRES_X86_SSE2;
19885 for (uint32_t n = 5; n < 8; n++) {
19886 for (size_t k = 1; k <= 40; k += 9) {
19887 GemmMicrokernelTester()
19888 .mr(1)
19889 .nr(4)
19890 .kr(2)
19891 .sr(1)
19892 .m(1)
19893 .n(n)
19894 .k(k)
19895 .cn_stride(7)
19896 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
19897 }
19898 }
19899 }
19900
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD128,n_gt_4_subtile)19901 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD128, n_gt_4_subtile) {
19902 TEST_REQUIRES_X86_SSE2;
19903 for (uint32_t n = 5; n < 8; n++) {
19904 for (size_t k = 1; k <= 40; k += 9) {
19905 for (uint32_t m = 1; m <= 1; m++) {
19906 GemmMicrokernelTester()
19907 .mr(1)
19908 .nr(4)
19909 .kr(2)
19910 .sr(1)
19911 .m(m)
19912 .n(n)
19913 .k(k)
19914 .iterations(1)
19915 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
19916 }
19917 }
19918 }
19919 }
19920
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD128,n_div_4)19921 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD128, n_div_4) {
19922 TEST_REQUIRES_X86_SSE2;
19923 for (uint32_t n = 8; n <= 12; n += 4) {
19924 for (size_t k = 1; k <= 40; k += 9) {
19925 GemmMicrokernelTester()
19926 .mr(1)
19927 .nr(4)
19928 .kr(2)
19929 .sr(1)
19930 .m(1)
19931 .n(n)
19932 .k(k)
19933 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
19934 }
19935 }
19936 }
19937
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD128,n_div_4_strided_cn)19938 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD128, n_div_4_strided_cn) {
19939 TEST_REQUIRES_X86_SSE2;
19940 for (uint32_t n = 8; n <= 12; n += 4) {
19941 for (size_t k = 1; k <= 40; k += 9) {
19942 GemmMicrokernelTester()
19943 .mr(1)
19944 .nr(4)
19945 .kr(2)
19946 .sr(1)
19947 .m(1)
19948 .n(n)
19949 .k(k)
19950 .cn_stride(7)
19951 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
19952 }
19953 }
19954 }
19955
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD128,n_div_4_subtile)19956 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD128, n_div_4_subtile) {
19957 TEST_REQUIRES_X86_SSE2;
19958 for (uint32_t n = 8; n <= 12; n += 4) {
19959 for (size_t k = 1; k <= 40; k += 9) {
19960 for (uint32_t m = 1; m <= 1; m++) {
19961 GemmMicrokernelTester()
19962 .mr(1)
19963 .nr(4)
19964 .kr(2)
19965 .sr(1)
19966 .m(m)
19967 .n(n)
19968 .k(k)
19969 .iterations(1)
19970 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
19971 }
19972 }
19973 }
19974 }
19975
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD128,small_kernel)19976 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD128, small_kernel) {
19977 TEST_REQUIRES_X86_SSE2;
19978 for (size_t k = 1; k <= 40; k += 9) {
19979 GemmMicrokernelTester()
19980 .mr(1)
19981 .nr(4)
19982 .kr(2)
19983 .sr(1)
19984 .m(1)
19985 .n(4)
19986 .k(k)
19987 .ks(3)
19988 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
19989 }
19990 }
19991
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD128,small_kernel_subtile)19992 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD128, small_kernel_subtile) {
19993 TEST_REQUIRES_X86_SSE2;
19994 for (size_t k = 1; k <= 40; k += 9) {
19995 for (uint32_t n = 1; n <= 4; n++) {
19996 for (uint32_t m = 1; m <= 1; m++) {
19997 GemmMicrokernelTester()
19998 .mr(1)
19999 .nr(4)
20000 .kr(2)
20001 .sr(1)
20002 .m(m)
20003 .n(n)
20004 .k(k)
20005 .ks(3)
20006 .iterations(1)
20007 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
20008 }
20009 }
20010 }
20011 }
20012
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD128,n_gt_4_small_kernel)20013 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD128, n_gt_4_small_kernel) {
20014 TEST_REQUIRES_X86_SSE2;
20015 for (uint32_t n = 5; n < 8; n++) {
20016 for (size_t k = 1; k <= 40; k += 9) {
20017 GemmMicrokernelTester()
20018 .mr(1)
20019 .nr(4)
20020 .kr(2)
20021 .sr(1)
20022 .m(1)
20023 .n(n)
20024 .k(k)
20025 .ks(3)
20026 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
20027 }
20028 }
20029 }
20030
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD128,n_div_4_small_kernel)20031 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD128, n_div_4_small_kernel) {
20032 TEST_REQUIRES_X86_SSE2;
20033 for (uint32_t n = 8; n <= 12; n += 4) {
20034 for (size_t k = 1; k <= 40; k += 9) {
20035 GemmMicrokernelTester()
20036 .mr(1)
20037 .nr(4)
20038 .kr(2)
20039 .sr(1)
20040 .m(1)
20041 .n(n)
20042 .k(k)
20043 .ks(3)
20044 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
20045 }
20046 }
20047 }
20048
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD128,strided_cm_subtile)20049 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD128, strided_cm_subtile) {
20050 TEST_REQUIRES_X86_SSE2;
20051 for (size_t k = 1; k <= 40; k += 9) {
20052 for (uint32_t n = 1; n <= 4; n++) {
20053 for (uint32_t m = 1; m <= 1; m++) {
20054 GemmMicrokernelTester()
20055 .mr(1)
20056 .nr(4)
20057 .kr(2)
20058 .sr(1)
20059 .m(m)
20060 .n(n)
20061 .k(k)
20062 .cm_stride(7)
20063 .iterations(1)
20064 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
20065 }
20066 }
20067 }
20068 }
20069
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD128,a_offset)20070 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD128, a_offset) {
20071 TEST_REQUIRES_X86_SSE2;
20072 for (size_t k = 1; k <= 40; k += 9) {
20073 GemmMicrokernelTester()
20074 .mr(1)
20075 .nr(4)
20076 .kr(2)
20077 .sr(1)
20078 .m(1)
20079 .n(4)
20080 .k(k)
20081 .ks(3)
20082 .a_offset(43)
20083 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
20084 }
20085 }
20086
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD128,zero)20087 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD128, zero) {
20088 TEST_REQUIRES_X86_SSE2;
20089 for (size_t k = 1; k <= 40; k += 9) {
20090 for (uint32_t mz = 0; mz < 1; mz++) {
20091 GemmMicrokernelTester()
20092 .mr(1)
20093 .nr(4)
20094 .kr(2)
20095 .sr(1)
20096 .m(1)
20097 .n(4)
20098 .k(k)
20099 .ks(3)
20100 .a_offset(43)
20101 .zero_index(mz)
20102 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
20103 }
20104 }
20105 }
20106
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD128,qmin)20107 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD128, qmin) {
20108 TEST_REQUIRES_X86_SSE2;
20109 GemmMicrokernelTester()
20110 .mr(1)
20111 .nr(4)
20112 .kr(2)
20113 .sr(1)
20114 .m(1)
20115 .n(4)
20116 .k(8)
20117 .qmin(128)
20118 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
20119 }
20120
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD128,qmax)20121 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD128, qmax) {
20122 TEST_REQUIRES_X86_SSE2;
20123 GemmMicrokernelTester()
20124 .mr(1)
20125 .nr(4)
20126 .kr(2)
20127 .sr(1)
20128 .m(1)
20129 .n(4)
20130 .k(8)
20131 .qmax(128)
20132 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
20133 }
20134
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD128,strided_cm)20135 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__SSE2_LD128, strided_cm) {
20136 TEST_REQUIRES_X86_SSE2;
20137 GemmMicrokernelTester()
20138 .mr(1)
20139 .nr(4)
20140 .kr(2)
20141 .sr(1)
20142 .m(1)
20143 .n(4)
20144 .k(8)
20145 .cm_stride(7)
20146 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
20147 }
20148 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
20149
20150
20151 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD128,k_eq_8)20152 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD128, k_eq_8) {
20153 TEST_REQUIRES_X86_SSE41;
20154 GemmMicrokernelTester()
20155 .mr(1)
20156 .nr(4)
20157 .kr(2)
20158 .sr(1)
20159 .m(1)
20160 .n(4)
20161 .k(8)
20162 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20163 }
20164
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD128,strided_cn)20165 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD128, strided_cn) {
20166 TEST_REQUIRES_X86_SSE41;
20167 GemmMicrokernelTester()
20168 .mr(1)
20169 .nr(4)
20170 .kr(2)
20171 .sr(1)
20172 .m(1)
20173 .n(4)
20174 .k(8)
20175 .cn_stride(7)
20176 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20177 }
20178
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD128,k_eq_8_subtile)20179 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD128, k_eq_8_subtile) {
20180 TEST_REQUIRES_X86_SSE41;
20181 for (uint32_t n = 1; n <= 4; n++) {
20182 for (uint32_t m = 1; m <= 1; m++) {
20183 GemmMicrokernelTester()
20184 .mr(1)
20185 .nr(4)
20186 .kr(2)
20187 .sr(1)
20188 .m(m)
20189 .n(n)
20190 .k(8)
20191 .iterations(1)
20192 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20193 }
20194 }
20195 }
20196
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD128,k_eq_8_subtile_m)20197 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD128, k_eq_8_subtile_m) {
20198 TEST_REQUIRES_X86_SSE41;
20199 for (uint32_t m = 1; m <= 1; m++) {
20200 GemmMicrokernelTester()
20201 .mr(1)
20202 .nr(4)
20203 .kr(2)
20204 .sr(1)
20205 .m(m)
20206 .n(4)
20207 .k(8)
20208 .iterations(1)
20209 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20210 }
20211 }
20212
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD128,k_eq_8_subtile_n)20213 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD128, k_eq_8_subtile_n) {
20214 TEST_REQUIRES_X86_SSE41;
20215 for (uint32_t n = 1; n <= 4; n++) {
20216 GemmMicrokernelTester()
20217 .mr(1)
20218 .nr(4)
20219 .kr(2)
20220 .sr(1)
20221 .m(1)
20222 .n(n)
20223 .k(8)
20224 .iterations(1)
20225 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20226 }
20227 }
20228
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD128,k_lt_8)20229 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD128, k_lt_8) {
20230 TEST_REQUIRES_X86_SSE41;
20231 for (size_t k = 1; k < 8; k++) {
20232 GemmMicrokernelTester()
20233 .mr(1)
20234 .nr(4)
20235 .kr(2)
20236 .sr(1)
20237 .m(1)
20238 .n(4)
20239 .k(k)
20240 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20241 }
20242 }
20243
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD128,k_lt_8_subtile)20244 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD128, k_lt_8_subtile) {
20245 TEST_REQUIRES_X86_SSE41;
20246 for (size_t k = 1; k < 8; k++) {
20247 for (uint32_t n = 1; n <= 4; n++) {
20248 for (uint32_t m = 1; m <= 1; m++) {
20249 GemmMicrokernelTester()
20250 .mr(1)
20251 .nr(4)
20252 .kr(2)
20253 .sr(1)
20254 .m(m)
20255 .n(n)
20256 .k(k)
20257 .iterations(1)
20258 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20259 }
20260 }
20261 }
20262 }
20263
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD128,k_gt_8)20264 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD128, k_gt_8) {
20265 TEST_REQUIRES_X86_SSE41;
20266 for (size_t k = 9; k < 16; k++) {
20267 GemmMicrokernelTester()
20268 .mr(1)
20269 .nr(4)
20270 .kr(2)
20271 .sr(1)
20272 .m(1)
20273 .n(4)
20274 .k(k)
20275 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20276 }
20277 }
20278
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD128,k_gt_8_subtile)20279 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD128, k_gt_8_subtile) {
20280 TEST_REQUIRES_X86_SSE41;
20281 for (size_t k = 9; k < 16; k++) {
20282 for (uint32_t n = 1; n <= 4; n++) {
20283 for (uint32_t m = 1; m <= 1; m++) {
20284 GemmMicrokernelTester()
20285 .mr(1)
20286 .nr(4)
20287 .kr(2)
20288 .sr(1)
20289 .m(m)
20290 .n(n)
20291 .k(k)
20292 .iterations(1)
20293 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20294 }
20295 }
20296 }
20297 }
20298
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD128,k_div_8)20299 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD128, k_div_8) {
20300 TEST_REQUIRES_X86_SSE41;
20301 for (size_t k = 16; k <= 80; k += 8) {
20302 GemmMicrokernelTester()
20303 .mr(1)
20304 .nr(4)
20305 .kr(2)
20306 .sr(1)
20307 .m(1)
20308 .n(4)
20309 .k(k)
20310 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20311 }
20312 }
20313
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD128,k_div_8_subtile)20314 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD128, k_div_8_subtile) {
20315 TEST_REQUIRES_X86_SSE41;
20316 for (size_t k = 16; k <= 80; k += 8) {
20317 for (uint32_t n = 1; n <= 4; n++) {
20318 for (uint32_t m = 1; m <= 1; m++) {
20319 GemmMicrokernelTester()
20320 .mr(1)
20321 .nr(4)
20322 .kr(2)
20323 .sr(1)
20324 .m(m)
20325 .n(n)
20326 .k(k)
20327 .iterations(1)
20328 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20329 }
20330 }
20331 }
20332 }
20333
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD128,n_gt_4)20334 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD128, n_gt_4) {
20335 TEST_REQUIRES_X86_SSE41;
20336 for (uint32_t n = 5; n < 8; n++) {
20337 for (size_t k = 1; k <= 40; k += 9) {
20338 GemmMicrokernelTester()
20339 .mr(1)
20340 .nr(4)
20341 .kr(2)
20342 .sr(1)
20343 .m(1)
20344 .n(n)
20345 .k(k)
20346 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20347 }
20348 }
20349 }
20350
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD128,n_gt_4_strided_cn)20351 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD128, n_gt_4_strided_cn) {
20352 TEST_REQUIRES_X86_SSE41;
20353 for (uint32_t n = 5; n < 8; n++) {
20354 for (size_t k = 1; k <= 40; k += 9) {
20355 GemmMicrokernelTester()
20356 .mr(1)
20357 .nr(4)
20358 .kr(2)
20359 .sr(1)
20360 .m(1)
20361 .n(n)
20362 .k(k)
20363 .cn_stride(7)
20364 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20365 }
20366 }
20367 }
20368
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD128,n_gt_4_subtile)20369 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD128, n_gt_4_subtile) {
20370 TEST_REQUIRES_X86_SSE41;
20371 for (uint32_t n = 5; n < 8; n++) {
20372 for (size_t k = 1; k <= 40; k += 9) {
20373 for (uint32_t m = 1; m <= 1; m++) {
20374 GemmMicrokernelTester()
20375 .mr(1)
20376 .nr(4)
20377 .kr(2)
20378 .sr(1)
20379 .m(m)
20380 .n(n)
20381 .k(k)
20382 .iterations(1)
20383 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20384 }
20385 }
20386 }
20387 }
20388
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD128,n_div_4)20389 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD128, n_div_4) {
20390 TEST_REQUIRES_X86_SSE41;
20391 for (uint32_t n = 8; n <= 12; n += 4) {
20392 for (size_t k = 1; k <= 40; k += 9) {
20393 GemmMicrokernelTester()
20394 .mr(1)
20395 .nr(4)
20396 .kr(2)
20397 .sr(1)
20398 .m(1)
20399 .n(n)
20400 .k(k)
20401 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20402 }
20403 }
20404 }
20405
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD128,n_div_4_strided_cn)20406 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD128, n_div_4_strided_cn) {
20407 TEST_REQUIRES_X86_SSE41;
20408 for (uint32_t n = 8; n <= 12; n += 4) {
20409 for (size_t k = 1; k <= 40; k += 9) {
20410 GemmMicrokernelTester()
20411 .mr(1)
20412 .nr(4)
20413 .kr(2)
20414 .sr(1)
20415 .m(1)
20416 .n(n)
20417 .k(k)
20418 .cn_stride(7)
20419 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20420 }
20421 }
20422 }
20423
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD128,n_div_4_subtile)20424 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD128, n_div_4_subtile) {
20425 TEST_REQUIRES_X86_SSE41;
20426 for (uint32_t n = 8; n <= 12; n += 4) {
20427 for (size_t k = 1; k <= 40; k += 9) {
20428 for (uint32_t m = 1; m <= 1; m++) {
20429 GemmMicrokernelTester()
20430 .mr(1)
20431 .nr(4)
20432 .kr(2)
20433 .sr(1)
20434 .m(m)
20435 .n(n)
20436 .k(k)
20437 .iterations(1)
20438 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20439 }
20440 }
20441 }
20442 }
20443
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD128,small_kernel)20444 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD128, small_kernel) {
20445 TEST_REQUIRES_X86_SSE41;
20446 for (size_t k = 1; k <= 40; k += 9) {
20447 GemmMicrokernelTester()
20448 .mr(1)
20449 .nr(4)
20450 .kr(2)
20451 .sr(1)
20452 .m(1)
20453 .n(4)
20454 .k(k)
20455 .ks(3)
20456 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20457 }
20458 }
20459
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD128,small_kernel_subtile)20460 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD128, small_kernel_subtile) {
20461 TEST_REQUIRES_X86_SSE41;
20462 for (size_t k = 1; k <= 40; k += 9) {
20463 for (uint32_t n = 1; n <= 4; n++) {
20464 for (uint32_t m = 1; m <= 1; m++) {
20465 GemmMicrokernelTester()
20466 .mr(1)
20467 .nr(4)
20468 .kr(2)
20469 .sr(1)
20470 .m(m)
20471 .n(n)
20472 .k(k)
20473 .ks(3)
20474 .iterations(1)
20475 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20476 }
20477 }
20478 }
20479 }
20480
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD128,n_gt_4_small_kernel)20481 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD128, n_gt_4_small_kernel) {
20482 TEST_REQUIRES_X86_SSE41;
20483 for (uint32_t n = 5; n < 8; n++) {
20484 for (size_t k = 1; k <= 40; k += 9) {
20485 GemmMicrokernelTester()
20486 .mr(1)
20487 .nr(4)
20488 .kr(2)
20489 .sr(1)
20490 .m(1)
20491 .n(n)
20492 .k(k)
20493 .ks(3)
20494 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20495 }
20496 }
20497 }
20498
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD128,n_div_4_small_kernel)20499 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD128, n_div_4_small_kernel) {
20500 TEST_REQUIRES_X86_SSE41;
20501 for (uint32_t n = 8; n <= 12; n += 4) {
20502 for (size_t k = 1; k <= 40; k += 9) {
20503 GemmMicrokernelTester()
20504 .mr(1)
20505 .nr(4)
20506 .kr(2)
20507 .sr(1)
20508 .m(1)
20509 .n(n)
20510 .k(k)
20511 .ks(3)
20512 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20513 }
20514 }
20515 }
20516
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD128,strided_cm_subtile)20517 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD128, strided_cm_subtile) {
20518 TEST_REQUIRES_X86_SSE41;
20519 for (size_t k = 1; k <= 40; k += 9) {
20520 for (uint32_t n = 1; n <= 4; n++) {
20521 for (uint32_t m = 1; m <= 1; m++) {
20522 GemmMicrokernelTester()
20523 .mr(1)
20524 .nr(4)
20525 .kr(2)
20526 .sr(1)
20527 .m(m)
20528 .n(n)
20529 .k(k)
20530 .cm_stride(7)
20531 .iterations(1)
20532 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20533 }
20534 }
20535 }
20536 }
20537
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD128,a_offset)20538 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD128, a_offset) {
20539 TEST_REQUIRES_X86_SSE41;
20540 for (size_t k = 1; k <= 40; k += 9) {
20541 GemmMicrokernelTester()
20542 .mr(1)
20543 .nr(4)
20544 .kr(2)
20545 .sr(1)
20546 .m(1)
20547 .n(4)
20548 .k(k)
20549 .ks(3)
20550 .a_offset(43)
20551 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20552 }
20553 }
20554
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD128,zero)20555 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD128, zero) {
20556 TEST_REQUIRES_X86_SSE41;
20557 for (size_t k = 1; k <= 40; k += 9) {
20558 for (uint32_t mz = 0; mz < 1; mz++) {
20559 GemmMicrokernelTester()
20560 .mr(1)
20561 .nr(4)
20562 .kr(2)
20563 .sr(1)
20564 .m(1)
20565 .n(4)
20566 .k(k)
20567 .ks(3)
20568 .a_offset(43)
20569 .zero_index(mz)
20570 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20571 }
20572 }
20573 }
20574
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD128,qmin)20575 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD128, qmin) {
20576 TEST_REQUIRES_X86_SSE41;
20577 GemmMicrokernelTester()
20578 .mr(1)
20579 .nr(4)
20580 .kr(2)
20581 .sr(1)
20582 .m(1)
20583 .n(4)
20584 .k(8)
20585 .qmin(128)
20586 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20587 }
20588
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD128,qmax)20589 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD128, qmax) {
20590 TEST_REQUIRES_X86_SSE41;
20591 GemmMicrokernelTester()
20592 .mr(1)
20593 .nr(4)
20594 .kr(2)
20595 .sr(1)
20596 .m(1)
20597 .n(4)
20598 .k(8)
20599 .qmax(128)
20600 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20601 }
20602
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD128,strided_cm)20603 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__SSE41_LD128, strided_cm) {
20604 TEST_REQUIRES_X86_SSE41;
20605 GemmMicrokernelTester()
20606 .mr(1)
20607 .nr(4)
20608 .kr(2)
20609 .sr(1)
20610 .m(1)
20611 .n(4)
20612 .k(8)
20613 .cm_stride(7)
20614 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
20615 }
20616 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
20617
20618
20619 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD128,k_eq_8)20620 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD128, k_eq_8) {
20621 TEST_REQUIRES_X86_SSE2;
20622 GemmMicrokernelTester()
20623 .mr(2)
20624 .nr(4)
20625 .kr(2)
20626 .sr(1)
20627 .m(2)
20628 .n(4)
20629 .k(8)
20630 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
20631 }
20632
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD128,strided_cn)20633 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD128, strided_cn) {
20634 TEST_REQUIRES_X86_SSE2;
20635 GemmMicrokernelTester()
20636 .mr(2)
20637 .nr(4)
20638 .kr(2)
20639 .sr(1)
20640 .m(2)
20641 .n(4)
20642 .k(8)
20643 .cn_stride(7)
20644 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
20645 }
20646
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD128,k_eq_8_subtile)20647 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD128, k_eq_8_subtile) {
20648 TEST_REQUIRES_X86_SSE2;
20649 for (uint32_t n = 1; n <= 4; n++) {
20650 for (uint32_t m = 1; m <= 2; m++) {
20651 GemmMicrokernelTester()
20652 .mr(2)
20653 .nr(4)
20654 .kr(2)
20655 .sr(1)
20656 .m(m)
20657 .n(n)
20658 .k(8)
20659 .iterations(1)
20660 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
20661 }
20662 }
20663 }
20664
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD128,k_eq_8_subtile_m)20665 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD128, k_eq_8_subtile_m) {
20666 TEST_REQUIRES_X86_SSE2;
20667 for (uint32_t m = 1; m <= 2; m++) {
20668 GemmMicrokernelTester()
20669 .mr(2)
20670 .nr(4)
20671 .kr(2)
20672 .sr(1)
20673 .m(m)
20674 .n(4)
20675 .k(8)
20676 .iterations(1)
20677 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
20678 }
20679 }
20680
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD128,k_eq_8_subtile_n)20681 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD128, k_eq_8_subtile_n) {
20682 TEST_REQUIRES_X86_SSE2;
20683 for (uint32_t n = 1; n <= 4; n++) {
20684 GemmMicrokernelTester()
20685 .mr(2)
20686 .nr(4)
20687 .kr(2)
20688 .sr(1)
20689 .m(2)
20690 .n(n)
20691 .k(8)
20692 .iterations(1)
20693 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
20694 }
20695 }
20696
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD128,k_lt_8)20697 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD128, k_lt_8) {
20698 TEST_REQUIRES_X86_SSE2;
20699 for (size_t k = 1; k < 8; k++) {
20700 GemmMicrokernelTester()
20701 .mr(2)
20702 .nr(4)
20703 .kr(2)
20704 .sr(1)
20705 .m(2)
20706 .n(4)
20707 .k(k)
20708 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
20709 }
20710 }
20711
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD128,k_lt_8_subtile)20712 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD128, k_lt_8_subtile) {
20713 TEST_REQUIRES_X86_SSE2;
20714 for (size_t k = 1; k < 8; k++) {
20715 for (uint32_t n = 1; n <= 4; n++) {
20716 for (uint32_t m = 1; m <= 2; m++) {
20717 GemmMicrokernelTester()
20718 .mr(2)
20719 .nr(4)
20720 .kr(2)
20721 .sr(1)
20722 .m(m)
20723 .n(n)
20724 .k(k)
20725 .iterations(1)
20726 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
20727 }
20728 }
20729 }
20730 }
20731
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD128,k_gt_8)20732 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD128, k_gt_8) {
20733 TEST_REQUIRES_X86_SSE2;
20734 for (size_t k = 9; k < 16; k++) {
20735 GemmMicrokernelTester()
20736 .mr(2)
20737 .nr(4)
20738 .kr(2)
20739 .sr(1)
20740 .m(2)
20741 .n(4)
20742 .k(k)
20743 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
20744 }
20745 }
20746
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD128,k_gt_8_subtile)20747 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD128, k_gt_8_subtile) {
20748 TEST_REQUIRES_X86_SSE2;
20749 for (size_t k = 9; k < 16; k++) {
20750 for (uint32_t n = 1; n <= 4; n++) {
20751 for (uint32_t m = 1; m <= 2; m++) {
20752 GemmMicrokernelTester()
20753 .mr(2)
20754 .nr(4)
20755 .kr(2)
20756 .sr(1)
20757 .m(m)
20758 .n(n)
20759 .k(k)
20760 .iterations(1)
20761 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
20762 }
20763 }
20764 }
20765 }
20766
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD128,k_div_8)20767 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD128, k_div_8) {
20768 TEST_REQUIRES_X86_SSE2;
20769 for (size_t k = 16; k <= 80; k += 8) {
20770 GemmMicrokernelTester()
20771 .mr(2)
20772 .nr(4)
20773 .kr(2)
20774 .sr(1)
20775 .m(2)
20776 .n(4)
20777 .k(k)
20778 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
20779 }
20780 }
20781
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD128,k_div_8_subtile)20782 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD128, k_div_8_subtile) {
20783 TEST_REQUIRES_X86_SSE2;
20784 for (size_t k = 16; k <= 80; k += 8) {
20785 for (uint32_t n = 1; n <= 4; n++) {
20786 for (uint32_t m = 1; m <= 2; m++) {
20787 GemmMicrokernelTester()
20788 .mr(2)
20789 .nr(4)
20790 .kr(2)
20791 .sr(1)
20792 .m(m)
20793 .n(n)
20794 .k(k)
20795 .iterations(1)
20796 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
20797 }
20798 }
20799 }
20800 }
20801
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD128,n_gt_4)20802 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD128, n_gt_4) {
20803 TEST_REQUIRES_X86_SSE2;
20804 for (uint32_t n = 5; n < 8; n++) {
20805 for (size_t k = 1; k <= 40; k += 9) {
20806 GemmMicrokernelTester()
20807 .mr(2)
20808 .nr(4)
20809 .kr(2)
20810 .sr(1)
20811 .m(2)
20812 .n(n)
20813 .k(k)
20814 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
20815 }
20816 }
20817 }
20818
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD128,n_gt_4_strided_cn)20819 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD128, n_gt_4_strided_cn) {
20820 TEST_REQUIRES_X86_SSE2;
20821 for (uint32_t n = 5; n < 8; n++) {
20822 for (size_t k = 1; k <= 40; k += 9) {
20823 GemmMicrokernelTester()
20824 .mr(2)
20825 .nr(4)
20826 .kr(2)
20827 .sr(1)
20828 .m(2)
20829 .n(n)
20830 .k(k)
20831 .cn_stride(7)
20832 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
20833 }
20834 }
20835 }
20836
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD128,n_gt_4_subtile)20837 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD128, n_gt_4_subtile) {
20838 TEST_REQUIRES_X86_SSE2;
20839 for (uint32_t n = 5; n < 8; n++) {
20840 for (size_t k = 1; k <= 40; k += 9) {
20841 for (uint32_t m = 1; m <= 2; m++) {
20842 GemmMicrokernelTester()
20843 .mr(2)
20844 .nr(4)
20845 .kr(2)
20846 .sr(1)
20847 .m(m)
20848 .n(n)
20849 .k(k)
20850 .iterations(1)
20851 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
20852 }
20853 }
20854 }
20855 }
20856
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD128,n_div_4)20857 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD128, n_div_4) {
20858 TEST_REQUIRES_X86_SSE2;
20859 for (uint32_t n = 8; n <= 12; n += 4) {
20860 for (size_t k = 1; k <= 40; k += 9) {
20861 GemmMicrokernelTester()
20862 .mr(2)
20863 .nr(4)
20864 .kr(2)
20865 .sr(1)
20866 .m(2)
20867 .n(n)
20868 .k(k)
20869 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
20870 }
20871 }
20872 }
20873
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD128,n_div_4_strided_cn)20874 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD128, n_div_4_strided_cn) {
20875 TEST_REQUIRES_X86_SSE2;
20876 for (uint32_t n = 8; n <= 12; n += 4) {
20877 for (size_t k = 1; k <= 40; k += 9) {
20878 GemmMicrokernelTester()
20879 .mr(2)
20880 .nr(4)
20881 .kr(2)
20882 .sr(1)
20883 .m(2)
20884 .n(n)
20885 .k(k)
20886 .cn_stride(7)
20887 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
20888 }
20889 }
20890 }
20891
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD128,n_div_4_subtile)20892 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD128, n_div_4_subtile) {
20893 TEST_REQUIRES_X86_SSE2;
20894 for (uint32_t n = 8; n <= 12; n += 4) {
20895 for (size_t k = 1; k <= 40; k += 9) {
20896 for (uint32_t m = 1; m <= 2; m++) {
20897 GemmMicrokernelTester()
20898 .mr(2)
20899 .nr(4)
20900 .kr(2)
20901 .sr(1)
20902 .m(m)
20903 .n(n)
20904 .k(k)
20905 .iterations(1)
20906 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
20907 }
20908 }
20909 }
20910 }
20911
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD128,small_kernel)20912 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD128, small_kernel) {
20913 TEST_REQUIRES_X86_SSE2;
20914 for (size_t k = 1; k <= 40; k += 9) {
20915 GemmMicrokernelTester()
20916 .mr(2)
20917 .nr(4)
20918 .kr(2)
20919 .sr(1)
20920 .m(2)
20921 .n(4)
20922 .k(k)
20923 .ks(3)
20924 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
20925 }
20926 }
20927
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD128,small_kernel_subtile)20928 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD128, small_kernel_subtile) {
20929 TEST_REQUIRES_X86_SSE2;
20930 for (size_t k = 1; k <= 40; k += 9) {
20931 for (uint32_t n = 1; n <= 4; n++) {
20932 for (uint32_t m = 1; m <= 2; m++) {
20933 GemmMicrokernelTester()
20934 .mr(2)
20935 .nr(4)
20936 .kr(2)
20937 .sr(1)
20938 .m(m)
20939 .n(n)
20940 .k(k)
20941 .ks(3)
20942 .iterations(1)
20943 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
20944 }
20945 }
20946 }
20947 }
20948
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD128,n_gt_4_small_kernel)20949 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD128, n_gt_4_small_kernel) {
20950 TEST_REQUIRES_X86_SSE2;
20951 for (uint32_t n = 5; n < 8; n++) {
20952 for (size_t k = 1; k <= 40; k += 9) {
20953 GemmMicrokernelTester()
20954 .mr(2)
20955 .nr(4)
20956 .kr(2)
20957 .sr(1)
20958 .m(2)
20959 .n(n)
20960 .k(k)
20961 .ks(3)
20962 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
20963 }
20964 }
20965 }
20966
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD128,n_div_4_small_kernel)20967 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD128, n_div_4_small_kernel) {
20968 TEST_REQUIRES_X86_SSE2;
20969 for (uint32_t n = 8; n <= 12; n += 4) {
20970 for (size_t k = 1; k <= 40; k += 9) {
20971 GemmMicrokernelTester()
20972 .mr(2)
20973 .nr(4)
20974 .kr(2)
20975 .sr(1)
20976 .m(2)
20977 .n(n)
20978 .k(k)
20979 .ks(3)
20980 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
20981 }
20982 }
20983 }
20984
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD128,strided_cm_subtile)20985 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD128, strided_cm_subtile) {
20986 TEST_REQUIRES_X86_SSE2;
20987 for (size_t k = 1; k <= 40; k += 9) {
20988 for (uint32_t n = 1; n <= 4; n++) {
20989 for (uint32_t m = 1; m <= 2; m++) {
20990 GemmMicrokernelTester()
20991 .mr(2)
20992 .nr(4)
20993 .kr(2)
20994 .sr(1)
20995 .m(m)
20996 .n(n)
20997 .k(k)
20998 .cm_stride(7)
20999 .iterations(1)
21000 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
21001 }
21002 }
21003 }
21004 }
21005
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD128,a_offset)21006 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD128, a_offset) {
21007 TEST_REQUIRES_X86_SSE2;
21008 for (size_t k = 1; k <= 40; k += 9) {
21009 GemmMicrokernelTester()
21010 .mr(2)
21011 .nr(4)
21012 .kr(2)
21013 .sr(1)
21014 .m(2)
21015 .n(4)
21016 .k(k)
21017 .ks(3)
21018 .a_offset(83)
21019 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
21020 }
21021 }
21022
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD128,zero)21023 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD128, zero) {
21024 TEST_REQUIRES_X86_SSE2;
21025 for (size_t k = 1; k <= 40; k += 9) {
21026 for (uint32_t mz = 0; mz < 2; mz++) {
21027 GemmMicrokernelTester()
21028 .mr(2)
21029 .nr(4)
21030 .kr(2)
21031 .sr(1)
21032 .m(2)
21033 .n(4)
21034 .k(k)
21035 .ks(3)
21036 .a_offset(83)
21037 .zero_index(mz)
21038 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
21039 }
21040 }
21041 }
21042
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD128,qmin)21043 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD128, qmin) {
21044 TEST_REQUIRES_X86_SSE2;
21045 GemmMicrokernelTester()
21046 .mr(2)
21047 .nr(4)
21048 .kr(2)
21049 .sr(1)
21050 .m(2)
21051 .n(4)
21052 .k(8)
21053 .qmin(128)
21054 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
21055 }
21056
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD128,qmax)21057 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD128, qmax) {
21058 TEST_REQUIRES_X86_SSE2;
21059 GemmMicrokernelTester()
21060 .mr(2)
21061 .nr(4)
21062 .kr(2)
21063 .sr(1)
21064 .m(2)
21065 .n(4)
21066 .k(8)
21067 .qmax(128)
21068 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
21069 }
21070
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD128,strided_cm)21071 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__SSE2_LD128, strided_cm) {
21072 TEST_REQUIRES_X86_SSE2;
21073 GemmMicrokernelTester()
21074 .mr(2)
21075 .nr(4)
21076 .kr(2)
21077 .sr(1)
21078 .m(2)
21079 .n(4)
21080 .k(8)
21081 .cm_stride(7)
21082 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
21083 }
21084 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
21085
21086
21087 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD128,k_eq_8)21088 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD128, k_eq_8) {
21089 TEST_REQUIRES_X86_SSE41;
21090 GemmMicrokernelTester()
21091 .mr(2)
21092 .nr(4)
21093 .kr(2)
21094 .sr(1)
21095 .m(2)
21096 .n(4)
21097 .k(8)
21098 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21099 }
21100
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD128,strided_cn)21101 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD128, strided_cn) {
21102 TEST_REQUIRES_X86_SSE41;
21103 GemmMicrokernelTester()
21104 .mr(2)
21105 .nr(4)
21106 .kr(2)
21107 .sr(1)
21108 .m(2)
21109 .n(4)
21110 .k(8)
21111 .cn_stride(7)
21112 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21113 }
21114
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD128,k_eq_8_subtile)21115 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD128, k_eq_8_subtile) {
21116 TEST_REQUIRES_X86_SSE41;
21117 for (uint32_t n = 1; n <= 4; n++) {
21118 for (uint32_t m = 1; m <= 2; m++) {
21119 GemmMicrokernelTester()
21120 .mr(2)
21121 .nr(4)
21122 .kr(2)
21123 .sr(1)
21124 .m(m)
21125 .n(n)
21126 .k(8)
21127 .iterations(1)
21128 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21129 }
21130 }
21131 }
21132
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD128,k_eq_8_subtile_m)21133 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD128, k_eq_8_subtile_m) {
21134 TEST_REQUIRES_X86_SSE41;
21135 for (uint32_t m = 1; m <= 2; m++) {
21136 GemmMicrokernelTester()
21137 .mr(2)
21138 .nr(4)
21139 .kr(2)
21140 .sr(1)
21141 .m(m)
21142 .n(4)
21143 .k(8)
21144 .iterations(1)
21145 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21146 }
21147 }
21148
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD128,k_eq_8_subtile_n)21149 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD128, k_eq_8_subtile_n) {
21150 TEST_REQUIRES_X86_SSE41;
21151 for (uint32_t n = 1; n <= 4; n++) {
21152 GemmMicrokernelTester()
21153 .mr(2)
21154 .nr(4)
21155 .kr(2)
21156 .sr(1)
21157 .m(2)
21158 .n(n)
21159 .k(8)
21160 .iterations(1)
21161 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21162 }
21163 }
21164
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD128,k_lt_8)21165 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD128, k_lt_8) {
21166 TEST_REQUIRES_X86_SSE41;
21167 for (size_t k = 1; k < 8; k++) {
21168 GemmMicrokernelTester()
21169 .mr(2)
21170 .nr(4)
21171 .kr(2)
21172 .sr(1)
21173 .m(2)
21174 .n(4)
21175 .k(k)
21176 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21177 }
21178 }
21179
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD128,k_lt_8_subtile)21180 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD128, k_lt_8_subtile) {
21181 TEST_REQUIRES_X86_SSE41;
21182 for (size_t k = 1; k < 8; k++) {
21183 for (uint32_t n = 1; n <= 4; n++) {
21184 for (uint32_t m = 1; m <= 2; m++) {
21185 GemmMicrokernelTester()
21186 .mr(2)
21187 .nr(4)
21188 .kr(2)
21189 .sr(1)
21190 .m(m)
21191 .n(n)
21192 .k(k)
21193 .iterations(1)
21194 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21195 }
21196 }
21197 }
21198 }
21199
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD128,k_gt_8)21200 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD128, k_gt_8) {
21201 TEST_REQUIRES_X86_SSE41;
21202 for (size_t k = 9; k < 16; k++) {
21203 GemmMicrokernelTester()
21204 .mr(2)
21205 .nr(4)
21206 .kr(2)
21207 .sr(1)
21208 .m(2)
21209 .n(4)
21210 .k(k)
21211 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21212 }
21213 }
21214
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD128,k_gt_8_subtile)21215 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD128, k_gt_8_subtile) {
21216 TEST_REQUIRES_X86_SSE41;
21217 for (size_t k = 9; k < 16; k++) {
21218 for (uint32_t n = 1; n <= 4; n++) {
21219 for (uint32_t m = 1; m <= 2; m++) {
21220 GemmMicrokernelTester()
21221 .mr(2)
21222 .nr(4)
21223 .kr(2)
21224 .sr(1)
21225 .m(m)
21226 .n(n)
21227 .k(k)
21228 .iterations(1)
21229 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21230 }
21231 }
21232 }
21233 }
21234
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD128,k_div_8)21235 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD128, k_div_8) {
21236 TEST_REQUIRES_X86_SSE41;
21237 for (size_t k = 16; k <= 80; k += 8) {
21238 GemmMicrokernelTester()
21239 .mr(2)
21240 .nr(4)
21241 .kr(2)
21242 .sr(1)
21243 .m(2)
21244 .n(4)
21245 .k(k)
21246 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21247 }
21248 }
21249
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD128,k_div_8_subtile)21250 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD128, k_div_8_subtile) {
21251 TEST_REQUIRES_X86_SSE41;
21252 for (size_t k = 16; k <= 80; k += 8) {
21253 for (uint32_t n = 1; n <= 4; n++) {
21254 for (uint32_t m = 1; m <= 2; m++) {
21255 GemmMicrokernelTester()
21256 .mr(2)
21257 .nr(4)
21258 .kr(2)
21259 .sr(1)
21260 .m(m)
21261 .n(n)
21262 .k(k)
21263 .iterations(1)
21264 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21265 }
21266 }
21267 }
21268 }
21269
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD128,n_gt_4)21270 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD128, n_gt_4) {
21271 TEST_REQUIRES_X86_SSE41;
21272 for (uint32_t n = 5; n < 8; n++) {
21273 for (size_t k = 1; k <= 40; k += 9) {
21274 GemmMicrokernelTester()
21275 .mr(2)
21276 .nr(4)
21277 .kr(2)
21278 .sr(1)
21279 .m(2)
21280 .n(n)
21281 .k(k)
21282 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21283 }
21284 }
21285 }
21286
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD128,n_gt_4_strided_cn)21287 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD128, n_gt_4_strided_cn) {
21288 TEST_REQUIRES_X86_SSE41;
21289 for (uint32_t n = 5; n < 8; n++) {
21290 for (size_t k = 1; k <= 40; k += 9) {
21291 GemmMicrokernelTester()
21292 .mr(2)
21293 .nr(4)
21294 .kr(2)
21295 .sr(1)
21296 .m(2)
21297 .n(n)
21298 .k(k)
21299 .cn_stride(7)
21300 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21301 }
21302 }
21303 }
21304
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD128,n_gt_4_subtile)21305 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD128, n_gt_4_subtile) {
21306 TEST_REQUIRES_X86_SSE41;
21307 for (uint32_t n = 5; n < 8; n++) {
21308 for (size_t k = 1; k <= 40; k += 9) {
21309 for (uint32_t m = 1; m <= 2; m++) {
21310 GemmMicrokernelTester()
21311 .mr(2)
21312 .nr(4)
21313 .kr(2)
21314 .sr(1)
21315 .m(m)
21316 .n(n)
21317 .k(k)
21318 .iterations(1)
21319 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21320 }
21321 }
21322 }
21323 }
21324
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD128,n_div_4)21325 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD128, n_div_4) {
21326 TEST_REQUIRES_X86_SSE41;
21327 for (uint32_t n = 8; n <= 12; n += 4) {
21328 for (size_t k = 1; k <= 40; k += 9) {
21329 GemmMicrokernelTester()
21330 .mr(2)
21331 .nr(4)
21332 .kr(2)
21333 .sr(1)
21334 .m(2)
21335 .n(n)
21336 .k(k)
21337 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21338 }
21339 }
21340 }
21341
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD128,n_div_4_strided_cn)21342 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD128, n_div_4_strided_cn) {
21343 TEST_REQUIRES_X86_SSE41;
21344 for (uint32_t n = 8; n <= 12; n += 4) {
21345 for (size_t k = 1; k <= 40; k += 9) {
21346 GemmMicrokernelTester()
21347 .mr(2)
21348 .nr(4)
21349 .kr(2)
21350 .sr(1)
21351 .m(2)
21352 .n(n)
21353 .k(k)
21354 .cn_stride(7)
21355 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21356 }
21357 }
21358 }
21359
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD128,n_div_4_subtile)21360 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD128, n_div_4_subtile) {
21361 TEST_REQUIRES_X86_SSE41;
21362 for (uint32_t n = 8; n <= 12; n += 4) {
21363 for (size_t k = 1; k <= 40; k += 9) {
21364 for (uint32_t m = 1; m <= 2; m++) {
21365 GemmMicrokernelTester()
21366 .mr(2)
21367 .nr(4)
21368 .kr(2)
21369 .sr(1)
21370 .m(m)
21371 .n(n)
21372 .k(k)
21373 .iterations(1)
21374 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21375 }
21376 }
21377 }
21378 }
21379
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD128,small_kernel)21380 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD128, small_kernel) {
21381 TEST_REQUIRES_X86_SSE41;
21382 for (size_t k = 1; k <= 40; k += 9) {
21383 GemmMicrokernelTester()
21384 .mr(2)
21385 .nr(4)
21386 .kr(2)
21387 .sr(1)
21388 .m(2)
21389 .n(4)
21390 .k(k)
21391 .ks(3)
21392 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21393 }
21394 }
21395
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD128,small_kernel_subtile)21396 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD128, small_kernel_subtile) {
21397 TEST_REQUIRES_X86_SSE41;
21398 for (size_t k = 1; k <= 40; k += 9) {
21399 for (uint32_t n = 1; n <= 4; n++) {
21400 for (uint32_t m = 1; m <= 2; m++) {
21401 GemmMicrokernelTester()
21402 .mr(2)
21403 .nr(4)
21404 .kr(2)
21405 .sr(1)
21406 .m(m)
21407 .n(n)
21408 .k(k)
21409 .ks(3)
21410 .iterations(1)
21411 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21412 }
21413 }
21414 }
21415 }
21416
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD128,n_gt_4_small_kernel)21417 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD128, n_gt_4_small_kernel) {
21418 TEST_REQUIRES_X86_SSE41;
21419 for (uint32_t n = 5; n < 8; n++) {
21420 for (size_t k = 1; k <= 40; k += 9) {
21421 GemmMicrokernelTester()
21422 .mr(2)
21423 .nr(4)
21424 .kr(2)
21425 .sr(1)
21426 .m(2)
21427 .n(n)
21428 .k(k)
21429 .ks(3)
21430 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21431 }
21432 }
21433 }
21434
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD128,n_div_4_small_kernel)21435 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD128, n_div_4_small_kernel) {
21436 TEST_REQUIRES_X86_SSE41;
21437 for (uint32_t n = 8; n <= 12; n += 4) {
21438 for (size_t k = 1; k <= 40; k += 9) {
21439 GemmMicrokernelTester()
21440 .mr(2)
21441 .nr(4)
21442 .kr(2)
21443 .sr(1)
21444 .m(2)
21445 .n(n)
21446 .k(k)
21447 .ks(3)
21448 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21449 }
21450 }
21451 }
21452
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD128,strided_cm_subtile)21453 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD128, strided_cm_subtile) {
21454 TEST_REQUIRES_X86_SSE41;
21455 for (size_t k = 1; k <= 40; k += 9) {
21456 for (uint32_t n = 1; n <= 4; n++) {
21457 for (uint32_t m = 1; m <= 2; m++) {
21458 GemmMicrokernelTester()
21459 .mr(2)
21460 .nr(4)
21461 .kr(2)
21462 .sr(1)
21463 .m(m)
21464 .n(n)
21465 .k(k)
21466 .cm_stride(7)
21467 .iterations(1)
21468 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21469 }
21470 }
21471 }
21472 }
21473
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD128,a_offset)21474 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD128, a_offset) {
21475 TEST_REQUIRES_X86_SSE41;
21476 for (size_t k = 1; k <= 40; k += 9) {
21477 GemmMicrokernelTester()
21478 .mr(2)
21479 .nr(4)
21480 .kr(2)
21481 .sr(1)
21482 .m(2)
21483 .n(4)
21484 .k(k)
21485 .ks(3)
21486 .a_offset(83)
21487 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21488 }
21489 }
21490
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD128,zero)21491 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD128, zero) {
21492 TEST_REQUIRES_X86_SSE41;
21493 for (size_t k = 1; k <= 40; k += 9) {
21494 for (uint32_t mz = 0; mz < 2; mz++) {
21495 GemmMicrokernelTester()
21496 .mr(2)
21497 .nr(4)
21498 .kr(2)
21499 .sr(1)
21500 .m(2)
21501 .n(4)
21502 .k(k)
21503 .ks(3)
21504 .a_offset(83)
21505 .zero_index(mz)
21506 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21507 }
21508 }
21509 }
21510
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD128,qmin)21511 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD128, qmin) {
21512 TEST_REQUIRES_X86_SSE41;
21513 GemmMicrokernelTester()
21514 .mr(2)
21515 .nr(4)
21516 .kr(2)
21517 .sr(1)
21518 .m(2)
21519 .n(4)
21520 .k(8)
21521 .qmin(128)
21522 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21523 }
21524
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD128,qmax)21525 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD128, qmax) {
21526 TEST_REQUIRES_X86_SSE41;
21527 GemmMicrokernelTester()
21528 .mr(2)
21529 .nr(4)
21530 .kr(2)
21531 .sr(1)
21532 .m(2)
21533 .n(4)
21534 .k(8)
21535 .qmax(128)
21536 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21537 }
21538
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD128,strided_cm)21539 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__SSE41_LD128, strided_cm) {
21540 TEST_REQUIRES_X86_SSE41;
21541 GemmMicrokernelTester()
21542 .mr(2)
21543 .nr(4)
21544 .kr(2)
21545 .sr(1)
21546 .m(2)
21547 .n(4)
21548 .k(8)
21549 .cm_stride(7)
21550 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
21551 }
21552 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
21553
21554
21555 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD128,k_eq_8)21556 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD128, k_eq_8) {
21557 TEST_REQUIRES_X86_SSE2;
21558 GemmMicrokernelTester()
21559 .mr(4)
21560 .nr(4)
21561 .kr(2)
21562 .sr(1)
21563 .m(4)
21564 .n(4)
21565 .k(8)
21566 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
21567 }
21568
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD128,strided_cn)21569 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD128, strided_cn) {
21570 TEST_REQUIRES_X86_SSE2;
21571 GemmMicrokernelTester()
21572 .mr(4)
21573 .nr(4)
21574 .kr(2)
21575 .sr(1)
21576 .m(4)
21577 .n(4)
21578 .k(8)
21579 .cn_stride(7)
21580 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
21581 }
21582
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD128,k_eq_8_subtile)21583 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD128, k_eq_8_subtile) {
21584 TEST_REQUIRES_X86_SSE2;
21585 for (uint32_t n = 1; n <= 4; n++) {
21586 for (uint32_t m = 1; m <= 4; m++) {
21587 GemmMicrokernelTester()
21588 .mr(4)
21589 .nr(4)
21590 .kr(2)
21591 .sr(1)
21592 .m(m)
21593 .n(n)
21594 .k(8)
21595 .iterations(1)
21596 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
21597 }
21598 }
21599 }
21600
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD128,k_eq_8_subtile_m)21601 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD128, k_eq_8_subtile_m) {
21602 TEST_REQUIRES_X86_SSE2;
21603 for (uint32_t m = 1; m <= 4; m++) {
21604 GemmMicrokernelTester()
21605 .mr(4)
21606 .nr(4)
21607 .kr(2)
21608 .sr(1)
21609 .m(m)
21610 .n(4)
21611 .k(8)
21612 .iterations(1)
21613 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
21614 }
21615 }
21616
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD128,k_eq_8_subtile_n)21617 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD128, k_eq_8_subtile_n) {
21618 TEST_REQUIRES_X86_SSE2;
21619 for (uint32_t n = 1; n <= 4; n++) {
21620 GemmMicrokernelTester()
21621 .mr(4)
21622 .nr(4)
21623 .kr(2)
21624 .sr(1)
21625 .m(4)
21626 .n(n)
21627 .k(8)
21628 .iterations(1)
21629 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
21630 }
21631 }
21632
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD128,k_lt_8)21633 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD128, k_lt_8) {
21634 TEST_REQUIRES_X86_SSE2;
21635 for (size_t k = 1; k < 8; k++) {
21636 GemmMicrokernelTester()
21637 .mr(4)
21638 .nr(4)
21639 .kr(2)
21640 .sr(1)
21641 .m(4)
21642 .n(4)
21643 .k(k)
21644 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
21645 }
21646 }
21647
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD128,k_lt_8_subtile)21648 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD128, k_lt_8_subtile) {
21649 TEST_REQUIRES_X86_SSE2;
21650 for (size_t k = 1; k < 8; k++) {
21651 for (uint32_t n = 1; n <= 4; n++) {
21652 for (uint32_t m = 1; m <= 4; m++) {
21653 GemmMicrokernelTester()
21654 .mr(4)
21655 .nr(4)
21656 .kr(2)
21657 .sr(1)
21658 .m(m)
21659 .n(n)
21660 .k(k)
21661 .iterations(1)
21662 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
21663 }
21664 }
21665 }
21666 }
21667
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD128,k_gt_8)21668 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD128, k_gt_8) {
21669 TEST_REQUIRES_X86_SSE2;
21670 for (size_t k = 9; k < 16; k++) {
21671 GemmMicrokernelTester()
21672 .mr(4)
21673 .nr(4)
21674 .kr(2)
21675 .sr(1)
21676 .m(4)
21677 .n(4)
21678 .k(k)
21679 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
21680 }
21681 }
21682
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD128,k_gt_8_subtile)21683 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD128, k_gt_8_subtile) {
21684 TEST_REQUIRES_X86_SSE2;
21685 for (size_t k = 9; k < 16; k++) {
21686 for (uint32_t n = 1; n <= 4; n++) {
21687 for (uint32_t m = 1; m <= 4; m++) {
21688 GemmMicrokernelTester()
21689 .mr(4)
21690 .nr(4)
21691 .kr(2)
21692 .sr(1)
21693 .m(m)
21694 .n(n)
21695 .k(k)
21696 .iterations(1)
21697 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
21698 }
21699 }
21700 }
21701 }
21702
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD128,k_div_8)21703 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD128, k_div_8) {
21704 TEST_REQUIRES_X86_SSE2;
21705 for (size_t k = 16; k <= 80; k += 8) {
21706 GemmMicrokernelTester()
21707 .mr(4)
21708 .nr(4)
21709 .kr(2)
21710 .sr(1)
21711 .m(4)
21712 .n(4)
21713 .k(k)
21714 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
21715 }
21716 }
21717
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD128,k_div_8_subtile)21718 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD128, k_div_8_subtile) {
21719 TEST_REQUIRES_X86_SSE2;
21720 for (size_t k = 16; k <= 80; k += 8) {
21721 for (uint32_t n = 1; n <= 4; n++) {
21722 for (uint32_t m = 1; m <= 4; m++) {
21723 GemmMicrokernelTester()
21724 .mr(4)
21725 .nr(4)
21726 .kr(2)
21727 .sr(1)
21728 .m(m)
21729 .n(n)
21730 .k(k)
21731 .iterations(1)
21732 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
21733 }
21734 }
21735 }
21736 }
21737
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD128,n_gt_4)21738 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD128, n_gt_4) {
21739 TEST_REQUIRES_X86_SSE2;
21740 for (uint32_t n = 5; n < 8; n++) {
21741 for (size_t k = 1; k <= 40; k += 9) {
21742 GemmMicrokernelTester()
21743 .mr(4)
21744 .nr(4)
21745 .kr(2)
21746 .sr(1)
21747 .m(4)
21748 .n(n)
21749 .k(k)
21750 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
21751 }
21752 }
21753 }
21754
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD128,n_gt_4_strided_cn)21755 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD128, n_gt_4_strided_cn) {
21756 TEST_REQUIRES_X86_SSE2;
21757 for (uint32_t n = 5; n < 8; n++) {
21758 for (size_t k = 1; k <= 40; k += 9) {
21759 GemmMicrokernelTester()
21760 .mr(4)
21761 .nr(4)
21762 .kr(2)
21763 .sr(1)
21764 .m(4)
21765 .n(n)
21766 .k(k)
21767 .cn_stride(7)
21768 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
21769 }
21770 }
21771 }
21772
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD128,n_gt_4_subtile)21773 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD128, n_gt_4_subtile) {
21774 TEST_REQUIRES_X86_SSE2;
21775 for (uint32_t n = 5; n < 8; n++) {
21776 for (size_t k = 1; k <= 40; k += 9) {
21777 for (uint32_t m = 1; m <= 4; m++) {
21778 GemmMicrokernelTester()
21779 .mr(4)
21780 .nr(4)
21781 .kr(2)
21782 .sr(1)
21783 .m(m)
21784 .n(n)
21785 .k(k)
21786 .iterations(1)
21787 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
21788 }
21789 }
21790 }
21791 }
21792
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD128,n_div_4)21793 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD128, n_div_4) {
21794 TEST_REQUIRES_X86_SSE2;
21795 for (uint32_t n = 8; n <= 12; n += 4) {
21796 for (size_t k = 1; k <= 40; k += 9) {
21797 GemmMicrokernelTester()
21798 .mr(4)
21799 .nr(4)
21800 .kr(2)
21801 .sr(1)
21802 .m(4)
21803 .n(n)
21804 .k(k)
21805 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
21806 }
21807 }
21808 }
21809
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD128,n_div_4_strided_cn)21810 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD128, n_div_4_strided_cn) {
21811 TEST_REQUIRES_X86_SSE2;
21812 for (uint32_t n = 8; n <= 12; n += 4) {
21813 for (size_t k = 1; k <= 40; k += 9) {
21814 GemmMicrokernelTester()
21815 .mr(4)
21816 .nr(4)
21817 .kr(2)
21818 .sr(1)
21819 .m(4)
21820 .n(n)
21821 .k(k)
21822 .cn_stride(7)
21823 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
21824 }
21825 }
21826 }
21827
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD128,n_div_4_subtile)21828 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD128, n_div_4_subtile) {
21829 TEST_REQUIRES_X86_SSE2;
21830 for (uint32_t n = 8; n <= 12; n += 4) {
21831 for (size_t k = 1; k <= 40; k += 9) {
21832 for (uint32_t m = 1; m <= 4; m++) {
21833 GemmMicrokernelTester()
21834 .mr(4)
21835 .nr(4)
21836 .kr(2)
21837 .sr(1)
21838 .m(m)
21839 .n(n)
21840 .k(k)
21841 .iterations(1)
21842 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
21843 }
21844 }
21845 }
21846 }
21847
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD128,small_kernel)21848 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD128, small_kernel) {
21849 TEST_REQUIRES_X86_SSE2;
21850 for (size_t k = 1; k <= 40; k += 9) {
21851 GemmMicrokernelTester()
21852 .mr(4)
21853 .nr(4)
21854 .kr(2)
21855 .sr(1)
21856 .m(4)
21857 .n(4)
21858 .k(k)
21859 .ks(3)
21860 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
21861 }
21862 }
21863
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD128,small_kernel_subtile)21864 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD128, small_kernel_subtile) {
21865 TEST_REQUIRES_X86_SSE2;
21866 for (size_t k = 1; k <= 40; k += 9) {
21867 for (uint32_t n = 1; n <= 4; n++) {
21868 for (uint32_t m = 1; m <= 4; m++) {
21869 GemmMicrokernelTester()
21870 .mr(4)
21871 .nr(4)
21872 .kr(2)
21873 .sr(1)
21874 .m(m)
21875 .n(n)
21876 .k(k)
21877 .ks(3)
21878 .iterations(1)
21879 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
21880 }
21881 }
21882 }
21883 }
21884
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD128,n_gt_4_small_kernel)21885 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD128, n_gt_4_small_kernel) {
21886 TEST_REQUIRES_X86_SSE2;
21887 for (uint32_t n = 5; n < 8; n++) {
21888 for (size_t k = 1; k <= 40; k += 9) {
21889 GemmMicrokernelTester()
21890 .mr(4)
21891 .nr(4)
21892 .kr(2)
21893 .sr(1)
21894 .m(4)
21895 .n(n)
21896 .k(k)
21897 .ks(3)
21898 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
21899 }
21900 }
21901 }
21902
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD128,n_div_4_small_kernel)21903 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD128, n_div_4_small_kernel) {
21904 TEST_REQUIRES_X86_SSE2;
21905 for (uint32_t n = 8; n <= 12; n += 4) {
21906 for (size_t k = 1; k <= 40; k += 9) {
21907 GemmMicrokernelTester()
21908 .mr(4)
21909 .nr(4)
21910 .kr(2)
21911 .sr(1)
21912 .m(4)
21913 .n(n)
21914 .k(k)
21915 .ks(3)
21916 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
21917 }
21918 }
21919 }
21920
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD128,strided_cm_subtile)21921 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD128, strided_cm_subtile) {
21922 TEST_REQUIRES_X86_SSE2;
21923 for (size_t k = 1; k <= 40; k += 9) {
21924 for (uint32_t n = 1; n <= 4; n++) {
21925 for (uint32_t m = 1; m <= 4; m++) {
21926 GemmMicrokernelTester()
21927 .mr(4)
21928 .nr(4)
21929 .kr(2)
21930 .sr(1)
21931 .m(m)
21932 .n(n)
21933 .k(k)
21934 .cm_stride(7)
21935 .iterations(1)
21936 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
21937 }
21938 }
21939 }
21940 }
21941
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD128,a_offset)21942 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD128, a_offset) {
21943 TEST_REQUIRES_X86_SSE2;
21944 for (size_t k = 1; k <= 40; k += 9) {
21945 GemmMicrokernelTester()
21946 .mr(4)
21947 .nr(4)
21948 .kr(2)
21949 .sr(1)
21950 .m(4)
21951 .n(4)
21952 .k(k)
21953 .ks(3)
21954 .a_offset(163)
21955 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
21956 }
21957 }
21958
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD128,zero)21959 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD128, zero) {
21960 TEST_REQUIRES_X86_SSE2;
21961 for (size_t k = 1; k <= 40; k += 9) {
21962 for (uint32_t mz = 0; mz < 4; mz++) {
21963 GemmMicrokernelTester()
21964 .mr(4)
21965 .nr(4)
21966 .kr(2)
21967 .sr(1)
21968 .m(4)
21969 .n(4)
21970 .k(k)
21971 .ks(3)
21972 .a_offset(163)
21973 .zero_index(mz)
21974 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
21975 }
21976 }
21977 }
21978
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD128,qmin)21979 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD128, qmin) {
21980 TEST_REQUIRES_X86_SSE2;
21981 GemmMicrokernelTester()
21982 .mr(4)
21983 .nr(4)
21984 .kr(2)
21985 .sr(1)
21986 .m(4)
21987 .n(4)
21988 .k(8)
21989 .qmin(128)
21990 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
21991 }
21992
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD128,qmax)21993 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD128, qmax) {
21994 TEST_REQUIRES_X86_SSE2;
21995 GemmMicrokernelTester()
21996 .mr(4)
21997 .nr(4)
21998 .kr(2)
21999 .sr(1)
22000 .m(4)
22001 .n(4)
22002 .k(8)
22003 .qmax(128)
22004 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
22005 }
22006
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD128,strided_cm)22007 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__SSE2_LD128, strided_cm) {
22008 TEST_REQUIRES_X86_SSE2;
22009 GemmMicrokernelTester()
22010 .mr(4)
22011 .nr(4)
22012 .kr(2)
22013 .sr(1)
22014 .m(4)
22015 .n(4)
22016 .k(8)
22017 .cm_stride(7)
22018 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
22019 }
22020 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
22021
22022
22023 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD128,k_eq_8)22024 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD128, k_eq_8) {
22025 TEST_REQUIRES_X86_XOP;
22026 GemmMicrokernelTester()
22027 .mr(1)
22028 .nr(4)
22029 .kr(2)
22030 .sr(1)
22031 .m(1)
22032 .n(4)
22033 .k(8)
22034 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
22035 }
22036
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD128,strided_cn)22037 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD128, strided_cn) {
22038 TEST_REQUIRES_X86_XOP;
22039 GemmMicrokernelTester()
22040 .mr(1)
22041 .nr(4)
22042 .kr(2)
22043 .sr(1)
22044 .m(1)
22045 .n(4)
22046 .k(8)
22047 .cn_stride(7)
22048 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
22049 }
22050
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD128,k_eq_8_subtile)22051 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD128, k_eq_8_subtile) {
22052 TEST_REQUIRES_X86_XOP;
22053 for (uint32_t n = 1; n <= 4; n++) {
22054 for (uint32_t m = 1; m <= 1; m++) {
22055 GemmMicrokernelTester()
22056 .mr(1)
22057 .nr(4)
22058 .kr(2)
22059 .sr(1)
22060 .m(m)
22061 .n(n)
22062 .k(8)
22063 .iterations(1)
22064 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
22065 }
22066 }
22067 }
22068
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD128,k_eq_8_subtile_m)22069 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD128, k_eq_8_subtile_m) {
22070 TEST_REQUIRES_X86_XOP;
22071 for (uint32_t m = 1; m <= 1; m++) {
22072 GemmMicrokernelTester()
22073 .mr(1)
22074 .nr(4)
22075 .kr(2)
22076 .sr(1)
22077 .m(m)
22078 .n(4)
22079 .k(8)
22080 .iterations(1)
22081 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
22082 }
22083 }
22084
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD128,k_eq_8_subtile_n)22085 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD128, k_eq_8_subtile_n) {
22086 TEST_REQUIRES_X86_XOP;
22087 for (uint32_t n = 1; n <= 4; n++) {
22088 GemmMicrokernelTester()
22089 .mr(1)
22090 .nr(4)
22091 .kr(2)
22092 .sr(1)
22093 .m(1)
22094 .n(n)
22095 .k(8)
22096 .iterations(1)
22097 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
22098 }
22099 }
22100
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD128,k_lt_8)22101 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD128, k_lt_8) {
22102 TEST_REQUIRES_X86_XOP;
22103 for (size_t k = 1; k < 8; k++) {
22104 GemmMicrokernelTester()
22105 .mr(1)
22106 .nr(4)
22107 .kr(2)
22108 .sr(1)
22109 .m(1)
22110 .n(4)
22111 .k(k)
22112 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
22113 }
22114 }
22115
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD128,k_lt_8_subtile)22116 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD128, k_lt_8_subtile) {
22117 TEST_REQUIRES_X86_XOP;
22118 for (size_t k = 1; k < 8; k++) {
22119 for (uint32_t n = 1; n <= 4; n++) {
22120 for (uint32_t m = 1; m <= 1; m++) {
22121 GemmMicrokernelTester()
22122 .mr(1)
22123 .nr(4)
22124 .kr(2)
22125 .sr(1)
22126 .m(m)
22127 .n(n)
22128 .k(k)
22129 .iterations(1)
22130 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
22131 }
22132 }
22133 }
22134 }
22135
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD128,k_gt_8)22136 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD128, k_gt_8) {
22137 TEST_REQUIRES_X86_XOP;
22138 for (size_t k = 9; k < 16; k++) {
22139 GemmMicrokernelTester()
22140 .mr(1)
22141 .nr(4)
22142 .kr(2)
22143 .sr(1)
22144 .m(1)
22145 .n(4)
22146 .k(k)
22147 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
22148 }
22149 }
22150
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD128,k_gt_8_subtile)22151 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD128, k_gt_8_subtile) {
22152 TEST_REQUIRES_X86_XOP;
22153 for (size_t k = 9; k < 16; k++) {
22154 for (uint32_t n = 1; n <= 4; n++) {
22155 for (uint32_t m = 1; m <= 1; m++) {
22156 GemmMicrokernelTester()
22157 .mr(1)
22158 .nr(4)
22159 .kr(2)
22160 .sr(1)
22161 .m(m)
22162 .n(n)
22163 .k(k)
22164 .iterations(1)
22165 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
22166 }
22167 }
22168 }
22169 }
22170
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD128,k_div_8)22171 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD128, k_div_8) {
22172 TEST_REQUIRES_X86_XOP;
22173 for (size_t k = 16; k <= 80; k += 8) {
22174 GemmMicrokernelTester()
22175 .mr(1)
22176 .nr(4)
22177 .kr(2)
22178 .sr(1)
22179 .m(1)
22180 .n(4)
22181 .k(k)
22182 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
22183 }
22184 }
22185
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD128,k_div_8_subtile)22186 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD128, k_div_8_subtile) {
22187 TEST_REQUIRES_X86_XOP;
22188 for (size_t k = 16; k <= 80; k += 8) {
22189 for (uint32_t n = 1; n <= 4; n++) {
22190 for (uint32_t m = 1; m <= 1; m++) {
22191 GemmMicrokernelTester()
22192 .mr(1)
22193 .nr(4)
22194 .kr(2)
22195 .sr(1)
22196 .m(m)
22197 .n(n)
22198 .k(k)
22199 .iterations(1)
22200 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
22201 }
22202 }
22203 }
22204 }
22205
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD128,n_gt_4)22206 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD128, n_gt_4) {
22207 TEST_REQUIRES_X86_XOP;
22208 for (uint32_t n = 5; n < 8; n++) {
22209 for (size_t k = 1; k <= 40; k += 9) {
22210 GemmMicrokernelTester()
22211 .mr(1)
22212 .nr(4)
22213 .kr(2)
22214 .sr(1)
22215 .m(1)
22216 .n(n)
22217 .k(k)
22218 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
22219 }
22220 }
22221 }
22222
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD128,n_gt_4_strided_cn)22223 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD128, n_gt_4_strided_cn) {
22224 TEST_REQUIRES_X86_XOP;
22225 for (uint32_t n = 5; n < 8; n++) {
22226 for (size_t k = 1; k <= 40; k += 9) {
22227 GemmMicrokernelTester()
22228 .mr(1)
22229 .nr(4)
22230 .kr(2)
22231 .sr(1)
22232 .m(1)
22233 .n(n)
22234 .k(k)
22235 .cn_stride(7)
22236 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
22237 }
22238 }
22239 }
22240
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD128,n_gt_4_subtile)22241 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD128, n_gt_4_subtile) {
22242 TEST_REQUIRES_X86_XOP;
22243 for (uint32_t n = 5; n < 8; n++) {
22244 for (size_t k = 1; k <= 40; k += 9) {
22245 for (uint32_t m = 1; m <= 1; m++) {
22246 GemmMicrokernelTester()
22247 .mr(1)
22248 .nr(4)
22249 .kr(2)
22250 .sr(1)
22251 .m(m)
22252 .n(n)
22253 .k(k)
22254 .iterations(1)
22255 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
22256 }
22257 }
22258 }
22259 }
22260
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD128,n_div_4)22261 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD128, n_div_4) {
22262 TEST_REQUIRES_X86_XOP;
22263 for (uint32_t n = 8; n <= 12; n += 4) {
22264 for (size_t k = 1; k <= 40; k += 9) {
22265 GemmMicrokernelTester()
22266 .mr(1)
22267 .nr(4)
22268 .kr(2)
22269 .sr(1)
22270 .m(1)
22271 .n(n)
22272 .k(k)
22273 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
22274 }
22275 }
22276 }
22277
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD128,n_div_4_strided_cn)22278 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD128, n_div_4_strided_cn) {
22279 TEST_REQUIRES_X86_XOP;
22280 for (uint32_t n = 8; n <= 12; n += 4) {
22281 for (size_t k = 1; k <= 40; k += 9) {
22282 GemmMicrokernelTester()
22283 .mr(1)
22284 .nr(4)
22285 .kr(2)
22286 .sr(1)
22287 .m(1)
22288 .n(n)
22289 .k(k)
22290 .cn_stride(7)
22291 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
22292 }
22293 }
22294 }
22295
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD128,n_div_4_subtile)22296 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD128, n_div_4_subtile) {
22297 TEST_REQUIRES_X86_XOP;
22298 for (uint32_t n = 8; n <= 12; n += 4) {
22299 for (size_t k = 1; k <= 40; k += 9) {
22300 for (uint32_t m = 1; m <= 1; m++) {
22301 GemmMicrokernelTester()
22302 .mr(1)
22303 .nr(4)
22304 .kr(2)
22305 .sr(1)
22306 .m(m)
22307 .n(n)
22308 .k(k)
22309 .iterations(1)
22310 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
22311 }
22312 }
22313 }
22314 }
22315
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD128,small_kernel)22316 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD128, small_kernel) {
22317 TEST_REQUIRES_X86_XOP;
22318 for (size_t k = 1; k <= 40; k += 9) {
22319 GemmMicrokernelTester()
22320 .mr(1)
22321 .nr(4)
22322 .kr(2)
22323 .sr(1)
22324 .m(1)
22325 .n(4)
22326 .k(k)
22327 .ks(3)
22328 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
22329 }
22330 }
22331
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD128,small_kernel_subtile)22332 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD128, small_kernel_subtile) {
22333 TEST_REQUIRES_X86_XOP;
22334 for (size_t k = 1; k <= 40; k += 9) {
22335 for (uint32_t n = 1; n <= 4; n++) {
22336 for (uint32_t m = 1; m <= 1; m++) {
22337 GemmMicrokernelTester()
22338 .mr(1)
22339 .nr(4)
22340 .kr(2)
22341 .sr(1)
22342 .m(m)
22343 .n(n)
22344 .k(k)
22345 .ks(3)
22346 .iterations(1)
22347 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
22348 }
22349 }
22350 }
22351 }
22352
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD128,n_gt_4_small_kernel)22353 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD128, n_gt_4_small_kernel) {
22354 TEST_REQUIRES_X86_XOP;
22355 for (uint32_t n = 5; n < 8; n++) {
22356 for (size_t k = 1; k <= 40; k += 9) {
22357 GemmMicrokernelTester()
22358 .mr(1)
22359 .nr(4)
22360 .kr(2)
22361 .sr(1)
22362 .m(1)
22363 .n(n)
22364 .k(k)
22365 .ks(3)
22366 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
22367 }
22368 }
22369 }
22370
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD128,n_div_4_small_kernel)22371 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD128, n_div_4_small_kernel) {
22372 TEST_REQUIRES_X86_XOP;
22373 for (uint32_t n = 8; n <= 12; n += 4) {
22374 for (size_t k = 1; k <= 40; k += 9) {
22375 GemmMicrokernelTester()
22376 .mr(1)
22377 .nr(4)
22378 .kr(2)
22379 .sr(1)
22380 .m(1)
22381 .n(n)
22382 .k(k)
22383 .ks(3)
22384 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
22385 }
22386 }
22387 }
22388
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD128,strided_cm_subtile)22389 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD128, strided_cm_subtile) {
22390 TEST_REQUIRES_X86_XOP;
22391 for (size_t k = 1; k <= 40; k += 9) {
22392 for (uint32_t n = 1; n <= 4; n++) {
22393 for (uint32_t m = 1; m <= 1; m++) {
22394 GemmMicrokernelTester()
22395 .mr(1)
22396 .nr(4)
22397 .kr(2)
22398 .sr(1)
22399 .m(m)
22400 .n(n)
22401 .k(k)
22402 .cm_stride(7)
22403 .iterations(1)
22404 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
22405 }
22406 }
22407 }
22408 }
22409
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD128,a_offset)22410 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD128, a_offset) {
22411 TEST_REQUIRES_X86_XOP;
22412 for (size_t k = 1; k <= 40; k += 9) {
22413 GemmMicrokernelTester()
22414 .mr(1)
22415 .nr(4)
22416 .kr(2)
22417 .sr(1)
22418 .m(1)
22419 .n(4)
22420 .k(k)
22421 .ks(3)
22422 .a_offset(43)
22423 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
22424 }
22425 }
22426
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD128,zero)22427 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD128, zero) {
22428 TEST_REQUIRES_X86_XOP;
22429 for (size_t k = 1; k <= 40; k += 9) {
22430 for (uint32_t mz = 0; mz < 1; mz++) {
22431 GemmMicrokernelTester()
22432 .mr(1)
22433 .nr(4)
22434 .kr(2)
22435 .sr(1)
22436 .m(1)
22437 .n(4)
22438 .k(k)
22439 .ks(3)
22440 .a_offset(43)
22441 .zero_index(mz)
22442 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
22443 }
22444 }
22445 }
22446
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD128,qmin)22447 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD128, qmin) {
22448 TEST_REQUIRES_X86_XOP;
22449 GemmMicrokernelTester()
22450 .mr(1)
22451 .nr(4)
22452 .kr(2)
22453 .sr(1)
22454 .m(1)
22455 .n(4)
22456 .k(8)
22457 .qmin(128)
22458 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
22459 }
22460
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD128,qmax)22461 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD128, qmax) {
22462 TEST_REQUIRES_X86_XOP;
22463 GemmMicrokernelTester()
22464 .mr(1)
22465 .nr(4)
22466 .kr(2)
22467 .sr(1)
22468 .m(1)
22469 .n(4)
22470 .k(8)
22471 .qmax(128)
22472 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
22473 }
22474
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD128,strided_cm)22475 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__XOP_LD128, strided_cm) {
22476 TEST_REQUIRES_X86_XOP;
22477 GemmMicrokernelTester()
22478 .mr(1)
22479 .nr(4)
22480 .kr(2)
22481 .sr(1)
22482 .m(1)
22483 .n(4)
22484 .k(8)
22485 .cm_stride(7)
22486 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
22487 }
22488 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
22489
22490
22491 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD128,k_eq_8)22492 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD128, k_eq_8) {
22493 TEST_REQUIRES_X86_XOP;
22494 GemmMicrokernelTester()
22495 .mr(2)
22496 .nr(4)
22497 .kr(2)
22498 .sr(1)
22499 .m(2)
22500 .n(4)
22501 .k(8)
22502 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
22503 }
22504
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD128,strided_cn)22505 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD128, strided_cn) {
22506 TEST_REQUIRES_X86_XOP;
22507 GemmMicrokernelTester()
22508 .mr(2)
22509 .nr(4)
22510 .kr(2)
22511 .sr(1)
22512 .m(2)
22513 .n(4)
22514 .k(8)
22515 .cn_stride(7)
22516 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
22517 }
22518
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD128,k_eq_8_subtile)22519 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD128, k_eq_8_subtile) {
22520 TEST_REQUIRES_X86_XOP;
22521 for (uint32_t n = 1; n <= 4; n++) {
22522 for (uint32_t m = 1; m <= 2; m++) {
22523 GemmMicrokernelTester()
22524 .mr(2)
22525 .nr(4)
22526 .kr(2)
22527 .sr(1)
22528 .m(m)
22529 .n(n)
22530 .k(8)
22531 .iterations(1)
22532 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
22533 }
22534 }
22535 }
22536
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD128,k_eq_8_subtile_m)22537 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD128, k_eq_8_subtile_m) {
22538 TEST_REQUIRES_X86_XOP;
22539 for (uint32_t m = 1; m <= 2; m++) {
22540 GemmMicrokernelTester()
22541 .mr(2)
22542 .nr(4)
22543 .kr(2)
22544 .sr(1)
22545 .m(m)
22546 .n(4)
22547 .k(8)
22548 .iterations(1)
22549 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
22550 }
22551 }
22552
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD128,k_eq_8_subtile_n)22553 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD128, k_eq_8_subtile_n) {
22554 TEST_REQUIRES_X86_XOP;
22555 for (uint32_t n = 1; n <= 4; n++) {
22556 GemmMicrokernelTester()
22557 .mr(2)
22558 .nr(4)
22559 .kr(2)
22560 .sr(1)
22561 .m(2)
22562 .n(n)
22563 .k(8)
22564 .iterations(1)
22565 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
22566 }
22567 }
22568
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD128,k_lt_8)22569 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD128, k_lt_8) {
22570 TEST_REQUIRES_X86_XOP;
22571 for (size_t k = 1; k < 8; k++) {
22572 GemmMicrokernelTester()
22573 .mr(2)
22574 .nr(4)
22575 .kr(2)
22576 .sr(1)
22577 .m(2)
22578 .n(4)
22579 .k(k)
22580 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
22581 }
22582 }
22583
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD128,k_lt_8_subtile)22584 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD128, k_lt_8_subtile) {
22585 TEST_REQUIRES_X86_XOP;
22586 for (size_t k = 1; k < 8; k++) {
22587 for (uint32_t n = 1; n <= 4; n++) {
22588 for (uint32_t m = 1; m <= 2; m++) {
22589 GemmMicrokernelTester()
22590 .mr(2)
22591 .nr(4)
22592 .kr(2)
22593 .sr(1)
22594 .m(m)
22595 .n(n)
22596 .k(k)
22597 .iterations(1)
22598 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
22599 }
22600 }
22601 }
22602 }
22603
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD128,k_gt_8)22604 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD128, k_gt_8) {
22605 TEST_REQUIRES_X86_XOP;
22606 for (size_t k = 9; k < 16; k++) {
22607 GemmMicrokernelTester()
22608 .mr(2)
22609 .nr(4)
22610 .kr(2)
22611 .sr(1)
22612 .m(2)
22613 .n(4)
22614 .k(k)
22615 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
22616 }
22617 }
22618
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD128,k_gt_8_subtile)22619 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD128, k_gt_8_subtile) {
22620 TEST_REQUIRES_X86_XOP;
22621 for (size_t k = 9; k < 16; k++) {
22622 for (uint32_t n = 1; n <= 4; n++) {
22623 for (uint32_t m = 1; m <= 2; m++) {
22624 GemmMicrokernelTester()
22625 .mr(2)
22626 .nr(4)
22627 .kr(2)
22628 .sr(1)
22629 .m(m)
22630 .n(n)
22631 .k(k)
22632 .iterations(1)
22633 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
22634 }
22635 }
22636 }
22637 }
22638
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD128,k_div_8)22639 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD128, k_div_8) {
22640 TEST_REQUIRES_X86_XOP;
22641 for (size_t k = 16; k <= 80; k += 8) {
22642 GemmMicrokernelTester()
22643 .mr(2)
22644 .nr(4)
22645 .kr(2)
22646 .sr(1)
22647 .m(2)
22648 .n(4)
22649 .k(k)
22650 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
22651 }
22652 }
22653
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD128,k_div_8_subtile)22654 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD128, k_div_8_subtile) {
22655 TEST_REQUIRES_X86_XOP;
22656 for (size_t k = 16; k <= 80; k += 8) {
22657 for (uint32_t n = 1; n <= 4; n++) {
22658 for (uint32_t m = 1; m <= 2; m++) {
22659 GemmMicrokernelTester()
22660 .mr(2)
22661 .nr(4)
22662 .kr(2)
22663 .sr(1)
22664 .m(m)
22665 .n(n)
22666 .k(k)
22667 .iterations(1)
22668 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
22669 }
22670 }
22671 }
22672 }
22673
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD128,n_gt_4)22674 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD128, n_gt_4) {
22675 TEST_REQUIRES_X86_XOP;
22676 for (uint32_t n = 5; n < 8; n++) {
22677 for (size_t k = 1; k <= 40; k += 9) {
22678 GemmMicrokernelTester()
22679 .mr(2)
22680 .nr(4)
22681 .kr(2)
22682 .sr(1)
22683 .m(2)
22684 .n(n)
22685 .k(k)
22686 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
22687 }
22688 }
22689 }
22690
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD128,n_gt_4_strided_cn)22691 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD128, n_gt_4_strided_cn) {
22692 TEST_REQUIRES_X86_XOP;
22693 for (uint32_t n = 5; n < 8; n++) {
22694 for (size_t k = 1; k <= 40; k += 9) {
22695 GemmMicrokernelTester()
22696 .mr(2)
22697 .nr(4)
22698 .kr(2)
22699 .sr(1)
22700 .m(2)
22701 .n(n)
22702 .k(k)
22703 .cn_stride(7)
22704 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
22705 }
22706 }
22707 }
22708
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD128,n_gt_4_subtile)22709 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD128, n_gt_4_subtile) {
22710 TEST_REQUIRES_X86_XOP;
22711 for (uint32_t n = 5; n < 8; n++) {
22712 for (size_t k = 1; k <= 40; k += 9) {
22713 for (uint32_t m = 1; m <= 2; m++) {
22714 GemmMicrokernelTester()
22715 .mr(2)
22716 .nr(4)
22717 .kr(2)
22718 .sr(1)
22719 .m(m)
22720 .n(n)
22721 .k(k)
22722 .iterations(1)
22723 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
22724 }
22725 }
22726 }
22727 }
22728
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD128,n_div_4)22729 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD128, n_div_4) {
22730 TEST_REQUIRES_X86_XOP;
22731 for (uint32_t n = 8; n <= 12; n += 4) {
22732 for (size_t k = 1; k <= 40; k += 9) {
22733 GemmMicrokernelTester()
22734 .mr(2)
22735 .nr(4)
22736 .kr(2)
22737 .sr(1)
22738 .m(2)
22739 .n(n)
22740 .k(k)
22741 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
22742 }
22743 }
22744 }
22745
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD128,n_div_4_strided_cn)22746 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD128, n_div_4_strided_cn) {
22747 TEST_REQUIRES_X86_XOP;
22748 for (uint32_t n = 8; n <= 12; n += 4) {
22749 for (size_t k = 1; k <= 40; k += 9) {
22750 GemmMicrokernelTester()
22751 .mr(2)
22752 .nr(4)
22753 .kr(2)
22754 .sr(1)
22755 .m(2)
22756 .n(n)
22757 .k(k)
22758 .cn_stride(7)
22759 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
22760 }
22761 }
22762 }
22763
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD128,n_div_4_subtile)22764 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD128, n_div_4_subtile) {
22765 TEST_REQUIRES_X86_XOP;
22766 for (uint32_t n = 8; n <= 12; n += 4) {
22767 for (size_t k = 1; k <= 40; k += 9) {
22768 for (uint32_t m = 1; m <= 2; m++) {
22769 GemmMicrokernelTester()
22770 .mr(2)
22771 .nr(4)
22772 .kr(2)
22773 .sr(1)
22774 .m(m)
22775 .n(n)
22776 .k(k)
22777 .iterations(1)
22778 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
22779 }
22780 }
22781 }
22782 }
22783
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD128,small_kernel)22784 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD128, small_kernel) {
22785 TEST_REQUIRES_X86_XOP;
22786 for (size_t k = 1; k <= 40; k += 9) {
22787 GemmMicrokernelTester()
22788 .mr(2)
22789 .nr(4)
22790 .kr(2)
22791 .sr(1)
22792 .m(2)
22793 .n(4)
22794 .k(k)
22795 .ks(3)
22796 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
22797 }
22798 }
22799
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD128,small_kernel_subtile)22800 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD128, small_kernel_subtile) {
22801 TEST_REQUIRES_X86_XOP;
22802 for (size_t k = 1; k <= 40; k += 9) {
22803 for (uint32_t n = 1; n <= 4; n++) {
22804 for (uint32_t m = 1; m <= 2; m++) {
22805 GemmMicrokernelTester()
22806 .mr(2)
22807 .nr(4)
22808 .kr(2)
22809 .sr(1)
22810 .m(m)
22811 .n(n)
22812 .k(k)
22813 .ks(3)
22814 .iterations(1)
22815 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
22816 }
22817 }
22818 }
22819 }
22820
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD128,n_gt_4_small_kernel)22821 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD128, n_gt_4_small_kernel) {
22822 TEST_REQUIRES_X86_XOP;
22823 for (uint32_t n = 5; n < 8; n++) {
22824 for (size_t k = 1; k <= 40; k += 9) {
22825 GemmMicrokernelTester()
22826 .mr(2)
22827 .nr(4)
22828 .kr(2)
22829 .sr(1)
22830 .m(2)
22831 .n(n)
22832 .k(k)
22833 .ks(3)
22834 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
22835 }
22836 }
22837 }
22838
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD128,n_div_4_small_kernel)22839 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD128, n_div_4_small_kernel) {
22840 TEST_REQUIRES_X86_XOP;
22841 for (uint32_t n = 8; n <= 12; n += 4) {
22842 for (size_t k = 1; k <= 40; k += 9) {
22843 GemmMicrokernelTester()
22844 .mr(2)
22845 .nr(4)
22846 .kr(2)
22847 .sr(1)
22848 .m(2)
22849 .n(n)
22850 .k(k)
22851 .ks(3)
22852 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
22853 }
22854 }
22855 }
22856
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD128,strided_cm_subtile)22857 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD128, strided_cm_subtile) {
22858 TEST_REQUIRES_X86_XOP;
22859 for (size_t k = 1; k <= 40; k += 9) {
22860 for (uint32_t n = 1; n <= 4; n++) {
22861 for (uint32_t m = 1; m <= 2; m++) {
22862 GemmMicrokernelTester()
22863 .mr(2)
22864 .nr(4)
22865 .kr(2)
22866 .sr(1)
22867 .m(m)
22868 .n(n)
22869 .k(k)
22870 .cm_stride(7)
22871 .iterations(1)
22872 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
22873 }
22874 }
22875 }
22876 }
22877
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD128,a_offset)22878 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD128, a_offset) {
22879 TEST_REQUIRES_X86_XOP;
22880 for (size_t k = 1; k <= 40; k += 9) {
22881 GemmMicrokernelTester()
22882 .mr(2)
22883 .nr(4)
22884 .kr(2)
22885 .sr(1)
22886 .m(2)
22887 .n(4)
22888 .k(k)
22889 .ks(3)
22890 .a_offset(83)
22891 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
22892 }
22893 }
22894
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD128,zero)22895 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD128, zero) {
22896 TEST_REQUIRES_X86_XOP;
22897 for (size_t k = 1; k <= 40; k += 9) {
22898 for (uint32_t mz = 0; mz < 2; mz++) {
22899 GemmMicrokernelTester()
22900 .mr(2)
22901 .nr(4)
22902 .kr(2)
22903 .sr(1)
22904 .m(2)
22905 .n(4)
22906 .k(k)
22907 .ks(3)
22908 .a_offset(83)
22909 .zero_index(mz)
22910 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
22911 }
22912 }
22913 }
22914
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD128,qmin)22915 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD128, qmin) {
22916 TEST_REQUIRES_X86_XOP;
22917 GemmMicrokernelTester()
22918 .mr(2)
22919 .nr(4)
22920 .kr(2)
22921 .sr(1)
22922 .m(2)
22923 .n(4)
22924 .k(8)
22925 .qmin(128)
22926 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
22927 }
22928
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD128,qmax)22929 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD128, qmax) {
22930 TEST_REQUIRES_X86_XOP;
22931 GemmMicrokernelTester()
22932 .mr(2)
22933 .nr(4)
22934 .kr(2)
22935 .sr(1)
22936 .m(2)
22937 .n(4)
22938 .k(8)
22939 .qmax(128)
22940 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
22941 }
22942
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD128,strided_cm)22943 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__XOP_LD128, strided_cm) {
22944 TEST_REQUIRES_X86_XOP;
22945 GemmMicrokernelTester()
22946 .mr(2)
22947 .nr(4)
22948 .kr(2)
22949 .sr(1)
22950 .m(2)
22951 .n(4)
22952 .k(8)
22953 .cm_stride(7)
22954 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
22955 }
22956 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
22957
22958
22959 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD128,k_eq_8)22960 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD128, k_eq_8) {
22961 TEST_REQUIRES_X86_AVX;
22962 GemmMicrokernelTester()
22963 .mr(3)
22964 .nr(4)
22965 .kr(2)
22966 .sr(1)
22967 .m(3)
22968 .n(4)
22969 .k(8)
22970 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
22971 }
22972
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD128,strided_cn)22973 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD128, strided_cn) {
22974 TEST_REQUIRES_X86_AVX;
22975 GemmMicrokernelTester()
22976 .mr(3)
22977 .nr(4)
22978 .kr(2)
22979 .sr(1)
22980 .m(3)
22981 .n(4)
22982 .k(8)
22983 .cn_stride(7)
22984 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
22985 }
22986
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD128,k_eq_8_subtile)22987 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD128, k_eq_8_subtile) {
22988 TEST_REQUIRES_X86_AVX;
22989 for (uint32_t n = 1; n <= 4; n++) {
22990 for (uint32_t m = 1; m <= 3; m++) {
22991 GemmMicrokernelTester()
22992 .mr(3)
22993 .nr(4)
22994 .kr(2)
22995 .sr(1)
22996 .m(m)
22997 .n(n)
22998 .k(8)
22999 .iterations(1)
23000 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23001 }
23002 }
23003 }
23004
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD128,k_eq_8_subtile_m)23005 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD128, k_eq_8_subtile_m) {
23006 TEST_REQUIRES_X86_AVX;
23007 for (uint32_t m = 1; m <= 3; m++) {
23008 GemmMicrokernelTester()
23009 .mr(3)
23010 .nr(4)
23011 .kr(2)
23012 .sr(1)
23013 .m(m)
23014 .n(4)
23015 .k(8)
23016 .iterations(1)
23017 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23018 }
23019 }
23020
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD128,k_eq_8_subtile_n)23021 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD128, k_eq_8_subtile_n) {
23022 TEST_REQUIRES_X86_AVX;
23023 for (uint32_t n = 1; n <= 4; n++) {
23024 GemmMicrokernelTester()
23025 .mr(3)
23026 .nr(4)
23027 .kr(2)
23028 .sr(1)
23029 .m(3)
23030 .n(n)
23031 .k(8)
23032 .iterations(1)
23033 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23034 }
23035 }
23036
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD128,k_lt_8)23037 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD128, k_lt_8) {
23038 TEST_REQUIRES_X86_AVX;
23039 for (size_t k = 1; k < 8; k++) {
23040 GemmMicrokernelTester()
23041 .mr(3)
23042 .nr(4)
23043 .kr(2)
23044 .sr(1)
23045 .m(3)
23046 .n(4)
23047 .k(k)
23048 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23049 }
23050 }
23051
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD128,k_lt_8_subtile)23052 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD128, k_lt_8_subtile) {
23053 TEST_REQUIRES_X86_AVX;
23054 for (size_t k = 1; k < 8; k++) {
23055 for (uint32_t n = 1; n <= 4; n++) {
23056 for (uint32_t m = 1; m <= 3; m++) {
23057 GemmMicrokernelTester()
23058 .mr(3)
23059 .nr(4)
23060 .kr(2)
23061 .sr(1)
23062 .m(m)
23063 .n(n)
23064 .k(k)
23065 .iterations(1)
23066 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23067 }
23068 }
23069 }
23070 }
23071
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD128,k_gt_8)23072 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD128, k_gt_8) {
23073 TEST_REQUIRES_X86_AVX;
23074 for (size_t k = 9; k < 16; k++) {
23075 GemmMicrokernelTester()
23076 .mr(3)
23077 .nr(4)
23078 .kr(2)
23079 .sr(1)
23080 .m(3)
23081 .n(4)
23082 .k(k)
23083 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23084 }
23085 }
23086
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD128,k_gt_8_subtile)23087 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD128, k_gt_8_subtile) {
23088 TEST_REQUIRES_X86_AVX;
23089 for (size_t k = 9; k < 16; k++) {
23090 for (uint32_t n = 1; n <= 4; n++) {
23091 for (uint32_t m = 1; m <= 3; m++) {
23092 GemmMicrokernelTester()
23093 .mr(3)
23094 .nr(4)
23095 .kr(2)
23096 .sr(1)
23097 .m(m)
23098 .n(n)
23099 .k(k)
23100 .iterations(1)
23101 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23102 }
23103 }
23104 }
23105 }
23106
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD128,k_div_8)23107 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD128, k_div_8) {
23108 TEST_REQUIRES_X86_AVX;
23109 for (size_t k = 16; k <= 80; k += 8) {
23110 GemmMicrokernelTester()
23111 .mr(3)
23112 .nr(4)
23113 .kr(2)
23114 .sr(1)
23115 .m(3)
23116 .n(4)
23117 .k(k)
23118 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23119 }
23120 }
23121
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD128,k_div_8_subtile)23122 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD128, k_div_8_subtile) {
23123 TEST_REQUIRES_X86_AVX;
23124 for (size_t k = 16; k <= 80; k += 8) {
23125 for (uint32_t n = 1; n <= 4; n++) {
23126 for (uint32_t m = 1; m <= 3; m++) {
23127 GemmMicrokernelTester()
23128 .mr(3)
23129 .nr(4)
23130 .kr(2)
23131 .sr(1)
23132 .m(m)
23133 .n(n)
23134 .k(k)
23135 .iterations(1)
23136 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23137 }
23138 }
23139 }
23140 }
23141
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD128,n_gt_4)23142 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD128, n_gt_4) {
23143 TEST_REQUIRES_X86_AVX;
23144 for (uint32_t n = 5; n < 8; n++) {
23145 for (size_t k = 1; k <= 40; k += 9) {
23146 GemmMicrokernelTester()
23147 .mr(3)
23148 .nr(4)
23149 .kr(2)
23150 .sr(1)
23151 .m(3)
23152 .n(n)
23153 .k(k)
23154 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23155 }
23156 }
23157 }
23158
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD128,n_gt_4_strided_cn)23159 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD128, n_gt_4_strided_cn) {
23160 TEST_REQUIRES_X86_AVX;
23161 for (uint32_t n = 5; n < 8; n++) {
23162 for (size_t k = 1; k <= 40; k += 9) {
23163 GemmMicrokernelTester()
23164 .mr(3)
23165 .nr(4)
23166 .kr(2)
23167 .sr(1)
23168 .m(3)
23169 .n(n)
23170 .k(k)
23171 .cn_stride(7)
23172 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23173 }
23174 }
23175 }
23176
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD128,n_gt_4_subtile)23177 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD128, n_gt_4_subtile) {
23178 TEST_REQUIRES_X86_AVX;
23179 for (uint32_t n = 5; n < 8; n++) {
23180 for (size_t k = 1; k <= 40; k += 9) {
23181 for (uint32_t m = 1; m <= 3; m++) {
23182 GemmMicrokernelTester()
23183 .mr(3)
23184 .nr(4)
23185 .kr(2)
23186 .sr(1)
23187 .m(m)
23188 .n(n)
23189 .k(k)
23190 .iterations(1)
23191 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23192 }
23193 }
23194 }
23195 }
23196
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD128,n_div_4)23197 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD128, n_div_4) {
23198 TEST_REQUIRES_X86_AVX;
23199 for (uint32_t n = 8; n <= 12; n += 4) {
23200 for (size_t k = 1; k <= 40; k += 9) {
23201 GemmMicrokernelTester()
23202 .mr(3)
23203 .nr(4)
23204 .kr(2)
23205 .sr(1)
23206 .m(3)
23207 .n(n)
23208 .k(k)
23209 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23210 }
23211 }
23212 }
23213
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD128,n_div_4_strided_cn)23214 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD128, n_div_4_strided_cn) {
23215 TEST_REQUIRES_X86_AVX;
23216 for (uint32_t n = 8; n <= 12; n += 4) {
23217 for (size_t k = 1; k <= 40; k += 9) {
23218 GemmMicrokernelTester()
23219 .mr(3)
23220 .nr(4)
23221 .kr(2)
23222 .sr(1)
23223 .m(3)
23224 .n(n)
23225 .k(k)
23226 .cn_stride(7)
23227 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23228 }
23229 }
23230 }
23231
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD128,n_div_4_subtile)23232 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD128, n_div_4_subtile) {
23233 TEST_REQUIRES_X86_AVX;
23234 for (uint32_t n = 8; n <= 12; n += 4) {
23235 for (size_t k = 1; k <= 40; k += 9) {
23236 for (uint32_t m = 1; m <= 3; m++) {
23237 GemmMicrokernelTester()
23238 .mr(3)
23239 .nr(4)
23240 .kr(2)
23241 .sr(1)
23242 .m(m)
23243 .n(n)
23244 .k(k)
23245 .iterations(1)
23246 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23247 }
23248 }
23249 }
23250 }
23251
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD128,small_kernel)23252 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD128, small_kernel) {
23253 TEST_REQUIRES_X86_AVX;
23254 for (size_t k = 1; k <= 40; k += 9) {
23255 GemmMicrokernelTester()
23256 .mr(3)
23257 .nr(4)
23258 .kr(2)
23259 .sr(1)
23260 .m(3)
23261 .n(4)
23262 .k(k)
23263 .ks(3)
23264 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23265 }
23266 }
23267
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD128,small_kernel_subtile)23268 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD128, small_kernel_subtile) {
23269 TEST_REQUIRES_X86_AVX;
23270 for (size_t k = 1; k <= 40; k += 9) {
23271 for (uint32_t n = 1; n <= 4; n++) {
23272 for (uint32_t m = 1; m <= 3; m++) {
23273 GemmMicrokernelTester()
23274 .mr(3)
23275 .nr(4)
23276 .kr(2)
23277 .sr(1)
23278 .m(m)
23279 .n(n)
23280 .k(k)
23281 .ks(3)
23282 .iterations(1)
23283 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23284 }
23285 }
23286 }
23287 }
23288
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD128,n_gt_4_small_kernel)23289 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD128, n_gt_4_small_kernel) {
23290 TEST_REQUIRES_X86_AVX;
23291 for (uint32_t n = 5; n < 8; n++) {
23292 for (size_t k = 1; k <= 40; k += 9) {
23293 GemmMicrokernelTester()
23294 .mr(3)
23295 .nr(4)
23296 .kr(2)
23297 .sr(1)
23298 .m(3)
23299 .n(n)
23300 .k(k)
23301 .ks(3)
23302 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23303 }
23304 }
23305 }
23306
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD128,n_div_4_small_kernel)23307 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD128, n_div_4_small_kernel) {
23308 TEST_REQUIRES_X86_AVX;
23309 for (uint32_t n = 8; n <= 12; n += 4) {
23310 for (size_t k = 1; k <= 40; k += 9) {
23311 GemmMicrokernelTester()
23312 .mr(3)
23313 .nr(4)
23314 .kr(2)
23315 .sr(1)
23316 .m(3)
23317 .n(n)
23318 .k(k)
23319 .ks(3)
23320 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23321 }
23322 }
23323 }
23324
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD128,strided_cm_subtile)23325 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD128, strided_cm_subtile) {
23326 TEST_REQUIRES_X86_AVX;
23327 for (size_t k = 1; k <= 40; k += 9) {
23328 for (uint32_t n = 1; n <= 4; n++) {
23329 for (uint32_t m = 1; m <= 3; m++) {
23330 GemmMicrokernelTester()
23331 .mr(3)
23332 .nr(4)
23333 .kr(2)
23334 .sr(1)
23335 .m(m)
23336 .n(n)
23337 .k(k)
23338 .cm_stride(7)
23339 .iterations(1)
23340 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23341 }
23342 }
23343 }
23344 }
23345
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD128,a_offset)23346 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD128, a_offset) {
23347 TEST_REQUIRES_X86_AVX;
23348 for (size_t k = 1; k <= 40; k += 9) {
23349 GemmMicrokernelTester()
23350 .mr(3)
23351 .nr(4)
23352 .kr(2)
23353 .sr(1)
23354 .m(3)
23355 .n(4)
23356 .k(k)
23357 .ks(3)
23358 .a_offset(127)
23359 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23360 }
23361 }
23362
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD128,zero)23363 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD128, zero) {
23364 TEST_REQUIRES_X86_AVX;
23365 for (size_t k = 1; k <= 40; k += 9) {
23366 for (uint32_t mz = 0; mz < 3; mz++) {
23367 GemmMicrokernelTester()
23368 .mr(3)
23369 .nr(4)
23370 .kr(2)
23371 .sr(1)
23372 .m(3)
23373 .n(4)
23374 .k(k)
23375 .ks(3)
23376 .a_offset(127)
23377 .zero_index(mz)
23378 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23379 }
23380 }
23381 }
23382
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD128,qmin)23383 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD128, qmin) {
23384 TEST_REQUIRES_X86_AVX;
23385 GemmMicrokernelTester()
23386 .mr(3)
23387 .nr(4)
23388 .kr(2)
23389 .sr(1)
23390 .m(3)
23391 .n(4)
23392 .k(8)
23393 .qmin(128)
23394 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23395 }
23396
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD128,qmax)23397 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD128, qmax) {
23398 TEST_REQUIRES_X86_AVX;
23399 GemmMicrokernelTester()
23400 .mr(3)
23401 .nr(4)
23402 .kr(2)
23403 .sr(1)
23404 .m(3)
23405 .n(4)
23406 .k(8)
23407 .qmax(128)
23408 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23409 }
23410
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD128,strided_cm)23411 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__AVX_LD128, strided_cm) {
23412 TEST_REQUIRES_X86_AVX;
23413 GemmMicrokernelTester()
23414 .mr(3)
23415 .nr(4)
23416 .kr(2)
23417 .sr(1)
23418 .m(3)
23419 .n(4)
23420 .k(8)
23421 .cm_stride(7)
23422 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23423 }
23424 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
23425
23426
23427 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD128,k_eq_8)23428 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD128, k_eq_8) {
23429 TEST_REQUIRES_X86_XOP;
23430 GemmMicrokernelTester()
23431 .mr(4)
23432 .nr(4)
23433 .kr(2)
23434 .sr(1)
23435 .m(4)
23436 .n(4)
23437 .k(8)
23438 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23439 }
23440
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD128,strided_cn)23441 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD128, strided_cn) {
23442 TEST_REQUIRES_X86_XOP;
23443 GemmMicrokernelTester()
23444 .mr(4)
23445 .nr(4)
23446 .kr(2)
23447 .sr(1)
23448 .m(4)
23449 .n(4)
23450 .k(8)
23451 .cn_stride(7)
23452 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23453 }
23454
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD128,k_eq_8_subtile)23455 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD128, k_eq_8_subtile) {
23456 TEST_REQUIRES_X86_XOP;
23457 for (uint32_t n = 1; n <= 4; n++) {
23458 for (uint32_t m = 1; m <= 4; m++) {
23459 GemmMicrokernelTester()
23460 .mr(4)
23461 .nr(4)
23462 .kr(2)
23463 .sr(1)
23464 .m(m)
23465 .n(n)
23466 .k(8)
23467 .iterations(1)
23468 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23469 }
23470 }
23471 }
23472
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD128,k_eq_8_subtile_m)23473 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD128, k_eq_8_subtile_m) {
23474 TEST_REQUIRES_X86_XOP;
23475 for (uint32_t m = 1; m <= 4; m++) {
23476 GemmMicrokernelTester()
23477 .mr(4)
23478 .nr(4)
23479 .kr(2)
23480 .sr(1)
23481 .m(m)
23482 .n(4)
23483 .k(8)
23484 .iterations(1)
23485 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23486 }
23487 }
23488
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD128,k_eq_8_subtile_n)23489 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD128, k_eq_8_subtile_n) {
23490 TEST_REQUIRES_X86_XOP;
23491 for (uint32_t n = 1; n <= 4; n++) {
23492 GemmMicrokernelTester()
23493 .mr(4)
23494 .nr(4)
23495 .kr(2)
23496 .sr(1)
23497 .m(4)
23498 .n(n)
23499 .k(8)
23500 .iterations(1)
23501 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23502 }
23503 }
23504
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD128,k_lt_8)23505 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD128, k_lt_8) {
23506 TEST_REQUIRES_X86_XOP;
23507 for (size_t k = 1; k < 8; k++) {
23508 GemmMicrokernelTester()
23509 .mr(4)
23510 .nr(4)
23511 .kr(2)
23512 .sr(1)
23513 .m(4)
23514 .n(4)
23515 .k(k)
23516 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23517 }
23518 }
23519
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD128,k_lt_8_subtile)23520 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD128, k_lt_8_subtile) {
23521 TEST_REQUIRES_X86_XOP;
23522 for (size_t k = 1; k < 8; k++) {
23523 for (uint32_t n = 1; n <= 4; n++) {
23524 for (uint32_t m = 1; m <= 4; m++) {
23525 GemmMicrokernelTester()
23526 .mr(4)
23527 .nr(4)
23528 .kr(2)
23529 .sr(1)
23530 .m(m)
23531 .n(n)
23532 .k(k)
23533 .iterations(1)
23534 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23535 }
23536 }
23537 }
23538 }
23539
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD128,k_gt_8)23540 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD128, k_gt_8) {
23541 TEST_REQUIRES_X86_XOP;
23542 for (size_t k = 9; k < 16; k++) {
23543 GemmMicrokernelTester()
23544 .mr(4)
23545 .nr(4)
23546 .kr(2)
23547 .sr(1)
23548 .m(4)
23549 .n(4)
23550 .k(k)
23551 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23552 }
23553 }
23554
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD128,k_gt_8_subtile)23555 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD128, k_gt_8_subtile) {
23556 TEST_REQUIRES_X86_XOP;
23557 for (size_t k = 9; k < 16; k++) {
23558 for (uint32_t n = 1; n <= 4; n++) {
23559 for (uint32_t m = 1; m <= 4; m++) {
23560 GemmMicrokernelTester()
23561 .mr(4)
23562 .nr(4)
23563 .kr(2)
23564 .sr(1)
23565 .m(m)
23566 .n(n)
23567 .k(k)
23568 .iterations(1)
23569 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23570 }
23571 }
23572 }
23573 }
23574
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD128,k_div_8)23575 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD128, k_div_8) {
23576 TEST_REQUIRES_X86_XOP;
23577 for (size_t k = 16; k <= 80; k += 8) {
23578 GemmMicrokernelTester()
23579 .mr(4)
23580 .nr(4)
23581 .kr(2)
23582 .sr(1)
23583 .m(4)
23584 .n(4)
23585 .k(k)
23586 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23587 }
23588 }
23589
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD128,k_div_8_subtile)23590 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD128, k_div_8_subtile) {
23591 TEST_REQUIRES_X86_XOP;
23592 for (size_t k = 16; k <= 80; k += 8) {
23593 for (uint32_t n = 1; n <= 4; n++) {
23594 for (uint32_t m = 1; m <= 4; m++) {
23595 GemmMicrokernelTester()
23596 .mr(4)
23597 .nr(4)
23598 .kr(2)
23599 .sr(1)
23600 .m(m)
23601 .n(n)
23602 .k(k)
23603 .iterations(1)
23604 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23605 }
23606 }
23607 }
23608 }
23609
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD128,n_gt_4)23610 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD128, n_gt_4) {
23611 TEST_REQUIRES_X86_XOP;
23612 for (uint32_t n = 5; n < 8; n++) {
23613 for (size_t k = 1; k <= 40; k += 9) {
23614 GemmMicrokernelTester()
23615 .mr(4)
23616 .nr(4)
23617 .kr(2)
23618 .sr(1)
23619 .m(4)
23620 .n(n)
23621 .k(k)
23622 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23623 }
23624 }
23625 }
23626
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD128,n_gt_4_strided_cn)23627 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD128, n_gt_4_strided_cn) {
23628 TEST_REQUIRES_X86_XOP;
23629 for (uint32_t n = 5; n < 8; n++) {
23630 for (size_t k = 1; k <= 40; k += 9) {
23631 GemmMicrokernelTester()
23632 .mr(4)
23633 .nr(4)
23634 .kr(2)
23635 .sr(1)
23636 .m(4)
23637 .n(n)
23638 .k(k)
23639 .cn_stride(7)
23640 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23641 }
23642 }
23643 }
23644
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD128,n_gt_4_subtile)23645 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD128, n_gt_4_subtile) {
23646 TEST_REQUIRES_X86_XOP;
23647 for (uint32_t n = 5; n < 8; n++) {
23648 for (size_t k = 1; k <= 40; k += 9) {
23649 for (uint32_t m = 1; m <= 4; m++) {
23650 GemmMicrokernelTester()
23651 .mr(4)
23652 .nr(4)
23653 .kr(2)
23654 .sr(1)
23655 .m(m)
23656 .n(n)
23657 .k(k)
23658 .iterations(1)
23659 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23660 }
23661 }
23662 }
23663 }
23664
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD128,n_div_4)23665 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD128, n_div_4) {
23666 TEST_REQUIRES_X86_XOP;
23667 for (uint32_t n = 8; n <= 12; n += 4) {
23668 for (size_t k = 1; k <= 40; k += 9) {
23669 GemmMicrokernelTester()
23670 .mr(4)
23671 .nr(4)
23672 .kr(2)
23673 .sr(1)
23674 .m(4)
23675 .n(n)
23676 .k(k)
23677 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23678 }
23679 }
23680 }
23681
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD128,n_div_4_strided_cn)23682 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD128, n_div_4_strided_cn) {
23683 TEST_REQUIRES_X86_XOP;
23684 for (uint32_t n = 8; n <= 12; n += 4) {
23685 for (size_t k = 1; k <= 40; k += 9) {
23686 GemmMicrokernelTester()
23687 .mr(4)
23688 .nr(4)
23689 .kr(2)
23690 .sr(1)
23691 .m(4)
23692 .n(n)
23693 .k(k)
23694 .cn_stride(7)
23695 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23696 }
23697 }
23698 }
23699
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD128,n_div_4_subtile)23700 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD128, n_div_4_subtile) {
23701 TEST_REQUIRES_X86_XOP;
23702 for (uint32_t n = 8; n <= 12; n += 4) {
23703 for (size_t k = 1; k <= 40; k += 9) {
23704 for (uint32_t m = 1; m <= 4; m++) {
23705 GemmMicrokernelTester()
23706 .mr(4)
23707 .nr(4)
23708 .kr(2)
23709 .sr(1)
23710 .m(m)
23711 .n(n)
23712 .k(k)
23713 .iterations(1)
23714 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23715 }
23716 }
23717 }
23718 }
23719
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD128,small_kernel)23720 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD128, small_kernel) {
23721 TEST_REQUIRES_X86_XOP;
23722 for (size_t k = 1; k <= 40; k += 9) {
23723 GemmMicrokernelTester()
23724 .mr(4)
23725 .nr(4)
23726 .kr(2)
23727 .sr(1)
23728 .m(4)
23729 .n(4)
23730 .k(k)
23731 .ks(3)
23732 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23733 }
23734 }
23735
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD128,small_kernel_subtile)23736 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD128, small_kernel_subtile) {
23737 TEST_REQUIRES_X86_XOP;
23738 for (size_t k = 1; k <= 40; k += 9) {
23739 for (uint32_t n = 1; n <= 4; n++) {
23740 for (uint32_t m = 1; m <= 4; m++) {
23741 GemmMicrokernelTester()
23742 .mr(4)
23743 .nr(4)
23744 .kr(2)
23745 .sr(1)
23746 .m(m)
23747 .n(n)
23748 .k(k)
23749 .ks(3)
23750 .iterations(1)
23751 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23752 }
23753 }
23754 }
23755 }
23756
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD128,n_gt_4_small_kernel)23757 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD128, n_gt_4_small_kernel) {
23758 TEST_REQUIRES_X86_XOP;
23759 for (uint32_t n = 5; n < 8; n++) {
23760 for (size_t k = 1; k <= 40; k += 9) {
23761 GemmMicrokernelTester()
23762 .mr(4)
23763 .nr(4)
23764 .kr(2)
23765 .sr(1)
23766 .m(4)
23767 .n(n)
23768 .k(k)
23769 .ks(3)
23770 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23771 }
23772 }
23773 }
23774
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD128,n_div_4_small_kernel)23775 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD128, n_div_4_small_kernel) {
23776 TEST_REQUIRES_X86_XOP;
23777 for (uint32_t n = 8; n <= 12; n += 4) {
23778 for (size_t k = 1; k <= 40; k += 9) {
23779 GemmMicrokernelTester()
23780 .mr(4)
23781 .nr(4)
23782 .kr(2)
23783 .sr(1)
23784 .m(4)
23785 .n(n)
23786 .k(k)
23787 .ks(3)
23788 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23789 }
23790 }
23791 }
23792
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD128,strided_cm_subtile)23793 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD128, strided_cm_subtile) {
23794 TEST_REQUIRES_X86_XOP;
23795 for (size_t k = 1; k <= 40; k += 9) {
23796 for (uint32_t n = 1; n <= 4; n++) {
23797 for (uint32_t m = 1; m <= 4; m++) {
23798 GemmMicrokernelTester()
23799 .mr(4)
23800 .nr(4)
23801 .kr(2)
23802 .sr(1)
23803 .m(m)
23804 .n(n)
23805 .k(k)
23806 .cm_stride(7)
23807 .iterations(1)
23808 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23809 }
23810 }
23811 }
23812 }
23813
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD128,a_offset)23814 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD128, a_offset) {
23815 TEST_REQUIRES_X86_XOP;
23816 for (size_t k = 1; k <= 40; k += 9) {
23817 GemmMicrokernelTester()
23818 .mr(4)
23819 .nr(4)
23820 .kr(2)
23821 .sr(1)
23822 .m(4)
23823 .n(4)
23824 .k(k)
23825 .ks(3)
23826 .a_offset(163)
23827 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23828 }
23829 }
23830
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD128,zero)23831 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD128, zero) {
23832 TEST_REQUIRES_X86_XOP;
23833 for (size_t k = 1; k <= 40; k += 9) {
23834 for (uint32_t mz = 0; mz < 4; mz++) {
23835 GemmMicrokernelTester()
23836 .mr(4)
23837 .nr(4)
23838 .kr(2)
23839 .sr(1)
23840 .m(4)
23841 .n(4)
23842 .k(k)
23843 .ks(3)
23844 .a_offset(163)
23845 .zero_index(mz)
23846 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23847 }
23848 }
23849 }
23850
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD128,qmin)23851 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD128, qmin) {
23852 TEST_REQUIRES_X86_XOP;
23853 GemmMicrokernelTester()
23854 .mr(4)
23855 .nr(4)
23856 .kr(2)
23857 .sr(1)
23858 .m(4)
23859 .n(4)
23860 .k(8)
23861 .qmin(128)
23862 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23863 }
23864
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD128,qmax)23865 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD128, qmax) {
23866 TEST_REQUIRES_X86_XOP;
23867 GemmMicrokernelTester()
23868 .mr(4)
23869 .nr(4)
23870 .kr(2)
23871 .sr(1)
23872 .m(4)
23873 .n(4)
23874 .k(8)
23875 .qmax(128)
23876 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23877 }
23878
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD128,strided_cm)23879 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2__XOP_LD128, strided_cm) {
23880 TEST_REQUIRES_X86_XOP;
23881 GemmMicrokernelTester()
23882 .mr(4)
23883 .nr(4)
23884 .kr(2)
23885 .sr(1)
23886 .m(4)
23887 .n(4)
23888 .k(8)
23889 .cm_stride(7)
23890 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
23891 }
23892 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
23893
23894
23895 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD64,k_eq_8)23896 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD64, k_eq_8) {
23897 TEST_REQUIRES_X86_SSE2;
23898 GemmMicrokernelTester()
23899 .mr(1)
23900 .nr(4)
23901 .kr(2)
23902 .sr(4)
23903 .m(1)
23904 .n(4)
23905 .k(8)
23906 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2s4__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
23907 }
23908
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD64,strided_cn)23909 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD64, strided_cn) {
23910 TEST_REQUIRES_X86_SSE2;
23911 GemmMicrokernelTester()
23912 .mr(1)
23913 .nr(4)
23914 .kr(2)
23915 .sr(4)
23916 .m(1)
23917 .n(4)
23918 .k(8)
23919 .cn_stride(7)
23920 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2s4__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
23921 }
23922
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD64,k_eq_8_subtile)23923 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD64, k_eq_8_subtile) {
23924 TEST_REQUIRES_X86_SSE2;
23925 for (uint32_t n = 1; n <= 4; n++) {
23926 for (uint32_t m = 1; m <= 1; m++) {
23927 GemmMicrokernelTester()
23928 .mr(1)
23929 .nr(4)
23930 .kr(2)
23931 .sr(4)
23932 .m(m)
23933 .n(n)
23934 .k(8)
23935 .iterations(1)
23936 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2s4__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
23937 }
23938 }
23939 }
23940
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD64,k_eq_8_subtile_m)23941 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD64, k_eq_8_subtile_m) {
23942 TEST_REQUIRES_X86_SSE2;
23943 for (uint32_t m = 1; m <= 1; m++) {
23944 GemmMicrokernelTester()
23945 .mr(1)
23946 .nr(4)
23947 .kr(2)
23948 .sr(4)
23949 .m(m)
23950 .n(4)
23951 .k(8)
23952 .iterations(1)
23953 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2s4__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
23954 }
23955 }
23956
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD64,k_eq_8_subtile_n)23957 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD64, k_eq_8_subtile_n) {
23958 TEST_REQUIRES_X86_SSE2;
23959 for (uint32_t n = 1; n <= 4; n++) {
23960 GemmMicrokernelTester()
23961 .mr(1)
23962 .nr(4)
23963 .kr(2)
23964 .sr(4)
23965 .m(1)
23966 .n(n)
23967 .k(8)
23968 .iterations(1)
23969 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2s4__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
23970 }
23971 }
23972
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD64,k_lt_8)23973 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD64, k_lt_8) {
23974 TEST_REQUIRES_X86_SSE2;
23975 for (size_t k = 1; k < 8; k++) {
23976 GemmMicrokernelTester()
23977 .mr(1)
23978 .nr(4)
23979 .kr(2)
23980 .sr(4)
23981 .m(1)
23982 .n(4)
23983 .k(k)
23984 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2s4__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
23985 }
23986 }
23987
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD64,k_lt_8_subtile)23988 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD64, k_lt_8_subtile) {
23989 TEST_REQUIRES_X86_SSE2;
23990 for (size_t k = 1; k < 8; k++) {
23991 for (uint32_t n = 1; n <= 4; n++) {
23992 for (uint32_t m = 1; m <= 1; m++) {
23993 GemmMicrokernelTester()
23994 .mr(1)
23995 .nr(4)
23996 .kr(2)
23997 .sr(4)
23998 .m(m)
23999 .n(n)
24000 .k(k)
24001 .iterations(1)
24002 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2s4__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
24003 }
24004 }
24005 }
24006 }
24007
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD64,k_gt_8)24008 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD64, k_gt_8) {
24009 TEST_REQUIRES_X86_SSE2;
24010 for (size_t k = 9; k < 16; k++) {
24011 GemmMicrokernelTester()
24012 .mr(1)
24013 .nr(4)
24014 .kr(2)
24015 .sr(4)
24016 .m(1)
24017 .n(4)
24018 .k(k)
24019 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2s4__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
24020 }
24021 }
24022
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD64,k_gt_8_subtile)24023 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD64, k_gt_8_subtile) {
24024 TEST_REQUIRES_X86_SSE2;
24025 for (size_t k = 9; k < 16; k++) {
24026 for (uint32_t n = 1; n <= 4; n++) {
24027 for (uint32_t m = 1; m <= 1; m++) {
24028 GemmMicrokernelTester()
24029 .mr(1)
24030 .nr(4)
24031 .kr(2)
24032 .sr(4)
24033 .m(m)
24034 .n(n)
24035 .k(k)
24036 .iterations(1)
24037 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2s4__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
24038 }
24039 }
24040 }
24041 }
24042
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD64,k_div_8)24043 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD64, k_div_8) {
24044 TEST_REQUIRES_X86_SSE2;
24045 for (size_t k = 16; k <= 80; k += 8) {
24046 GemmMicrokernelTester()
24047 .mr(1)
24048 .nr(4)
24049 .kr(2)
24050 .sr(4)
24051 .m(1)
24052 .n(4)
24053 .k(k)
24054 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2s4__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
24055 }
24056 }
24057
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD64,k_div_8_subtile)24058 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD64, k_div_8_subtile) {
24059 TEST_REQUIRES_X86_SSE2;
24060 for (size_t k = 16; k <= 80; k += 8) {
24061 for (uint32_t n = 1; n <= 4; n++) {
24062 for (uint32_t m = 1; m <= 1; m++) {
24063 GemmMicrokernelTester()
24064 .mr(1)
24065 .nr(4)
24066 .kr(2)
24067 .sr(4)
24068 .m(m)
24069 .n(n)
24070 .k(k)
24071 .iterations(1)
24072 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2s4__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
24073 }
24074 }
24075 }
24076 }
24077
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD64,n_gt_4)24078 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD64, n_gt_4) {
24079 TEST_REQUIRES_X86_SSE2;
24080 for (uint32_t n = 5; n < 8; n++) {
24081 for (size_t k = 1; k <= 40; k += 9) {
24082 GemmMicrokernelTester()
24083 .mr(1)
24084 .nr(4)
24085 .kr(2)
24086 .sr(4)
24087 .m(1)
24088 .n(n)
24089 .k(k)
24090 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2s4__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
24091 }
24092 }
24093 }
24094
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD64,n_gt_4_strided_cn)24095 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD64, n_gt_4_strided_cn) {
24096 TEST_REQUIRES_X86_SSE2;
24097 for (uint32_t n = 5; n < 8; n++) {
24098 for (size_t k = 1; k <= 40; k += 9) {
24099 GemmMicrokernelTester()
24100 .mr(1)
24101 .nr(4)
24102 .kr(2)
24103 .sr(4)
24104 .m(1)
24105 .n(n)
24106 .k(k)
24107 .cn_stride(7)
24108 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2s4__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
24109 }
24110 }
24111 }
24112
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD64,n_gt_4_subtile)24113 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD64, n_gt_4_subtile) {
24114 TEST_REQUIRES_X86_SSE2;
24115 for (uint32_t n = 5; n < 8; n++) {
24116 for (size_t k = 1; k <= 40; k += 9) {
24117 for (uint32_t m = 1; m <= 1; m++) {
24118 GemmMicrokernelTester()
24119 .mr(1)
24120 .nr(4)
24121 .kr(2)
24122 .sr(4)
24123 .m(m)
24124 .n(n)
24125 .k(k)
24126 .iterations(1)
24127 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2s4__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
24128 }
24129 }
24130 }
24131 }
24132
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD64,n_div_4)24133 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD64, n_div_4) {
24134 TEST_REQUIRES_X86_SSE2;
24135 for (uint32_t n = 8; n <= 12; n += 4) {
24136 for (size_t k = 1; k <= 40; k += 9) {
24137 GemmMicrokernelTester()
24138 .mr(1)
24139 .nr(4)
24140 .kr(2)
24141 .sr(4)
24142 .m(1)
24143 .n(n)
24144 .k(k)
24145 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2s4__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
24146 }
24147 }
24148 }
24149
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD64,n_div_4_strided_cn)24150 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD64, n_div_4_strided_cn) {
24151 TEST_REQUIRES_X86_SSE2;
24152 for (uint32_t n = 8; n <= 12; n += 4) {
24153 for (size_t k = 1; k <= 40; k += 9) {
24154 GemmMicrokernelTester()
24155 .mr(1)
24156 .nr(4)
24157 .kr(2)
24158 .sr(4)
24159 .m(1)
24160 .n(n)
24161 .k(k)
24162 .cn_stride(7)
24163 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2s4__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
24164 }
24165 }
24166 }
24167
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD64,n_div_4_subtile)24168 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD64, n_div_4_subtile) {
24169 TEST_REQUIRES_X86_SSE2;
24170 for (uint32_t n = 8; n <= 12; n += 4) {
24171 for (size_t k = 1; k <= 40; k += 9) {
24172 for (uint32_t m = 1; m <= 1; m++) {
24173 GemmMicrokernelTester()
24174 .mr(1)
24175 .nr(4)
24176 .kr(2)
24177 .sr(4)
24178 .m(m)
24179 .n(n)
24180 .k(k)
24181 .iterations(1)
24182 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2s4__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
24183 }
24184 }
24185 }
24186 }
24187
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD64,small_kernel)24188 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD64, small_kernel) {
24189 TEST_REQUIRES_X86_SSE2;
24190 for (size_t k = 1; k <= 40; k += 9) {
24191 GemmMicrokernelTester()
24192 .mr(1)
24193 .nr(4)
24194 .kr(2)
24195 .sr(4)
24196 .m(1)
24197 .n(4)
24198 .k(k)
24199 .ks(3)
24200 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2s4__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
24201 }
24202 }
24203
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD64,small_kernel_subtile)24204 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD64, small_kernel_subtile) {
24205 TEST_REQUIRES_X86_SSE2;
24206 for (size_t k = 1; k <= 40; k += 9) {
24207 for (uint32_t n = 1; n <= 4; n++) {
24208 for (uint32_t m = 1; m <= 1; m++) {
24209 GemmMicrokernelTester()
24210 .mr(1)
24211 .nr(4)
24212 .kr(2)
24213 .sr(4)
24214 .m(m)
24215 .n(n)
24216 .k(k)
24217 .ks(3)
24218 .iterations(1)
24219 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2s4__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
24220 }
24221 }
24222 }
24223 }
24224
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD64,n_gt_4_small_kernel)24225 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD64, n_gt_4_small_kernel) {
24226 TEST_REQUIRES_X86_SSE2;
24227 for (uint32_t n = 5; n < 8; n++) {
24228 for (size_t k = 1; k <= 40; k += 9) {
24229 GemmMicrokernelTester()
24230 .mr(1)
24231 .nr(4)
24232 .kr(2)
24233 .sr(4)
24234 .m(1)
24235 .n(n)
24236 .k(k)
24237 .ks(3)
24238 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2s4__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
24239 }
24240 }
24241 }
24242
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD64,n_div_4_small_kernel)24243 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD64, n_div_4_small_kernel) {
24244 TEST_REQUIRES_X86_SSE2;
24245 for (uint32_t n = 8; n <= 12; n += 4) {
24246 for (size_t k = 1; k <= 40; k += 9) {
24247 GemmMicrokernelTester()
24248 .mr(1)
24249 .nr(4)
24250 .kr(2)
24251 .sr(4)
24252 .m(1)
24253 .n(n)
24254 .k(k)
24255 .ks(3)
24256 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2s4__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
24257 }
24258 }
24259 }
24260
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD64,strided_cm_subtile)24261 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD64, strided_cm_subtile) {
24262 TEST_REQUIRES_X86_SSE2;
24263 for (size_t k = 1; k <= 40; k += 9) {
24264 for (uint32_t n = 1; n <= 4; n++) {
24265 for (uint32_t m = 1; m <= 1; m++) {
24266 GemmMicrokernelTester()
24267 .mr(1)
24268 .nr(4)
24269 .kr(2)
24270 .sr(4)
24271 .m(m)
24272 .n(n)
24273 .k(k)
24274 .cm_stride(7)
24275 .iterations(1)
24276 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2s4__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
24277 }
24278 }
24279 }
24280 }
24281
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD64,a_offset)24282 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD64, a_offset) {
24283 TEST_REQUIRES_X86_SSE2;
24284 for (size_t k = 1; k <= 40; k += 9) {
24285 GemmMicrokernelTester()
24286 .mr(1)
24287 .nr(4)
24288 .kr(2)
24289 .sr(4)
24290 .m(1)
24291 .n(4)
24292 .k(k)
24293 .ks(3)
24294 .a_offset(43)
24295 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2s4__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
24296 }
24297 }
24298
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD64,zero)24299 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD64, zero) {
24300 TEST_REQUIRES_X86_SSE2;
24301 for (size_t k = 1; k <= 40; k += 9) {
24302 for (uint32_t mz = 0; mz < 1; mz++) {
24303 GemmMicrokernelTester()
24304 .mr(1)
24305 .nr(4)
24306 .kr(2)
24307 .sr(4)
24308 .m(1)
24309 .n(4)
24310 .k(k)
24311 .ks(3)
24312 .a_offset(43)
24313 .zero_index(mz)
24314 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2s4__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
24315 }
24316 }
24317 }
24318
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD64,qmin)24319 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD64, qmin) {
24320 TEST_REQUIRES_X86_SSE2;
24321 GemmMicrokernelTester()
24322 .mr(1)
24323 .nr(4)
24324 .kr(2)
24325 .sr(4)
24326 .m(1)
24327 .n(4)
24328 .k(8)
24329 .qmin(128)
24330 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2s4__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
24331 }
24332
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD64,qmax)24333 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD64, qmax) {
24334 TEST_REQUIRES_X86_SSE2;
24335 GemmMicrokernelTester()
24336 .mr(1)
24337 .nr(4)
24338 .kr(2)
24339 .sr(4)
24340 .m(1)
24341 .n(4)
24342 .k(8)
24343 .qmax(128)
24344 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2s4__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
24345 }
24346
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD64,strided_cm)24347 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD64, strided_cm) {
24348 TEST_REQUIRES_X86_SSE2;
24349 GemmMicrokernelTester()
24350 .mr(1)
24351 .nr(4)
24352 .kr(2)
24353 .sr(4)
24354 .m(1)
24355 .n(4)
24356 .k(8)
24357 .cm_stride(7)
24358 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2s4__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
24359 }
24360 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
24361
24362
24363 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD64,k_eq_8)24364 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD64, k_eq_8) {
24365 TEST_REQUIRES_X86_SSE2;
24366 GemmMicrokernelTester()
24367 .mr(2)
24368 .nr(4)
24369 .kr(2)
24370 .sr(4)
24371 .m(2)
24372 .n(4)
24373 .k(8)
24374 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2s4__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
24375 }
24376
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD64,strided_cn)24377 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD64, strided_cn) {
24378 TEST_REQUIRES_X86_SSE2;
24379 GemmMicrokernelTester()
24380 .mr(2)
24381 .nr(4)
24382 .kr(2)
24383 .sr(4)
24384 .m(2)
24385 .n(4)
24386 .k(8)
24387 .cn_stride(7)
24388 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2s4__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
24389 }
24390
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD64,k_eq_8_subtile)24391 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD64, k_eq_8_subtile) {
24392 TEST_REQUIRES_X86_SSE2;
24393 for (uint32_t n = 1; n <= 4; n++) {
24394 for (uint32_t m = 1; m <= 2; m++) {
24395 GemmMicrokernelTester()
24396 .mr(2)
24397 .nr(4)
24398 .kr(2)
24399 .sr(4)
24400 .m(m)
24401 .n(n)
24402 .k(8)
24403 .iterations(1)
24404 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2s4__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
24405 }
24406 }
24407 }
24408
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD64,k_eq_8_subtile_m)24409 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD64, k_eq_8_subtile_m) {
24410 TEST_REQUIRES_X86_SSE2;
24411 for (uint32_t m = 1; m <= 2; m++) {
24412 GemmMicrokernelTester()
24413 .mr(2)
24414 .nr(4)
24415 .kr(2)
24416 .sr(4)
24417 .m(m)
24418 .n(4)
24419 .k(8)
24420 .iterations(1)
24421 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2s4__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
24422 }
24423 }
24424
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD64,k_eq_8_subtile_n)24425 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD64, k_eq_8_subtile_n) {
24426 TEST_REQUIRES_X86_SSE2;
24427 for (uint32_t n = 1; n <= 4; n++) {
24428 GemmMicrokernelTester()
24429 .mr(2)
24430 .nr(4)
24431 .kr(2)
24432 .sr(4)
24433 .m(2)
24434 .n(n)
24435 .k(8)
24436 .iterations(1)
24437 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2s4__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
24438 }
24439 }
24440
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD64,k_lt_8)24441 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD64, k_lt_8) {
24442 TEST_REQUIRES_X86_SSE2;
24443 for (size_t k = 1; k < 8; k++) {
24444 GemmMicrokernelTester()
24445 .mr(2)
24446 .nr(4)
24447 .kr(2)
24448 .sr(4)
24449 .m(2)
24450 .n(4)
24451 .k(k)
24452 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2s4__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
24453 }
24454 }
24455
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD64,k_lt_8_subtile)24456 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD64, k_lt_8_subtile) {
24457 TEST_REQUIRES_X86_SSE2;
24458 for (size_t k = 1; k < 8; k++) {
24459 for (uint32_t n = 1; n <= 4; n++) {
24460 for (uint32_t m = 1; m <= 2; m++) {
24461 GemmMicrokernelTester()
24462 .mr(2)
24463 .nr(4)
24464 .kr(2)
24465 .sr(4)
24466 .m(m)
24467 .n(n)
24468 .k(k)
24469 .iterations(1)
24470 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2s4__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
24471 }
24472 }
24473 }
24474 }
24475
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD64,k_gt_8)24476 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD64, k_gt_8) {
24477 TEST_REQUIRES_X86_SSE2;
24478 for (size_t k = 9; k < 16; k++) {
24479 GemmMicrokernelTester()
24480 .mr(2)
24481 .nr(4)
24482 .kr(2)
24483 .sr(4)
24484 .m(2)
24485 .n(4)
24486 .k(k)
24487 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2s4__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
24488 }
24489 }
24490
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD64,k_gt_8_subtile)24491 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD64, k_gt_8_subtile) {
24492 TEST_REQUIRES_X86_SSE2;
24493 for (size_t k = 9; k < 16; k++) {
24494 for (uint32_t n = 1; n <= 4; n++) {
24495 for (uint32_t m = 1; m <= 2; m++) {
24496 GemmMicrokernelTester()
24497 .mr(2)
24498 .nr(4)
24499 .kr(2)
24500 .sr(4)
24501 .m(m)
24502 .n(n)
24503 .k(k)
24504 .iterations(1)
24505 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2s4__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
24506 }
24507 }
24508 }
24509 }
24510
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD64,k_div_8)24511 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD64, k_div_8) {
24512 TEST_REQUIRES_X86_SSE2;
24513 for (size_t k = 16; k <= 80; k += 8) {
24514 GemmMicrokernelTester()
24515 .mr(2)
24516 .nr(4)
24517 .kr(2)
24518 .sr(4)
24519 .m(2)
24520 .n(4)
24521 .k(k)
24522 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2s4__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
24523 }
24524 }
24525
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD64,k_div_8_subtile)24526 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD64, k_div_8_subtile) {
24527 TEST_REQUIRES_X86_SSE2;
24528 for (size_t k = 16; k <= 80; k += 8) {
24529 for (uint32_t n = 1; n <= 4; n++) {
24530 for (uint32_t m = 1; m <= 2; m++) {
24531 GemmMicrokernelTester()
24532 .mr(2)
24533 .nr(4)
24534 .kr(2)
24535 .sr(4)
24536 .m(m)
24537 .n(n)
24538 .k(k)
24539 .iterations(1)
24540 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2s4__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
24541 }
24542 }
24543 }
24544 }
24545
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD64,n_gt_4)24546 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD64, n_gt_4) {
24547 TEST_REQUIRES_X86_SSE2;
24548 for (uint32_t n = 5; n < 8; n++) {
24549 for (size_t k = 1; k <= 40; k += 9) {
24550 GemmMicrokernelTester()
24551 .mr(2)
24552 .nr(4)
24553 .kr(2)
24554 .sr(4)
24555 .m(2)
24556 .n(n)
24557 .k(k)
24558 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2s4__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
24559 }
24560 }
24561 }
24562
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD64,n_gt_4_strided_cn)24563 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD64, n_gt_4_strided_cn) {
24564 TEST_REQUIRES_X86_SSE2;
24565 for (uint32_t n = 5; n < 8; n++) {
24566 for (size_t k = 1; k <= 40; k += 9) {
24567 GemmMicrokernelTester()
24568 .mr(2)
24569 .nr(4)
24570 .kr(2)
24571 .sr(4)
24572 .m(2)
24573 .n(n)
24574 .k(k)
24575 .cn_stride(7)
24576 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2s4__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
24577 }
24578 }
24579 }
24580
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD64,n_gt_4_subtile)24581 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD64, n_gt_4_subtile) {
24582 TEST_REQUIRES_X86_SSE2;
24583 for (uint32_t n = 5; n < 8; n++) {
24584 for (size_t k = 1; k <= 40; k += 9) {
24585 for (uint32_t m = 1; m <= 2; m++) {
24586 GemmMicrokernelTester()
24587 .mr(2)
24588 .nr(4)
24589 .kr(2)
24590 .sr(4)
24591 .m(m)
24592 .n(n)
24593 .k(k)
24594 .iterations(1)
24595 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2s4__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
24596 }
24597 }
24598 }
24599 }
24600
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD64,n_div_4)24601 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD64, n_div_4) {
24602 TEST_REQUIRES_X86_SSE2;
24603 for (uint32_t n = 8; n <= 12; n += 4) {
24604 for (size_t k = 1; k <= 40; k += 9) {
24605 GemmMicrokernelTester()
24606 .mr(2)
24607 .nr(4)
24608 .kr(2)
24609 .sr(4)
24610 .m(2)
24611 .n(n)
24612 .k(k)
24613 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2s4__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
24614 }
24615 }
24616 }
24617
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD64,n_div_4_strided_cn)24618 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD64, n_div_4_strided_cn) {
24619 TEST_REQUIRES_X86_SSE2;
24620 for (uint32_t n = 8; n <= 12; n += 4) {
24621 for (size_t k = 1; k <= 40; k += 9) {
24622 GemmMicrokernelTester()
24623 .mr(2)
24624 .nr(4)
24625 .kr(2)
24626 .sr(4)
24627 .m(2)
24628 .n(n)
24629 .k(k)
24630 .cn_stride(7)
24631 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2s4__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
24632 }
24633 }
24634 }
24635
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD64,n_div_4_subtile)24636 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD64, n_div_4_subtile) {
24637 TEST_REQUIRES_X86_SSE2;
24638 for (uint32_t n = 8; n <= 12; n += 4) {
24639 for (size_t k = 1; k <= 40; k += 9) {
24640 for (uint32_t m = 1; m <= 2; m++) {
24641 GemmMicrokernelTester()
24642 .mr(2)
24643 .nr(4)
24644 .kr(2)
24645 .sr(4)
24646 .m(m)
24647 .n(n)
24648 .k(k)
24649 .iterations(1)
24650 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2s4__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
24651 }
24652 }
24653 }
24654 }
24655
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD64,small_kernel)24656 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD64, small_kernel) {
24657 TEST_REQUIRES_X86_SSE2;
24658 for (size_t k = 1; k <= 40; k += 9) {
24659 GemmMicrokernelTester()
24660 .mr(2)
24661 .nr(4)
24662 .kr(2)
24663 .sr(4)
24664 .m(2)
24665 .n(4)
24666 .k(k)
24667 .ks(3)
24668 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2s4__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
24669 }
24670 }
24671
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD64,small_kernel_subtile)24672 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD64, small_kernel_subtile) {
24673 TEST_REQUIRES_X86_SSE2;
24674 for (size_t k = 1; k <= 40; k += 9) {
24675 for (uint32_t n = 1; n <= 4; n++) {
24676 for (uint32_t m = 1; m <= 2; m++) {
24677 GemmMicrokernelTester()
24678 .mr(2)
24679 .nr(4)
24680 .kr(2)
24681 .sr(4)
24682 .m(m)
24683 .n(n)
24684 .k(k)
24685 .ks(3)
24686 .iterations(1)
24687 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2s4__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
24688 }
24689 }
24690 }
24691 }
24692
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD64,n_gt_4_small_kernel)24693 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD64, n_gt_4_small_kernel) {
24694 TEST_REQUIRES_X86_SSE2;
24695 for (uint32_t n = 5; n < 8; n++) {
24696 for (size_t k = 1; k <= 40; k += 9) {
24697 GemmMicrokernelTester()
24698 .mr(2)
24699 .nr(4)
24700 .kr(2)
24701 .sr(4)
24702 .m(2)
24703 .n(n)
24704 .k(k)
24705 .ks(3)
24706 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2s4__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
24707 }
24708 }
24709 }
24710
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD64,n_div_4_small_kernel)24711 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD64, n_div_4_small_kernel) {
24712 TEST_REQUIRES_X86_SSE2;
24713 for (uint32_t n = 8; n <= 12; n += 4) {
24714 for (size_t k = 1; k <= 40; k += 9) {
24715 GemmMicrokernelTester()
24716 .mr(2)
24717 .nr(4)
24718 .kr(2)
24719 .sr(4)
24720 .m(2)
24721 .n(n)
24722 .k(k)
24723 .ks(3)
24724 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2s4__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
24725 }
24726 }
24727 }
24728
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD64,strided_cm_subtile)24729 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD64, strided_cm_subtile) {
24730 TEST_REQUIRES_X86_SSE2;
24731 for (size_t k = 1; k <= 40; k += 9) {
24732 for (uint32_t n = 1; n <= 4; n++) {
24733 for (uint32_t m = 1; m <= 2; m++) {
24734 GemmMicrokernelTester()
24735 .mr(2)
24736 .nr(4)
24737 .kr(2)
24738 .sr(4)
24739 .m(m)
24740 .n(n)
24741 .k(k)
24742 .cm_stride(7)
24743 .iterations(1)
24744 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2s4__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
24745 }
24746 }
24747 }
24748 }
24749
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD64,a_offset)24750 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD64, a_offset) {
24751 TEST_REQUIRES_X86_SSE2;
24752 for (size_t k = 1; k <= 40; k += 9) {
24753 GemmMicrokernelTester()
24754 .mr(2)
24755 .nr(4)
24756 .kr(2)
24757 .sr(4)
24758 .m(2)
24759 .n(4)
24760 .k(k)
24761 .ks(3)
24762 .a_offset(83)
24763 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2s4__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
24764 }
24765 }
24766
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD64,zero)24767 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD64, zero) {
24768 TEST_REQUIRES_X86_SSE2;
24769 for (size_t k = 1; k <= 40; k += 9) {
24770 for (uint32_t mz = 0; mz < 2; mz++) {
24771 GemmMicrokernelTester()
24772 .mr(2)
24773 .nr(4)
24774 .kr(2)
24775 .sr(4)
24776 .m(2)
24777 .n(4)
24778 .k(k)
24779 .ks(3)
24780 .a_offset(83)
24781 .zero_index(mz)
24782 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2s4__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
24783 }
24784 }
24785 }
24786
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD64,qmin)24787 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD64, qmin) {
24788 TEST_REQUIRES_X86_SSE2;
24789 GemmMicrokernelTester()
24790 .mr(2)
24791 .nr(4)
24792 .kr(2)
24793 .sr(4)
24794 .m(2)
24795 .n(4)
24796 .k(8)
24797 .qmin(128)
24798 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2s4__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
24799 }
24800
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD64,qmax)24801 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD64, qmax) {
24802 TEST_REQUIRES_X86_SSE2;
24803 GemmMicrokernelTester()
24804 .mr(2)
24805 .nr(4)
24806 .kr(2)
24807 .sr(4)
24808 .m(2)
24809 .n(4)
24810 .k(8)
24811 .qmax(128)
24812 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2s4__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
24813 }
24814
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD64,strided_cm)24815 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__SSE2_LD64, strided_cm) {
24816 TEST_REQUIRES_X86_SSE2;
24817 GemmMicrokernelTester()
24818 .mr(2)
24819 .nr(4)
24820 .kr(2)
24821 .sr(4)
24822 .m(2)
24823 .n(4)
24824 .k(8)
24825 .cm_stride(7)
24826 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2s4__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
24827 }
24828 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
24829
24830
24831 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD64,k_eq_8)24832 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD64, k_eq_8) {
24833 TEST_REQUIRES_X86_SSE41;
24834 GemmMicrokernelTester()
24835 .mr(2)
24836 .nr(4)
24837 .kr(2)
24838 .sr(4)
24839 .m(2)
24840 .n(4)
24841 .k(8)
24842 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2s4__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
24843 }
24844
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD64,strided_cn)24845 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD64, strided_cn) {
24846 TEST_REQUIRES_X86_SSE41;
24847 GemmMicrokernelTester()
24848 .mr(2)
24849 .nr(4)
24850 .kr(2)
24851 .sr(4)
24852 .m(2)
24853 .n(4)
24854 .k(8)
24855 .cn_stride(7)
24856 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2s4__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
24857 }
24858
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD64,k_eq_8_subtile)24859 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD64, k_eq_8_subtile) {
24860 TEST_REQUIRES_X86_SSE41;
24861 for (uint32_t n = 1; n <= 4; n++) {
24862 for (uint32_t m = 1; m <= 2; m++) {
24863 GemmMicrokernelTester()
24864 .mr(2)
24865 .nr(4)
24866 .kr(2)
24867 .sr(4)
24868 .m(m)
24869 .n(n)
24870 .k(8)
24871 .iterations(1)
24872 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2s4__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
24873 }
24874 }
24875 }
24876
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD64,k_eq_8_subtile_m)24877 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD64, k_eq_8_subtile_m) {
24878 TEST_REQUIRES_X86_SSE41;
24879 for (uint32_t m = 1; m <= 2; m++) {
24880 GemmMicrokernelTester()
24881 .mr(2)
24882 .nr(4)
24883 .kr(2)
24884 .sr(4)
24885 .m(m)
24886 .n(4)
24887 .k(8)
24888 .iterations(1)
24889 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2s4__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
24890 }
24891 }
24892
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD64,k_eq_8_subtile_n)24893 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD64, k_eq_8_subtile_n) {
24894 TEST_REQUIRES_X86_SSE41;
24895 for (uint32_t n = 1; n <= 4; n++) {
24896 GemmMicrokernelTester()
24897 .mr(2)
24898 .nr(4)
24899 .kr(2)
24900 .sr(4)
24901 .m(2)
24902 .n(n)
24903 .k(8)
24904 .iterations(1)
24905 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2s4__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
24906 }
24907 }
24908
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD64,k_lt_8)24909 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD64, k_lt_8) {
24910 TEST_REQUIRES_X86_SSE41;
24911 for (size_t k = 1; k < 8; k++) {
24912 GemmMicrokernelTester()
24913 .mr(2)
24914 .nr(4)
24915 .kr(2)
24916 .sr(4)
24917 .m(2)
24918 .n(4)
24919 .k(k)
24920 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2s4__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
24921 }
24922 }
24923
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD64,k_lt_8_subtile)24924 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD64, k_lt_8_subtile) {
24925 TEST_REQUIRES_X86_SSE41;
24926 for (size_t k = 1; k < 8; k++) {
24927 for (uint32_t n = 1; n <= 4; n++) {
24928 for (uint32_t m = 1; m <= 2; m++) {
24929 GemmMicrokernelTester()
24930 .mr(2)
24931 .nr(4)
24932 .kr(2)
24933 .sr(4)
24934 .m(m)
24935 .n(n)
24936 .k(k)
24937 .iterations(1)
24938 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2s4__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
24939 }
24940 }
24941 }
24942 }
24943
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD64,k_gt_8)24944 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD64, k_gt_8) {
24945 TEST_REQUIRES_X86_SSE41;
24946 for (size_t k = 9; k < 16; k++) {
24947 GemmMicrokernelTester()
24948 .mr(2)
24949 .nr(4)
24950 .kr(2)
24951 .sr(4)
24952 .m(2)
24953 .n(4)
24954 .k(k)
24955 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2s4__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
24956 }
24957 }
24958
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD64,k_gt_8_subtile)24959 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD64, k_gt_8_subtile) {
24960 TEST_REQUIRES_X86_SSE41;
24961 for (size_t k = 9; k < 16; k++) {
24962 for (uint32_t n = 1; n <= 4; n++) {
24963 for (uint32_t m = 1; m <= 2; m++) {
24964 GemmMicrokernelTester()
24965 .mr(2)
24966 .nr(4)
24967 .kr(2)
24968 .sr(4)
24969 .m(m)
24970 .n(n)
24971 .k(k)
24972 .iterations(1)
24973 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2s4__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
24974 }
24975 }
24976 }
24977 }
24978
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD64,k_div_8)24979 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD64, k_div_8) {
24980 TEST_REQUIRES_X86_SSE41;
24981 for (size_t k = 16; k <= 80; k += 8) {
24982 GemmMicrokernelTester()
24983 .mr(2)
24984 .nr(4)
24985 .kr(2)
24986 .sr(4)
24987 .m(2)
24988 .n(4)
24989 .k(k)
24990 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2s4__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
24991 }
24992 }
24993
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD64,k_div_8_subtile)24994 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD64, k_div_8_subtile) {
24995 TEST_REQUIRES_X86_SSE41;
24996 for (size_t k = 16; k <= 80; k += 8) {
24997 for (uint32_t n = 1; n <= 4; n++) {
24998 for (uint32_t m = 1; m <= 2; m++) {
24999 GemmMicrokernelTester()
25000 .mr(2)
25001 .nr(4)
25002 .kr(2)
25003 .sr(4)
25004 .m(m)
25005 .n(n)
25006 .k(k)
25007 .iterations(1)
25008 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2s4__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25009 }
25010 }
25011 }
25012 }
25013
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD64,n_gt_4)25014 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD64, n_gt_4) {
25015 TEST_REQUIRES_X86_SSE41;
25016 for (uint32_t n = 5; n < 8; n++) {
25017 for (size_t k = 1; k <= 40; k += 9) {
25018 GemmMicrokernelTester()
25019 .mr(2)
25020 .nr(4)
25021 .kr(2)
25022 .sr(4)
25023 .m(2)
25024 .n(n)
25025 .k(k)
25026 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2s4__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25027 }
25028 }
25029 }
25030
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD64,n_gt_4_strided_cn)25031 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD64, n_gt_4_strided_cn) {
25032 TEST_REQUIRES_X86_SSE41;
25033 for (uint32_t n = 5; n < 8; n++) {
25034 for (size_t k = 1; k <= 40; k += 9) {
25035 GemmMicrokernelTester()
25036 .mr(2)
25037 .nr(4)
25038 .kr(2)
25039 .sr(4)
25040 .m(2)
25041 .n(n)
25042 .k(k)
25043 .cn_stride(7)
25044 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2s4__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25045 }
25046 }
25047 }
25048
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD64,n_gt_4_subtile)25049 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD64, n_gt_4_subtile) {
25050 TEST_REQUIRES_X86_SSE41;
25051 for (uint32_t n = 5; n < 8; n++) {
25052 for (size_t k = 1; k <= 40; k += 9) {
25053 for (uint32_t m = 1; m <= 2; m++) {
25054 GemmMicrokernelTester()
25055 .mr(2)
25056 .nr(4)
25057 .kr(2)
25058 .sr(4)
25059 .m(m)
25060 .n(n)
25061 .k(k)
25062 .iterations(1)
25063 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2s4__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25064 }
25065 }
25066 }
25067 }
25068
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD64,n_div_4)25069 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD64, n_div_4) {
25070 TEST_REQUIRES_X86_SSE41;
25071 for (uint32_t n = 8; n <= 12; n += 4) {
25072 for (size_t k = 1; k <= 40; k += 9) {
25073 GemmMicrokernelTester()
25074 .mr(2)
25075 .nr(4)
25076 .kr(2)
25077 .sr(4)
25078 .m(2)
25079 .n(n)
25080 .k(k)
25081 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2s4__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25082 }
25083 }
25084 }
25085
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD64,n_div_4_strided_cn)25086 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD64, n_div_4_strided_cn) {
25087 TEST_REQUIRES_X86_SSE41;
25088 for (uint32_t n = 8; n <= 12; n += 4) {
25089 for (size_t k = 1; k <= 40; k += 9) {
25090 GemmMicrokernelTester()
25091 .mr(2)
25092 .nr(4)
25093 .kr(2)
25094 .sr(4)
25095 .m(2)
25096 .n(n)
25097 .k(k)
25098 .cn_stride(7)
25099 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2s4__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25100 }
25101 }
25102 }
25103
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD64,n_div_4_subtile)25104 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD64, n_div_4_subtile) {
25105 TEST_REQUIRES_X86_SSE41;
25106 for (uint32_t n = 8; n <= 12; n += 4) {
25107 for (size_t k = 1; k <= 40; k += 9) {
25108 for (uint32_t m = 1; m <= 2; m++) {
25109 GemmMicrokernelTester()
25110 .mr(2)
25111 .nr(4)
25112 .kr(2)
25113 .sr(4)
25114 .m(m)
25115 .n(n)
25116 .k(k)
25117 .iterations(1)
25118 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2s4__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25119 }
25120 }
25121 }
25122 }
25123
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD64,small_kernel)25124 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD64, small_kernel) {
25125 TEST_REQUIRES_X86_SSE41;
25126 for (size_t k = 1; k <= 40; k += 9) {
25127 GemmMicrokernelTester()
25128 .mr(2)
25129 .nr(4)
25130 .kr(2)
25131 .sr(4)
25132 .m(2)
25133 .n(4)
25134 .k(k)
25135 .ks(3)
25136 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2s4__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25137 }
25138 }
25139
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD64,small_kernel_subtile)25140 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD64, small_kernel_subtile) {
25141 TEST_REQUIRES_X86_SSE41;
25142 for (size_t k = 1; k <= 40; k += 9) {
25143 for (uint32_t n = 1; n <= 4; n++) {
25144 for (uint32_t m = 1; m <= 2; m++) {
25145 GemmMicrokernelTester()
25146 .mr(2)
25147 .nr(4)
25148 .kr(2)
25149 .sr(4)
25150 .m(m)
25151 .n(n)
25152 .k(k)
25153 .ks(3)
25154 .iterations(1)
25155 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2s4__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25156 }
25157 }
25158 }
25159 }
25160
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD64,n_gt_4_small_kernel)25161 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD64, n_gt_4_small_kernel) {
25162 TEST_REQUIRES_X86_SSE41;
25163 for (uint32_t n = 5; n < 8; n++) {
25164 for (size_t k = 1; k <= 40; k += 9) {
25165 GemmMicrokernelTester()
25166 .mr(2)
25167 .nr(4)
25168 .kr(2)
25169 .sr(4)
25170 .m(2)
25171 .n(n)
25172 .k(k)
25173 .ks(3)
25174 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2s4__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25175 }
25176 }
25177 }
25178
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD64,n_div_4_small_kernel)25179 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD64, n_div_4_small_kernel) {
25180 TEST_REQUIRES_X86_SSE41;
25181 for (uint32_t n = 8; n <= 12; n += 4) {
25182 for (size_t k = 1; k <= 40; k += 9) {
25183 GemmMicrokernelTester()
25184 .mr(2)
25185 .nr(4)
25186 .kr(2)
25187 .sr(4)
25188 .m(2)
25189 .n(n)
25190 .k(k)
25191 .ks(3)
25192 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2s4__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25193 }
25194 }
25195 }
25196
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD64,strided_cm_subtile)25197 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD64, strided_cm_subtile) {
25198 TEST_REQUIRES_X86_SSE41;
25199 for (size_t k = 1; k <= 40; k += 9) {
25200 for (uint32_t n = 1; n <= 4; n++) {
25201 for (uint32_t m = 1; m <= 2; m++) {
25202 GemmMicrokernelTester()
25203 .mr(2)
25204 .nr(4)
25205 .kr(2)
25206 .sr(4)
25207 .m(m)
25208 .n(n)
25209 .k(k)
25210 .cm_stride(7)
25211 .iterations(1)
25212 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2s4__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25213 }
25214 }
25215 }
25216 }
25217
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD64,a_offset)25218 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD64, a_offset) {
25219 TEST_REQUIRES_X86_SSE41;
25220 for (size_t k = 1; k <= 40; k += 9) {
25221 GemmMicrokernelTester()
25222 .mr(2)
25223 .nr(4)
25224 .kr(2)
25225 .sr(4)
25226 .m(2)
25227 .n(4)
25228 .k(k)
25229 .ks(3)
25230 .a_offset(83)
25231 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2s4__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25232 }
25233 }
25234
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD64,zero)25235 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD64, zero) {
25236 TEST_REQUIRES_X86_SSE41;
25237 for (size_t k = 1; k <= 40; k += 9) {
25238 for (uint32_t mz = 0; mz < 2; mz++) {
25239 GemmMicrokernelTester()
25240 .mr(2)
25241 .nr(4)
25242 .kr(2)
25243 .sr(4)
25244 .m(2)
25245 .n(4)
25246 .k(k)
25247 .ks(3)
25248 .a_offset(83)
25249 .zero_index(mz)
25250 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2s4__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25251 }
25252 }
25253 }
25254
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD64,qmin)25255 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD64, qmin) {
25256 TEST_REQUIRES_X86_SSE41;
25257 GemmMicrokernelTester()
25258 .mr(2)
25259 .nr(4)
25260 .kr(2)
25261 .sr(4)
25262 .m(2)
25263 .n(4)
25264 .k(8)
25265 .qmin(128)
25266 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2s4__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25267 }
25268
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD64,qmax)25269 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD64, qmax) {
25270 TEST_REQUIRES_X86_SSE41;
25271 GemmMicrokernelTester()
25272 .mr(2)
25273 .nr(4)
25274 .kr(2)
25275 .sr(4)
25276 .m(2)
25277 .n(4)
25278 .k(8)
25279 .qmax(128)
25280 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2s4__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25281 }
25282
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD64,strided_cm)25283 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__SSE41_LD64, strided_cm) {
25284 TEST_REQUIRES_X86_SSE41;
25285 GemmMicrokernelTester()
25286 .mr(2)
25287 .nr(4)
25288 .kr(2)
25289 .sr(4)
25290 .m(2)
25291 .n(4)
25292 .k(8)
25293 .cm_stride(7)
25294 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2s4__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25295 }
25296 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
25297
25298
25299 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD64,k_eq_8)25300 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD64, k_eq_8) {
25301 TEST_REQUIRES_X86_SSE41;
25302 GemmMicrokernelTester()
25303 .mr(3)
25304 .nr(4)
25305 .kr(2)
25306 .sr(4)
25307 .m(3)
25308 .n(4)
25309 .k(8)
25310 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25311 }
25312
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD64,strided_cn)25313 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD64, strided_cn) {
25314 TEST_REQUIRES_X86_SSE41;
25315 GemmMicrokernelTester()
25316 .mr(3)
25317 .nr(4)
25318 .kr(2)
25319 .sr(4)
25320 .m(3)
25321 .n(4)
25322 .k(8)
25323 .cn_stride(7)
25324 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25325 }
25326
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD64,k_eq_8_subtile)25327 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD64, k_eq_8_subtile) {
25328 TEST_REQUIRES_X86_SSE41;
25329 for (uint32_t n = 1; n <= 4; n++) {
25330 for (uint32_t m = 1; m <= 3; m++) {
25331 GemmMicrokernelTester()
25332 .mr(3)
25333 .nr(4)
25334 .kr(2)
25335 .sr(4)
25336 .m(m)
25337 .n(n)
25338 .k(8)
25339 .iterations(1)
25340 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25341 }
25342 }
25343 }
25344
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD64,k_eq_8_subtile_m)25345 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD64, k_eq_8_subtile_m) {
25346 TEST_REQUIRES_X86_SSE41;
25347 for (uint32_t m = 1; m <= 3; m++) {
25348 GemmMicrokernelTester()
25349 .mr(3)
25350 .nr(4)
25351 .kr(2)
25352 .sr(4)
25353 .m(m)
25354 .n(4)
25355 .k(8)
25356 .iterations(1)
25357 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25358 }
25359 }
25360
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD64,k_eq_8_subtile_n)25361 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD64, k_eq_8_subtile_n) {
25362 TEST_REQUIRES_X86_SSE41;
25363 for (uint32_t n = 1; n <= 4; n++) {
25364 GemmMicrokernelTester()
25365 .mr(3)
25366 .nr(4)
25367 .kr(2)
25368 .sr(4)
25369 .m(3)
25370 .n(n)
25371 .k(8)
25372 .iterations(1)
25373 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25374 }
25375 }
25376
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD64,k_lt_8)25377 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD64, k_lt_8) {
25378 TEST_REQUIRES_X86_SSE41;
25379 for (size_t k = 1; k < 8; k++) {
25380 GemmMicrokernelTester()
25381 .mr(3)
25382 .nr(4)
25383 .kr(2)
25384 .sr(4)
25385 .m(3)
25386 .n(4)
25387 .k(k)
25388 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25389 }
25390 }
25391
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD64,k_lt_8_subtile)25392 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD64, k_lt_8_subtile) {
25393 TEST_REQUIRES_X86_SSE41;
25394 for (size_t k = 1; k < 8; k++) {
25395 for (uint32_t n = 1; n <= 4; n++) {
25396 for (uint32_t m = 1; m <= 3; m++) {
25397 GemmMicrokernelTester()
25398 .mr(3)
25399 .nr(4)
25400 .kr(2)
25401 .sr(4)
25402 .m(m)
25403 .n(n)
25404 .k(k)
25405 .iterations(1)
25406 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25407 }
25408 }
25409 }
25410 }
25411
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD64,k_gt_8)25412 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD64, k_gt_8) {
25413 TEST_REQUIRES_X86_SSE41;
25414 for (size_t k = 9; k < 16; k++) {
25415 GemmMicrokernelTester()
25416 .mr(3)
25417 .nr(4)
25418 .kr(2)
25419 .sr(4)
25420 .m(3)
25421 .n(4)
25422 .k(k)
25423 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25424 }
25425 }
25426
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD64,k_gt_8_subtile)25427 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD64, k_gt_8_subtile) {
25428 TEST_REQUIRES_X86_SSE41;
25429 for (size_t k = 9; k < 16; k++) {
25430 for (uint32_t n = 1; n <= 4; n++) {
25431 for (uint32_t m = 1; m <= 3; m++) {
25432 GemmMicrokernelTester()
25433 .mr(3)
25434 .nr(4)
25435 .kr(2)
25436 .sr(4)
25437 .m(m)
25438 .n(n)
25439 .k(k)
25440 .iterations(1)
25441 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25442 }
25443 }
25444 }
25445 }
25446
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD64,k_div_8)25447 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD64, k_div_8) {
25448 TEST_REQUIRES_X86_SSE41;
25449 for (size_t k = 16; k <= 80; k += 8) {
25450 GemmMicrokernelTester()
25451 .mr(3)
25452 .nr(4)
25453 .kr(2)
25454 .sr(4)
25455 .m(3)
25456 .n(4)
25457 .k(k)
25458 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25459 }
25460 }
25461
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD64,k_div_8_subtile)25462 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD64, k_div_8_subtile) {
25463 TEST_REQUIRES_X86_SSE41;
25464 for (size_t k = 16; k <= 80; k += 8) {
25465 for (uint32_t n = 1; n <= 4; n++) {
25466 for (uint32_t m = 1; m <= 3; m++) {
25467 GemmMicrokernelTester()
25468 .mr(3)
25469 .nr(4)
25470 .kr(2)
25471 .sr(4)
25472 .m(m)
25473 .n(n)
25474 .k(k)
25475 .iterations(1)
25476 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25477 }
25478 }
25479 }
25480 }
25481
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD64,n_gt_4)25482 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD64, n_gt_4) {
25483 TEST_REQUIRES_X86_SSE41;
25484 for (uint32_t n = 5; n < 8; n++) {
25485 for (size_t k = 1; k <= 40; k += 9) {
25486 GemmMicrokernelTester()
25487 .mr(3)
25488 .nr(4)
25489 .kr(2)
25490 .sr(4)
25491 .m(3)
25492 .n(n)
25493 .k(k)
25494 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25495 }
25496 }
25497 }
25498
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD64,n_gt_4_strided_cn)25499 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD64, n_gt_4_strided_cn) {
25500 TEST_REQUIRES_X86_SSE41;
25501 for (uint32_t n = 5; n < 8; n++) {
25502 for (size_t k = 1; k <= 40; k += 9) {
25503 GemmMicrokernelTester()
25504 .mr(3)
25505 .nr(4)
25506 .kr(2)
25507 .sr(4)
25508 .m(3)
25509 .n(n)
25510 .k(k)
25511 .cn_stride(7)
25512 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25513 }
25514 }
25515 }
25516
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD64,n_gt_4_subtile)25517 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD64, n_gt_4_subtile) {
25518 TEST_REQUIRES_X86_SSE41;
25519 for (uint32_t n = 5; n < 8; n++) {
25520 for (size_t k = 1; k <= 40; k += 9) {
25521 for (uint32_t m = 1; m <= 3; m++) {
25522 GemmMicrokernelTester()
25523 .mr(3)
25524 .nr(4)
25525 .kr(2)
25526 .sr(4)
25527 .m(m)
25528 .n(n)
25529 .k(k)
25530 .iterations(1)
25531 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25532 }
25533 }
25534 }
25535 }
25536
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD64,n_div_4)25537 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD64, n_div_4) {
25538 TEST_REQUIRES_X86_SSE41;
25539 for (uint32_t n = 8; n <= 12; n += 4) {
25540 for (size_t k = 1; k <= 40; k += 9) {
25541 GemmMicrokernelTester()
25542 .mr(3)
25543 .nr(4)
25544 .kr(2)
25545 .sr(4)
25546 .m(3)
25547 .n(n)
25548 .k(k)
25549 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25550 }
25551 }
25552 }
25553
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD64,n_div_4_strided_cn)25554 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD64, n_div_4_strided_cn) {
25555 TEST_REQUIRES_X86_SSE41;
25556 for (uint32_t n = 8; n <= 12; n += 4) {
25557 for (size_t k = 1; k <= 40; k += 9) {
25558 GemmMicrokernelTester()
25559 .mr(3)
25560 .nr(4)
25561 .kr(2)
25562 .sr(4)
25563 .m(3)
25564 .n(n)
25565 .k(k)
25566 .cn_stride(7)
25567 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25568 }
25569 }
25570 }
25571
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD64,n_div_4_subtile)25572 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD64, n_div_4_subtile) {
25573 TEST_REQUIRES_X86_SSE41;
25574 for (uint32_t n = 8; n <= 12; n += 4) {
25575 for (size_t k = 1; k <= 40; k += 9) {
25576 for (uint32_t m = 1; m <= 3; m++) {
25577 GemmMicrokernelTester()
25578 .mr(3)
25579 .nr(4)
25580 .kr(2)
25581 .sr(4)
25582 .m(m)
25583 .n(n)
25584 .k(k)
25585 .iterations(1)
25586 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25587 }
25588 }
25589 }
25590 }
25591
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD64,small_kernel)25592 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD64, small_kernel) {
25593 TEST_REQUIRES_X86_SSE41;
25594 for (size_t k = 1; k <= 40; k += 9) {
25595 GemmMicrokernelTester()
25596 .mr(3)
25597 .nr(4)
25598 .kr(2)
25599 .sr(4)
25600 .m(3)
25601 .n(4)
25602 .k(k)
25603 .ks(3)
25604 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25605 }
25606 }
25607
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD64,small_kernel_subtile)25608 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD64, small_kernel_subtile) {
25609 TEST_REQUIRES_X86_SSE41;
25610 for (size_t k = 1; k <= 40; k += 9) {
25611 for (uint32_t n = 1; n <= 4; n++) {
25612 for (uint32_t m = 1; m <= 3; m++) {
25613 GemmMicrokernelTester()
25614 .mr(3)
25615 .nr(4)
25616 .kr(2)
25617 .sr(4)
25618 .m(m)
25619 .n(n)
25620 .k(k)
25621 .ks(3)
25622 .iterations(1)
25623 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25624 }
25625 }
25626 }
25627 }
25628
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD64,n_gt_4_small_kernel)25629 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD64, n_gt_4_small_kernel) {
25630 TEST_REQUIRES_X86_SSE41;
25631 for (uint32_t n = 5; n < 8; n++) {
25632 for (size_t k = 1; k <= 40; k += 9) {
25633 GemmMicrokernelTester()
25634 .mr(3)
25635 .nr(4)
25636 .kr(2)
25637 .sr(4)
25638 .m(3)
25639 .n(n)
25640 .k(k)
25641 .ks(3)
25642 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25643 }
25644 }
25645 }
25646
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD64,n_div_4_small_kernel)25647 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD64, n_div_4_small_kernel) {
25648 TEST_REQUIRES_X86_SSE41;
25649 for (uint32_t n = 8; n <= 12; n += 4) {
25650 for (size_t k = 1; k <= 40; k += 9) {
25651 GemmMicrokernelTester()
25652 .mr(3)
25653 .nr(4)
25654 .kr(2)
25655 .sr(4)
25656 .m(3)
25657 .n(n)
25658 .k(k)
25659 .ks(3)
25660 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25661 }
25662 }
25663 }
25664
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD64,strided_cm_subtile)25665 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD64, strided_cm_subtile) {
25666 TEST_REQUIRES_X86_SSE41;
25667 for (size_t k = 1; k <= 40; k += 9) {
25668 for (uint32_t n = 1; n <= 4; n++) {
25669 for (uint32_t m = 1; m <= 3; m++) {
25670 GemmMicrokernelTester()
25671 .mr(3)
25672 .nr(4)
25673 .kr(2)
25674 .sr(4)
25675 .m(m)
25676 .n(n)
25677 .k(k)
25678 .cm_stride(7)
25679 .iterations(1)
25680 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25681 }
25682 }
25683 }
25684 }
25685
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD64,a_offset)25686 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD64, a_offset) {
25687 TEST_REQUIRES_X86_SSE41;
25688 for (size_t k = 1; k <= 40; k += 9) {
25689 GemmMicrokernelTester()
25690 .mr(3)
25691 .nr(4)
25692 .kr(2)
25693 .sr(4)
25694 .m(3)
25695 .n(4)
25696 .k(k)
25697 .ks(3)
25698 .a_offset(127)
25699 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25700 }
25701 }
25702
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD64,zero)25703 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD64, zero) {
25704 TEST_REQUIRES_X86_SSE41;
25705 for (size_t k = 1; k <= 40; k += 9) {
25706 for (uint32_t mz = 0; mz < 3; mz++) {
25707 GemmMicrokernelTester()
25708 .mr(3)
25709 .nr(4)
25710 .kr(2)
25711 .sr(4)
25712 .m(3)
25713 .n(4)
25714 .k(k)
25715 .ks(3)
25716 .a_offset(127)
25717 .zero_index(mz)
25718 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25719 }
25720 }
25721 }
25722
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD64,qmin)25723 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD64, qmin) {
25724 TEST_REQUIRES_X86_SSE41;
25725 GemmMicrokernelTester()
25726 .mr(3)
25727 .nr(4)
25728 .kr(2)
25729 .sr(4)
25730 .m(3)
25731 .n(4)
25732 .k(8)
25733 .qmin(128)
25734 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25735 }
25736
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD64,qmax)25737 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD64, qmax) {
25738 TEST_REQUIRES_X86_SSE41;
25739 GemmMicrokernelTester()
25740 .mr(3)
25741 .nr(4)
25742 .kr(2)
25743 .sr(4)
25744 .m(3)
25745 .n(4)
25746 .k(8)
25747 .qmax(128)
25748 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25749 }
25750
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD64,strided_cm)25751 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__SSE41_LD64, strided_cm) {
25752 TEST_REQUIRES_X86_SSE41;
25753 GemmMicrokernelTester()
25754 .mr(3)
25755 .nr(4)
25756 .kr(2)
25757 .sr(4)
25758 .m(3)
25759 .n(4)
25760 .k(8)
25761 .cm_stride(7)
25762 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25763 }
25764 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
25765
25766
25767 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD64,k_eq_8)25768 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD64, k_eq_8) {
25769 TEST_REQUIRES_X86_AVX;
25770 GemmMicrokernelTester()
25771 .mr(3)
25772 .nr(4)
25773 .kr(2)
25774 .sr(4)
25775 .m(3)
25776 .n(4)
25777 .k(8)
25778 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25779 }
25780
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD64,strided_cn)25781 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD64, strided_cn) {
25782 TEST_REQUIRES_X86_AVX;
25783 GemmMicrokernelTester()
25784 .mr(3)
25785 .nr(4)
25786 .kr(2)
25787 .sr(4)
25788 .m(3)
25789 .n(4)
25790 .k(8)
25791 .cn_stride(7)
25792 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25793 }
25794
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD64,k_eq_8_subtile)25795 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD64, k_eq_8_subtile) {
25796 TEST_REQUIRES_X86_AVX;
25797 for (uint32_t n = 1; n <= 4; n++) {
25798 for (uint32_t m = 1; m <= 3; m++) {
25799 GemmMicrokernelTester()
25800 .mr(3)
25801 .nr(4)
25802 .kr(2)
25803 .sr(4)
25804 .m(m)
25805 .n(n)
25806 .k(8)
25807 .iterations(1)
25808 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25809 }
25810 }
25811 }
25812
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD64,k_eq_8_subtile_m)25813 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD64, k_eq_8_subtile_m) {
25814 TEST_REQUIRES_X86_AVX;
25815 for (uint32_t m = 1; m <= 3; m++) {
25816 GemmMicrokernelTester()
25817 .mr(3)
25818 .nr(4)
25819 .kr(2)
25820 .sr(4)
25821 .m(m)
25822 .n(4)
25823 .k(8)
25824 .iterations(1)
25825 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25826 }
25827 }
25828
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD64,k_eq_8_subtile_n)25829 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD64, k_eq_8_subtile_n) {
25830 TEST_REQUIRES_X86_AVX;
25831 for (uint32_t n = 1; n <= 4; n++) {
25832 GemmMicrokernelTester()
25833 .mr(3)
25834 .nr(4)
25835 .kr(2)
25836 .sr(4)
25837 .m(3)
25838 .n(n)
25839 .k(8)
25840 .iterations(1)
25841 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25842 }
25843 }
25844
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD64,k_lt_8)25845 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD64, k_lt_8) {
25846 TEST_REQUIRES_X86_AVX;
25847 for (size_t k = 1; k < 8; k++) {
25848 GemmMicrokernelTester()
25849 .mr(3)
25850 .nr(4)
25851 .kr(2)
25852 .sr(4)
25853 .m(3)
25854 .n(4)
25855 .k(k)
25856 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25857 }
25858 }
25859
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD64,k_lt_8_subtile)25860 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD64, k_lt_8_subtile) {
25861 TEST_REQUIRES_X86_AVX;
25862 for (size_t k = 1; k < 8; k++) {
25863 for (uint32_t n = 1; n <= 4; n++) {
25864 for (uint32_t m = 1; m <= 3; m++) {
25865 GemmMicrokernelTester()
25866 .mr(3)
25867 .nr(4)
25868 .kr(2)
25869 .sr(4)
25870 .m(m)
25871 .n(n)
25872 .k(k)
25873 .iterations(1)
25874 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25875 }
25876 }
25877 }
25878 }
25879
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD64,k_gt_8)25880 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD64, k_gt_8) {
25881 TEST_REQUIRES_X86_AVX;
25882 for (size_t k = 9; k < 16; k++) {
25883 GemmMicrokernelTester()
25884 .mr(3)
25885 .nr(4)
25886 .kr(2)
25887 .sr(4)
25888 .m(3)
25889 .n(4)
25890 .k(k)
25891 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25892 }
25893 }
25894
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD64,k_gt_8_subtile)25895 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD64, k_gt_8_subtile) {
25896 TEST_REQUIRES_X86_AVX;
25897 for (size_t k = 9; k < 16; k++) {
25898 for (uint32_t n = 1; n <= 4; n++) {
25899 for (uint32_t m = 1; m <= 3; m++) {
25900 GemmMicrokernelTester()
25901 .mr(3)
25902 .nr(4)
25903 .kr(2)
25904 .sr(4)
25905 .m(m)
25906 .n(n)
25907 .k(k)
25908 .iterations(1)
25909 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25910 }
25911 }
25912 }
25913 }
25914
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD64,k_div_8)25915 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD64, k_div_8) {
25916 TEST_REQUIRES_X86_AVX;
25917 for (size_t k = 16; k <= 80; k += 8) {
25918 GemmMicrokernelTester()
25919 .mr(3)
25920 .nr(4)
25921 .kr(2)
25922 .sr(4)
25923 .m(3)
25924 .n(4)
25925 .k(k)
25926 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25927 }
25928 }
25929
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD64,k_div_8_subtile)25930 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD64, k_div_8_subtile) {
25931 TEST_REQUIRES_X86_AVX;
25932 for (size_t k = 16; k <= 80; k += 8) {
25933 for (uint32_t n = 1; n <= 4; n++) {
25934 for (uint32_t m = 1; m <= 3; m++) {
25935 GemmMicrokernelTester()
25936 .mr(3)
25937 .nr(4)
25938 .kr(2)
25939 .sr(4)
25940 .m(m)
25941 .n(n)
25942 .k(k)
25943 .iterations(1)
25944 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25945 }
25946 }
25947 }
25948 }
25949
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD64,n_gt_4)25950 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD64, n_gt_4) {
25951 TEST_REQUIRES_X86_AVX;
25952 for (uint32_t n = 5; n < 8; n++) {
25953 for (size_t k = 1; k <= 40; k += 9) {
25954 GemmMicrokernelTester()
25955 .mr(3)
25956 .nr(4)
25957 .kr(2)
25958 .sr(4)
25959 .m(3)
25960 .n(n)
25961 .k(k)
25962 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25963 }
25964 }
25965 }
25966
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD64,n_gt_4_strided_cn)25967 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD64, n_gt_4_strided_cn) {
25968 TEST_REQUIRES_X86_AVX;
25969 for (uint32_t n = 5; n < 8; n++) {
25970 for (size_t k = 1; k <= 40; k += 9) {
25971 GemmMicrokernelTester()
25972 .mr(3)
25973 .nr(4)
25974 .kr(2)
25975 .sr(4)
25976 .m(3)
25977 .n(n)
25978 .k(k)
25979 .cn_stride(7)
25980 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
25981 }
25982 }
25983 }
25984
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD64,n_gt_4_subtile)25985 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD64, n_gt_4_subtile) {
25986 TEST_REQUIRES_X86_AVX;
25987 for (uint32_t n = 5; n < 8; n++) {
25988 for (size_t k = 1; k <= 40; k += 9) {
25989 for (uint32_t m = 1; m <= 3; m++) {
25990 GemmMicrokernelTester()
25991 .mr(3)
25992 .nr(4)
25993 .kr(2)
25994 .sr(4)
25995 .m(m)
25996 .n(n)
25997 .k(k)
25998 .iterations(1)
25999 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
26000 }
26001 }
26002 }
26003 }
26004
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD64,n_div_4)26005 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD64, n_div_4) {
26006 TEST_REQUIRES_X86_AVX;
26007 for (uint32_t n = 8; n <= 12; n += 4) {
26008 for (size_t k = 1; k <= 40; k += 9) {
26009 GemmMicrokernelTester()
26010 .mr(3)
26011 .nr(4)
26012 .kr(2)
26013 .sr(4)
26014 .m(3)
26015 .n(n)
26016 .k(k)
26017 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
26018 }
26019 }
26020 }
26021
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD64,n_div_4_strided_cn)26022 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD64, n_div_4_strided_cn) {
26023 TEST_REQUIRES_X86_AVX;
26024 for (uint32_t n = 8; n <= 12; n += 4) {
26025 for (size_t k = 1; k <= 40; k += 9) {
26026 GemmMicrokernelTester()
26027 .mr(3)
26028 .nr(4)
26029 .kr(2)
26030 .sr(4)
26031 .m(3)
26032 .n(n)
26033 .k(k)
26034 .cn_stride(7)
26035 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
26036 }
26037 }
26038 }
26039
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD64,n_div_4_subtile)26040 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD64, n_div_4_subtile) {
26041 TEST_REQUIRES_X86_AVX;
26042 for (uint32_t n = 8; n <= 12; n += 4) {
26043 for (size_t k = 1; k <= 40; k += 9) {
26044 for (uint32_t m = 1; m <= 3; m++) {
26045 GemmMicrokernelTester()
26046 .mr(3)
26047 .nr(4)
26048 .kr(2)
26049 .sr(4)
26050 .m(m)
26051 .n(n)
26052 .k(k)
26053 .iterations(1)
26054 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
26055 }
26056 }
26057 }
26058 }
26059
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD64,small_kernel)26060 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD64, small_kernel) {
26061 TEST_REQUIRES_X86_AVX;
26062 for (size_t k = 1; k <= 40; k += 9) {
26063 GemmMicrokernelTester()
26064 .mr(3)
26065 .nr(4)
26066 .kr(2)
26067 .sr(4)
26068 .m(3)
26069 .n(4)
26070 .k(k)
26071 .ks(3)
26072 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
26073 }
26074 }
26075
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD64,small_kernel_subtile)26076 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD64, small_kernel_subtile) {
26077 TEST_REQUIRES_X86_AVX;
26078 for (size_t k = 1; k <= 40; k += 9) {
26079 for (uint32_t n = 1; n <= 4; n++) {
26080 for (uint32_t m = 1; m <= 3; m++) {
26081 GemmMicrokernelTester()
26082 .mr(3)
26083 .nr(4)
26084 .kr(2)
26085 .sr(4)
26086 .m(m)
26087 .n(n)
26088 .k(k)
26089 .ks(3)
26090 .iterations(1)
26091 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
26092 }
26093 }
26094 }
26095 }
26096
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD64,n_gt_4_small_kernel)26097 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD64, n_gt_4_small_kernel) {
26098 TEST_REQUIRES_X86_AVX;
26099 for (uint32_t n = 5; n < 8; n++) {
26100 for (size_t k = 1; k <= 40; k += 9) {
26101 GemmMicrokernelTester()
26102 .mr(3)
26103 .nr(4)
26104 .kr(2)
26105 .sr(4)
26106 .m(3)
26107 .n(n)
26108 .k(k)
26109 .ks(3)
26110 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
26111 }
26112 }
26113 }
26114
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD64,n_div_4_small_kernel)26115 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD64, n_div_4_small_kernel) {
26116 TEST_REQUIRES_X86_AVX;
26117 for (uint32_t n = 8; n <= 12; n += 4) {
26118 for (size_t k = 1; k <= 40; k += 9) {
26119 GemmMicrokernelTester()
26120 .mr(3)
26121 .nr(4)
26122 .kr(2)
26123 .sr(4)
26124 .m(3)
26125 .n(n)
26126 .k(k)
26127 .ks(3)
26128 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
26129 }
26130 }
26131 }
26132
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD64,strided_cm_subtile)26133 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD64, strided_cm_subtile) {
26134 TEST_REQUIRES_X86_AVX;
26135 for (size_t k = 1; k <= 40; k += 9) {
26136 for (uint32_t n = 1; n <= 4; n++) {
26137 for (uint32_t m = 1; m <= 3; m++) {
26138 GemmMicrokernelTester()
26139 .mr(3)
26140 .nr(4)
26141 .kr(2)
26142 .sr(4)
26143 .m(m)
26144 .n(n)
26145 .k(k)
26146 .cm_stride(7)
26147 .iterations(1)
26148 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
26149 }
26150 }
26151 }
26152 }
26153
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD64,a_offset)26154 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD64, a_offset) {
26155 TEST_REQUIRES_X86_AVX;
26156 for (size_t k = 1; k <= 40; k += 9) {
26157 GemmMicrokernelTester()
26158 .mr(3)
26159 .nr(4)
26160 .kr(2)
26161 .sr(4)
26162 .m(3)
26163 .n(4)
26164 .k(k)
26165 .ks(3)
26166 .a_offset(127)
26167 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
26168 }
26169 }
26170
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD64,zero)26171 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD64, zero) {
26172 TEST_REQUIRES_X86_AVX;
26173 for (size_t k = 1; k <= 40; k += 9) {
26174 for (uint32_t mz = 0; mz < 3; mz++) {
26175 GemmMicrokernelTester()
26176 .mr(3)
26177 .nr(4)
26178 .kr(2)
26179 .sr(4)
26180 .m(3)
26181 .n(4)
26182 .k(k)
26183 .ks(3)
26184 .a_offset(127)
26185 .zero_index(mz)
26186 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
26187 }
26188 }
26189 }
26190
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD64,qmin)26191 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD64, qmin) {
26192 TEST_REQUIRES_X86_AVX;
26193 GemmMicrokernelTester()
26194 .mr(3)
26195 .nr(4)
26196 .kr(2)
26197 .sr(4)
26198 .m(3)
26199 .n(4)
26200 .k(8)
26201 .qmin(128)
26202 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
26203 }
26204
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD64,qmax)26205 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD64, qmax) {
26206 TEST_REQUIRES_X86_AVX;
26207 GemmMicrokernelTester()
26208 .mr(3)
26209 .nr(4)
26210 .kr(2)
26211 .sr(4)
26212 .m(3)
26213 .n(4)
26214 .k(8)
26215 .qmax(128)
26216 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
26217 }
26218
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD64,strided_cm)26219 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__AVX_LD64, strided_cm) {
26220 TEST_REQUIRES_X86_AVX;
26221 GemmMicrokernelTester()
26222 .mr(3)
26223 .nr(4)
26224 .kr(2)
26225 .sr(4)
26226 .m(3)
26227 .n(4)
26228 .k(8)
26229 .cm_stride(7)
26230 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
26231 }
26232 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
26233
26234
26235 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD64,k_eq_8)26236 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD64, k_eq_8) {
26237 TEST_REQUIRES_X86_XOP;
26238 GemmMicrokernelTester()
26239 .mr(3)
26240 .nr(4)
26241 .kr(2)
26242 .sr(4)
26243 .m(3)
26244 .n(4)
26245 .k(8)
26246 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
26247 }
26248
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD64,strided_cn)26249 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD64, strided_cn) {
26250 TEST_REQUIRES_X86_XOP;
26251 GemmMicrokernelTester()
26252 .mr(3)
26253 .nr(4)
26254 .kr(2)
26255 .sr(4)
26256 .m(3)
26257 .n(4)
26258 .k(8)
26259 .cn_stride(7)
26260 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
26261 }
26262
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD64,k_eq_8_subtile)26263 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD64, k_eq_8_subtile) {
26264 TEST_REQUIRES_X86_XOP;
26265 for (uint32_t n = 1; n <= 4; n++) {
26266 for (uint32_t m = 1; m <= 3; m++) {
26267 GemmMicrokernelTester()
26268 .mr(3)
26269 .nr(4)
26270 .kr(2)
26271 .sr(4)
26272 .m(m)
26273 .n(n)
26274 .k(8)
26275 .iterations(1)
26276 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
26277 }
26278 }
26279 }
26280
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD64,k_eq_8_subtile_m)26281 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD64, k_eq_8_subtile_m) {
26282 TEST_REQUIRES_X86_XOP;
26283 for (uint32_t m = 1; m <= 3; m++) {
26284 GemmMicrokernelTester()
26285 .mr(3)
26286 .nr(4)
26287 .kr(2)
26288 .sr(4)
26289 .m(m)
26290 .n(4)
26291 .k(8)
26292 .iterations(1)
26293 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
26294 }
26295 }
26296
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD64,k_eq_8_subtile_n)26297 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD64, k_eq_8_subtile_n) {
26298 TEST_REQUIRES_X86_XOP;
26299 for (uint32_t n = 1; n <= 4; n++) {
26300 GemmMicrokernelTester()
26301 .mr(3)
26302 .nr(4)
26303 .kr(2)
26304 .sr(4)
26305 .m(3)
26306 .n(n)
26307 .k(8)
26308 .iterations(1)
26309 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
26310 }
26311 }
26312
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD64,k_lt_8)26313 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD64, k_lt_8) {
26314 TEST_REQUIRES_X86_XOP;
26315 for (size_t k = 1; k < 8; k++) {
26316 GemmMicrokernelTester()
26317 .mr(3)
26318 .nr(4)
26319 .kr(2)
26320 .sr(4)
26321 .m(3)
26322 .n(4)
26323 .k(k)
26324 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
26325 }
26326 }
26327
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD64,k_lt_8_subtile)26328 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD64, k_lt_8_subtile) {
26329 TEST_REQUIRES_X86_XOP;
26330 for (size_t k = 1; k < 8; k++) {
26331 for (uint32_t n = 1; n <= 4; n++) {
26332 for (uint32_t m = 1; m <= 3; m++) {
26333 GemmMicrokernelTester()
26334 .mr(3)
26335 .nr(4)
26336 .kr(2)
26337 .sr(4)
26338 .m(m)
26339 .n(n)
26340 .k(k)
26341 .iterations(1)
26342 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
26343 }
26344 }
26345 }
26346 }
26347
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD64,k_gt_8)26348 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD64, k_gt_8) {
26349 TEST_REQUIRES_X86_XOP;
26350 for (size_t k = 9; k < 16; k++) {
26351 GemmMicrokernelTester()
26352 .mr(3)
26353 .nr(4)
26354 .kr(2)
26355 .sr(4)
26356 .m(3)
26357 .n(4)
26358 .k(k)
26359 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
26360 }
26361 }
26362
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD64,k_gt_8_subtile)26363 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD64, k_gt_8_subtile) {
26364 TEST_REQUIRES_X86_XOP;
26365 for (size_t k = 9; k < 16; k++) {
26366 for (uint32_t n = 1; n <= 4; n++) {
26367 for (uint32_t m = 1; m <= 3; m++) {
26368 GemmMicrokernelTester()
26369 .mr(3)
26370 .nr(4)
26371 .kr(2)
26372 .sr(4)
26373 .m(m)
26374 .n(n)
26375 .k(k)
26376 .iterations(1)
26377 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
26378 }
26379 }
26380 }
26381 }
26382
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD64,k_div_8)26383 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD64, k_div_8) {
26384 TEST_REQUIRES_X86_XOP;
26385 for (size_t k = 16; k <= 80; k += 8) {
26386 GemmMicrokernelTester()
26387 .mr(3)
26388 .nr(4)
26389 .kr(2)
26390 .sr(4)
26391 .m(3)
26392 .n(4)
26393 .k(k)
26394 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
26395 }
26396 }
26397
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD64,k_div_8_subtile)26398 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD64, k_div_8_subtile) {
26399 TEST_REQUIRES_X86_XOP;
26400 for (size_t k = 16; k <= 80; k += 8) {
26401 for (uint32_t n = 1; n <= 4; n++) {
26402 for (uint32_t m = 1; m <= 3; m++) {
26403 GemmMicrokernelTester()
26404 .mr(3)
26405 .nr(4)
26406 .kr(2)
26407 .sr(4)
26408 .m(m)
26409 .n(n)
26410 .k(k)
26411 .iterations(1)
26412 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
26413 }
26414 }
26415 }
26416 }
26417
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD64,n_gt_4)26418 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD64, n_gt_4) {
26419 TEST_REQUIRES_X86_XOP;
26420 for (uint32_t n = 5; n < 8; n++) {
26421 for (size_t k = 1; k <= 40; k += 9) {
26422 GemmMicrokernelTester()
26423 .mr(3)
26424 .nr(4)
26425 .kr(2)
26426 .sr(4)
26427 .m(3)
26428 .n(n)
26429 .k(k)
26430 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
26431 }
26432 }
26433 }
26434
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD64,n_gt_4_strided_cn)26435 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD64, n_gt_4_strided_cn) {
26436 TEST_REQUIRES_X86_XOP;
26437 for (uint32_t n = 5; n < 8; n++) {
26438 for (size_t k = 1; k <= 40; k += 9) {
26439 GemmMicrokernelTester()
26440 .mr(3)
26441 .nr(4)
26442 .kr(2)
26443 .sr(4)
26444 .m(3)
26445 .n(n)
26446 .k(k)
26447 .cn_stride(7)
26448 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
26449 }
26450 }
26451 }
26452
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD64,n_gt_4_subtile)26453 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD64, n_gt_4_subtile) {
26454 TEST_REQUIRES_X86_XOP;
26455 for (uint32_t n = 5; n < 8; n++) {
26456 for (size_t k = 1; k <= 40; k += 9) {
26457 for (uint32_t m = 1; m <= 3; m++) {
26458 GemmMicrokernelTester()
26459 .mr(3)
26460 .nr(4)
26461 .kr(2)
26462 .sr(4)
26463 .m(m)
26464 .n(n)
26465 .k(k)
26466 .iterations(1)
26467 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
26468 }
26469 }
26470 }
26471 }
26472
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD64,n_div_4)26473 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD64, n_div_4) {
26474 TEST_REQUIRES_X86_XOP;
26475 for (uint32_t n = 8; n <= 12; n += 4) {
26476 for (size_t k = 1; k <= 40; k += 9) {
26477 GemmMicrokernelTester()
26478 .mr(3)
26479 .nr(4)
26480 .kr(2)
26481 .sr(4)
26482 .m(3)
26483 .n(n)
26484 .k(k)
26485 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
26486 }
26487 }
26488 }
26489
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD64,n_div_4_strided_cn)26490 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD64, n_div_4_strided_cn) {
26491 TEST_REQUIRES_X86_XOP;
26492 for (uint32_t n = 8; n <= 12; n += 4) {
26493 for (size_t k = 1; k <= 40; k += 9) {
26494 GemmMicrokernelTester()
26495 .mr(3)
26496 .nr(4)
26497 .kr(2)
26498 .sr(4)
26499 .m(3)
26500 .n(n)
26501 .k(k)
26502 .cn_stride(7)
26503 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
26504 }
26505 }
26506 }
26507
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD64,n_div_4_subtile)26508 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD64, n_div_4_subtile) {
26509 TEST_REQUIRES_X86_XOP;
26510 for (uint32_t n = 8; n <= 12; n += 4) {
26511 for (size_t k = 1; k <= 40; k += 9) {
26512 for (uint32_t m = 1; m <= 3; m++) {
26513 GemmMicrokernelTester()
26514 .mr(3)
26515 .nr(4)
26516 .kr(2)
26517 .sr(4)
26518 .m(m)
26519 .n(n)
26520 .k(k)
26521 .iterations(1)
26522 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
26523 }
26524 }
26525 }
26526 }
26527
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD64,small_kernel)26528 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD64, small_kernel) {
26529 TEST_REQUIRES_X86_XOP;
26530 for (size_t k = 1; k <= 40; k += 9) {
26531 GemmMicrokernelTester()
26532 .mr(3)
26533 .nr(4)
26534 .kr(2)
26535 .sr(4)
26536 .m(3)
26537 .n(4)
26538 .k(k)
26539 .ks(3)
26540 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
26541 }
26542 }
26543
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD64,small_kernel_subtile)26544 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD64, small_kernel_subtile) {
26545 TEST_REQUIRES_X86_XOP;
26546 for (size_t k = 1; k <= 40; k += 9) {
26547 for (uint32_t n = 1; n <= 4; n++) {
26548 for (uint32_t m = 1; m <= 3; m++) {
26549 GemmMicrokernelTester()
26550 .mr(3)
26551 .nr(4)
26552 .kr(2)
26553 .sr(4)
26554 .m(m)
26555 .n(n)
26556 .k(k)
26557 .ks(3)
26558 .iterations(1)
26559 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
26560 }
26561 }
26562 }
26563 }
26564
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD64,n_gt_4_small_kernel)26565 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD64, n_gt_4_small_kernel) {
26566 TEST_REQUIRES_X86_XOP;
26567 for (uint32_t n = 5; n < 8; n++) {
26568 for (size_t k = 1; k <= 40; k += 9) {
26569 GemmMicrokernelTester()
26570 .mr(3)
26571 .nr(4)
26572 .kr(2)
26573 .sr(4)
26574 .m(3)
26575 .n(n)
26576 .k(k)
26577 .ks(3)
26578 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
26579 }
26580 }
26581 }
26582
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD64,n_div_4_small_kernel)26583 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD64, n_div_4_small_kernel) {
26584 TEST_REQUIRES_X86_XOP;
26585 for (uint32_t n = 8; n <= 12; n += 4) {
26586 for (size_t k = 1; k <= 40; k += 9) {
26587 GemmMicrokernelTester()
26588 .mr(3)
26589 .nr(4)
26590 .kr(2)
26591 .sr(4)
26592 .m(3)
26593 .n(n)
26594 .k(k)
26595 .ks(3)
26596 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
26597 }
26598 }
26599 }
26600
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD64,strided_cm_subtile)26601 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD64, strided_cm_subtile) {
26602 TEST_REQUIRES_X86_XOP;
26603 for (size_t k = 1; k <= 40; k += 9) {
26604 for (uint32_t n = 1; n <= 4; n++) {
26605 for (uint32_t m = 1; m <= 3; m++) {
26606 GemmMicrokernelTester()
26607 .mr(3)
26608 .nr(4)
26609 .kr(2)
26610 .sr(4)
26611 .m(m)
26612 .n(n)
26613 .k(k)
26614 .cm_stride(7)
26615 .iterations(1)
26616 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
26617 }
26618 }
26619 }
26620 }
26621
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD64,a_offset)26622 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD64, a_offset) {
26623 TEST_REQUIRES_X86_XOP;
26624 for (size_t k = 1; k <= 40; k += 9) {
26625 GemmMicrokernelTester()
26626 .mr(3)
26627 .nr(4)
26628 .kr(2)
26629 .sr(4)
26630 .m(3)
26631 .n(4)
26632 .k(k)
26633 .ks(3)
26634 .a_offset(127)
26635 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
26636 }
26637 }
26638
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD64,zero)26639 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD64, zero) {
26640 TEST_REQUIRES_X86_XOP;
26641 for (size_t k = 1; k <= 40; k += 9) {
26642 for (uint32_t mz = 0; mz < 3; mz++) {
26643 GemmMicrokernelTester()
26644 .mr(3)
26645 .nr(4)
26646 .kr(2)
26647 .sr(4)
26648 .m(3)
26649 .n(4)
26650 .k(k)
26651 .ks(3)
26652 .a_offset(127)
26653 .zero_index(mz)
26654 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
26655 }
26656 }
26657 }
26658
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD64,qmin)26659 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD64, qmin) {
26660 TEST_REQUIRES_X86_XOP;
26661 GemmMicrokernelTester()
26662 .mr(3)
26663 .nr(4)
26664 .kr(2)
26665 .sr(4)
26666 .m(3)
26667 .n(4)
26668 .k(8)
26669 .qmin(128)
26670 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
26671 }
26672
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD64,qmax)26673 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD64, qmax) {
26674 TEST_REQUIRES_X86_XOP;
26675 GemmMicrokernelTester()
26676 .mr(3)
26677 .nr(4)
26678 .kr(2)
26679 .sr(4)
26680 .m(3)
26681 .n(4)
26682 .k(8)
26683 .qmax(128)
26684 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
26685 }
26686
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD64,strided_cm)26687 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD64, strided_cm) {
26688 TEST_REQUIRES_X86_XOP;
26689 GemmMicrokernelTester()
26690 .mr(3)
26691 .nr(4)
26692 .kr(2)
26693 .sr(4)
26694 .m(3)
26695 .n(4)
26696 .k(8)
26697 .cm_stride(7)
26698 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
26699 }
26700 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
26701
26702
26703 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD128,k_eq_8)26704 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD128, k_eq_8) {
26705 TEST_REQUIRES_X86_SSE2;
26706 GemmMicrokernelTester()
26707 .mr(1)
26708 .nr(4)
26709 .kr(2)
26710 .sr(4)
26711 .m(1)
26712 .n(4)
26713 .k(8)
26714 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2s4__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
26715 }
26716
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD128,strided_cn)26717 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD128, strided_cn) {
26718 TEST_REQUIRES_X86_SSE2;
26719 GemmMicrokernelTester()
26720 .mr(1)
26721 .nr(4)
26722 .kr(2)
26723 .sr(4)
26724 .m(1)
26725 .n(4)
26726 .k(8)
26727 .cn_stride(7)
26728 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2s4__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
26729 }
26730
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD128,k_eq_8_subtile)26731 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD128, k_eq_8_subtile) {
26732 TEST_REQUIRES_X86_SSE2;
26733 for (uint32_t n = 1; n <= 4; n++) {
26734 for (uint32_t m = 1; m <= 1; m++) {
26735 GemmMicrokernelTester()
26736 .mr(1)
26737 .nr(4)
26738 .kr(2)
26739 .sr(4)
26740 .m(m)
26741 .n(n)
26742 .k(8)
26743 .iterations(1)
26744 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2s4__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
26745 }
26746 }
26747 }
26748
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD128,k_eq_8_subtile_m)26749 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD128, k_eq_8_subtile_m) {
26750 TEST_REQUIRES_X86_SSE2;
26751 for (uint32_t m = 1; m <= 1; m++) {
26752 GemmMicrokernelTester()
26753 .mr(1)
26754 .nr(4)
26755 .kr(2)
26756 .sr(4)
26757 .m(m)
26758 .n(4)
26759 .k(8)
26760 .iterations(1)
26761 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2s4__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
26762 }
26763 }
26764
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD128,k_eq_8_subtile_n)26765 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD128, k_eq_8_subtile_n) {
26766 TEST_REQUIRES_X86_SSE2;
26767 for (uint32_t n = 1; n <= 4; n++) {
26768 GemmMicrokernelTester()
26769 .mr(1)
26770 .nr(4)
26771 .kr(2)
26772 .sr(4)
26773 .m(1)
26774 .n(n)
26775 .k(8)
26776 .iterations(1)
26777 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2s4__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
26778 }
26779 }
26780
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD128,k_lt_8)26781 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD128, k_lt_8) {
26782 TEST_REQUIRES_X86_SSE2;
26783 for (size_t k = 1; k < 8; k++) {
26784 GemmMicrokernelTester()
26785 .mr(1)
26786 .nr(4)
26787 .kr(2)
26788 .sr(4)
26789 .m(1)
26790 .n(4)
26791 .k(k)
26792 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2s4__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
26793 }
26794 }
26795
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD128,k_lt_8_subtile)26796 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD128, k_lt_8_subtile) {
26797 TEST_REQUIRES_X86_SSE2;
26798 for (size_t k = 1; k < 8; k++) {
26799 for (uint32_t n = 1; n <= 4; n++) {
26800 for (uint32_t m = 1; m <= 1; m++) {
26801 GemmMicrokernelTester()
26802 .mr(1)
26803 .nr(4)
26804 .kr(2)
26805 .sr(4)
26806 .m(m)
26807 .n(n)
26808 .k(k)
26809 .iterations(1)
26810 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2s4__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
26811 }
26812 }
26813 }
26814 }
26815
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD128,k_gt_8)26816 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD128, k_gt_8) {
26817 TEST_REQUIRES_X86_SSE2;
26818 for (size_t k = 9; k < 16; k++) {
26819 GemmMicrokernelTester()
26820 .mr(1)
26821 .nr(4)
26822 .kr(2)
26823 .sr(4)
26824 .m(1)
26825 .n(4)
26826 .k(k)
26827 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2s4__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
26828 }
26829 }
26830
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD128,k_gt_8_subtile)26831 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD128, k_gt_8_subtile) {
26832 TEST_REQUIRES_X86_SSE2;
26833 for (size_t k = 9; k < 16; k++) {
26834 for (uint32_t n = 1; n <= 4; n++) {
26835 for (uint32_t m = 1; m <= 1; m++) {
26836 GemmMicrokernelTester()
26837 .mr(1)
26838 .nr(4)
26839 .kr(2)
26840 .sr(4)
26841 .m(m)
26842 .n(n)
26843 .k(k)
26844 .iterations(1)
26845 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2s4__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
26846 }
26847 }
26848 }
26849 }
26850
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD128,k_div_8)26851 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD128, k_div_8) {
26852 TEST_REQUIRES_X86_SSE2;
26853 for (size_t k = 16; k <= 80; k += 8) {
26854 GemmMicrokernelTester()
26855 .mr(1)
26856 .nr(4)
26857 .kr(2)
26858 .sr(4)
26859 .m(1)
26860 .n(4)
26861 .k(k)
26862 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2s4__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
26863 }
26864 }
26865
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD128,k_div_8_subtile)26866 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD128, k_div_8_subtile) {
26867 TEST_REQUIRES_X86_SSE2;
26868 for (size_t k = 16; k <= 80; k += 8) {
26869 for (uint32_t n = 1; n <= 4; n++) {
26870 for (uint32_t m = 1; m <= 1; m++) {
26871 GemmMicrokernelTester()
26872 .mr(1)
26873 .nr(4)
26874 .kr(2)
26875 .sr(4)
26876 .m(m)
26877 .n(n)
26878 .k(k)
26879 .iterations(1)
26880 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2s4__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
26881 }
26882 }
26883 }
26884 }
26885
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD128,n_gt_4)26886 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD128, n_gt_4) {
26887 TEST_REQUIRES_X86_SSE2;
26888 for (uint32_t n = 5; n < 8; n++) {
26889 for (size_t k = 1; k <= 40; k += 9) {
26890 GemmMicrokernelTester()
26891 .mr(1)
26892 .nr(4)
26893 .kr(2)
26894 .sr(4)
26895 .m(1)
26896 .n(n)
26897 .k(k)
26898 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2s4__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
26899 }
26900 }
26901 }
26902
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD128,n_gt_4_strided_cn)26903 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD128, n_gt_4_strided_cn) {
26904 TEST_REQUIRES_X86_SSE2;
26905 for (uint32_t n = 5; n < 8; n++) {
26906 for (size_t k = 1; k <= 40; k += 9) {
26907 GemmMicrokernelTester()
26908 .mr(1)
26909 .nr(4)
26910 .kr(2)
26911 .sr(4)
26912 .m(1)
26913 .n(n)
26914 .k(k)
26915 .cn_stride(7)
26916 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2s4__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
26917 }
26918 }
26919 }
26920
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD128,n_gt_4_subtile)26921 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD128, n_gt_4_subtile) {
26922 TEST_REQUIRES_X86_SSE2;
26923 for (uint32_t n = 5; n < 8; n++) {
26924 for (size_t k = 1; k <= 40; k += 9) {
26925 for (uint32_t m = 1; m <= 1; m++) {
26926 GemmMicrokernelTester()
26927 .mr(1)
26928 .nr(4)
26929 .kr(2)
26930 .sr(4)
26931 .m(m)
26932 .n(n)
26933 .k(k)
26934 .iterations(1)
26935 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2s4__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
26936 }
26937 }
26938 }
26939 }
26940
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD128,n_div_4)26941 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD128, n_div_4) {
26942 TEST_REQUIRES_X86_SSE2;
26943 for (uint32_t n = 8; n <= 12; n += 4) {
26944 for (size_t k = 1; k <= 40; k += 9) {
26945 GemmMicrokernelTester()
26946 .mr(1)
26947 .nr(4)
26948 .kr(2)
26949 .sr(4)
26950 .m(1)
26951 .n(n)
26952 .k(k)
26953 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2s4__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
26954 }
26955 }
26956 }
26957
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD128,n_div_4_strided_cn)26958 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD128, n_div_4_strided_cn) {
26959 TEST_REQUIRES_X86_SSE2;
26960 for (uint32_t n = 8; n <= 12; n += 4) {
26961 for (size_t k = 1; k <= 40; k += 9) {
26962 GemmMicrokernelTester()
26963 .mr(1)
26964 .nr(4)
26965 .kr(2)
26966 .sr(4)
26967 .m(1)
26968 .n(n)
26969 .k(k)
26970 .cn_stride(7)
26971 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2s4__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
26972 }
26973 }
26974 }
26975
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD128,n_div_4_subtile)26976 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD128, n_div_4_subtile) {
26977 TEST_REQUIRES_X86_SSE2;
26978 for (uint32_t n = 8; n <= 12; n += 4) {
26979 for (size_t k = 1; k <= 40; k += 9) {
26980 for (uint32_t m = 1; m <= 1; m++) {
26981 GemmMicrokernelTester()
26982 .mr(1)
26983 .nr(4)
26984 .kr(2)
26985 .sr(4)
26986 .m(m)
26987 .n(n)
26988 .k(k)
26989 .iterations(1)
26990 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2s4__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
26991 }
26992 }
26993 }
26994 }
26995
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD128,small_kernel)26996 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD128, small_kernel) {
26997 TEST_REQUIRES_X86_SSE2;
26998 for (size_t k = 1; k <= 40; k += 9) {
26999 GemmMicrokernelTester()
27000 .mr(1)
27001 .nr(4)
27002 .kr(2)
27003 .sr(4)
27004 .m(1)
27005 .n(4)
27006 .k(k)
27007 .ks(3)
27008 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2s4__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
27009 }
27010 }
27011
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD128,small_kernel_subtile)27012 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD128, small_kernel_subtile) {
27013 TEST_REQUIRES_X86_SSE2;
27014 for (size_t k = 1; k <= 40; k += 9) {
27015 for (uint32_t n = 1; n <= 4; n++) {
27016 for (uint32_t m = 1; m <= 1; m++) {
27017 GemmMicrokernelTester()
27018 .mr(1)
27019 .nr(4)
27020 .kr(2)
27021 .sr(4)
27022 .m(m)
27023 .n(n)
27024 .k(k)
27025 .ks(3)
27026 .iterations(1)
27027 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2s4__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
27028 }
27029 }
27030 }
27031 }
27032
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD128,n_gt_4_small_kernel)27033 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD128, n_gt_4_small_kernel) {
27034 TEST_REQUIRES_X86_SSE2;
27035 for (uint32_t n = 5; n < 8; n++) {
27036 for (size_t k = 1; k <= 40; k += 9) {
27037 GemmMicrokernelTester()
27038 .mr(1)
27039 .nr(4)
27040 .kr(2)
27041 .sr(4)
27042 .m(1)
27043 .n(n)
27044 .k(k)
27045 .ks(3)
27046 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2s4__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
27047 }
27048 }
27049 }
27050
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD128,n_div_4_small_kernel)27051 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD128, n_div_4_small_kernel) {
27052 TEST_REQUIRES_X86_SSE2;
27053 for (uint32_t n = 8; n <= 12; n += 4) {
27054 for (size_t k = 1; k <= 40; k += 9) {
27055 GemmMicrokernelTester()
27056 .mr(1)
27057 .nr(4)
27058 .kr(2)
27059 .sr(4)
27060 .m(1)
27061 .n(n)
27062 .k(k)
27063 .ks(3)
27064 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2s4__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
27065 }
27066 }
27067 }
27068
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD128,strided_cm_subtile)27069 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD128, strided_cm_subtile) {
27070 TEST_REQUIRES_X86_SSE2;
27071 for (size_t k = 1; k <= 40; k += 9) {
27072 for (uint32_t n = 1; n <= 4; n++) {
27073 for (uint32_t m = 1; m <= 1; m++) {
27074 GemmMicrokernelTester()
27075 .mr(1)
27076 .nr(4)
27077 .kr(2)
27078 .sr(4)
27079 .m(m)
27080 .n(n)
27081 .k(k)
27082 .cm_stride(7)
27083 .iterations(1)
27084 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2s4__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
27085 }
27086 }
27087 }
27088 }
27089
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD128,a_offset)27090 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD128, a_offset) {
27091 TEST_REQUIRES_X86_SSE2;
27092 for (size_t k = 1; k <= 40; k += 9) {
27093 GemmMicrokernelTester()
27094 .mr(1)
27095 .nr(4)
27096 .kr(2)
27097 .sr(4)
27098 .m(1)
27099 .n(4)
27100 .k(k)
27101 .ks(3)
27102 .a_offset(43)
27103 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2s4__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
27104 }
27105 }
27106
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD128,zero)27107 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD128, zero) {
27108 TEST_REQUIRES_X86_SSE2;
27109 for (size_t k = 1; k <= 40; k += 9) {
27110 for (uint32_t mz = 0; mz < 1; mz++) {
27111 GemmMicrokernelTester()
27112 .mr(1)
27113 .nr(4)
27114 .kr(2)
27115 .sr(4)
27116 .m(1)
27117 .n(4)
27118 .k(k)
27119 .ks(3)
27120 .a_offset(43)
27121 .zero_index(mz)
27122 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2s4__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
27123 }
27124 }
27125 }
27126
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD128,qmin)27127 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD128, qmin) {
27128 TEST_REQUIRES_X86_SSE2;
27129 GemmMicrokernelTester()
27130 .mr(1)
27131 .nr(4)
27132 .kr(2)
27133 .sr(4)
27134 .m(1)
27135 .n(4)
27136 .k(8)
27137 .qmin(128)
27138 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2s4__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
27139 }
27140
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD128,qmax)27141 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD128, qmax) {
27142 TEST_REQUIRES_X86_SSE2;
27143 GemmMicrokernelTester()
27144 .mr(1)
27145 .nr(4)
27146 .kr(2)
27147 .sr(4)
27148 .m(1)
27149 .n(4)
27150 .k(8)
27151 .qmax(128)
27152 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2s4__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
27153 }
27154
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD128,strided_cm)27155 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__SSE2_LD128, strided_cm) {
27156 TEST_REQUIRES_X86_SSE2;
27157 GemmMicrokernelTester()
27158 .mr(1)
27159 .nr(4)
27160 .kr(2)
27161 .sr(4)
27162 .m(1)
27163 .n(4)
27164 .k(8)
27165 .cm_stride(7)
27166 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2s4__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
27167 }
27168 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
27169
27170
27171 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD128,k_eq_8)27172 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD128, k_eq_8) {
27173 TEST_REQUIRES_X86_SSE2;
27174 GemmMicrokernelTester()
27175 .mr(4)
27176 .nr(4)
27177 .kr(2)
27178 .sr(4)
27179 .m(4)
27180 .n(4)
27181 .k(8)
27182 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2s4__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
27183 }
27184
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD128,strided_cn)27185 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD128, strided_cn) {
27186 TEST_REQUIRES_X86_SSE2;
27187 GemmMicrokernelTester()
27188 .mr(4)
27189 .nr(4)
27190 .kr(2)
27191 .sr(4)
27192 .m(4)
27193 .n(4)
27194 .k(8)
27195 .cn_stride(7)
27196 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2s4__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
27197 }
27198
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD128,k_eq_8_subtile)27199 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD128, k_eq_8_subtile) {
27200 TEST_REQUIRES_X86_SSE2;
27201 for (uint32_t n = 1; n <= 4; n++) {
27202 for (uint32_t m = 1; m <= 4; m++) {
27203 GemmMicrokernelTester()
27204 .mr(4)
27205 .nr(4)
27206 .kr(2)
27207 .sr(4)
27208 .m(m)
27209 .n(n)
27210 .k(8)
27211 .iterations(1)
27212 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2s4__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
27213 }
27214 }
27215 }
27216
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD128,k_eq_8_subtile_m)27217 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD128, k_eq_8_subtile_m) {
27218 TEST_REQUIRES_X86_SSE2;
27219 for (uint32_t m = 1; m <= 4; m++) {
27220 GemmMicrokernelTester()
27221 .mr(4)
27222 .nr(4)
27223 .kr(2)
27224 .sr(4)
27225 .m(m)
27226 .n(4)
27227 .k(8)
27228 .iterations(1)
27229 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2s4__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
27230 }
27231 }
27232
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD128,k_eq_8_subtile_n)27233 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD128, k_eq_8_subtile_n) {
27234 TEST_REQUIRES_X86_SSE2;
27235 for (uint32_t n = 1; n <= 4; n++) {
27236 GemmMicrokernelTester()
27237 .mr(4)
27238 .nr(4)
27239 .kr(2)
27240 .sr(4)
27241 .m(4)
27242 .n(n)
27243 .k(8)
27244 .iterations(1)
27245 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2s4__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
27246 }
27247 }
27248
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD128,k_lt_8)27249 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD128, k_lt_8) {
27250 TEST_REQUIRES_X86_SSE2;
27251 for (size_t k = 1; k < 8; k++) {
27252 GemmMicrokernelTester()
27253 .mr(4)
27254 .nr(4)
27255 .kr(2)
27256 .sr(4)
27257 .m(4)
27258 .n(4)
27259 .k(k)
27260 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2s4__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
27261 }
27262 }
27263
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD128,k_lt_8_subtile)27264 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD128, k_lt_8_subtile) {
27265 TEST_REQUIRES_X86_SSE2;
27266 for (size_t k = 1; k < 8; k++) {
27267 for (uint32_t n = 1; n <= 4; n++) {
27268 for (uint32_t m = 1; m <= 4; m++) {
27269 GemmMicrokernelTester()
27270 .mr(4)
27271 .nr(4)
27272 .kr(2)
27273 .sr(4)
27274 .m(m)
27275 .n(n)
27276 .k(k)
27277 .iterations(1)
27278 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2s4__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
27279 }
27280 }
27281 }
27282 }
27283
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD128,k_gt_8)27284 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD128, k_gt_8) {
27285 TEST_REQUIRES_X86_SSE2;
27286 for (size_t k = 9; k < 16; k++) {
27287 GemmMicrokernelTester()
27288 .mr(4)
27289 .nr(4)
27290 .kr(2)
27291 .sr(4)
27292 .m(4)
27293 .n(4)
27294 .k(k)
27295 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2s4__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
27296 }
27297 }
27298
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD128,k_gt_8_subtile)27299 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD128, k_gt_8_subtile) {
27300 TEST_REQUIRES_X86_SSE2;
27301 for (size_t k = 9; k < 16; k++) {
27302 for (uint32_t n = 1; n <= 4; n++) {
27303 for (uint32_t m = 1; m <= 4; m++) {
27304 GemmMicrokernelTester()
27305 .mr(4)
27306 .nr(4)
27307 .kr(2)
27308 .sr(4)
27309 .m(m)
27310 .n(n)
27311 .k(k)
27312 .iterations(1)
27313 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2s4__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
27314 }
27315 }
27316 }
27317 }
27318
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD128,k_div_8)27319 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD128, k_div_8) {
27320 TEST_REQUIRES_X86_SSE2;
27321 for (size_t k = 16; k <= 80; k += 8) {
27322 GemmMicrokernelTester()
27323 .mr(4)
27324 .nr(4)
27325 .kr(2)
27326 .sr(4)
27327 .m(4)
27328 .n(4)
27329 .k(k)
27330 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2s4__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
27331 }
27332 }
27333
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD128,k_div_8_subtile)27334 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD128, k_div_8_subtile) {
27335 TEST_REQUIRES_X86_SSE2;
27336 for (size_t k = 16; k <= 80; k += 8) {
27337 for (uint32_t n = 1; n <= 4; n++) {
27338 for (uint32_t m = 1; m <= 4; m++) {
27339 GemmMicrokernelTester()
27340 .mr(4)
27341 .nr(4)
27342 .kr(2)
27343 .sr(4)
27344 .m(m)
27345 .n(n)
27346 .k(k)
27347 .iterations(1)
27348 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2s4__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
27349 }
27350 }
27351 }
27352 }
27353
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD128,n_gt_4)27354 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD128, n_gt_4) {
27355 TEST_REQUIRES_X86_SSE2;
27356 for (uint32_t n = 5; n < 8; n++) {
27357 for (size_t k = 1; k <= 40; k += 9) {
27358 GemmMicrokernelTester()
27359 .mr(4)
27360 .nr(4)
27361 .kr(2)
27362 .sr(4)
27363 .m(4)
27364 .n(n)
27365 .k(k)
27366 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2s4__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
27367 }
27368 }
27369 }
27370
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD128,n_gt_4_strided_cn)27371 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD128, n_gt_4_strided_cn) {
27372 TEST_REQUIRES_X86_SSE2;
27373 for (uint32_t n = 5; n < 8; n++) {
27374 for (size_t k = 1; k <= 40; k += 9) {
27375 GemmMicrokernelTester()
27376 .mr(4)
27377 .nr(4)
27378 .kr(2)
27379 .sr(4)
27380 .m(4)
27381 .n(n)
27382 .k(k)
27383 .cn_stride(7)
27384 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2s4__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
27385 }
27386 }
27387 }
27388
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD128,n_gt_4_subtile)27389 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD128, n_gt_4_subtile) {
27390 TEST_REQUIRES_X86_SSE2;
27391 for (uint32_t n = 5; n < 8; n++) {
27392 for (size_t k = 1; k <= 40; k += 9) {
27393 for (uint32_t m = 1; m <= 4; m++) {
27394 GemmMicrokernelTester()
27395 .mr(4)
27396 .nr(4)
27397 .kr(2)
27398 .sr(4)
27399 .m(m)
27400 .n(n)
27401 .k(k)
27402 .iterations(1)
27403 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2s4__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
27404 }
27405 }
27406 }
27407 }
27408
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD128,n_div_4)27409 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD128, n_div_4) {
27410 TEST_REQUIRES_X86_SSE2;
27411 for (uint32_t n = 8; n <= 12; n += 4) {
27412 for (size_t k = 1; k <= 40; k += 9) {
27413 GemmMicrokernelTester()
27414 .mr(4)
27415 .nr(4)
27416 .kr(2)
27417 .sr(4)
27418 .m(4)
27419 .n(n)
27420 .k(k)
27421 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2s4__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
27422 }
27423 }
27424 }
27425
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD128,n_div_4_strided_cn)27426 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD128, n_div_4_strided_cn) {
27427 TEST_REQUIRES_X86_SSE2;
27428 for (uint32_t n = 8; n <= 12; n += 4) {
27429 for (size_t k = 1; k <= 40; k += 9) {
27430 GemmMicrokernelTester()
27431 .mr(4)
27432 .nr(4)
27433 .kr(2)
27434 .sr(4)
27435 .m(4)
27436 .n(n)
27437 .k(k)
27438 .cn_stride(7)
27439 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2s4__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
27440 }
27441 }
27442 }
27443
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD128,n_div_4_subtile)27444 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD128, n_div_4_subtile) {
27445 TEST_REQUIRES_X86_SSE2;
27446 for (uint32_t n = 8; n <= 12; n += 4) {
27447 for (size_t k = 1; k <= 40; k += 9) {
27448 for (uint32_t m = 1; m <= 4; m++) {
27449 GemmMicrokernelTester()
27450 .mr(4)
27451 .nr(4)
27452 .kr(2)
27453 .sr(4)
27454 .m(m)
27455 .n(n)
27456 .k(k)
27457 .iterations(1)
27458 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2s4__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
27459 }
27460 }
27461 }
27462 }
27463
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD128,small_kernel)27464 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD128, small_kernel) {
27465 TEST_REQUIRES_X86_SSE2;
27466 for (size_t k = 1; k <= 40; k += 9) {
27467 GemmMicrokernelTester()
27468 .mr(4)
27469 .nr(4)
27470 .kr(2)
27471 .sr(4)
27472 .m(4)
27473 .n(4)
27474 .k(k)
27475 .ks(3)
27476 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2s4__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
27477 }
27478 }
27479
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD128,small_kernel_subtile)27480 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD128, small_kernel_subtile) {
27481 TEST_REQUIRES_X86_SSE2;
27482 for (size_t k = 1; k <= 40; k += 9) {
27483 for (uint32_t n = 1; n <= 4; n++) {
27484 for (uint32_t m = 1; m <= 4; m++) {
27485 GemmMicrokernelTester()
27486 .mr(4)
27487 .nr(4)
27488 .kr(2)
27489 .sr(4)
27490 .m(m)
27491 .n(n)
27492 .k(k)
27493 .ks(3)
27494 .iterations(1)
27495 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2s4__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
27496 }
27497 }
27498 }
27499 }
27500
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD128,n_gt_4_small_kernel)27501 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD128, n_gt_4_small_kernel) {
27502 TEST_REQUIRES_X86_SSE2;
27503 for (uint32_t n = 5; n < 8; n++) {
27504 for (size_t k = 1; k <= 40; k += 9) {
27505 GemmMicrokernelTester()
27506 .mr(4)
27507 .nr(4)
27508 .kr(2)
27509 .sr(4)
27510 .m(4)
27511 .n(n)
27512 .k(k)
27513 .ks(3)
27514 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2s4__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
27515 }
27516 }
27517 }
27518
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD128,n_div_4_small_kernel)27519 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD128, n_div_4_small_kernel) {
27520 TEST_REQUIRES_X86_SSE2;
27521 for (uint32_t n = 8; n <= 12; n += 4) {
27522 for (size_t k = 1; k <= 40; k += 9) {
27523 GemmMicrokernelTester()
27524 .mr(4)
27525 .nr(4)
27526 .kr(2)
27527 .sr(4)
27528 .m(4)
27529 .n(n)
27530 .k(k)
27531 .ks(3)
27532 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2s4__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
27533 }
27534 }
27535 }
27536
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD128,strided_cm_subtile)27537 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD128, strided_cm_subtile) {
27538 TEST_REQUIRES_X86_SSE2;
27539 for (size_t k = 1; k <= 40; k += 9) {
27540 for (uint32_t n = 1; n <= 4; n++) {
27541 for (uint32_t m = 1; m <= 4; m++) {
27542 GemmMicrokernelTester()
27543 .mr(4)
27544 .nr(4)
27545 .kr(2)
27546 .sr(4)
27547 .m(m)
27548 .n(n)
27549 .k(k)
27550 .cm_stride(7)
27551 .iterations(1)
27552 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2s4__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
27553 }
27554 }
27555 }
27556 }
27557
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD128,a_offset)27558 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD128, a_offset) {
27559 TEST_REQUIRES_X86_SSE2;
27560 for (size_t k = 1; k <= 40; k += 9) {
27561 GemmMicrokernelTester()
27562 .mr(4)
27563 .nr(4)
27564 .kr(2)
27565 .sr(4)
27566 .m(4)
27567 .n(4)
27568 .k(k)
27569 .ks(3)
27570 .a_offset(163)
27571 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2s4__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
27572 }
27573 }
27574
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD128,zero)27575 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD128, zero) {
27576 TEST_REQUIRES_X86_SSE2;
27577 for (size_t k = 1; k <= 40; k += 9) {
27578 for (uint32_t mz = 0; mz < 4; mz++) {
27579 GemmMicrokernelTester()
27580 .mr(4)
27581 .nr(4)
27582 .kr(2)
27583 .sr(4)
27584 .m(4)
27585 .n(4)
27586 .k(k)
27587 .ks(3)
27588 .a_offset(163)
27589 .zero_index(mz)
27590 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2s4__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
27591 }
27592 }
27593 }
27594
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD128,qmin)27595 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD128, qmin) {
27596 TEST_REQUIRES_X86_SSE2;
27597 GemmMicrokernelTester()
27598 .mr(4)
27599 .nr(4)
27600 .kr(2)
27601 .sr(4)
27602 .m(4)
27603 .n(4)
27604 .k(8)
27605 .qmin(128)
27606 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2s4__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
27607 }
27608
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD128,qmax)27609 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD128, qmax) {
27610 TEST_REQUIRES_X86_SSE2;
27611 GemmMicrokernelTester()
27612 .mr(4)
27613 .nr(4)
27614 .kr(2)
27615 .sr(4)
27616 .m(4)
27617 .n(4)
27618 .k(8)
27619 .qmax(128)
27620 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2s4__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
27621 }
27622
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD128,strided_cm)27623 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__SSE2_LD128, strided_cm) {
27624 TEST_REQUIRES_X86_SSE2;
27625 GemmMicrokernelTester()
27626 .mr(4)
27627 .nr(4)
27628 .kr(2)
27629 .sr(4)
27630 .m(4)
27631 .n(4)
27632 .k(8)
27633 .cm_stride(7)
27634 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2s4__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
27635 }
27636 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
27637
27638
27639 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128,k_eq_8)27640 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128, k_eq_8) {
27641 TEST_REQUIRES_X86_SSE41;
27642 GemmMicrokernelTester()
27643 .mr(4)
27644 .nr(4)
27645 .kr(2)
27646 .sr(4)
27647 .m(4)
27648 .n(4)
27649 .k(8)
27650 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2s4__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
27651 }
27652
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128,strided_cn)27653 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128, strided_cn) {
27654 TEST_REQUIRES_X86_SSE41;
27655 GemmMicrokernelTester()
27656 .mr(4)
27657 .nr(4)
27658 .kr(2)
27659 .sr(4)
27660 .m(4)
27661 .n(4)
27662 .k(8)
27663 .cn_stride(7)
27664 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2s4__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
27665 }
27666
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128,k_eq_8_subtile)27667 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128, k_eq_8_subtile) {
27668 TEST_REQUIRES_X86_SSE41;
27669 for (uint32_t n = 1; n <= 4; n++) {
27670 for (uint32_t m = 1; m <= 4; m++) {
27671 GemmMicrokernelTester()
27672 .mr(4)
27673 .nr(4)
27674 .kr(2)
27675 .sr(4)
27676 .m(m)
27677 .n(n)
27678 .k(8)
27679 .iterations(1)
27680 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2s4__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
27681 }
27682 }
27683 }
27684
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128,k_eq_8_subtile_m)27685 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128, k_eq_8_subtile_m) {
27686 TEST_REQUIRES_X86_SSE41;
27687 for (uint32_t m = 1; m <= 4; m++) {
27688 GemmMicrokernelTester()
27689 .mr(4)
27690 .nr(4)
27691 .kr(2)
27692 .sr(4)
27693 .m(m)
27694 .n(4)
27695 .k(8)
27696 .iterations(1)
27697 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2s4__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
27698 }
27699 }
27700
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128,k_eq_8_subtile_n)27701 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128, k_eq_8_subtile_n) {
27702 TEST_REQUIRES_X86_SSE41;
27703 for (uint32_t n = 1; n <= 4; n++) {
27704 GemmMicrokernelTester()
27705 .mr(4)
27706 .nr(4)
27707 .kr(2)
27708 .sr(4)
27709 .m(4)
27710 .n(n)
27711 .k(8)
27712 .iterations(1)
27713 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2s4__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
27714 }
27715 }
27716
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128,k_lt_8)27717 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128, k_lt_8) {
27718 TEST_REQUIRES_X86_SSE41;
27719 for (size_t k = 1; k < 8; k++) {
27720 GemmMicrokernelTester()
27721 .mr(4)
27722 .nr(4)
27723 .kr(2)
27724 .sr(4)
27725 .m(4)
27726 .n(4)
27727 .k(k)
27728 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2s4__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
27729 }
27730 }
27731
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128,k_lt_8_subtile)27732 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128, k_lt_8_subtile) {
27733 TEST_REQUIRES_X86_SSE41;
27734 for (size_t k = 1; k < 8; k++) {
27735 for (uint32_t n = 1; n <= 4; n++) {
27736 for (uint32_t m = 1; m <= 4; m++) {
27737 GemmMicrokernelTester()
27738 .mr(4)
27739 .nr(4)
27740 .kr(2)
27741 .sr(4)
27742 .m(m)
27743 .n(n)
27744 .k(k)
27745 .iterations(1)
27746 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2s4__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
27747 }
27748 }
27749 }
27750 }
27751
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128,k_gt_8)27752 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128, k_gt_8) {
27753 TEST_REQUIRES_X86_SSE41;
27754 for (size_t k = 9; k < 16; k++) {
27755 GemmMicrokernelTester()
27756 .mr(4)
27757 .nr(4)
27758 .kr(2)
27759 .sr(4)
27760 .m(4)
27761 .n(4)
27762 .k(k)
27763 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2s4__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
27764 }
27765 }
27766
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128,k_gt_8_subtile)27767 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128, k_gt_8_subtile) {
27768 TEST_REQUIRES_X86_SSE41;
27769 for (size_t k = 9; k < 16; k++) {
27770 for (uint32_t n = 1; n <= 4; n++) {
27771 for (uint32_t m = 1; m <= 4; m++) {
27772 GemmMicrokernelTester()
27773 .mr(4)
27774 .nr(4)
27775 .kr(2)
27776 .sr(4)
27777 .m(m)
27778 .n(n)
27779 .k(k)
27780 .iterations(1)
27781 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2s4__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
27782 }
27783 }
27784 }
27785 }
27786
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128,k_div_8)27787 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128, k_div_8) {
27788 TEST_REQUIRES_X86_SSE41;
27789 for (size_t k = 16; k <= 80; k += 8) {
27790 GemmMicrokernelTester()
27791 .mr(4)
27792 .nr(4)
27793 .kr(2)
27794 .sr(4)
27795 .m(4)
27796 .n(4)
27797 .k(k)
27798 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2s4__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
27799 }
27800 }
27801
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128,k_div_8_subtile)27802 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128, k_div_8_subtile) {
27803 TEST_REQUIRES_X86_SSE41;
27804 for (size_t k = 16; k <= 80; k += 8) {
27805 for (uint32_t n = 1; n <= 4; n++) {
27806 for (uint32_t m = 1; m <= 4; m++) {
27807 GemmMicrokernelTester()
27808 .mr(4)
27809 .nr(4)
27810 .kr(2)
27811 .sr(4)
27812 .m(m)
27813 .n(n)
27814 .k(k)
27815 .iterations(1)
27816 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2s4__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
27817 }
27818 }
27819 }
27820 }
27821
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128,n_gt_4)27822 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128, n_gt_4) {
27823 TEST_REQUIRES_X86_SSE41;
27824 for (uint32_t n = 5; n < 8; n++) {
27825 for (size_t k = 1; k <= 40; k += 9) {
27826 GemmMicrokernelTester()
27827 .mr(4)
27828 .nr(4)
27829 .kr(2)
27830 .sr(4)
27831 .m(4)
27832 .n(n)
27833 .k(k)
27834 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2s4__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
27835 }
27836 }
27837 }
27838
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128,n_gt_4_strided_cn)27839 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128, n_gt_4_strided_cn) {
27840 TEST_REQUIRES_X86_SSE41;
27841 for (uint32_t n = 5; n < 8; n++) {
27842 for (size_t k = 1; k <= 40; k += 9) {
27843 GemmMicrokernelTester()
27844 .mr(4)
27845 .nr(4)
27846 .kr(2)
27847 .sr(4)
27848 .m(4)
27849 .n(n)
27850 .k(k)
27851 .cn_stride(7)
27852 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2s4__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
27853 }
27854 }
27855 }
27856
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128,n_gt_4_subtile)27857 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128, n_gt_4_subtile) {
27858 TEST_REQUIRES_X86_SSE41;
27859 for (uint32_t n = 5; n < 8; n++) {
27860 for (size_t k = 1; k <= 40; k += 9) {
27861 for (uint32_t m = 1; m <= 4; m++) {
27862 GemmMicrokernelTester()
27863 .mr(4)
27864 .nr(4)
27865 .kr(2)
27866 .sr(4)
27867 .m(m)
27868 .n(n)
27869 .k(k)
27870 .iterations(1)
27871 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2s4__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
27872 }
27873 }
27874 }
27875 }
27876
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128,n_div_4)27877 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128, n_div_4) {
27878 TEST_REQUIRES_X86_SSE41;
27879 for (uint32_t n = 8; n <= 12; n += 4) {
27880 for (size_t k = 1; k <= 40; k += 9) {
27881 GemmMicrokernelTester()
27882 .mr(4)
27883 .nr(4)
27884 .kr(2)
27885 .sr(4)
27886 .m(4)
27887 .n(n)
27888 .k(k)
27889 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2s4__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
27890 }
27891 }
27892 }
27893
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128,n_div_4_strided_cn)27894 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128, n_div_4_strided_cn) {
27895 TEST_REQUIRES_X86_SSE41;
27896 for (uint32_t n = 8; n <= 12; n += 4) {
27897 for (size_t k = 1; k <= 40; k += 9) {
27898 GemmMicrokernelTester()
27899 .mr(4)
27900 .nr(4)
27901 .kr(2)
27902 .sr(4)
27903 .m(4)
27904 .n(n)
27905 .k(k)
27906 .cn_stride(7)
27907 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2s4__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
27908 }
27909 }
27910 }
27911
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128,n_div_4_subtile)27912 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128, n_div_4_subtile) {
27913 TEST_REQUIRES_X86_SSE41;
27914 for (uint32_t n = 8; n <= 12; n += 4) {
27915 for (size_t k = 1; k <= 40; k += 9) {
27916 for (uint32_t m = 1; m <= 4; m++) {
27917 GemmMicrokernelTester()
27918 .mr(4)
27919 .nr(4)
27920 .kr(2)
27921 .sr(4)
27922 .m(m)
27923 .n(n)
27924 .k(k)
27925 .iterations(1)
27926 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2s4__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
27927 }
27928 }
27929 }
27930 }
27931
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128,small_kernel)27932 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128, small_kernel) {
27933 TEST_REQUIRES_X86_SSE41;
27934 for (size_t k = 1; k <= 40; k += 9) {
27935 GemmMicrokernelTester()
27936 .mr(4)
27937 .nr(4)
27938 .kr(2)
27939 .sr(4)
27940 .m(4)
27941 .n(4)
27942 .k(k)
27943 .ks(3)
27944 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2s4__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
27945 }
27946 }
27947
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128,small_kernel_subtile)27948 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128, small_kernel_subtile) {
27949 TEST_REQUIRES_X86_SSE41;
27950 for (size_t k = 1; k <= 40; k += 9) {
27951 for (uint32_t n = 1; n <= 4; n++) {
27952 for (uint32_t m = 1; m <= 4; m++) {
27953 GemmMicrokernelTester()
27954 .mr(4)
27955 .nr(4)
27956 .kr(2)
27957 .sr(4)
27958 .m(m)
27959 .n(n)
27960 .k(k)
27961 .ks(3)
27962 .iterations(1)
27963 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2s4__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
27964 }
27965 }
27966 }
27967 }
27968
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128,n_gt_4_small_kernel)27969 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128, n_gt_4_small_kernel) {
27970 TEST_REQUIRES_X86_SSE41;
27971 for (uint32_t n = 5; n < 8; n++) {
27972 for (size_t k = 1; k <= 40; k += 9) {
27973 GemmMicrokernelTester()
27974 .mr(4)
27975 .nr(4)
27976 .kr(2)
27977 .sr(4)
27978 .m(4)
27979 .n(n)
27980 .k(k)
27981 .ks(3)
27982 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2s4__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
27983 }
27984 }
27985 }
27986
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128,n_div_4_small_kernel)27987 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128, n_div_4_small_kernel) {
27988 TEST_REQUIRES_X86_SSE41;
27989 for (uint32_t n = 8; n <= 12; n += 4) {
27990 for (size_t k = 1; k <= 40; k += 9) {
27991 GemmMicrokernelTester()
27992 .mr(4)
27993 .nr(4)
27994 .kr(2)
27995 .sr(4)
27996 .m(4)
27997 .n(n)
27998 .k(k)
27999 .ks(3)
28000 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2s4__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
28001 }
28002 }
28003 }
28004
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128,strided_cm_subtile)28005 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128, strided_cm_subtile) {
28006 TEST_REQUIRES_X86_SSE41;
28007 for (size_t k = 1; k <= 40; k += 9) {
28008 for (uint32_t n = 1; n <= 4; n++) {
28009 for (uint32_t m = 1; m <= 4; m++) {
28010 GemmMicrokernelTester()
28011 .mr(4)
28012 .nr(4)
28013 .kr(2)
28014 .sr(4)
28015 .m(m)
28016 .n(n)
28017 .k(k)
28018 .cm_stride(7)
28019 .iterations(1)
28020 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2s4__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
28021 }
28022 }
28023 }
28024 }
28025
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128,a_offset)28026 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128, a_offset) {
28027 TEST_REQUIRES_X86_SSE41;
28028 for (size_t k = 1; k <= 40; k += 9) {
28029 GemmMicrokernelTester()
28030 .mr(4)
28031 .nr(4)
28032 .kr(2)
28033 .sr(4)
28034 .m(4)
28035 .n(4)
28036 .k(k)
28037 .ks(3)
28038 .a_offset(163)
28039 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2s4__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
28040 }
28041 }
28042
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128,zero)28043 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128, zero) {
28044 TEST_REQUIRES_X86_SSE41;
28045 for (size_t k = 1; k <= 40; k += 9) {
28046 for (uint32_t mz = 0; mz < 4; mz++) {
28047 GemmMicrokernelTester()
28048 .mr(4)
28049 .nr(4)
28050 .kr(2)
28051 .sr(4)
28052 .m(4)
28053 .n(4)
28054 .k(k)
28055 .ks(3)
28056 .a_offset(163)
28057 .zero_index(mz)
28058 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2s4__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
28059 }
28060 }
28061 }
28062
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128,qmin)28063 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128, qmin) {
28064 TEST_REQUIRES_X86_SSE41;
28065 GemmMicrokernelTester()
28066 .mr(4)
28067 .nr(4)
28068 .kr(2)
28069 .sr(4)
28070 .m(4)
28071 .n(4)
28072 .k(8)
28073 .qmin(128)
28074 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2s4__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
28075 }
28076
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128,qmax)28077 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128, qmax) {
28078 TEST_REQUIRES_X86_SSE41;
28079 GemmMicrokernelTester()
28080 .mr(4)
28081 .nr(4)
28082 .kr(2)
28083 .sr(4)
28084 .m(4)
28085 .n(4)
28086 .k(8)
28087 .qmax(128)
28088 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2s4__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
28089 }
28090
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128,strided_cm)28091 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__SSE41_LD128, strided_cm) {
28092 TEST_REQUIRES_X86_SSE41;
28093 GemmMicrokernelTester()
28094 .mr(4)
28095 .nr(4)
28096 .kr(2)
28097 .sr(4)
28098 .m(4)
28099 .n(4)
28100 .k(8)
28101 .cm_stride(7)
28102 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2s4__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
28103 }
28104 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
28105
28106
28107 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD128,k_eq_8)28108 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD128, k_eq_8) {
28109 TEST_REQUIRES_X86_AVX;
28110 GemmMicrokernelTester()
28111 .mr(1)
28112 .nr(4)
28113 .kr(2)
28114 .sr(4)
28115 .m(1)
28116 .n(4)
28117 .k(8)
28118 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2s4__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
28119 }
28120
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD128,strided_cn)28121 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD128, strided_cn) {
28122 TEST_REQUIRES_X86_AVX;
28123 GemmMicrokernelTester()
28124 .mr(1)
28125 .nr(4)
28126 .kr(2)
28127 .sr(4)
28128 .m(1)
28129 .n(4)
28130 .k(8)
28131 .cn_stride(7)
28132 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2s4__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
28133 }
28134
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD128,k_eq_8_subtile)28135 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD128, k_eq_8_subtile) {
28136 TEST_REQUIRES_X86_AVX;
28137 for (uint32_t n = 1; n <= 4; n++) {
28138 for (uint32_t m = 1; m <= 1; m++) {
28139 GemmMicrokernelTester()
28140 .mr(1)
28141 .nr(4)
28142 .kr(2)
28143 .sr(4)
28144 .m(m)
28145 .n(n)
28146 .k(8)
28147 .iterations(1)
28148 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2s4__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
28149 }
28150 }
28151 }
28152
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD128,k_eq_8_subtile_m)28153 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD128, k_eq_8_subtile_m) {
28154 TEST_REQUIRES_X86_AVX;
28155 for (uint32_t m = 1; m <= 1; m++) {
28156 GemmMicrokernelTester()
28157 .mr(1)
28158 .nr(4)
28159 .kr(2)
28160 .sr(4)
28161 .m(m)
28162 .n(4)
28163 .k(8)
28164 .iterations(1)
28165 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2s4__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
28166 }
28167 }
28168
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD128,k_eq_8_subtile_n)28169 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD128, k_eq_8_subtile_n) {
28170 TEST_REQUIRES_X86_AVX;
28171 for (uint32_t n = 1; n <= 4; n++) {
28172 GemmMicrokernelTester()
28173 .mr(1)
28174 .nr(4)
28175 .kr(2)
28176 .sr(4)
28177 .m(1)
28178 .n(n)
28179 .k(8)
28180 .iterations(1)
28181 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2s4__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
28182 }
28183 }
28184
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD128,k_lt_8)28185 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD128, k_lt_8) {
28186 TEST_REQUIRES_X86_AVX;
28187 for (size_t k = 1; k < 8; k++) {
28188 GemmMicrokernelTester()
28189 .mr(1)
28190 .nr(4)
28191 .kr(2)
28192 .sr(4)
28193 .m(1)
28194 .n(4)
28195 .k(k)
28196 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2s4__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
28197 }
28198 }
28199
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD128,k_lt_8_subtile)28200 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD128, k_lt_8_subtile) {
28201 TEST_REQUIRES_X86_AVX;
28202 for (size_t k = 1; k < 8; k++) {
28203 for (uint32_t n = 1; n <= 4; n++) {
28204 for (uint32_t m = 1; m <= 1; m++) {
28205 GemmMicrokernelTester()
28206 .mr(1)
28207 .nr(4)
28208 .kr(2)
28209 .sr(4)
28210 .m(m)
28211 .n(n)
28212 .k(k)
28213 .iterations(1)
28214 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2s4__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
28215 }
28216 }
28217 }
28218 }
28219
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD128,k_gt_8)28220 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD128, k_gt_8) {
28221 TEST_REQUIRES_X86_AVX;
28222 for (size_t k = 9; k < 16; k++) {
28223 GemmMicrokernelTester()
28224 .mr(1)
28225 .nr(4)
28226 .kr(2)
28227 .sr(4)
28228 .m(1)
28229 .n(4)
28230 .k(k)
28231 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2s4__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
28232 }
28233 }
28234
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD128,k_gt_8_subtile)28235 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD128, k_gt_8_subtile) {
28236 TEST_REQUIRES_X86_AVX;
28237 for (size_t k = 9; k < 16; k++) {
28238 for (uint32_t n = 1; n <= 4; n++) {
28239 for (uint32_t m = 1; m <= 1; m++) {
28240 GemmMicrokernelTester()
28241 .mr(1)
28242 .nr(4)
28243 .kr(2)
28244 .sr(4)
28245 .m(m)
28246 .n(n)
28247 .k(k)
28248 .iterations(1)
28249 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2s4__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
28250 }
28251 }
28252 }
28253 }
28254
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD128,k_div_8)28255 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD128, k_div_8) {
28256 TEST_REQUIRES_X86_AVX;
28257 for (size_t k = 16; k <= 80; k += 8) {
28258 GemmMicrokernelTester()
28259 .mr(1)
28260 .nr(4)
28261 .kr(2)
28262 .sr(4)
28263 .m(1)
28264 .n(4)
28265 .k(k)
28266 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2s4__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
28267 }
28268 }
28269
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD128,k_div_8_subtile)28270 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD128, k_div_8_subtile) {
28271 TEST_REQUIRES_X86_AVX;
28272 for (size_t k = 16; k <= 80; k += 8) {
28273 for (uint32_t n = 1; n <= 4; n++) {
28274 for (uint32_t m = 1; m <= 1; m++) {
28275 GemmMicrokernelTester()
28276 .mr(1)
28277 .nr(4)
28278 .kr(2)
28279 .sr(4)
28280 .m(m)
28281 .n(n)
28282 .k(k)
28283 .iterations(1)
28284 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2s4__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
28285 }
28286 }
28287 }
28288 }
28289
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD128,n_gt_4)28290 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD128, n_gt_4) {
28291 TEST_REQUIRES_X86_AVX;
28292 for (uint32_t n = 5; n < 8; n++) {
28293 for (size_t k = 1; k <= 40; k += 9) {
28294 GemmMicrokernelTester()
28295 .mr(1)
28296 .nr(4)
28297 .kr(2)
28298 .sr(4)
28299 .m(1)
28300 .n(n)
28301 .k(k)
28302 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2s4__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
28303 }
28304 }
28305 }
28306
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD128,n_gt_4_strided_cn)28307 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD128, n_gt_4_strided_cn) {
28308 TEST_REQUIRES_X86_AVX;
28309 for (uint32_t n = 5; n < 8; n++) {
28310 for (size_t k = 1; k <= 40; k += 9) {
28311 GemmMicrokernelTester()
28312 .mr(1)
28313 .nr(4)
28314 .kr(2)
28315 .sr(4)
28316 .m(1)
28317 .n(n)
28318 .k(k)
28319 .cn_stride(7)
28320 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2s4__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
28321 }
28322 }
28323 }
28324
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD128,n_gt_4_subtile)28325 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD128, n_gt_4_subtile) {
28326 TEST_REQUIRES_X86_AVX;
28327 for (uint32_t n = 5; n < 8; n++) {
28328 for (size_t k = 1; k <= 40; k += 9) {
28329 for (uint32_t m = 1; m <= 1; m++) {
28330 GemmMicrokernelTester()
28331 .mr(1)
28332 .nr(4)
28333 .kr(2)
28334 .sr(4)
28335 .m(m)
28336 .n(n)
28337 .k(k)
28338 .iterations(1)
28339 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2s4__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
28340 }
28341 }
28342 }
28343 }
28344
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD128,n_div_4)28345 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD128, n_div_4) {
28346 TEST_REQUIRES_X86_AVX;
28347 for (uint32_t n = 8; n <= 12; n += 4) {
28348 for (size_t k = 1; k <= 40; k += 9) {
28349 GemmMicrokernelTester()
28350 .mr(1)
28351 .nr(4)
28352 .kr(2)
28353 .sr(4)
28354 .m(1)
28355 .n(n)
28356 .k(k)
28357 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2s4__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
28358 }
28359 }
28360 }
28361
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD128,n_div_4_strided_cn)28362 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD128, n_div_4_strided_cn) {
28363 TEST_REQUIRES_X86_AVX;
28364 for (uint32_t n = 8; n <= 12; n += 4) {
28365 for (size_t k = 1; k <= 40; k += 9) {
28366 GemmMicrokernelTester()
28367 .mr(1)
28368 .nr(4)
28369 .kr(2)
28370 .sr(4)
28371 .m(1)
28372 .n(n)
28373 .k(k)
28374 .cn_stride(7)
28375 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2s4__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
28376 }
28377 }
28378 }
28379
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD128,n_div_4_subtile)28380 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD128, n_div_4_subtile) {
28381 TEST_REQUIRES_X86_AVX;
28382 for (uint32_t n = 8; n <= 12; n += 4) {
28383 for (size_t k = 1; k <= 40; k += 9) {
28384 for (uint32_t m = 1; m <= 1; m++) {
28385 GemmMicrokernelTester()
28386 .mr(1)
28387 .nr(4)
28388 .kr(2)
28389 .sr(4)
28390 .m(m)
28391 .n(n)
28392 .k(k)
28393 .iterations(1)
28394 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2s4__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
28395 }
28396 }
28397 }
28398 }
28399
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD128,small_kernel)28400 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD128, small_kernel) {
28401 TEST_REQUIRES_X86_AVX;
28402 for (size_t k = 1; k <= 40; k += 9) {
28403 GemmMicrokernelTester()
28404 .mr(1)
28405 .nr(4)
28406 .kr(2)
28407 .sr(4)
28408 .m(1)
28409 .n(4)
28410 .k(k)
28411 .ks(3)
28412 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2s4__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
28413 }
28414 }
28415
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD128,small_kernel_subtile)28416 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD128, small_kernel_subtile) {
28417 TEST_REQUIRES_X86_AVX;
28418 for (size_t k = 1; k <= 40; k += 9) {
28419 for (uint32_t n = 1; n <= 4; n++) {
28420 for (uint32_t m = 1; m <= 1; m++) {
28421 GemmMicrokernelTester()
28422 .mr(1)
28423 .nr(4)
28424 .kr(2)
28425 .sr(4)
28426 .m(m)
28427 .n(n)
28428 .k(k)
28429 .ks(3)
28430 .iterations(1)
28431 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2s4__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
28432 }
28433 }
28434 }
28435 }
28436
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD128,n_gt_4_small_kernel)28437 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD128, n_gt_4_small_kernel) {
28438 TEST_REQUIRES_X86_AVX;
28439 for (uint32_t n = 5; n < 8; n++) {
28440 for (size_t k = 1; k <= 40; k += 9) {
28441 GemmMicrokernelTester()
28442 .mr(1)
28443 .nr(4)
28444 .kr(2)
28445 .sr(4)
28446 .m(1)
28447 .n(n)
28448 .k(k)
28449 .ks(3)
28450 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2s4__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
28451 }
28452 }
28453 }
28454
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD128,n_div_4_small_kernel)28455 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD128, n_div_4_small_kernel) {
28456 TEST_REQUIRES_X86_AVX;
28457 for (uint32_t n = 8; n <= 12; n += 4) {
28458 for (size_t k = 1; k <= 40; k += 9) {
28459 GemmMicrokernelTester()
28460 .mr(1)
28461 .nr(4)
28462 .kr(2)
28463 .sr(4)
28464 .m(1)
28465 .n(n)
28466 .k(k)
28467 .ks(3)
28468 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2s4__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
28469 }
28470 }
28471 }
28472
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD128,strided_cm_subtile)28473 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD128, strided_cm_subtile) {
28474 TEST_REQUIRES_X86_AVX;
28475 for (size_t k = 1; k <= 40; k += 9) {
28476 for (uint32_t n = 1; n <= 4; n++) {
28477 for (uint32_t m = 1; m <= 1; m++) {
28478 GemmMicrokernelTester()
28479 .mr(1)
28480 .nr(4)
28481 .kr(2)
28482 .sr(4)
28483 .m(m)
28484 .n(n)
28485 .k(k)
28486 .cm_stride(7)
28487 .iterations(1)
28488 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2s4__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
28489 }
28490 }
28491 }
28492 }
28493
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD128,a_offset)28494 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD128, a_offset) {
28495 TEST_REQUIRES_X86_AVX;
28496 for (size_t k = 1; k <= 40; k += 9) {
28497 GemmMicrokernelTester()
28498 .mr(1)
28499 .nr(4)
28500 .kr(2)
28501 .sr(4)
28502 .m(1)
28503 .n(4)
28504 .k(k)
28505 .ks(3)
28506 .a_offset(43)
28507 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2s4__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
28508 }
28509 }
28510
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD128,zero)28511 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD128, zero) {
28512 TEST_REQUIRES_X86_AVX;
28513 for (size_t k = 1; k <= 40; k += 9) {
28514 for (uint32_t mz = 0; mz < 1; mz++) {
28515 GemmMicrokernelTester()
28516 .mr(1)
28517 .nr(4)
28518 .kr(2)
28519 .sr(4)
28520 .m(1)
28521 .n(4)
28522 .k(k)
28523 .ks(3)
28524 .a_offset(43)
28525 .zero_index(mz)
28526 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2s4__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
28527 }
28528 }
28529 }
28530
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD128,qmin)28531 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD128, qmin) {
28532 TEST_REQUIRES_X86_AVX;
28533 GemmMicrokernelTester()
28534 .mr(1)
28535 .nr(4)
28536 .kr(2)
28537 .sr(4)
28538 .m(1)
28539 .n(4)
28540 .k(8)
28541 .qmin(128)
28542 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2s4__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
28543 }
28544
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD128,qmax)28545 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD128, qmax) {
28546 TEST_REQUIRES_X86_AVX;
28547 GemmMicrokernelTester()
28548 .mr(1)
28549 .nr(4)
28550 .kr(2)
28551 .sr(4)
28552 .m(1)
28553 .n(4)
28554 .k(8)
28555 .qmax(128)
28556 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2s4__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
28557 }
28558
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD128,strided_cm)28559 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__AVX_LD128, strided_cm) {
28560 TEST_REQUIRES_X86_AVX;
28561 GemmMicrokernelTester()
28562 .mr(1)
28563 .nr(4)
28564 .kr(2)
28565 .sr(4)
28566 .m(1)
28567 .n(4)
28568 .k(8)
28569 .cm_stride(7)
28570 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2s4__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
28571 }
28572 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
28573
28574
28575 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD128,k_eq_8)28576 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD128, k_eq_8) {
28577 TEST_REQUIRES_X86_AVX;
28578 GemmMicrokernelTester()
28579 .mr(2)
28580 .nr(4)
28581 .kr(2)
28582 .sr(4)
28583 .m(2)
28584 .n(4)
28585 .k(8)
28586 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2s4__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
28587 }
28588
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD128,strided_cn)28589 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD128, strided_cn) {
28590 TEST_REQUIRES_X86_AVX;
28591 GemmMicrokernelTester()
28592 .mr(2)
28593 .nr(4)
28594 .kr(2)
28595 .sr(4)
28596 .m(2)
28597 .n(4)
28598 .k(8)
28599 .cn_stride(7)
28600 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2s4__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
28601 }
28602
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD128,k_eq_8_subtile)28603 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD128, k_eq_8_subtile) {
28604 TEST_REQUIRES_X86_AVX;
28605 for (uint32_t n = 1; n <= 4; n++) {
28606 for (uint32_t m = 1; m <= 2; m++) {
28607 GemmMicrokernelTester()
28608 .mr(2)
28609 .nr(4)
28610 .kr(2)
28611 .sr(4)
28612 .m(m)
28613 .n(n)
28614 .k(8)
28615 .iterations(1)
28616 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2s4__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
28617 }
28618 }
28619 }
28620
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD128,k_eq_8_subtile_m)28621 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD128, k_eq_8_subtile_m) {
28622 TEST_REQUIRES_X86_AVX;
28623 for (uint32_t m = 1; m <= 2; m++) {
28624 GemmMicrokernelTester()
28625 .mr(2)
28626 .nr(4)
28627 .kr(2)
28628 .sr(4)
28629 .m(m)
28630 .n(4)
28631 .k(8)
28632 .iterations(1)
28633 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2s4__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
28634 }
28635 }
28636
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD128,k_eq_8_subtile_n)28637 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD128, k_eq_8_subtile_n) {
28638 TEST_REQUIRES_X86_AVX;
28639 for (uint32_t n = 1; n <= 4; n++) {
28640 GemmMicrokernelTester()
28641 .mr(2)
28642 .nr(4)
28643 .kr(2)
28644 .sr(4)
28645 .m(2)
28646 .n(n)
28647 .k(8)
28648 .iterations(1)
28649 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2s4__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
28650 }
28651 }
28652
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD128,k_lt_8)28653 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD128, k_lt_8) {
28654 TEST_REQUIRES_X86_AVX;
28655 for (size_t k = 1; k < 8; k++) {
28656 GemmMicrokernelTester()
28657 .mr(2)
28658 .nr(4)
28659 .kr(2)
28660 .sr(4)
28661 .m(2)
28662 .n(4)
28663 .k(k)
28664 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2s4__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
28665 }
28666 }
28667
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD128,k_lt_8_subtile)28668 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD128, k_lt_8_subtile) {
28669 TEST_REQUIRES_X86_AVX;
28670 for (size_t k = 1; k < 8; k++) {
28671 for (uint32_t n = 1; n <= 4; n++) {
28672 for (uint32_t m = 1; m <= 2; m++) {
28673 GemmMicrokernelTester()
28674 .mr(2)
28675 .nr(4)
28676 .kr(2)
28677 .sr(4)
28678 .m(m)
28679 .n(n)
28680 .k(k)
28681 .iterations(1)
28682 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2s4__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
28683 }
28684 }
28685 }
28686 }
28687
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD128,k_gt_8)28688 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD128, k_gt_8) {
28689 TEST_REQUIRES_X86_AVX;
28690 for (size_t k = 9; k < 16; k++) {
28691 GemmMicrokernelTester()
28692 .mr(2)
28693 .nr(4)
28694 .kr(2)
28695 .sr(4)
28696 .m(2)
28697 .n(4)
28698 .k(k)
28699 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2s4__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
28700 }
28701 }
28702
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD128,k_gt_8_subtile)28703 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD128, k_gt_8_subtile) {
28704 TEST_REQUIRES_X86_AVX;
28705 for (size_t k = 9; k < 16; k++) {
28706 for (uint32_t n = 1; n <= 4; n++) {
28707 for (uint32_t m = 1; m <= 2; m++) {
28708 GemmMicrokernelTester()
28709 .mr(2)
28710 .nr(4)
28711 .kr(2)
28712 .sr(4)
28713 .m(m)
28714 .n(n)
28715 .k(k)
28716 .iterations(1)
28717 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2s4__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
28718 }
28719 }
28720 }
28721 }
28722
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD128,k_div_8)28723 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD128, k_div_8) {
28724 TEST_REQUIRES_X86_AVX;
28725 for (size_t k = 16; k <= 80; k += 8) {
28726 GemmMicrokernelTester()
28727 .mr(2)
28728 .nr(4)
28729 .kr(2)
28730 .sr(4)
28731 .m(2)
28732 .n(4)
28733 .k(k)
28734 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2s4__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
28735 }
28736 }
28737
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD128,k_div_8_subtile)28738 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD128, k_div_8_subtile) {
28739 TEST_REQUIRES_X86_AVX;
28740 for (size_t k = 16; k <= 80; k += 8) {
28741 for (uint32_t n = 1; n <= 4; n++) {
28742 for (uint32_t m = 1; m <= 2; m++) {
28743 GemmMicrokernelTester()
28744 .mr(2)
28745 .nr(4)
28746 .kr(2)
28747 .sr(4)
28748 .m(m)
28749 .n(n)
28750 .k(k)
28751 .iterations(1)
28752 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2s4__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
28753 }
28754 }
28755 }
28756 }
28757
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD128,n_gt_4)28758 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD128, n_gt_4) {
28759 TEST_REQUIRES_X86_AVX;
28760 for (uint32_t n = 5; n < 8; n++) {
28761 for (size_t k = 1; k <= 40; k += 9) {
28762 GemmMicrokernelTester()
28763 .mr(2)
28764 .nr(4)
28765 .kr(2)
28766 .sr(4)
28767 .m(2)
28768 .n(n)
28769 .k(k)
28770 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2s4__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
28771 }
28772 }
28773 }
28774
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD128,n_gt_4_strided_cn)28775 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD128, n_gt_4_strided_cn) {
28776 TEST_REQUIRES_X86_AVX;
28777 for (uint32_t n = 5; n < 8; n++) {
28778 for (size_t k = 1; k <= 40; k += 9) {
28779 GemmMicrokernelTester()
28780 .mr(2)
28781 .nr(4)
28782 .kr(2)
28783 .sr(4)
28784 .m(2)
28785 .n(n)
28786 .k(k)
28787 .cn_stride(7)
28788 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2s4__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
28789 }
28790 }
28791 }
28792
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD128,n_gt_4_subtile)28793 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD128, n_gt_4_subtile) {
28794 TEST_REQUIRES_X86_AVX;
28795 for (uint32_t n = 5; n < 8; n++) {
28796 for (size_t k = 1; k <= 40; k += 9) {
28797 for (uint32_t m = 1; m <= 2; m++) {
28798 GemmMicrokernelTester()
28799 .mr(2)
28800 .nr(4)
28801 .kr(2)
28802 .sr(4)
28803 .m(m)
28804 .n(n)
28805 .k(k)
28806 .iterations(1)
28807 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2s4__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
28808 }
28809 }
28810 }
28811 }
28812
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD128,n_div_4)28813 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD128, n_div_4) {
28814 TEST_REQUIRES_X86_AVX;
28815 for (uint32_t n = 8; n <= 12; n += 4) {
28816 for (size_t k = 1; k <= 40; k += 9) {
28817 GemmMicrokernelTester()
28818 .mr(2)
28819 .nr(4)
28820 .kr(2)
28821 .sr(4)
28822 .m(2)
28823 .n(n)
28824 .k(k)
28825 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2s4__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
28826 }
28827 }
28828 }
28829
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD128,n_div_4_strided_cn)28830 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD128, n_div_4_strided_cn) {
28831 TEST_REQUIRES_X86_AVX;
28832 for (uint32_t n = 8; n <= 12; n += 4) {
28833 for (size_t k = 1; k <= 40; k += 9) {
28834 GemmMicrokernelTester()
28835 .mr(2)
28836 .nr(4)
28837 .kr(2)
28838 .sr(4)
28839 .m(2)
28840 .n(n)
28841 .k(k)
28842 .cn_stride(7)
28843 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2s4__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
28844 }
28845 }
28846 }
28847
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD128,n_div_4_subtile)28848 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD128, n_div_4_subtile) {
28849 TEST_REQUIRES_X86_AVX;
28850 for (uint32_t n = 8; n <= 12; n += 4) {
28851 for (size_t k = 1; k <= 40; k += 9) {
28852 for (uint32_t m = 1; m <= 2; m++) {
28853 GemmMicrokernelTester()
28854 .mr(2)
28855 .nr(4)
28856 .kr(2)
28857 .sr(4)
28858 .m(m)
28859 .n(n)
28860 .k(k)
28861 .iterations(1)
28862 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2s4__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
28863 }
28864 }
28865 }
28866 }
28867
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD128,small_kernel)28868 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD128, small_kernel) {
28869 TEST_REQUIRES_X86_AVX;
28870 for (size_t k = 1; k <= 40; k += 9) {
28871 GemmMicrokernelTester()
28872 .mr(2)
28873 .nr(4)
28874 .kr(2)
28875 .sr(4)
28876 .m(2)
28877 .n(4)
28878 .k(k)
28879 .ks(3)
28880 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2s4__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
28881 }
28882 }
28883
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD128,small_kernel_subtile)28884 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD128, small_kernel_subtile) {
28885 TEST_REQUIRES_X86_AVX;
28886 for (size_t k = 1; k <= 40; k += 9) {
28887 for (uint32_t n = 1; n <= 4; n++) {
28888 for (uint32_t m = 1; m <= 2; m++) {
28889 GemmMicrokernelTester()
28890 .mr(2)
28891 .nr(4)
28892 .kr(2)
28893 .sr(4)
28894 .m(m)
28895 .n(n)
28896 .k(k)
28897 .ks(3)
28898 .iterations(1)
28899 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2s4__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
28900 }
28901 }
28902 }
28903 }
28904
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD128,n_gt_4_small_kernel)28905 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD128, n_gt_4_small_kernel) {
28906 TEST_REQUIRES_X86_AVX;
28907 for (uint32_t n = 5; n < 8; n++) {
28908 for (size_t k = 1; k <= 40; k += 9) {
28909 GemmMicrokernelTester()
28910 .mr(2)
28911 .nr(4)
28912 .kr(2)
28913 .sr(4)
28914 .m(2)
28915 .n(n)
28916 .k(k)
28917 .ks(3)
28918 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2s4__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
28919 }
28920 }
28921 }
28922
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD128,n_div_4_small_kernel)28923 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD128, n_div_4_small_kernel) {
28924 TEST_REQUIRES_X86_AVX;
28925 for (uint32_t n = 8; n <= 12; n += 4) {
28926 for (size_t k = 1; k <= 40; k += 9) {
28927 GemmMicrokernelTester()
28928 .mr(2)
28929 .nr(4)
28930 .kr(2)
28931 .sr(4)
28932 .m(2)
28933 .n(n)
28934 .k(k)
28935 .ks(3)
28936 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2s4__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
28937 }
28938 }
28939 }
28940
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD128,strided_cm_subtile)28941 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD128, strided_cm_subtile) {
28942 TEST_REQUIRES_X86_AVX;
28943 for (size_t k = 1; k <= 40; k += 9) {
28944 for (uint32_t n = 1; n <= 4; n++) {
28945 for (uint32_t m = 1; m <= 2; m++) {
28946 GemmMicrokernelTester()
28947 .mr(2)
28948 .nr(4)
28949 .kr(2)
28950 .sr(4)
28951 .m(m)
28952 .n(n)
28953 .k(k)
28954 .cm_stride(7)
28955 .iterations(1)
28956 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2s4__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
28957 }
28958 }
28959 }
28960 }
28961
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD128,a_offset)28962 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD128, a_offset) {
28963 TEST_REQUIRES_X86_AVX;
28964 for (size_t k = 1; k <= 40; k += 9) {
28965 GemmMicrokernelTester()
28966 .mr(2)
28967 .nr(4)
28968 .kr(2)
28969 .sr(4)
28970 .m(2)
28971 .n(4)
28972 .k(k)
28973 .ks(3)
28974 .a_offset(83)
28975 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2s4__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
28976 }
28977 }
28978
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD128,zero)28979 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD128, zero) {
28980 TEST_REQUIRES_X86_AVX;
28981 for (size_t k = 1; k <= 40; k += 9) {
28982 for (uint32_t mz = 0; mz < 2; mz++) {
28983 GemmMicrokernelTester()
28984 .mr(2)
28985 .nr(4)
28986 .kr(2)
28987 .sr(4)
28988 .m(2)
28989 .n(4)
28990 .k(k)
28991 .ks(3)
28992 .a_offset(83)
28993 .zero_index(mz)
28994 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2s4__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
28995 }
28996 }
28997 }
28998
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD128,qmin)28999 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD128, qmin) {
29000 TEST_REQUIRES_X86_AVX;
29001 GemmMicrokernelTester()
29002 .mr(2)
29003 .nr(4)
29004 .kr(2)
29005 .sr(4)
29006 .m(2)
29007 .n(4)
29008 .k(8)
29009 .qmin(128)
29010 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2s4__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
29011 }
29012
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD128,qmax)29013 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD128, qmax) {
29014 TEST_REQUIRES_X86_AVX;
29015 GemmMicrokernelTester()
29016 .mr(2)
29017 .nr(4)
29018 .kr(2)
29019 .sr(4)
29020 .m(2)
29021 .n(4)
29022 .k(8)
29023 .qmax(128)
29024 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2s4__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
29025 }
29026
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD128,strided_cm)29027 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2S4__AVX_LD128, strided_cm) {
29028 TEST_REQUIRES_X86_AVX;
29029 GemmMicrokernelTester()
29030 .mr(2)
29031 .nr(4)
29032 .kr(2)
29033 .sr(4)
29034 .m(2)
29035 .n(4)
29036 .k(8)
29037 .cm_stride(7)
29038 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2s4__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
29039 }
29040 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
29041
29042
29043 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD128,k_eq_8)29044 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD128, k_eq_8) {
29045 TEST_REQUIRES_X86_XOP;
29046 GemmMicrokernelTester()
29047 .mr(3)
29048 .nr(4)
29049 .kr(2)
29050 .sr(4)
29051 .m(3)
29052 .n(4)
29053 .k(8)
29054 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
29055 }
29056
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD128,strided_cn)29057 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD128, strided_cn) {
29058 TEST_REQUIRES_X86_XOP;
29059 GemmMicrokernelTester()
29060 .mr(3)
29061 .nr(4)
29062 .kr(2)
29063 .sr(4)
29064 .m(3)
29065 .n(4)
29066 .k(8)
29067 .cn_stride(7)
29068 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
29069 }
29070
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD128,k_eq_8_subtile)29071 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD128, k_eq_8_subtile) {
29072 TEST_REQUIRES_X86_XOP;
29073 for (uint32_t n = 1; n <= 4; n++) {
29074 for (uint32_t m = 1; m <= 3; m++) {
29075 GemmMicrokernelTester()
29076 .mr(3)
29077 .nr(4)
29078 .kr(2)
29079 .sr(4)
29080 .m(m)
29081 .n(n)
29082 .k(8)
29083 .iterations(1)
29084 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
29085 }
29086 }
29087 }
29088
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD128,k_eq_8_subtile_m)29089 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD128, k_eq_8_subtile_m) {
29090 TEST_REQUIRES_X86_XOP;
29091 for (uint32_t m = 1; m <= 3; m++) {
29092 GemmMicrokernelTester()
29093 .mr(3)
29094 .nr(4)
29095 .kr(2)
29096 .sr(4)
29097 .m(m)
29098 .n(4)
29099 .k(8)
29100 .iterations(1)
29101 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
29102 }
29103 }
29104
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD128,k_eq_8_subtile_n)29105 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD128, k_eq_8_subtile_n) {
29106 TEST_REQUIRES_X86_XOP;
29107 for (uint32_t n = 1; n <= 4; n++) {
29108 GemmMicrokernelTester()
29109 .mr(3)
29110 .nr(4)
29111 .kr(2)
29112 .sr(4)
29113 .m(3)
29114 .n(n)
29115 .k(8)
29116 .iterations(1)
29117 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
29118 }
29119 }
29120
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD128,k_lt_8)29121 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD128, k_lt_8) {
29122 TEST_REQUIRES_X86_XOP;
29123 for (size_t k = 1; k < 8; k++) {
29124 GemmMicrokernelTester()
29125 .mr(3)
29126 .nr(4)
29127 .kr(2)
29128 .sr(4)
29129 .m(3)
29130 .n(4)
29131 .k(k)
29132 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
29133 }
29134 }
29135
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD128,k_lt_8_subtile)29136 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD128, k_lt_8_subtile) {
29137 TEST_REQUIRES_X86_XOP;
29138 for (size_t k = 1; k < 8; k++) {
29139 for (uint32_t n = 1; n <= 4; n++) {
29140 for (uint32_t m = 1; m <= 3; m++) {
29141 GemmMicrokernelTester()
29142 .mr(3)
29143 .nr(4)
29144 .kr(2)
29145 .sr(4)
29146 .m(m)
29147 .n(n)
29148 .k(k)
29149 .iterations(1)
29150 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
29151 }
29152 }
29153 }
29154 }
29155
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD128,k_gt_8)29156 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD128, k_gt_8) {
29157 TEST_REQUIRES_X86_XOP;
29158 for (size_t k = 9; k < 16; k++) {
29159 GemmMicrokernelTester()
29160 .mr(3)
29161 .nr(4)
29162 .kr(2)
29163 .sr(4)
29164 .m(3)
29165 .n(4)
29166 .k(k)
29167 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
29168 }
29169 }
29170
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD128,k_gt_8_subtile)29171 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD128, k_gt_8_subtile) {
29172 TEST_REQUIRES_X86_XOP;
29173 for (size_t k = 9; k < 16; k++) {
29174 for (uint32_t n = 1; n <= 4; n++) {
29175 for (uint32_t m = 1; m <= 3; m++) {
29176 GemmMicrokernelTester()
29177 .mr(3)
29178 .nr(4)
29179 .kr(2)
29180 .sr(4)
29181 .m(m)
29182 .n(n)
29183 .k(k)
29184 .iterations(1)
29185 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
29186 }
29187 }
29188 }
29189 }
29190
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD128,k_div_8)29191 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD128, k_div_8) {
29192 TEST_REQUIRES_X86_XOP;
29193 for (size_t k = 16; k <= 80; k += 8) {
29194 GemmMicrokernelTester()
29195 .mr(3)
29196 .nr(4)
29197 .kr(2)
29198 .sr(4)
29199 .m(3)
29200 .n(4)
29201 .k(k)
29202 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
29203 }
29204 }
29205
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD128,k_div_8_subtile)29206 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD128, k_div_8_subtile) {
29207 TEST_REQUIRES_X86_XOP;
29208 for (size_t k = 16; k <= 80; k += 8) {
29209 for (uint32_t n = 1; n <= 4; n++) {
29210 for (uint32_t m = 1; m <= 3; m++) {
29211 GemmMicrokernelTester()
29212 .mr(3)
29213 .nr(4)
29214 .kr(2)
29215 .sr(4)
29216 .m(m)
29217 .n(n)
29218 .k(k)
29219 .iterations(1)
29220 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
29221 }
29222 }
29223 }
29224 }
29225
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD128,n_gt_4)29226 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD128, n_gt_4) {
29227 TEST_REQUIRES_X86_XOP;
29228 for (uint32_t n = 5; n < 8; n++) {
29229 for (size_t k = 1; k <= 40; k += 9) {
29230 GemmMicrokernelTester()
29231 .mr(3)
29232 .nr(4)
29233 .kr(2)
29234 .sr(4)
29235 .m(3)
29236 .n(n)
29237 .k(k)
29238 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
29239 }
29240 }
29241 }
29242
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD128,n_gt_4_strided_cn)29243 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD128, n_gt_4_strided_cn) {
29244 TEST_REQUIRES_X86_XOP;
29245 for (uint32_t n = 5; n < 8; n++) {
29246 for (size_t k = 1; k <= 40; k += 9) {
29247 GemmMicrokernelTester()
29248 .mr(3)
29249 .nr(4)
29250 .kr(2)
29251 .sr(4)
29252 .m(3)
29253 .n(n)
29254 .k(k)
29255 .cn_stride(7)
29256 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
29257 }
29258 }
29259 }
29260
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD128,n_gt_4_subtile)29261 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD128, n_gt_4_subtile) {
29262 TEST_REQUIRES_X86_XOP;
29263 for (uint32_t n = 5; n < 8; n++) {
29264 for (size_t k = 1; k <= 40; k += 9) {
29265 for (uint32_t m = 1; m <= 3; m++) {
29266 GemmMicrokernelTester()
29267 .mr(3)
29268 .nr(4)
29269 .kr(2)
29270 .sr(4)
29271 .m(m)
29272 .n(n)
29273 .k(k)
29274 .iterations(1)
29275 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
29276 }
29277 }
29278 }
29279 }
29280
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD128,n_div_4)29281 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD128, n_div_4) {
29282 TEST_REQUIRES_X86_XOP;
29283 for (uint32_t n = 8; n <= 12; n += 4) {
29284 for (size_t k = 1; k <= 40; k += 9) {
29285 GemmMicrokernelTester()
29286 .mr(3)
29287 .nr(4)
29288 .kr(2)
29289 .sr(4)
29290 .m(3)
29291 .n(n)
29292 .k(k)
29293 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
29294 }
29295 }
29296 }
29297
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD128,n_div_4_strided_cn)29298 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD128, n_div_4_strided_cn) {
29299 TEST_REQUIRES_X86_XOP;
29300 for (uint32_t n = 8; n <= 12; n += 4) {
29301 for (size_t k = 1; k <= 40; k += 9) {
29302 GemmMicrokernelTester()
29303 .mr(3)
29304 .nr(4)
29305 .kr(2)
29306 .sr(4)
29307 .m(3)
29308 .n(n)
29309 .k(k)
29310 .cn_stride(7)
29311 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
29312 }
29313 }
29314 }
29315
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD128,n_div_4_subtile)29316 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD128, n_div_4_subtile) {
29317 TEST_REQUIRES_X86_XOP;
29318 for (uint32_t n = 8; n <= 12; n += 4) {
29319 for (size_t k = 1; k <= 40; k += 9) {
29320 for (uint32_t m = 1; m <= 3; m++) {
29321 GemmMicrokernelTester()
29322 .mr(3)
29323 .nr(4)
29324 .kr(2)
29325 .sr(4)
29326 .m(m)
29327 .n(n)
29328 .k(k)
29329 .iterations(1)
29330 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
29331 }
29332 }
29333 }
29334 }
29335
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD128,small_kernel)29336 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD128, small_kernel) {
29337 TEST_REQUIRES_X86_XOP;
29338 for (size_t k = 1; k <= 40; k += 9) {
29339 GemmMicrokernelTester()
29340 .mr(3)
29341 .nr(4)
29342 .kr(2)
29343 .sr(4)
29344 .m(3)
29345 .n(4)
29346 .k(k)
29347 .ks(3)
29348 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
29349 }
29350 }
29351
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD128,small_kernel_subtile)29352 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD128, small_kernel_subtile) {
29353 TEST_REQUIRES_X86_XOP;
29354 for (size_t k = 1; k <= 40; k += 9) {
29355 for (uint32_t n = 1; n <= 4; n++) {
29356 for (uint32_t m = 1; m <= 3; m++) {
29357 GemmMicrokernelTester()
29358 .mr(3)
29359 .nr(4)
29360 .kr(2)
29361 .sr(4)
29362 .m(m)
29363 .n(n)
29364 .k(k)
29365 .ks(3)
29366 .iterations(1)
29367 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
29368 }
29369 }
29370 }
29371 }
29372
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD128,n_gt_4_small_kernel)29373 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD128, n_gt_4_small_kernel) {
29374 TEST_REQUIRES_X86_XOP;
29375 for (uint32_t n = 5; n < 8; n++) {
29376 for (size_t k = 1; k <= 40; k += 9) {
29377 GemmMicrokernelTester()
29378 .mr(3)
29379 .nr(4)
29380 .kr(2)
29381 .sr(4)
29382 .m(3)
29383 .n(n)
29384 .k(k)
29385 .ks(3)
29386 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
29387 }
29388 }
29389 }
29390
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD128,n_div_4_small_kernel)29391 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD128, n_div_4_small_kernel) {
29392 TEST_REQUIRES_X86_XOP;
29393 for (uint32_t n = 8; n <= 12; n += 4) {
29394 for (size_t k = 1; k <= 40; k += 9) {
29395 GemmMicrokernelTester()
29396 .mr(3)
29397 .nr(4)
29398 .kr(2)
29399 .sr(4)
29400 .m(3)
29401 .n(n)
29402 .k(k)
29403 .ks(3)
29404 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
29405 }
29406 }
29407 }
29408
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD128,strided_cm_subtile)29409 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD128, strided_cm_subtile) {
29410 TEST_REQUIRES_X86_XOP;
29411 for (size_t k = 1; k <= 40; k += 9) {
29412 for (uint32_t n = 1; n <= 4; n++) {
29413 for (uint32_t m = 1; m <= 3; m++) {
29414 GemmMicrokernelTester()
29415 .mr(3)
29416 .nr(4)
29417 .kr(2)
29418 .sr(4)
29419 .m(m)
29420 .n(n)
29421 .k(k)
29422 .cm_stride(7)
29423 .iterations(1)
29424 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
29425 }
29426 }
29427 }
29428 }
29429
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD128,a_offset)29430 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD128, a_offset) {
29431 TEST_REQUIRES_X86_XOP;
29432 for (size_t k = 1; k <= 40; k += 9) {
29433 GemmMicrokernelTester()
29434 .mr(3)
29435 .nr(4)
29436 .kr(2)
29437 .sr(4)
29438 .m(3)
29439 .n(4)
29440 .k(k)
29441 .ks(3)
29442 .a_offset(127)
29443 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
29444 }
29445 }
29446
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD128,zero)29447 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD128, zero) {
29448 TEST_REQUIRES_X86_XOP;
29449 for (size_t k = 1; k <= 40; k += 9) {
29450 for (uint32_t mz = 0; mz < 3; mz++) {
29451 GemmMicrokernelTester()
29452 .mr(3)
29453 .nr(4)
29454 .kr(2)
29455 .sr(4)
29456 .m(3)
29457 .n(4)
29458 .k(k)
29459 .ks(3)
29460 .a_offset(127)
29461 .zero_index(mz)
29462 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
29463 }
29464 }
29465 }
29466
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD128,qmin)29467 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD128, qmin) {
29468 TEST_REQUIRES_X86_XOP;
29469 GemmMicrokernelTester()
29470 .mr(3)
29471 .nr(4)
29472 .kr(2)
29473 .sr(4)
29474 .m(3)
29475 .n(4)
29476 .k(8)
29477 .qmin(128)
29478 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
29479 }
29480
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD128,qmax)29481 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD128, qmax) {
29482 TEST_REQUIRES_X86_XOP;
29483 GemmMicrokernelTester()
29484 .mr(3)
29485 .nr(4)
29486 .kr(2)
29487 .sr(4)
29488 .m(3)
29489 .n(4)
29490 .k(8)
29491 .qmax(128)
29492 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
29493 }
29494
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD128,strided_cm)29495 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__XOP_LD128, strided_cm) {
29496 TEST_REQUIRES_X86_XOP;
29497 GemmMicrokernelTester()
29498 .mr(3)
29499 .nr(4)
29500 .kr(2)
29501 .sr(4)
29502 .m(3)
29503 .n(4)
29504 .k(8)
29505 .cm_stride(7)
29506 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
29507 }
29508 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
29509
29510
29511 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD128,k_eq_8)29512 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD128, k_eq_8) {
29513 TEST_REQUIRES_X86_XOP;
29514 GemmMicrokernelTester()
29515 .mr(4)
29516 .nr(4)
29517 .kr(2)
29518 .sr(4)
29519 .m(4)
29520 .n(4)
29521 .k(8)
29522 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2s4__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
29523 }
29524
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD128,strided_cn)29525 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD128, strided_cn) {
29526 TEST_REQUIRES_X86_XOP;
29527 GemmMicrokernelTester()
29528 .mr(4)
29529 .nr(4)
29530 .kr(2)
29531 .sr(4)
29532 .m(4)
29533 .n(4)
29534 .k(8)
29535 .cn_stride(7)
29536 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2s4__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
29537 }
29538
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD128,k_eq_8_subtile)29539 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD128, k_eq_8_subtile) {
29540 TEST_REQUIRES_X86_XOP;
29541 for (uint32_t n = 1; n <= 4; n++) {
29542 for (uint32_t m = 1; m <= 4; m++) {
29543 GemmMicrokernelTester()
29544 .mr(4)
29545 .nr(4)
29546 .kr(2)
29547 .sr(4)
29548 .m(m)
29549 .n(n)
29550 .k(8)
29551 .iterations(1)
29552 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2s4__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
29553 }
29554 }
29555 }
29556
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD128,k_eq_8_subtile_m)29557 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD128, k_eq_8_subtile_m) {
29558 TEST_REQUIRES_X86_XOP;
29559 for (uint32_t m = 1; m <= 4; m++) {
29560 GemmMicrokernelTester()
29561 .mr(4)
29562 .nr(4)
29563 .kr(2)
29564 .sr(4)
29565 .m(m)
29566 .n(4)
29567 .k(8)
29568 .iterations(1)
29569 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2s4__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
29570 }
29571 }
29572
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD128,k_eq_8_subtile_n)29573 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD128, k_eq_8_subtile_n) {
29574 TEST_REQUIRES_X86_XOP;
29575 for (uint32_t n = 1; n <= 4; n++) {
29576 GemmMicrokernelTester()
29577 .mr(4)
29578 .nr(4)
29579 .kr(2)
29580 .sr(4)
29581 .m(4)
29582 .n(n)
29583 .k(8)
29584 .iterations(1)
29585 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2s4__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
29586 }
29587 }
29588
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD128,k_lt_8)29589 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD128, k_lt_8) {
29590 TEST_REQUIRES_X86_XOP;
29591 for (size_t k = 1; k < 8; k++) {
29592 GemmMicrokernelTester()
29593 .mr(4)
29594 .nr(4)
29595 .kr(2)
29596 .sr(4)
29597 .m(4)
29598 .n(4)
29599 .k(k)
29600 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2s4__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
29601 }
29602 }
29603
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD128,k_lt_8_subtile)29604 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD128, k_lt_8_subtile) {
29605 TEST_REQUIRES_X86_XOP;
29606 for (size_t k = 1; k < 8; k++) {
29607 for (uint32_t n = 1; n <= 4; n++) {
29608 for (uint32_t m = 1; m <= 4; m++) {
29609 GemmMicrokernelTester()
29610 .mr(4)
29611 .nr(4)
29612 .kr(2)
29613 .sr(4)
29614 .m(m)
29615 .n(n)
29616 .k(k)
29617 .iterations(1)
29618 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2s4__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
29619 }
29620 }
29621 }
29622 }
29623
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD128,k_gt_8)29624 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD128, k_gt_8) {
29625 TEST_REQUIRES_X86_XOP;
29626 for (size_t k = 9; k < 16; k++) {
29627 GemmMicrokernelTester()
29628 .mr(4)
29629 .nr(4)
29630 .kr(2)
29631 .sr(4)
29632 .m(4)
29633 .n(4)
29634 .k(k)
29635 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2s4__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
29636 }
29637 }
29638
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD128,k_gt_8_subtile)29639 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD128, k_gt_8_subtile) {
29640 TEST_REQUIRES_X86_XOP;
29641 for (size_t k = 9; k < 16; k++) {
29642 for (uint32_t n = 1; n <= 4; n++) {
29643 for (uint32_t m = 1; m <= 4; m++) {
29644 GemmMicrokernelTester()
29645 .mr(4)
29646 .nr(4)
29647 .kr(2)
29648 .sr(4)
29649 .m(m)
29650 .n(n)
29651 .k(k)
29652 .iterations(1)
29653 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2s4__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
29654 }
29655 }
29656 }
29657 }
29658
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD128,k_div_8)29659 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD128, k_div_8) {
29660 TEST_REQUIRES_X86_XOP;
29661 for (size_t k = 16; k <= 80; k += 8) {
29662 GemmMicrokernelTester()
29663 .mr(4)
29664 .nr(4)
29665 .kr(2)
29666 .sr(4)
29667 .m(4)
29668 .n(4)
29669 .k(k)
29670 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2s4__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
29671 }
29672 }
29673
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD128,k_div_8_subtile)29674 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD128, k_div_8_subtile) {
29675 TEST_REQUIRES_X86_XOP;
29676 for (size_t k = 16; k <= 80; k += 8) {
29677 for (uint32_t n = 1; n <= 4; n++) {
29678 for (uint32_t m = 1; m <= 4; m++) {
29679 GemmMicrokernelTester()
29680 .mr(4)
29681 .nr(4)
29682 .kr(2)
29683 .sr(4)
29684 .m(m)
29685 .n(n)
29686 .k(k)
29687 .iterations(1)
29688 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2s4__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
29689 }
29690 }
29691 }
29692 }
29693
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD128,n_gt_4)29694 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD128, n_gt_4) {
29695 TEST_REQUIRES_X86_XOP;
29696 for (uint32_t n = 5; n < 8; n++) {
29697 for (size_t k = 1; k <= 40; k += 9) {
29698 GemmMicrokernelTester()
29699 .mr(4)
29700 .nr(4)
29701 .kr(2)
29702 .sr(4)
29703 .m(4)
29704 .n(n)
29705 .k(k)
29706 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2s4__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
29707 }
29708 }
29709 }
29710
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD128,n_gt_4_strided_cn)29711 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD128, n_gt_4_strided_cn) {
29712 TEST_REQUIRES_X86_XOP;
29713 for (uint32_t n = 5; n < 8; n++) {
29714 for (size_t k = 1; k <= 40; k += 9) {
29715 GemmMicrokernelTester()
29716 .mr(4)
29717 .nr(4)
29718 .kr(2)
29719 .sr(4)
29720 .m(4)
29721 .n(n)
29722 .k(k)
29723 .cn_stride(7)
29724 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2s4__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
29725 }
29726 }
29727 }
29728
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD128,n_gt_4_subtile)29729 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD128, n_gt_4_subtile) {
29730 TEST_REQUIRES_X86_XOP;
29731 for (uint32_t n = 5; n < 8; n++) {
29732 for (size_t k = 1; k <= 40; k += 9) {
29733 for (uint32_t m = 1; m <= 4; m++) {
29734 GemmMicrokernelTester()
29735 .mr(4)
29736 .nr(4)
29737 .kr(2)
29738 .sr(4)
29739 .m(m)
29740 .n(n)
29741 .k(k)
29742 .iterations(1)
29743 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2s4__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
29744 }
29745 }
29746 }
29747 }
29748
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD128,n_div_4)29749 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD128, n_div_4) {
29750 TEST_REQUIRES_X86_XOP;
29751 for (uint32_t n = 8; n <= 12; n += 4) {
29752 for (size_t k = 1; k <= 40; k += 9) {
29753 GemmMicrokernelTester()
29754 .mr(4)
29755 .nr(4)
29756 .kr(2)
29757 .sr(4)
29758 .m(4)
29759 .n(n)
29760 .k(k)
29761 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2s4__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
29762 }
29763 }
29764 }
29765
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD128,n_div_4_strided_cn)29766 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD128, n_div_4_strided_cn) {
29767 TEST_REQUIRES_X86_XOP;
29768 for (uint32_t n = 8; n <= 12; n += 4) {
29769 for (size_t k = 1; k <= 40; k += 9) {
29770 GemmMicrokernelTester()
29771 .mr(4)
29772 .nr(4)
29773 .kr(2)
29774 .sr(4)
29775 .m(4)
29776 .n(n)
29777 .k(k)
29778 .cn_stride(7)
29779 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2s4__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
29780 }
29781 }
29782 }
29783
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD128,n_div_4_subtile)29784 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD128, n_div_4_subtile) {
29785 TEST_REQUIRES_X86_XOP;
29786 for (uint32_t n = 8; n <= 12; n += 4) {
29787 for (size_t k = 1; k <= 40; k += 9) {
29788 for (uint32_t m = 1; m <= 4; m++) {
29789 GemmMicrokernelTester()
29790 .mr(4)
29791 .nr(4)
29792 .kr(2)
29793 .sr(4)
29794 .m(m)
29795 .n(n)
29796 .k(k)
29797 .iterations(1)
29798 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2s4__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
29799 }
29800 }
29801 }
29802 }
29803
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD128,small_kernel)29804 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD128, small_kernel) {
29805 TEST_REQUIRES_X86_XOP;
29806 for (size_t k = 1; k <= 40; k += 9) {
29807 GemmMicrokernelTester()
29808 .mr(4)
29809 .nr(4)
29810 .kr(2)
29811 .sr(4)
29812 .m(4)
29813 .n(4)
29814 .k(k)
29815 .ks(3)
29816 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2s4__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
29817 }
29818 }
29819
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD128,small_kernel_subtile)29820 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD128, small_kernel_subtile) {
29821 TEST_REQUIRES_X86_XOP;
29822 for (size_t k = 1; k <= 40; k += 9) {
29823 for (uint32_t n = 1; n <= 4; n++) {
29824 for (uint32_t m = 1; m <= 4; m++) {
29825 GemmMicrokernelTester()
29826 .mr(4)
29827 .nr(4)
29828 .kr(2)
29829 .sr(4)
29830 .m(m)
29831 .n(n)
29832 .k(k)
29833 .ks(3)
29834 .iterations(1)
29835 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2s4__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
29836 }
29837 }
29838 }
29839 }
29840
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD128,n_gt_4_small_kernel)29841 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD128, n_gt_4_small_kernel) {
29842 TEST_REQUIRES_X86_XOP;
29843 for (uint32_t n = 5; n < 8; n++) {
29844 for (size_t k = 1; k <= 40; k += 9) {
29845 GemmMicrokernelTester()
29846 .mr(4)
29847 .nr(4)
29848 .kr(2)
29849 .sr(4)
29850 .m(4)
29851 .n(n)
29852 .k(k)
29853 .ks(3)
29854 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2s4__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
29855 }
29856 }
29857 }
29858
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD128,n_div_4_small_kernel)29859 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD128, n_div_4_small_kernel) {
29860 TEST_REQUIRES_X86_XOP;
29861 for (uint32_t n = 8; n <= 12; n += 4) {
29862 for (size_t k = 1; k <= 40; k += 9) {
29863 GemmMicrokernelTester()
29864 .mr(4)
29865 .nr(4)
29866 .kr(2)
29867 .sr(4)
29868 .m(4)
29869 .n(n)
29870 .k(k)
29871 .ks(3)
29872 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2s4__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
29873 }
29874 }
29875 }
29876
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD128,strided_cm_subtile)29877 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD128, strided_cm_subtile) {
29878 TEST_REQUIRES_X86_XOP;
29879 for (size_t k = 1; k <= 40; k += 9) {
29880 for (uint32_t n = 1; n <= 4; n++) {
29881 for (uint32_t m = 1; m <= 4; m++) {
29882 GemmMicrokernelTester()
29883 .mr(4)
29884 .nr(4)
29885 .kr(2)
29886 .sr(4)
29887 .m(m)
29888 .n(n)
29889 .k(k)
29890 .cm_stride(7)
29891 .iterations(1)
29892 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2s4__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
29893 }
29894 }
29895 }
29896 }
29897
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD128,a_offset)29898 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD128, a_offset) {
29899 TEST_REQUIRES_X86_XOP;
29900 for (size_t k = 1; k <= 40; k += 9) {
29901 GemmMicrokernelTester()
29902 .mr(4)
29903 .nr(4)
29904 .kr(2)
29905 .sr(4)
29906 .m(4)
29907 .n(4)
29908 .k(k)
29909 .ks(3)
29910 .a_offset(163)
29911 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2s4__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
29912 }
29913 }
29914
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD128,zero)29915 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD128, zero) {
29916 TEST_REQUIRES_X86_XOP;
29917 for (size_t k = 1; k <= 40; k += 9) {
29918 for (uint32_t mz = 0; mz < 4; mz++) {
29919 GemmMicrokernelTester()
29920 .mr(4)
29921 .nr(4)
29922 .kr(2)
29923 .sr(4)
29924 .m(4)
29925 .n(4)
29926 .k(k)
29927 .ks(3)
29928 .a_offset(163)
29929 .zero_index(mz)
29930 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2s4__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
29931 }
29932 }
29933 }
29934
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD128,qmin)29935 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD128, qmin) {
29936 TEST_REQUIRES_X86_XOP;
29937 GemmMicrokernelTester()
29938 .mr(4)
29939 .nr(4)
29940 .kr(2)
29941 .sr(4)
29942 .m(4)
29943 .n(4)
29944 .k(8)
29945 .qmin(128)
29946 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2s4__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
29947 }
29948
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD128,qmax)29949 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD128, qmax) {
29950 TEST_REQUIRES_X86_XOP;
29951 GemmMicrokernelTester()
29952 .mr(4)
29953 .nr(4)
29954 .kr(2)
29955 .sr(4)
29956 .m(4)
29957 .n(4)
29958 .k(8)
29959 .qmax(128)
29960 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2s4__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
29961 }
29962
TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD128,strided_cm)29963 TEST(QS8_IGEMM_MINMAX_FP32_4X4C2S4__XOP_LD128, strided_cm) {
29964 TEST_REQUIRES_X86_XOP;
29965 GemmMicrokernelTester()
29966 .mr(4)
29967 .nr(4)
29968 .kr(2)
29969 .sr(4)
29970 .m(4)
29971 .n(4)
29972 .k(8)
29973 .cm_stride(7)
29974 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2s4__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
29975 }
29976 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
29977
29978
29979 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD64,k_eq_8)29980 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD64, k_eq_8) {
29981 TEST_REQUIRES_X86_SSE2;
29982 GemmMicrokernelTester()
29983 .mr(1)
29984 .nr(4)
29985 .kr(8)
29986 .sr(1)
29987 .m(1)
29988 .n(4)
29989 .k(8)
29990 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
29991 }
29992
TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD64,strided_cn)29993 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD64, strided_cn) {
29994 TEST_REQUIRES_X86_SSE2;
29995 GemmMicrokernelTester()
29996 .mr(1)
29997 .nr(4)
29998 .kr(8)
29999 .sr(1)
30000 .m(1)
30001 .n(4)
30002 .k(8)
30003 .cn_stride(7)
30004 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
30005 }
30006
TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD64,k_eq_8_subtile)30007 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD64, k_eq_8_subtile) {
30008 TEST_REQUIRES_X86_SSE2;
30009 for (uint32_t n = 1; n <= 4; n++) {
30010 for (uint32_t m = 1; m <= 1; m++) {
30011 GemmMicrokernelTester()
30012 .mr(1)
30013 .nr(4)
30014 .kr(8)
30015 .sr(1)
30016 .m(m)
30017 .n(n)
30018 .k(8)
30019 .iterations(1)
30020 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
30021 }
30022 }
30023 }
30024
TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD64,k_eq_8_subtile_m)30025 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD64, k_eq_8_subtile_m) {
30026 TEST_REQUIRES_X86_SSE2;
30027 for (uint32_t m = 1; m <= 1; m++) {
30028 GemmMicrokernelTester()
30029 .mr(1)
30030 .nr(4)
30031 .kr(8)
30032 .sr(1)
30033 .m(m)
30034 .n(4)
30035 .k(8)
30036 .iterations(1)
30037 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
30038 }
30039 }
30040
TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD64,k_eq_8_subtile_n)30041 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD64, k_eq_8_subtile_n) {
30042 TEST_REQUIRES_X86_SSE2;
30043 for (uint32_t n = 1; n <= 4; n++) {
30044 GemmMicrokernelTester()
30045 .mr(1)
30046 .nr(4)
30047 .kr(8)
30048 .sr(1)
30049 .m(1)
30050 .n(n)
30051 .k(8)
30052 .iterations(1)
30053 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
30054 }
30055 }
30056
TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD64,k_lt_8)30057 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD64, k_lt_8) {
30058 TEST_REQUIRES_X86_SSE2;
30059 for (size_t k = 1; k < 8; k++) {
30060 GemmMicrokernelTester()
30061 .mr(1)
30062 .nr(4)
30063 .kr(8)
30064 .sr(1)
30065 .m(1)
30066 .n(4)
30067 .k(k)
30068 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
30069 }
30070 }
30071
TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD64,k_lt_8_subtile)30072 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD64, k_lt_8_subtile) {
30073 TEST_REQUIRES_X86_SSE2;
30074 for (size_t k = 1; k < 8; k++) {
30075 for (uint32_t n = 1; n <= 4; n++) {
30076 for (uint32_t m = 1; m <= 1; m++) {
30077 GemmMicrokernelTester()
30078 .mr(1)
30079 .nr(4)
30080 .kr(8)
30081 .sr(1)
30082 .m(m)
30083 .n(n)
30084 .k(k)
30085 .iterations(1)
30086 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
30087 }
30088 }
30089 }
30090 }
30091
TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD64,k_gt_8)30092 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD64, k_gt_8) {
30093 TEST_REQUIRES_X86_SSE2;
30094 for (size_t k = 9; k < 16; k++) {
30095 GemmMicrokernelTester()
30096 .mr(1)
30097 .nr(4)
30098 .kr(8)
30099 .sr(1)
30100 .m(1)
30101 .n(4)
30102 .k(k)
30103 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
30104 }
30105 }
30106
TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD64,k_gt_8_subtile)30107 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD64, k_gt_8_subtile) {
30108 TEST_REQUIRES_X86_SSE2;
30109 for (size_t k = 9; k < 16; k++) {
30110 for (uint32_t n = 1; n <= 4; n++) {
30111 for (uint32_t m = 1; m <= 1; m++) {
30112 GemmMicrokernelTester()
30113 .mr(1)
30114 .nr(4)
30115 .kr(8)
30116 .sr(1)
30117 .m(m)
30118 .n(n)
30119 .k(k)
30120 .iterations(1)
30121 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
30122 }
30123 }
30124 }
30125 }
30126
TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD64,k_div_8)30127 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD64, k_div_8) {
30128 TEST_REQUIRES_X86_SSE2;
30129 for (size_t k = 16; k <= 80; k += 8) {
30130 GemmMicrokernelTester()
30131 .mr(1)
30132 .nr(4)
30133 .kr(8)
30134 .sr(1)
30135 .m(1)
30136 .n(4)
30137 .k(k)
30138 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
30139 }
30140 }
30141
TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD64,k_div_8_subtile)30142 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD64, k_div_8_subtile) {
30143 TEST_REQUIRES_X86_SSE2;
30144 for (size_t k = 16; k <= 80; k += 8) {
30145 for (uint32_t n = 1; n <= 4; n++) {
30146 for (uint32_t m = 1; m <= 1; m++) {
30147 GemmMicrokernelTester()
30148 .mr(1)
30149 .nr(4)
30150 .kr(8)
30151 .sr(1)
30152 .m(m)
30153 .n(n)
30154 .k(k)
30155 .iterations(1)
30156 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
30157 }
30158 }
30159 }
30160 }
30161
TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD64,n_gt_4)30162 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD64, n_gt_4) {
30163 TEST_REQUIRES_X86_SSE2;
30164 for (uint32_t n = 5; n < 8; n++) {
30165 for (size_t k = 1; k <= 40; k += 9) {
30166 GemmMicrokernelTester()
30167 .mr(1)
30168 .nr(4)
30169 .kr(8)
30170 .sr(1)
30171 .m(1)
30172 .n(n)
30173 .k(k)
30174 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
30175 }
30176 }
30177 }
30178
TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD64,n_gt_4_strided_cn)30179 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD64, n_gt_4_strided_cn) {
30180 TEST_REQUIRES_X86_SSE2;
30181 for (uint32_t n = 5; n < 8; n++) {
30182 for (size_t k = 1; k <= 40; k += 9) {
30183 GemmMicrokernelTester()
30184 .mr(1)
30185 .nr(4)
30186 .kr(8)
30187 .sr(1)
30188 .m(1)
30189 .n(n)
30190 .k(k)
30191 .cn_stride(7)
30192 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
30193 }
30194 }
30195 }
30196
TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD64,n_gt_4_subtile)30197 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD64, n_gt_4_subtile) {
30198 TEST_REQUIRES_X86_SSE2;
30199 for (uint32_t n = 5; n < 8; n++) {
30200 for (size_t k = 1; k <= 40; k += 9) {
30201 for (uint32_t m = 1; m <= 1; m++) {
30202 GemmMicrokernelTester()
30203 .mr(1)
30204 .nr(4)
30205 .kr(8)
30206 .sr(1)
30207 .m(m)
30208 .n(n)
30209 .k(k)
30210 .iterations(1)
30211 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
30212 }
30213 }
30214 }
30215 }
30216
TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD64,n_div_4)30217 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD64, n_div_4) {
30218 TEST_REQUIRES_X86_SSE2;
30219 for (uint32_t n = 8; n <= 12; n += 4) {
30220 for (size_t k = 1; k <= 40; k += 9) {
30221 GemmMicrokernelTester()
30222 .mr(1)
30223 .nr(4)
30224 .kr(8)
30225 .sr(1)
30226 .m(1)
30227 .n(n)
30228 .k(k)
30229 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
30230 }
30231 }
30232 }
30233
TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD64,n_div_4_strided_cn)30234 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD64, n_div_4_strided_cn) {
30235 TEST_REQUIRES_X86_SSE2;
30236 for (uint32_t n = 8; n <= 12; n += 4) {
30237 for (size_t k = 1; k <= 40; k += 9) {
30238 GemmMicrokernelTester()
30239 .mr(1)
30240 .nr(4)
30241 .kr(8)
30242 .sr(1)
30243 .m(1)
30244 .n(n)
30245 .k(k)
30246 .cn_stride(7)
30247 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
30248 }
30249 }
30250 }
30251
TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD64,n_div_4_subtile)30252 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD64, n_div_4_subtile) {
30253 TEST_REQUIRES_X86_SSE2;
30254 for (uint32_t n = 8; n <= 12; n += 4) {
30255 for (size_t k = 1; k <= 40; k += 9) {
30256 for (uint32_t m = 1; m <= 1; m++) {
30257 GemmMicrokernelTester()
30258 .mr(1)
30259 .nr(4)
30260 .kr(8)
30261 .sr(1)
30262 .m(m)
30263 .n(n)
30264 .k(k)
30265 .iterations(1)
30266 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
30267 }
30268 }
30269 }
30270 }
30271
TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD64,small_kernel)30272 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD64, small_kernel) {
30273 TEST_REQUIRES_X86_SSE2;
30274 for (size_t k = 1; k <= 40; k += 9) {
30275 GemmMicrokernelTester()
30276 .mr(1)
30277 .nr(4)
30278 .kr(8)
30279 .sr(1)
30280 .m(1)
30281 .n(4)
30282 .k(k)
30283 .ks(3)
30284 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
30285 }
30286 }
30287
TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD64,small_kernel_subtile)30288 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD64, small_kernel_subtile) {
30289 TEST_REQUIRES_X86_SSE2;
30290 for (size_t k = 1; k <= 40; k += 9) {
30291 for (uint32_t n = 1; n <= 4; n++) {
30292 for (uint32_t m = 1; m <= 1; m++) {
30293 GemmMicrokernelTester()
30294 .mr(1)
30295 .nr(4)
30296 .kr(8)
30297 .sr(1)
30298 .m(m)
30299 .n(n)
30300 .k(k)
30301 .ks(3)
30302 .iterations(1)
30303 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
30304 }
30305 }
30306 }
30307 }
30308
TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD64,n_gt_4_small_kernel)30309 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD64, n_gt_4_small_kernel) {
30310 TEST_REQUIRES_X86_SSE2;
30311 for (uint32_t n = 5; n < 8; n++) {
30312 for (size_t k = 1; k <= 40; k += 9) {
30313 GemmMicrokernelTester()
30314 .mr(1)
30315 .nr(4)
30316 .kr(8)
30317 .sr(1)
30318 .m(1)
30319 .n(n)
30320 .k(k)
30321 .ks(3)
30322 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
30323 }
30324 }
30325 }
30326
TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD64,n_div_4_small_kernel)30327 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD64, n_div_4_small_kernel) {
30328 TEST_REQUIRES_X86_SSE2;
30329 for (uint32_t n = 8; n <= 12; n += 4) {
30330 for (size_t k = 1; k <= 40; k += 9) {
30331 GemmMicrokernelTester()
30332 .mr(1)
30333 .nr(4)
30334 .kr(8)
30335 .sr(1)
30336 .m(1)
30337 .n(n)
30338 .k(k)
30339 .ks(3)
30340 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
30341 }
30342 }
30343 }
30344
TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD64,strided_cm_subtile)30345 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD64, strided_cm_subtile) {
30346 TEST_REQUIRES_X86_SSE2;
30347 for (size_t k = 1; k <= 40; k += 9) {
30348 for (uint32_t n = 1; n <= 4; n++) {
30349 for (uint32_t m = 1; m <= 1; m++) {
30350 GemmMicrokernelTester()
30351 .mr(1)
30352 .nr(4)
30353 .kr(8)
30354 .sr(1)
30355 .m(m)
30356 .n(n)
30357 .k(k)
30358 .cm_stride(7)
30359 .iterations(1)
30360 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
30361 }
30362 }
30363 }
30364 }
30365
TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD64,a_offset)30366 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD64, a_offset) {
30367 TEST_REQUIRES_X86_SSE2;
30368 for (size_t k = 1; k <= 40; k += 9) {
30369 GemmMicrokernelTester()
30370 .mr(1)
30371 .nr(4)
30372 .kr(8)
30373 .sr(1)
30374 .m(1)
30375 .n(4)
30376 .k(k)
30377 .ks(3)
30378 .a_offset(43)
30379 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
30380 }
30381 }
30382
TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD64,zero)30383 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD64, zero) {
30384 TEST_REQUIRES_X86_SSE2;
30385 for (size_t k = 1; k <= 40; k += 9) {
30386 for (uint32_t mz = 0; mz < 1; mz++) {
30387 GemmMicrokernelTester()
30388 .mr(1)
30389 .nr(4)
30390 .kr(8)
30391 .sr(1)
30392 .m(1)
30393 .n(4)
30394 .k(k)
30395 .ks(3)
30396 .a_offset(43)
30397 .zero_index(mz)
30398 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
30399 }
30400 }
30401 }
30402
TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD64,qmin)30403 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD64, qmin) {
30404 TEST_REQUIRES_X86_SSE2;
30405 GemmMicrokernelTester()
30406 .mr(1)
30407 .nr(4)
30408 .kr(8)
30409 .sr(1)
30410 .m(1)
30411 .n(4)
30412 .k(8)
30413 .qmin(128)
30414 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
30415 }
30416
TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD64,qmax)30417 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD64, qmax) {
30418 TEST_REQUIRES_X86_SSE2;
30419 GemmMicrokernelTester()
30420 .mr(1)
30421 .nr(4)
30422 .kr(8)
30423 .sr(1)
30424 .m(1)
30425 .n(4)
30426 .k(8)
30427 .qmax(128)
30428 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
30429 }
30430
TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD64,strided_cm)30431 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD64, strided_cm) {
30432 TEST_REQUIRES_X86_SSE2;
30433 GemmMicrokernelTester()
30434 .mr(1)
30435 .nr(4)
30436 .kr(8)
30437 .sr(1)
30438 .m(1)
30439 .n(4)
30440 .k(8)
30441 .cm_stride(7)
30442 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
30443 }
30444 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
30445
30446
30447 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD64,k_eq_8)30448 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD64, k_eq_8) {
30449 TEST_REQUIRES_X86_SSE2;
30450 GemmMicrokernelTester()
30451 .mr(2)
30452 .nr(4)
30453 .kr(8)
30454 .sr(1)
30455 .m(2)
30456 .n(4)
30457 .k(8)
30458 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
30459 }
30460
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD64,strided_cn)30461 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD64, strided_cn) {
30462 TEST_REQUIRES_X86_SSE2;
30463 GemmMicrokernelTester()
30464 .mr(2)
30465 .nr(4)
30466 .kr(8)
30467 .sr(1)
30468 .m(2)
30469 .n(4)
30470 .k(8)
30471 .cn_stride(7)
30472 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
30473 }
30474
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD64,k_eq_8_subtile)30475 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD64, k_eq_8_subtile) {
30476 TEST_REQUIRES_X86_SSE2;
30477 for (uint32_t n = 1; n <= 4; n++) {
30478 for (uint32_t m = 1; m <= 2; m++) {
30479 GemmMicrokernelTester()
30480 .mr(2)
30481 .nr(4)
30482 .kr(8)
30483 .sr(1)
30484 .m(m)
30485 .n(n)
30486 .k(8)
30487 .iterations(1)
30488 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
30489 }
30490 }
30491 }
30492
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD64,k_eq_8_subtile_m)30493 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD64, k_eq_8_subtile_m) {
30494 TEST_REQUIRES_X86_SSE2;
30495 for (uint32_t m = 1; m <= 2; m++) {
30496 GemmMicrokernelTester()
30497 .mr(2)
30498 .nr(4)
30499 .kr(8)
30500 .sr(1)
30501 .m(m)
30502 .n(4)
30503 .k(8)
30504 .iterations(1)
30505 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
30506 }
30507 }
30508
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD64,k_eq_8_subtile_n)30509 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD64, k_eq_8_subtile_n) {
30510 TEST_REQUIRES_X86_SSE2;
30511 for (uint32_t n = 1; n <= 4; n++) {
30512 GemmMicrokernelTester()
30513 .mr(2)
30514 .nr(4)
30515 .kr(8)
30516 .sr(1)
30517 .m(2)
30518 .n(n)
30519 .k(8)
30520 .iterations(1)
30521 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
30522 }
30523 }
30524
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD64,k_lt_8)30525 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD64, k_lt_8) {
30526 TEST_REQUIRES_X86_SSE2;
30527 for (size_t k = 1; k < 8; k++) {
30528 GemmMicrokernelTester()
30529 .mr(2)
30530 .nr(4)
30531 .kr(8)
30532 .sr(1)
30533 .m(2)
30534 .n(4)
30535 .k(k)
30536 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
30537 }
30538 }
30539
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD64,k_lt_8_subtile)30540 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD64, k_lt_8_subtile) {
30541 TEST_REQUIRES_X86_SSE2;
30542 for (size_t k = 1; k < 8; k++) {
30543 for (uint32_t n = 1; n <= 4; n++) {
30544 for (uint32_t m = 1; m <= 2; m++) {
30545 GemmMicrokernelTester()
30546 .mr(2)
30547 .nr(4)
30548 .kr(8)
30549 .sr(1)
30550 .m(m)
30551 .n(n)
30552 .k(k)
30553 .iterations(1)
30554 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
30555 }
30556 }
30557 }
30558 }
30559
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD64,k_gt_8)30560 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD64, k_gt_8) {
30561 TEST_REQUIRES_X86_SSE2;
30562 for (size_t k = 9; k < 16; k++) {
30563 GemmMicrokernelTester()
30564 .mr(2)
30565 .nr(4)
30566 .kr(8)
30567 .sr(1)
30568 .m(2)
30569 .n(4)
30570 .k(k)
30571 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
30572 }
30573 }
30574
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD64,k_gt_8_subtile)30575 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD64, k_gt_8_subtile) {
30576 TEST_REQUIRES_X86_SSE2;
30577 for (size_t k = 9; k < 16; k++) {
30578 for (uint32_t n = 1; n <= 4; n++) {
30579 for (uint32_t m = 1; m <= 2; m++) {
30580 GemmMicrokernelTester()
30581 .mr(2)
30582 .nr(4)
30583 .kr(8)
30584 .sr(1)
30585 .m(m)
30586 .n(n)
30587 .k(k)
30588 .iterations(1)
30589 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
30590 }
30591 }
30592 }
30593 }
30594
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD64,k_div_8)30595 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD64, k_div_8) {
30596 TEST_REQUIRES_X86_SSE2;
30597 for (size_t k = 16; k <= 80; k += 8) {
30598 GemmMicrokernelTester()
30599 .mr(2)
30600 .nr(4)
30601 .kr(8)
30602 .sr(1)
30603 .m(2)
30604 .n(4)
30605 .k(k)
30606 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
30607 }
30608 }
30609
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD64,k_div_8_subtile)30610 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD64, k_div_8_subtile) {
30611 TEST_REQUIRES_X86_SSE2;
30612 for (size_t k = 16; k <= 80; k += 8) {
30613 for (uint32_t n = 1; n <= 4; n++) {
30614 for (uint32_t m = 1; m <= 2; m++) {
30615 GemmMicrokernelTester()
30616 .mr(2)
30617 .nr(4)
30618 .kr(8)
30619 .sr(1)
30620 .m(m)
30621 .n(n)
30622 .k(k)
30623 .iterations(1)
30624 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
30625 }
30626 }
30627 }
30628 }
30629
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD64,n_gt_4)30630 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD64, n_gt_4) {
30631 TEST_REQUIRES_X86_SSE2;
30632 for (uint32_t n = 5; n < 8; n++) {
30633 for (size_t k = 1; k <= 40; k += 9) {
30634 GemmMicrokernelTester()
30635 .mr(2)
30636 .nr(4)
30637 .kr(8)
30638 .sr(1)
30639 .m(2)
30640 .n(n)
30641 .k(k)
30642 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
30643 }
30644 }
30645 }
30646
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD64,n_gt_4_strided_cn)30647 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD64, n_gt_4_strided_cn) {
30648 TEST_REQUIRES_X86_SSE2;
30649 for (uint32_t n = 5; n < 8; n++) {
30650 for (size_t k = 1; k <= 40; k += 9) {
30651 GemmMicrokernelTester()
30652 .mr(2)
30653 .nr(4)
30654 .kr(8)
30655 .sr(1)
30656 .m(2)
30657 .n(n)
30658 .k(k)
30659 .cn_stride(7)
30660 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
30661 }
30662 }
30663 }
30664
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD64,n_gt_4_subtile)30665 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD64, n_gt_4_subtile) {
30666 TEST_REQUIRES_X86_SSE2;
30667 for (uint32_t n = 5; n < 8; n++) {
30668 for (size_t k = 1; k <= 40; k += 9) {
30669 for (uint32_t m = 1; m <= 2; m++) {
30670 GemmMicrokernelTester()
30671 .mr(2)
30672 .nr(4)
30673 .kr(8)
30674 .sr(1)
30675 .m(m)
30676 .n(n)
30677 .k(k)
30678 .iterations(1)
30679 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
30680 }
30681 }
30682 }
30683 }
30684
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD64,n_div_4)30685 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD64, n_div_4) {
30686 TEST_REQUIRES_X86_SSE2;
30687 for (uint32_t n = 8; n <= 12; n += 4) {
30688 for (size_t k = 1; k <= 40; k += 9) {
30689 GemmMicrokernelTester()
30690 .mr(2)
30691 .nr(4)
30692 .kr(8)
30693 .sr(1)
30694 .m(2)
30695 .n(n)
30696 .k(k)
30697 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
30698 }
30699 }
30700 }
30701
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD64,n_div_4_strided_cn)30702 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD64, n_div_4_strided_cn) {
30703 TEST_REQUIRES_X86_SSE2;
30704 for (uint32_t n = 8; n <= 12; n += 4) {
30705 for (size_t k = 1; k <= 40; k += 9) {
30706 GemmMicrokernelTester()
30707 .mr(2)
30708 .nr(4)
30709 .kr(8)
30710 .sr(1)
30711 .m(2)
30712 .n(n)
30713 .k(k)
30714 .cn_stride(7)
30715 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
30716 }
30717 }
30718 }
30719
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD64,n_div_4_subtile)30720 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD64, n_div_4_subtile) {
30721 TEST_REQUIRES_X86_SSE2;
30722 for (uint32_t n = 8; n <= 12; n += 4) {
30723 for (size_t k = 1; k <= 40; k += 9) {
30724 for (uint32_t m = 1; m <= 2; m++) {
30725 GemmMicrokernelTester()
30726 .mr(2)
30727 .nr(4)
30728 .kr(8)
30729 .sr(1)
30730 .m(m)
30731 .n(n)
30732 .k(k)
30733 .iterations(1)
30734 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
30735 }
30736 }
30737 }
30738 }
30739
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD64,small_kernel)30740 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD64, small_kernel) {
30741 TEST_REQUIRES_X86_SSE2;
30742 for (size_t k = 1; k <= 40; k += 9) {
30743 GemmMicrokernelTester()
30744 .mr(2)
30745 .nr(4)
30746 .kr(8)
30747 .sr(1)
30748 .m(2)
30749 .n(4)
30750 .k(k)
30751 .ks(3)
30752 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
30753 }
30754 }
30755
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD64,small_kernel_subtile)30756 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD64, small_kernel_subtile) {
30757 TEST_REQUIRES_X86_SSE2;
30758 for (size_t k = 1; k <= 40; k += 9) {
30759 for (uint32_t n = 1; n <= 4; n++) {
30760 for (uint32_t m = 1; m <= 2; m++) {
30761 GemmMicrokernelTester()
30762 .mr(2)
30763 .nr(4)
30764 .kr(8)
30765 .sr(1)
30766 .m(m)
30767 .n(n)
30768 .k(k)
30769 .ks(3)
30770 .iterations(1)
30771 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
30772 }
30773 }
30774 }
30775 }
30776
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD64,n_gt_4_small_kernel)30777 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD64, n_gt_4_small_kernel) {
30778 TEST_REQUIRES_X86_SSE2;
30779 for (uint32_t n = 5; n < 8; n++) {
30780 for (size_t k = 1; k <= 40; k += 9) {
30781 GemmMicrokernelTester()
30782 .mr(2)
30783 .nr(4)
30784 .kr(8)
30785 .sr(1)
30786 .m(2)
30787 .n(n)
30788 .k(k)
30789 .ks(3)
30790 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
30791 }
30792 }
30793 }
30794
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD64,n_div_4_small_kernel)30795 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD64, n_div_4_small_kernel) {
30796 TEST_REQUIRES_X86_SSE2;
30797 for (uint32_t n = 8; n <= 12; n += 4) {
30798 for (size_t k = 1; k <= 40; k += 9) {
30799 GemmMicrokernelTester()
30800 .mr(2)
30801 .nr(4)
30802 .kr(8)
30803 .sr(1)
30804 .m(2)
30805 .n(n)
30806 .k(k)
30807 .ks(3)
30808 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
30809 }
30810 }
30811 }
30812
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD64,strided_cm_subtile)30813 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD64, strided_cm_subtile) {
30814 TEST_REQUIRES_X86_SSE2;
30815 for (size_t k = 1; k <= 40; k += 9) {
30816 for (uint32_t n = 1; n <= 4; n++) {
30817 for (uint32_t m = 1; m <= 2; m++) {
30818 GemmMicrokernelTester()
30819 .mr(2)
30820 .nr(4)
30821 .kr(8)
30822 .sr(1)
30823 .m(m)
30824 .n(n)
30825 .k(k)
30826 .cm_stride(7)
30827 .iterations(1)
30828 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
30829 }
30830 }
30831 }
30832 }
30833
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD64,a_offset)30834 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD64, a_offset) {
30835 TEST_REQUIRES_X86_SSE2;
30836 for (size_t k = 1; k <= 40; k += 9) {
30837 GemmMicrokernelTester()
30838 .mr(2)
30839 .nr(4)
30840 .kr(8)
30841 .sr(1)
30842 .m(2)
30843 .n(4)
30844 .k(k)
30845 .ks(3)
30846 .a_offset(83)
30847 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
30848 }
30849 }
30850
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD64,zero)30851 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD64, zero) {
30852 TEST_REQUIRES_X86_SSE2;
30853 for (size_t k = 1; k <= 40; k += 9) {
30854 for (uint32_t mz = 0; mz < 2; mz++) {
30855 GemmMicrokernelTester()
30856 .mr(2)
30857 .nr(4)
30858 .kr(8)
30859 .sr(1)
30860 .m(2)
30861 .n(4)
30862 .k(k)
30863 .ks(3)
30864 .a_offset(83)
30865 .zero_index(mz)
30866 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
30867 }
30868 }
30869 }
30870
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD64,qmin)30871 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD64, qmin) {
30872 TEST_REQUIRES_X86_SSE2;
30873 GemmMicrokernelTester()
30874 .mr(2)
30875 .nr(4)
30876 .kr(8)
30877 .sr(1)
30878 .m(2)
30879 .n(4)
30880 .k(8)
30881 .qmin(128)
30882 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
30883 }
30884
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD64,qmax)30885 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD64, qmax) {
30886 TEST_REQUIRES_X86_SSE2;
30887 GemmMicrokernelTester()
30888 .mr(2)
30889 .nr(4)
30890 .kr(8)
30891 .sr(1)
30892 .m(2)
30893 .n(4)
30894 .k(8)
30895 .qmax(128)
30896 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
30897 }
30898
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD64,strided_cm)30899 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD64, strided_cm) {
30900 TEST_REQUIRES_X86_SSE2;
30901 GemmMicrokernelTester()
30902 .mr(2)
30903 .nr(4)
30904 .kr(8)
30905 .sr(1)
30906 .m(2)
30907 .n(4)
30908 .k(8)
30909 .cm_stride(7)
30910 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
30911 }
30912 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
30913
30914
30915 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSSE3_LD64,k_eq_8)30916 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSSE3_LD64, k_eq_8) {
30917 TEST_REQUIRES_X86_SSSE3;
30918 GemmMicrokernelTester()
30919 .mr(3)
30920 .nr(4)
30921 .kr(8)
30922 .sr(1)
30923 .m(3)
30924 .n(4)
30925 .k(8)
30926 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
30927 }
30928
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSSE3_LD64,strided_cn)30929 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSSE3_LD64, strided_cn) {
30930 TEST_REQUIRES_X86_SSSE3;
30931 GemmMicrokernelTester()
30932 .mr(3)
30933 .nr(4)
30934 .kr(8)
30935 .sr(1)
30936 .m(3)
30937 .n(4)
30938 .k(8)
30939 .cn_stride(7)
30940 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
30941 }
30942
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSSE3_LD64,k_eq_8_subtile)30943 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSSE3_LD64, k_eq_8_subtile) {
30944 TEST_REQUIRES_X86_SSSE3;
30945 for (uint32_t n = 1; n <= 4; n++) {
30946 for (uint32_t m = 1; m <= 3; m++) {
30947 GemmMicrokernelTester()
30948 .mr(3)
30949 .nr(4)
30950 .kr(8)
30951 .sr(1)
30952 .m(m)
30953 .n(n)
30954 .k(8)
30955 .iterations(1)
30956 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
30957 }
30958 }
30959 }
30960
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSSE3_LD64,k_eq_8_subtile_m)30961 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSSE3_LD64, k_eq_8_subtile_m) {
30962 TEST_REQUIRES_X86_SSSE3;
30963 for (uint32_t m = 1; m <= 3; m++) {
30964 GemmMicrokernelTester()
30965 .mr(3)
30966 .nr(4)
30967 .kr(8)
30968 .sr(1)
30969 .m(m)
30970 .n(4)
30971 .k(8)
30972 .iterations(1)
30973 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
30974 }
30975 }
30976
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSSE3_LD64,k_eq_8_subtile_n)30977 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSSE3_LD64, k_eq_8_subtile_n) {
30978 TEST_REQUIRES_X86_SSSE3;
30979 for (uint32_t n = 1; n <= 4; n++) {
30980 GemmMicrokernelTester()
30981 .mr(3)
30982 .nr(4)
30983 .kr(8)
30984 .sr(1)
30985 .m(3)
30986 .n(n)
30987 .k(8)
30988 .iterations(1)
30989 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
30990 }
30991 }
30992
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSSE3_LD64,k_lt_8)30993 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSSE3_LD64, k_lt_8) {
30994 TEST_REQUIRES_X86_SSSE3;
30995 for (size_t k = 1; k < 8; k++) {
30996 GemmMicrokernelTester()
30997 .mr(3)
30998 .nr(4)
30999 .kr(8)
31000 .sr(1)
31001 .m(3)
31002 .n(4)
31003 .k(k)
31004 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
31005 }
31006 }
31007
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSSE3_LD64,k_lt_8_subtile)31008 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSSE3_LD64, k_lt_8_subtile) {
31009 TEST_REQUIRES_X86_SSSE3;
31010 for (size_t k = 1; k < 8; k++) {
31011 for (uint32_t n = 1; n <= 4; n++) {
31012 for (uint32_t m = 1; m <= 3; m++) {
31013 GemmMicrokernelTester()
31014 .mr(3)
31015 .nr(4)
31016 .kr(8)
31017 .sr(1)
31018 .m(m)
31019 .n(n)
31020 .k(k)
31021 .iterations(1)
31022 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
31023 }
31024 }
31025 }
31026 }
31027
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSSE3_LD64,k_gt_8)31028 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSSE3_LD64, k_gt_8) {
31029 TEST_REQUIRES_X86_SSSE3;
31030 for (size_t k = 9; k < 16; k++) {
31031 GemmMicrokernelTester()
31032 .mr(3)
31033 .nr(4)
31034 .kr(8)
31035 .sr(1)
31036 .m(3)
31037 .n(4)
31038 .k(k)
31039 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
31040 }
31041 }
31042
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSSE3_LD64,k_gt_8_subtile)31043 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSSE3_LD64, k_gt_8_subtile) {
31044 TEST_REQUIRES_X86_SSSE3;
31045 for (size_t k = 9; k < 16; k++) {
31046 for (uint32_t n = 1; n <= 4; n++) {
31047 for (uint32_t m = 1; m <= 3; m++) {
31048 GemmMicrokernelTester()
31049 .mr(3)
31050 .nr(4)
31051 .kr(8)
31052 .sr(1)
31053 .m(m)
31054 .n(n)
31055 .k(k)
31056 .iterations(1)
31057 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
31058 }
31059 }
31060 }
31061 }
31062
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSSE3_LD64,k_div_8)31063 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSSE3_LD64, k_div_8) {
31064 TEST_REQUIRES_X86_SSSE3;
31065 for (size_t k = 16; k <= 80; k += 8) {
31066 GemmMicrokernelTester()
31067 .mr(3)
31068 .nr(4)
31069 .kr(8)
31070 .sr(1)
31071 .m(3)
31072 .n(4)
31073 .k(k)
31074 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
31075 }
31076 }
31077
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSSE3_LD64,k_div_8_subtile)31078 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSSE3_LD64, k_div_8_subtile) {
31079 TEST_REQUIRES_X86_SSSE3;
31080 for (size_t k = 16; k <= 80; k += 8) {
31081 for (uint32_t n = 1; n <= 4; n++) {
31082 for (uint32_t m = 1; m <= 3; m++) {
31083 GemmMicrokernelTester()
31084 .mr(3)
31085 .nr(4)
31086 .kr(8)
31087 .sr(1)
31088 .m(m)
31089 .n(n)
31090 .k(k)
31091 .iterations(1)
31092 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
31093 }
31094 }
31095 }
31096 }
31097
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSSE3_LD64,n_gt_4)31098 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSSE3_LD64, n_gt_4) {
31099 TEST_REQUIRES_X86_SSSE3;
31100 for (uint32_t n = 5; n < 8; n++) {
31101 for (size_t k = 1; k <= 40; k += 9) {
31102 GemmMicrokernelTester()
31103 .mr(3)
31104 .nr(4)
31105 .kr(8)
31106 .sr(1)
31107 .m(3)
31108 .n(n)
31109 .k(k)
31110 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
31111 }
31112 }
31113 }
31114
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSSE3_LD64,n_gt_4_strided_cn)31115 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSSE3_LD64, n_gt_4_strided_cn) {
31116 TEST_REQUIRES_X86_SSSE3;
31117 for (uint32_t n = 5; n < 8; n++) {
31118 for (size_t k = 1; k <= 40; k += 9) {
31119 GemmMicrokernelTester()
31120 .mr(3)
31121 .nr(4)
31122 .kr(8)
31123 .sr(1)
31124 .m(3)
31125 .n(n)
31126 .k(k)
31127 .cn_stride(7)
31128 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
31129 }
31130 }
31131 }
31132
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSSE3_LD64,n_gt_4_subtile)31133 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSSE3_LD64, n_gt_4_subtile) {
31134 TEST_REQUIRES_X86_SSSE3;
31135 for (uint32_t n = 5; n < 8; n++) {
31136 for (size_t k = 1; k <= 40; k += 9) {
31137 for (uint32_t m = 1; m <= 3; m++) {
31138 GemmMicrokernelTester()
31139 .mr(3)
31140 .nr(4)
31141 .kr(8)
31142 .sr(1)
31143 .m(m)
31144 .n(n)
31145 .k(k)
31146 .iterations(1)
31147 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
31148 }
31149 }
31150 }
31151 }
31152
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSSE3_LD64,n_div_4)31153 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSSE3_LD64, n_div_4) {
31154 TEST_REQUIRES_X86_SSSE3;
31155 for (uint32_t n = 8; n <= 12; n += 4) {
31156 for (size_t k = 1; k <= 40; k += 9) {
31157 GemmMicrokernelTester()
31158 .mr(3)
31159 .nr(4)
31160 .kr(8)
31161 .sr(1)
31162 .m(3)
31163 .n(n)
31164 .k(k)
31165 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
31166 }
31167 }
31168 }
31169
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSSE3_LD64,n_div_4_strided_cn)31170 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSSE3_LD64, n_div_4_strided_cn) {
31171 TEST_REQUIRES_X86_SSSE3;
31172 for (uint32_t n = 8; n <= 12; n += 4) {
31173 for (size_t k = 1; k <= 40; k += 9) {
31174 GemmMicrokernelTester()
31175 .mr(3)
31176 .nr(4)
31177 .kr(8)
31178 .sr(1)
31179 .m(3)
31180 .n(n)
31181 .k(k)
31182 .cn_stride(7)
31183 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
31184 }
31185 }
31186 }
31187
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSSE3_LD64,n_div_4_subtile)31188 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSSE3_LD64, n_div_4_subtile) {
31189 TEST_REQUIRES_X86_SSSE3;
31190 for (uint32_t n = 8; n <= 12; n += 4) {
31191 for (size_t k = 1; k <= 40; k += 9) {
31192 for (uint32_t m = 1; m <= 3; m++) {
31193 GemmMicrokernelTester()
31194 .mr(3)
31195 .nr(4)
31196 .kr(8)
31197 .sr(1)
31198 .m(m)
31199 .n(n)
31200 .k(k)
31201 .iterations(1)
31202 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
31203 }
31204 }
31205 }
31206 }
31207
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSSE3_LD64,small_kernel)31208 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSSE3_LD64, small_kernel) {
31209 TEST_REQUIRES_X86_SSSE3;
31210 for (size_t k = 1; k <= 40; k += 9) {
31211 GemmMicrokernelTester()
31212 .mr(3)
31213 .nr(4)
31214 .kr(8)
31215 .sr(1)
31216 .m(3)
31217 .n(4)
31218 .k(k)
31219 .ks(3)
31220 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
31221 }
31222 }
31223
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSSE3_LD64,small_kernel_subtile)31224 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSSE3_LD64, small_kernel_subtile) {
31225 TEST_REQUIRES_X86_SSSE3;
31226 for (size_t k = 1; k <= 40; k += 9) {
31227 for (uint32_t n = 1; n <= 4; n++) {
31228 for (uint32_t m = 1; m <= 3; m++) {
31229 GemmMicrokernelTester()
31230 .mr(3)
31231 .nr(4)
31232 .kr(8)
31233 .sr(1)
31234 .m(m)
31235 .n(n)
31236 .k(k)
31237 .ks(3)
31238 .iterations(1)
31239 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
31240 }
31241 }
31242 }
31243 }
31244
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSSE3_LD64,n_gt_4_small_kernel)31245 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSSE3_LD64, n_gt_4_small_kernel) {
31246 TEST_REQUIRES_X86_SSSE3;
31247 for (uint32_t n = 5; n < 8; n++) {
31248 for (size_t k = 1; k <= 40; k += 9) {
31249 GemmMicrokernelTester()
31250 .mr(3)
31251 .nr(4)
31252 .kr(8)
31253 .sr(1)
31254 .m(3)
31255 .n(n)
31256 .k(k)
31257 .ks(3)
31258 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
31259 }
31260 }
31261 }
31262
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSSE3_LD64,n_div_4_small_kernel)31263 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSSE3_LD64, n_div_4_small_kernel) {
31264 TEST_REQUIRES_X86_SSSE3;
31265 for (uint32_t n = 8; n <= 12; n += 4) {
31266 for (size_t k = 1; k <= 40; k += 9) {
31267 GemmMicrokernelTester()
31268 .mr(3)
31269 .nr(4)
31270 .kr(8)
31271 .sr(1)
31272 .m(3)
31273 .n(n)
31274 .k(k)
31275 .ks(3)
31276 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
31277 }
31278 }
31279 }
31280
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSSE3_LD64,strided_cm_subtile)31281 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSSE3_LD64, strided_cm_subtile) {
31282 TEST_REQUIRES_X86_SSSE3;
31283 for (size_t k = 1; k <= 40; k += 9) {
31284 for (uint32_t n = 1; n <= 4; n++) {
31285 for (uint32_t m = 1; m <= 3; m++) {
31286 GemmMicrokernelTester()
31287 .mr(3)
31288 .nr(4)
31289 .kr(8)
31290 .sr(1)
31291 .m(m)
31292 .n(n)
31293 .k(k)
31294 .cm_stride(7)
31295 .iterations(1)
31296 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
31297 }
31298 }
31299 }
31300 }
31301
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSSE3_LD64,a_offset)31302 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSSE3_LD64, a_offset) {
31303 TEST_REQUIRES_X86_SSSE3;
31304 for (size_t k = 1; k <= 40; k += 9) {
31305 GemmMicrokernelTester()
31306 .mr(3)
31307 .nr(4)
31308 .kr(8)
31309 .sr(1)
31310 .m(3)
31311 .n(4)
31312 .k(k)
31313 .ks(3)
31314 .a_offset(127)
31315 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
31316 }
31317 }
31318
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSSE3_LD64,zero)31319 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSSE3_LD64, zero) {
31320 TEST_REQUIRES_X86_SSSE3;
31321 for (size_t k = 1; k <= 40; k += 9) {
31322 for (uint32_t mz = 0; mz < 3; mz++) {
31323 GemmMicrokernelTester()
31324 .mr(3)
31325 .nr(4)
31326 .kr(8)
31327 .sr(1)
31328 .m(3)
31329 .n(4)
31330 .k(k)
31331 .ks(3)
31332 .a_offset(127)
31333 .zero_index(mz)
31334 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
31335 }
31336 }
31337 }
31338
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSSE3_LD64,qmin)31339 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSSE3_LD64, qmin) {
31340 TEST_REQUIRES_X86_SSSE3;
31341 GemmMicrokernelTester()
31342 .mr(3)
31343 .nr(4)
31344 .kr(8)
31345 .sr(1)
31346 .m(3)
31347 .n(4)
31348 .k(8)
31349 .qmin(128)
31350 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
31351 }
31352
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSSE3_LD64,qmax)31353 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSSE3_LD64, qmax) {
31354 TEST_REQUIRES_X86_SSSE3;
31355 GemmMicrokernelTester()
31356 .mr(3)
31357 .nr(4)
31358 .kr(8)
31359 .sr(1)
31360 .m(3)
31361 .n(4)
31362 .k(8)
31363 .qmax(128)
31364 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
31365 }
31366
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSSE3_LD64,strided_cm)31367 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSSE3_LD64, strided_cm) {
31368 TEST_REQUIRES_X86_SSSE3;
31369 GemmMicrokernelTester()
31370 .mr(3)
31371 .nr(4)
31372 .kr(8)
31373 .sr(1)
31374 .m(3)
31375 .n(4)
31376 .k(8)
31377 .cm_stride(7)
31378 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
31379 }
31380 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
31381
31382
31383 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD64,k_eq_8)31384 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD64, k_eq_8) {
31385 TEST_REQUIRES_X86_SSE41;
31386 GemmMicrokernelTester()
31387 .mr(3)
31388 .nr(4)
31389 .kr(8)
31390 .sr(1)
31391 .m(3)
31392 .n(4)
31393 .k(8)
31394 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
31395 }
31396
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD64,strided_cn)31397 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD64, strided_cn) {
31398 TEST_REQUIRES_X86_SSE41;
31399 GemmMicrokernelTester()
31400 .mr(3)
31401 .nr(4)
31402 .kr(8)
31403 .sr(1)
31404 .m(3)
31405 .n(4)
31406 .k(8)
31407 .cn_stride(7)
31408 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
31409 }
31410
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD64,k_eq_8_subtile)31411 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD64, k_eq_8_subtile) {
31412 TEST_REQUIRES_X86_SSE41;
31413 for (uint32_t n = 1; n <= 4; n++) {
31414 for (uint32_t m = 1; m <= 3; m++) {
31415 GemmMicrokernelTester()
31416 .mr(3)
31417 .nr(4)
31418 .kr(8)
31419 .sr(1)
31420 .m(m)
31421 .n(n)
31422 .k(8)
31423 .iterations(1)
31424 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
31425 }
31426 }
31427 }
31428
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD64,k_eq_8_subtile_m)31429 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD64, k_eq_8_subtile_m) {
31430 TEST_REQUIRES_X86_SSE41;
31431 for (uint32_t m = 1; m <= 3; m++) {
31432 GemmMicrokernelTester()
31433 .mr(3)
31434 .nr(4)
31435 .kr(8)
31436 .sr(1)
31437 .m(m)
31438 .n(4)
31439 .k(8)
31440 .iterations(1)
31441 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
31442 }
31443 }
31444
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD64,k_eq_8_subtile_n)31445 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD64, k_eq_8_subtile_n) {
31446 TEST_REQUIRES_X86_SSE41;
31447 for (uint32_t n = 1; n <= 4; n++) {
31448 GemmMicrokernelTester()
31449 .mr(3)
31450 .nr(4)
31451 .kr(8)
31452 .sr(1)
31453 .m(3)
31454 .n(n)
31455 .k(8)
31456 .iterations(1)
31457 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
31458 }
31459 }
31460
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD64,k_lt_8)31461 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD64, k_lt_8) {
31462 TEST_REQUIRES_X86_SSE41;
31463 for (size_t k = 1; k < 8; k++) {
31464 GemmMicrokernelTester()
31465 .mr(3)
31466 .nr(4)
31467 .kr(8)
31468 .sr(1)
31469 .m(3)
31470 .n(4)
31471 .k(k)
31472 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
31473 }
31474 }
31475
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD64,k_lt_8_subtile)31476 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD64, k_lt_8_subtile) {
31477 TEST_REQUIRES_X86_SSE41;
31478 for (size_t k = 1; k < 8; k++) {
31479 for (uint32_t n = 1; n <= 4; n++) {
31480 for (uint32_t m = 1; m <= 3; m++) {
31481 GemmMicrokernelTester()
31482 .mr(3)
31483 .nr(4)
31484 .kr(8)
31485 .sr(1)
31486 .m(m)
31487 .n(n)
31488 .k(k)
31489 .iterations(1)
31490 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
31491 }
31492 }
31493 }
31494 }
31495
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD64,k_gt_8)31496 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD64, k_gt_8) {
31497 TEST_REQUIRES_X86_SSE41;
31498 for (size_t k = 9; k < 16; k++) {
31499 GemmMicrokernelTester()
31500 .mr(3)
31501 .nr(4)
31502 .kr(8)
31503 .sr(1)
31504 .m(3)
31505 .n(4)
31506 .k(k)
31507 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
31508 }
31509 }
31510
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD64,k_gt_8_subtile)31511 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD64, k_gt_8_subtile) {
31512 TEST_REQUIRES_X86_SSE41;
31513 for (size_t k = 9; k < 16; k++) {
31514 for (uint32_t n = 1; n <= 4; n++) {
31515 for (uint32_t m = 1; m <= 3; m++) {
31516 GemmMicrokernelTester()
31517 .mr(3)
31518 .nr(4)
31519 .kr(8)
31520 .sr(1)
31521 .m(m)
31522 .n(n)
31523 .k(k)
31524 .iterations(1)
31525 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
31526 }
31527 }
31528 }
31529 }
31530
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD64,k_div_8)31531 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD64, k_div_8) {
31532 TEST_REQUIRES_X86_SSE41;
31533 for (size_t k = 16; k <= 80; k += 8) {
31534 GemmMicrokernelTester()
31535 .mr(3)
31536 .nr(4)
31537 .kr(8)
31538 .sr(1)
31539 .m(3)
31540 .n(4)
31541 .k(k)
31542 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
31543 }
31544 }
31545
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD64,k_div_8_subtile)31546 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD64, k_div_8_subtile) {
31547 TEST_REQUIRES_X86_SSE41;
31548 for (size_t k = 16; k <= 80; k += 8) {
31549 for (uint32_t n = 1; n <= 4; n++) {
31550 for (uint32_t m = 1; m <= 3; m++) {
31551 GemmMicrokernelTester()
31552 .mr(3)
31553 .nr(4)
31554 .kr(8)
31555 .sr(1)
31556 .m(m)
31557 .n(n)
31558 .k(k)
31559 .iterations(1)
31560 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
31561 }
31562 }
31563 }
31564 }
31565
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD64,n_gt_4)31566 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD64, n_gt_4) {
31567 TEST_REQUIRES_X86_SSE41;
31568 for (uint32_t n = 5; n < 8; n++) {
31569 for (size_t k = 1; k <= 40; k += 9) {
31570 GemmMicrokernelTester()
31571 .mr(3)
31572 .nr(4)
31573 .kr(8)
31574 .sr(1)
31575 .m(3)
31576 .n(n)
31577 .k(k)
31578 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
31579 }
31580 }
31581 }
31582
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD64,n_gt_4_strided_cn)31583 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD64, n_gt_4_strided_cn) {
31584 TEST_REQUIRES_X86_SSE41;
31585 for (uint32_t n = 5; n < 8; n++) {
31586 for (size_t k = 1; k <= 40; k += 9) {
31587 GemmMicrokernelTester()
31588 .mr(3)
31589 .nr(4)
31590 .kr(8)
31591 .sr(1)
31592 .m(3)
31593 .n(n)
31594 .k(k)
31595 .cn_stride(7)
31596 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
31597 }
31598 }
31599 }
31600
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD64,n_gt_4_subtile)31601 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD64, n_gt_4_subtile) {
31602 TEST_REQUIRES_X86_SSE41;
31603 for (uint32_t n = 5; n < 8; n++) {
31604 for (size_t k = 1; k <= 40; k += 9) {
31605 for (uint32_t m = 1; m <= 3; m++) {
31606 GemmMicrokernelTester()
31607 .mr(3)
31608 .nr(4)
31609 .kr(8)
31610 .sr(1)
31611 .m(m)
31612 .n(n)
31613 .k(k)
31614 .iterations(1)
31615 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
31616 }
31617 }
31618 }
31619 }
31620
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD64,n_div_4)31621 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD64, n_div_4) {
31622 TEST_REQUIRES_X86_SSE41;
31623 for (uint32_t n = 8; n <= 12; n += 4) {
31624 for (size_t k = 1; k <= 40; k += 9) {
31625 GemmMicrokernelTester()
31626 .mr(3)
31627 .nr(4)
31628 .kr(8)
31629 .sr(1)
31630 .m(3)
31631 .n(n)
31632 .k(k)
31633 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
31634 }
31635 }
31636 }
31637
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD64,n_div_4_strided_cn)31638 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD64, n_div_4_strided_cn) {
31639 TEST_REQUIRES_X86_SSE41;
31640 for (uint32_t n = 8; n <= 12; n += 4) {
31641 for (size_t k = 1; k <= 40; k += 9) {
31642 GemmMicrokernelTester()
31643 .mr(3)
31644 .nr(4)
31645 .kr(8)
31646 .sr(1)
31647 .m(3)
31648 .n(n)
31649 .k(k)
31650 .cn_stride(7)
31651 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
31652 }
31653 }
31654 }
31655
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD64,n_div_4_subtile)31656 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD64, n_div_4_subtile) {
31657 TEST_REQUIRES_X86_SSE41;
31658 for (uint32_t n = 8; n <= 12; n += 4) {
31659 for (size_t k = 1; k <= 40; k += 9) {
31660 for (uint32_t m = 1; m <= 3; m++) {
31661 GemmMicrokernelTester()
31662 .mr(3)
31663 .nr(4)
31664 .kr(8)
31665 .sr(1)
31666 .m(m)
31667 .n(n)
31668 .k(k)
31669 .iterations(1)
31670 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
31671 }
31672 }
31673 }
31674 }
31675
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD64,small_kernel)31676 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD64, small_kernel) {
31677 TEST_REQUIRES_X86_SSE41;
31678 for (size_t k = 1; k <= 40; k += 9) {
31679 GemmMicrokernelTester()
31680 .mr(3)
31681 .nr(4)
31682 .kr(8)
31683 .sr(1)
31684 .m(3)
31685 .n(4)
31686 .k(k)
31687 .ks(3)
31688 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
31689 }
31690 }
31691
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD64,small_kernel_subtile)31692 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD64, small_kernel_subtile) {
31693 TEST_REQUIRES_X86_SSE41;
31694 for (size_t k = 1; k <= 40; k += 9) {
31695 for (uint32_t n = 1; n <= 4; n++) {
31696 for (uint32_t m = 1; m <= 3; m++) {
31697 GemmMicrokernelTester()
31698 .mr(3)
31699 .nr(4)
31700 .kr(8)
31701 .sr(1)
31702 .m(m)
31703 .n(n)
31704 .k(k)
31705 .ks(3)
31706 .iterations(1)
31707 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
31708 }
31709 }
31710 }
31711 }
31712
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD64,n_gt_4_small_kernel)31713 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD64, n_gt_4_small_kernel) {
31714 TEST_REQUIRES_X86_SSE41;
31715 for (uint32_t n = 5; n < 8; n++) {
31716 for (size_t k = 1; k <= 40; k += 9) {
31717 GemmMicrokernelTester()
31718 .mr(3)
31719 .nr(4)
31720 .kr(8)
31721 .sr(1)
31722 .m(3)
31723 .n(n)
31724 .k(k)
31725 .ks(3)
31726 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
31727 }
31728 }
31729 }
31730
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD64,n_div_4_small_kernel)31731 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD64, n_div_4_small_kernel) {
31732 TEST_REQUIRES_X86_SSE41;
31733 for (uint32_t n = 8; n <= 12; n += 4) {
31734 for (size_t k = 1; k <= 40; k += 9) {
31735 GemmMicrokernelTester()
31736 .mr(3)
31737 .nr(4)
31738 .kr(8)
31739 .sr(1)
31740 .m(3)
31741 .n(n)
31742 .k(k)
31743 .ks(3)
31744 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
31745 }
31746 }
31747 }
31748
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD64,strided_cm_subtile)31749 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD64, strided_cm_subtile) {
31750 TEST_REQUIRES_X86_SSE41;
31751 for (size_t k = 1; k <= 40; k += 9) {
31752 for (uint32_t n = 1; n <= 4; n++) {
31753 for (uint32_t m = 1; m <= 3; m++) {
31754 GemmMicrokernelTester()
31755 .mr(3)
31756 .nr(4)
31757 .kr(8)
31758 .sr(1)
31759 .m(m)
31760 .n(n)
31761 .k(k)
31762 .cm_stride(7)
31763 .iterations(1)
31764 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
31765 }
31766 }
31767 }
31768 }
31769
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD64,a_offset)31770 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD64, a_offset) {
31771 TEST_REQUIRES_X86_SSE41;
31772 for (size_t k = 1; k <= 40; k += 9) {
31773 GemmMicrokernelTester()
31774 .mr(3)
31775 .nr(4)
31776 .kr(8)
31777 .sr(1)
31778 .m(3)
31779 .n(4)
31780 .k(k)
31781 .ks(3)
31782 .a_offset(127)
31783 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
31784 }
31785 }
31786
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD64,zero)31787 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD64, zero) {
31788 TEST_REQUIRES_X86_SSE41;
31789 for (size_t k = 1; k <= 40; k += 9) {
31790 for (uint32_t mz = 0; mz < 3; mz++) {
31791 GemmMicrokernelTester()
31792 .mr(3)
31793 .nr(4)
31794 .kr(8)
31795 .sr(1)
31796 .m(3)
31797 .n(4)
31798 .k(k)
31799 .ks(3)
31800 .a_offset(127)
31801 .zero_index(mz)
31802 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
31803 }
31804 }
31805 }
31806
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD64,qmin)31807 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD64, qmin) {
31808 TEST_REQUIRES_X86_SSE41;
31809 GemmMicrokernelTester()
31810 .mr(3)
31811 .nr(4)
31812 .kr(8)
31813 .sr(1)
31814 .m(3)
31815 .n(4)
31816 .k(8)
31817 .qmin(128)
31818 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
31819 }
31820
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD64,qmax)31821 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD64, qmax) {
31822 TEST_REQUIRES_X86_SSE41;
31823 GemmMicrokernelTester()
31824 .mr(3)
31825 .nr(4)
31826 .kr(8)
31827 .sr(1)
31828 .m(3)
31829 .n(4)
31830 .k(8)
31831 .qmax(128)
31832 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
31833 }
31834
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD64,strided_cm)31835 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD64, strided_cm) {
31836 TEST_REQUIRES_X86_SSE41;
31837 GemmMicrokernelTester()
31838 .mr(3)
31839 .nr(4)
31840 .kr(8)
31841 .sr(1)
31842 .m(3)
31843 .n(4)
31844 .k(8)
31845 .cm_stride(7)
31846 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
31847 }
31848 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
31849
31850
31851 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD64,k_eq_8)31852 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD64, k_eq_8) {
31853 TEST_REQUIRES_X86_AVX;
31854 GemmMicrokernelTester()
31855 .mr(2)
31856 .nr(4)
31857 .kr(8)
31858 .sr(1)
31859 .m(2)
31860 .n(4)
31861 .k(8)
31862 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
31863 }
31864
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD64,strided_cn)31865 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD64, strided_cn) {
31866 TEST_REQUIRES_X86_AVX;
31867 GemmMicrokernelTester()
31868 .mr(2)
31869 .nr(4)
31870 .kr(8)
31871 .sr(1)
31872 .m(2)
31873 .n(4)
31874 .k(8)
31875 .cn_stride(7)
31876 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
31877 }
31878
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD64,k_eq_8_subtile)31879 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD64, k_eq_8_subtile) {
31880 TEST_REQUIRES_X86_AVX;
31881 for (uint32_t n = 1; n <= 4; n++) {
31882 for (uint32_t m = 1; m <= 2; m++) {
31883 GemmMicrokernelTester()
31884 .mr(2)
31885 .nr(4)
31886 .kr(8)
31887 .sr(1)
31888 .m(m)
31889 .n(n)
31890 .k(8)
31891 .iterations(1)
31892 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
31893 }
31894 }
31895 }
31896
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD64,k_eq_8_subtile_m)31897 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD64, k_eq_8_subtile_m) {
31898 TEST_REQUIRES_X86_AVX;
31899 for (uint32_t m = 1; m <= 2; m++) {
31900 GemmMicrokernelTester()
31901 .mr(2)
31902 .nr(4)
31903 .kr(8)
31904 .sr(1)
31905 .m(m)
31906 .n(4)
31907 .k(8)
31908 .iterations(1)
31909 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
31910 }
31911 }
31912
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD64,k_eq_8_subtile_n)31913 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD64, k_eq_8_subtile_n) {
31914 TEST_REQUIRES_X86_AVX;
31915 for (uint32_t n = 1; n <= 4; n++) {
31916 GemmMicrokernelTester()
31917 .mr(2)
31918 .nr(4)
31919 .kr(8)
31920 .sr(1)
31921 .m(2)
31922 .n(n)
31923 .k(8)
31924 .iterations(1)
31925 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
31926 }
31927 }
31928
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD64,k_lt_8)31929 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD64, k_lt_8) {
31930 TEST_REQUIRES_X86_AVX;
31931 for (size_t k = 1; k < 8; k++) {
31932 GemmMicrokernelTester()
31933 .mr(2)
31934 .nr(4)
31935 .kr(8)
31936 .sr(1)
31937 .m(2)
31938 .n(4)
31939 .k(k)
31940 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
31941 }
31942 }
31943
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD64,k_lt_8_subtile)31944 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD64, k_lt_8_subtile) {
31945 TEST_REQUIRES_X86_AVX;
31946 for (size_t k = 1; k < 8; k++) {
31947 for (uint32_t n = 1; n <= 4; n++) {
31948 for (uint32_t m = 1; m <= 2; m++) {
31949 GemmMicrokernelTester()
31950 .mr(2)
31951 .nr(4)
31952 .kr(8)
31953 .sr(1)
31954 .m(m)
31955 .n(n)
31956 .k(k)
31957 .iterations(1)
31958 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
31959 }
31960 }
31961 }
31962 }
31963
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD64,k_gt_8)31964 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD64, k_gt_8) {
31965 TEST_REQUIRES_X86_AVX;
31966 for (size_t k = 9; k < 16; k++) {
31967 GemmMicrokernelTester()
31968 .mr(2)
31969 .nr(4)
31970 .kr(8)
31971 .sr(1)
31972 .m(2)
31973 .n(4)
31974 .k(k)
31975 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
31976 }
31977 }
31978
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD64,k_gt_8_subtile)31979 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD64, k_gt_8_subtile) {
31980 TEST_REQUIRES_X86_AVX;
31981 for (size_t k = 9; k < 16; k++) {
31982 for (uint32_t n = 1; n <= 4; n++) {
31983 for (uint32_t m = 1; m <= 2; m++) {
31984 GemmMicrokernelTester()
31985 .mr(2)
31986 .nr(4)
31987 .kr(8)
31988 .sr(1)
31989 .m(m)
31990 .n(n)
31991 .k(k)
31992 .iterations(1)
31993 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
31994 }
31995 }
31996 }
31997 }
31998
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD64,k_div_8)31999 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD64, k_div_8) {
32000 TEST_REQUIRES_X86_AVX;
32001 for (size_t k = 16; k <= 80; k += 8) {
32002 GemmMicrokernelTester()
32003 .mr(2)
32004 .nr(4)
32005 .kr(8)
32006 .sr(1)
32007 .m(2)
32008 .n(4)
32009 .k(k)
32010 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
32011 }
32012 }
32013
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD64,k_div_8_subtile)32014 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD64, k_div_8_subtile) {
32015 TEST_REQUIRES_X86_AVX;
32016 for (size_t k = 16; k <= 80; k += 8) {
32017 for (uint32_t n = 1; n <= 4; n++) {
32018 for (uint32_t m = 1; m <= 2; m++) {
32019 GemmMicrokernelTester()
32020 .mr(2)
32021 .nr(4)
32022 .kr(8)
32023 .sr(1)
32024 .m(m)
32025 .n(n)
32026 .k(k)
32027 .iterations(1)
32028 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
32029 }
32030 }
32031 }
32032 }
32033
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD64,n_gt_4)32034 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD64, n_gt_4) {
32035 TEST_REQUIRES_X86_AVX;
32036 for (uint32_t n = 5; n < 8; n++) {
32037 for (size_t k = 1; k <= 40; k += 9) {
32038 GemmMicrokernelTester()
32039 .mr(2)
32040 .nr(4)
32041 .kr(8)
32042 .sr(1)
32043 .m(2)
32044 .n(n)
32045 .k(k)
32046 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
32047 }
32048 }
32049 }
32050
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD64,n_gt_4_strided_cn)32051 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD64, n_gt_4_strided_cn) {
32052 TEST_REQUIRES_X86_AVX;
32053 for (uint32_t n = 5; n < 8; n++) {
32054 for (size_t k = 1; k <= 40; k += 9) {
32055 GemmMicrokernelTester()
32056 .mr(2)
32057 .nr(4)
32058 .kr(8)
32059 .sr(1)
32060 .m(2)
32061 .n(n)
32062 .k(k)
32063 .cn_stride(7)
32064 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
32065 }
32066 }
32067 }
32068
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD64,n_gt_4_subtile)32069 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD64, n_gt_4_subtile) {
32070 TEST_REQUIRES_X86_AVX;
32071 for (uint32_t n = 5; n < 8; n++) {
32072 for (size_t k = 1; k <= 40; k += 9) {
32073 for (uint32_t m = 1; m <= 2; m++) {
32074 GemmMicrokernelTester()
32075 .mr(2)
32076 .nr(4)
32077 .kr(8)
32078 .sr(1)
32079 .m(m)
32080 .n(n)
32081 .k(k)
32082 .iterations(1)
32083 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
32084 }
32085 }
32086 }
32087 }
32088
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD64,n_div_4)32089 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD64, n_div_4) {
32090 TEST_REQUIRES_X86_AVX;
32091 for (uint32_t n = 8; n <= 12; n += 4) {
32092 for (size_t k = 1; k <= 40; k += 9) {
32093 GemmMicrokernelTester()
32094 .mr(2)
32095 .nr(4)
32096 .kr(8)
32097 .sr(1)
32098 .m(2)
32099 .n(n)
32100 .k(k)
32101 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
32102 }
32103 }
32104 }
32105
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD64,n_div_4_strided_cn)32106 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD64, n_div_4_strided_cn) {
32107 TEST_REQUIRES_X86_AVX;
32108 for (uint32_t n = 8; n <= 12; n += 4) {
32109 for (size_t k = 1; k <= 40; k += 9) {
32110 GemmMicrokernelTester()
32111 .mr(2)
32112 .nr(4)
32113 .kr(8)
32114 .sr(1)
32115 .m(2)
32116 .n(n)
32117 .k(k)
32118 .cn_stride(7)
32119 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
32120 }
32121 }
32122 }
32123
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD64,n_div_4_subtile)32124 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD64, n_div_4_subtile) {
32125 TEST_REQUIRES_X86_AVX;
32126 for (uint32_t n = 8; n <= 12; n += 4) {
32127 for (size_t k = 1; k <= 40; k += 9) {
32128 for (uint32_t m = 1; m <= 2; m++) {
32129 GemmMicrokernelTester()
32130 .mr(2)
32131 .nr(4)
32132 .kr(8)
32133 .sr(1)
32134 .m(m)
32135 .n(n)
32136 .k(k)
32137 .iterations(1)
32138 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
32139 }
32140 }
32141 }
32142 }
32143
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD64,small_kernel)32144 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD64, small_kernel) {
32145 TEST_REQUIRES_X86_AVX;
32146 for (size_t k = 1; k <= 40; k += 9) {
32147 GemmMicrokernelTester()
32148 .mr(2)
32149 .nr(4)
32150 .kr(8)
32151 .sr(1)
32152 .m(2)
32153 .n(4)
32154 .k(k)
32155 .ks(3)
32156 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
32157 }
32158 }
32159
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD64,small_kernel_subtile)32160 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD64, small_kernel_subtile) {
32161 TEST_REQUIRES_X86_AVX;
32162 for (size_t k = 1; k <= 40; k += 9) {
32163 for (uint32_t n = 1; n <= 4; n++) {
32164 for (uint32_t m = 1; m <= 2; m++) {
32165 GemmMicrokernelTester()
32166 .mr(2)
32167 .nr(4)
32168 .kr(8)
32169 .sr(1)
32170 .m(m)
32171 .n(n)
32172 .k(k)
32173 .ks(3)
32174 .iterations(1)
32175 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
32176 }
32177 }
32178 }
32179 }
32180
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD64,n_gt_4_small_kernel)32181 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD64, n_gt_4_small_kernel) {
32182 TEST_REQUIRES_X86_AVX;
32183 for (uint32_t n = 5; n < 8; n++) {
32184 for (size_t k = 1; k <= 40; k += 9) {
32185 GemmMicrokernelTester()
32186 .mr(2)
32187 .nr(4)
32188 .kr(8)
32189 .sr(1)
32190 .m(2)
32191 .n(n)
32192 .k(k)
32193 .ks(3)
32194 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
32195 }
32196 }
32197 }
32198
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD64,n_div_4_small_kernel)32199 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD64, n_div_4_small_kernel) {
32200 TEST_REQUIRES_X86_AVX;
32201 for (uint32_t n = 8; n <= 12; n += 4) {
32202 for (size_t k = 1; k <= 40; k += 9) {
32203 GemmMicrokernelTester()
32204 .mr(2)
32205 .nr(4)
32206 .kr(8)
32207 .sr(1)
32208 .m(2)
32209 .n(n)
32210 .k(k)
32211 .ks(3)
32212 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
32213 }
32214 }
32215 }
32216
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD64,strided_cm_subtile)32217 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD64, strided_cm_subtile) {
32218 TEST_REQUIRES_X86_AVX;
32219 for (size_t k = 1; k <= 40; k += 9) {
32220 for (uint32_t n = 1; n <= 4; n++) {
32221 for (uint32_t m = 1; m <= 2; m++) {
32222 GemmMicrokernelTester()
32223 .mr(2)
32224 .nr(4)
32225 .kr(8)
32226 .sr(1)
32227 .m(m)
32228 .n(n)
32229 .k(k)
32230 .cm_stride(7)
32231 .iterations(1)
32232 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
32233 }
32234 }
32235 }
32236 }
32237
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD64,a_offset)32238 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD64, a_offset) {
32239 TEST_REQUIRES_X86_AVX;
32240 for (size_t k = 1; k <= 40; k += 9) {
32241 GemmMicrokernelTester()
32242 .mr(2)
32243 .nr(4)
32244 .kr(8)
32245 .sr(1)
32246 .m(2)
32247 .n(4)
32248 .k(k)
32249 .ks(3)
32250 .a_offset(83)
32251 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
32252 }
32253 }
32254
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD64,zero)32255 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD64, zero) {
32256 TEST_REQUIRES_X86_AVX;
32257 for (size_t k = 1; k <= 40; k += 9) {
32258 for (uint32_t mz = 0; mz < 2; mz++) {
32259 GemmMicrokernelTester()
32260 .mr(2)
32261 .nr(4)
32262 .kr(8)
32263 .sr(1)
32264 .m(2)
32265 .n(4)
32266 .k(k)
32267 .ks(3)
32268 .a_offset(83)
32269 .zero_index(mz)
32270 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
32271 }
32272 }
32273 }
32274
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD64,qmin)32275 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD64, qmin) {
32276 TEST_REQUIRES_X86_AVX;
32277 GemmMicrokernelTester()
32278 .mr(2)
32279 .nr(4)
32280 .kr(8)
32281 .sr(1)
32282 .m(2)
32283 .n(4)
32284 .k(8)
32285 .qmin(128)
32286 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
32287 }
32288
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD64,qmax)32289 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD64, qmax) {
32290 TEST_REQUIRES_X86_AVX;
32291 GemmMicrokernelTester()
32292 .mr(2)
32293 .nr(4)
32294 .kr(8)
32295 .sr(1)
32296 .m(2)
32297 .n(4)
32298 .k(8)
32299 .qmax(128)
32300 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
32301 }
32302
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD64,strided_cm)32303 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD64, strided_cm) {
32304 TEST_REQUIRES_X86_AVX;
32305 GemmMicrokernelTester()
32306 .mr(2)
32307 .nr(4)
32308 .kr(8)
32309 .sr(1)
32310 .m(2)
32311 .n(4)
32312 .k(8)
32313 .cm_stride(7)
32314 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
32315 }
32316 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
32317
32318
32319 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD64,k_eq_8)32320 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD64, k_eq_8) {
32321 TEST_REQUIRES_X86_XOP;
32322 GemmMicrokernelTester()
32323 .mr(2)
32324 .nr(4)
32325 .kr(8)
32326 .sr(1)
32327 .m(2)
32328 .n(4)
32329 .k(8)
32330 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
32331 }
32332
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD64,strided_cn)32333 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD64, strided_cn) {
32334 TEST_REQUIRES_X86_XOP;
32335 GemmMicrokernelTester()
32336 .mr(2)
32337 .nr(4)
32338 .kr(8)
32339 .sr(1)
32340 .m(2)
32341 .n(4)
32342 .k(8)
32343 .cn_stride(7)
32344 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
32345 }
32346
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD64,k_eq_8_subtile)32347 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD64, k_eq_8_subtile) {
32348 TEST_REQUIRES_X86_XOP;
32349 for (uint32_t n = 1; n <= 4; n++) {
32350 for (uint32_t m = 1; m <= 2; m++) {
32351 GemmMicrokernelTester()
32352 .mr(2)
32353 .nr(4)
32354 .kr(8)
32355 .sr(1)
32356 .m(m)
32357 .n(n)
32358 .k(8)
32359 .iterations(1)
32360 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
32361 }
32362 }
32363 }
32364
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD64,k_eq_8_subtile_m)32365 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD64, k_eq_8_subtile_m) {
32366 TEST_REQUIRES_X86_XOP;
32367 for (uint32_t m = 1; m <= 2; m++) {
32368 GemmMicrokernelTester()
32369 .mr(2)
32370 .nr(4)
32371 .kr(8)
32372 .sr(1)
32373 .m(m)
32374 .n(4)
32375 .k(8)
32376 .iterations(1)
32377 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
32378 }
32379 }
32380
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD64,k_eq_8_subtile_n)32381 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD64, k_eq_8_subtile_n) {
32382 TEST_REQUIRES_X86_XOP;
32383 for (uint32_t n = 1; n <= 4; n++) {
32384 GemmMicrokernelTester()
32385 .mr(2)
32386 .nr(4)
32387 .kr(8)
32388 .sr(1)
32389 .m(2)
32390 .n(n)
32391 .k(8)
32392 .iterations(1)
32393 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
32394 }
32395 }
32396
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD64,k_lt_8)32397 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD64, k_lt_8) {
32398 TEST_REQUIRES_X86_XOP;
32399 for (size_t k = 1; k < 8; k++) {
32400 GemmMicrokernelTester()
32401 .mr(2)
32402 .nr(4)
32403 .kr(8)
32404 .sr(1)
32405 .m(2)
32406 .n(4)
32407 .k(k)
32408 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
32409 }
32410 }
32411
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD64,k_lt_8_subtile)32412 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD64, k_lt_8_subtile) {
32413 TEST_REQUIRES_X86_XOP;
32414 for (size_t k = 1; k < 8; k++) {
32415 for (uint32_t n = 1; n <= 4; n++) {
32416 for (uint32_t m = 1; m <= 2; m++) {
32417 GemmMicrokernelTester()
32418 .mr(2)
32419 .nr(4)
32420 .kr(8)
32421 .sr(1)
32422 .m(m)
32423 .n(n)
32424 .k(k)
32425 .iterations(1)
32426 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
32427 }
32428 }
32429 }
32430 }
32431
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD64,k_gt_8)32432 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD64, k_gt_8) {
32433 TEST_REQUIRES_X86_XOP;
32434 for (size_t k = 9; k < 16; k++) {
32435 GemmMicrokernelTester()
32436 .mr(2)
32437 .nr(4)
32438 .kr(8)
32439 .sr(1)
32440 .m(2)
32441 .n(4)
32442 .k(k)
32443 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
32444 }
32445 }
32446
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD64,k_gt_8_subtile)32447 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD64, k_gt_8_subtile) {
32448 TEST_REQUIRES_X86_XOP;
32449 for (size_t k = 9; k < 16; k++) {
32450 for (uint32_t n = 1; n <= 4; n++) {
32451 for (uint32_t m = 1; m <= 2; m++) {
32452 GemmMicrokernelTester()
32453 .mr(2)
32454 .nr(4)
32455 .kr(8)
32456 .sr(1)
32457 .m(m)
32458 .n(n)
32459 .k(k)
32460 .iterations(1)
32461 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
32462 }
32463 }
32464 }
32465 }
32466
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD64,k_div_8)32467 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD64, k_div_8) {
32468 TEST_REQUIRES_X86_XOP;
32469 for (size_t k = 16; k <= 80; k += 8) {
32470 GemmMicrokernelTester()
32471 .mr(2)
32472 .nr(4)
32473 .kr(8)
32474 .sr(1)
32475 .m(2)
32476 .n(4)
32477 .k(k)
32478 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
32479 }
32480 }
32481
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD64,k_div_8_subtile)32482 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD64, k_div_8_subtile) {
32483 TEST_REQUIRES_X86_XOP;
32484 for (size_t k = 16; k <= 80; k += 8) {
32485 for (uint32_t n = 1; n <= 4; n++) {
32486 for (uint32_t m = 1; m <= 2; m++) {
32487 GemmMicrokernelTester()
32488 .mr(2)
32489 .nr(4)
32490 .kr(8)
32491 .sr(1)
32492 .m(m)
32493 .n(n)
32494 .k(k)
32495 .iterations(1)
32496 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
32497 }
32498 }
32499 }
32500 }
32501
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD64,n_gt_4)32502 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD64, n_gt_4) {
32503 TEST_REQUIRES_X86_XOP;
32504 for (uint32_t n = 5; n < 8; n++) {
32505 for (size_t k = 1; k <= 40; k += 9) {
32506 GemmMicrokernelTester()
32507 .mr(2)
32508 .nr(4)
32509 .kr(8)
32510 .sr(1)
32511 .m(2)
32512 .n(n)
32513 .k(k)
32514 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
32515 }
32516 }
32517 }
32518
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD64,n_gt_4_strided_cn)32519 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD64, n_gt_4_strided_cn) {
32520 TEST_REQUIRES_X86_XOP;
32521 for (uint32_t n = 5; n < 8; n++) {
32522 for (size_t k = 1; k <= 40; k += 9) {
32523 GemmMicrokernelTester()
32524 .mr(2)
32525 .nr(4)
32526 .kr(8)
32527 .sr(1)
32528 .m(2)
32529 .n(n)
32530 .k(k)
32531 .cn_stride(7)
32532 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
32533 }
32534 }
32535 }
32536
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD64,n_gt_4_subtile)32537 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD64, n_gt_4_subtile) {
32538 TEST_REQUIRES_X86_XOP;
32539 for (uint32_t n = 5; n < 8; n++) {
32540 for (size_t k = 1; k <= 40; k += 9) {
32541 for (uint32_t m = 1; m <= 2; m++) {
32542 GemmMicrokernelTester()
32543 .mr(2)
32544 .nr(4)
32545 .kr(8)
32546 .sr(1)
32547 .m(m)
32548 .n(n)
32549 .k(k)
32550 .iterations(1)
32551 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
32552 }
32553 }
32554 }
32555 }
32556
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD64,n_div_4)32557 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD64, n_div_4) {
32558 TEST_REQUIRES_X86_XOP;
32559 for (uint32_t n = 8; n <= 12; n += 4) {
32560 for (size_t k = 1; k <= 40; k += 9) {
32561 GemmMicrokernelTester()
32562 .mr(2)
32563 .nr(4)
32564 .kr(8)
32565 .sr(1)
32566 .m(2)
32567 .n(n)
32568 .k(k)
32569 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
32570 }
32571 }
32572 }
32573
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD64,n_div_4_strided_cn)32574 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD64, n_div_4_strided_cn) {
32575 TEST_REQUIRES_X86_XOP;
32576 for (uint32_t n = 8; n <= 12; n += 4) {
32577 for (size_t k = 1; k <= 40; k += 9) {
32578 GemmMicrokernelTester()
32579 .mr(2)
32580 .nr(4)
32581 .kr(8)
32582 .sr(1)
32583 .m(2)
32584 .n(n)
32585 .k(k)
32586 .cn_stride(7)
32587 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
32588 }
32589 }
32590 }
32591
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD64,n_div_4_subtile)32592 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD64, n_div_4_subtile) {
32593 TEST_REQUIRES_X86_XOP;
32594 for (uint32_t n = 8; n <= 12; n += 4) {
32595 for (size_t k = 1; k <= 40; k += 9) {
32596 for (uint32_t m = 1; m <= 2; m++) {
32597 GemmMicrokernelTester()
32598 .mr(2)
32599 .nr(4)
32600 .kr(8)
32601 .sr(1)
32602 .m(m)
32603 .n(n)
32604 .k(k)
32605 .iterations(1)
32606 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
32607 }
32608 }
32609 }
32610 }
32611
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD64,small_kernel)32612 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD64, small_kernel) {
32613 TEST_REQUIRES_X86_XOP;
32614 for (size_t k = 1; k <= 40; k += 9) {
32615 GemmMicrokernelTester()
32616 .mr(2)
32617 .nr(4)
32618 .kr(8)
32619 .sr(1)
32620 .m(2)
32621 .n(4)
32622 .k(k)
32623 .ks(3)
32624 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
32625 }
32626 }
32627
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD64,small_kernel_subtile)32628 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD64, small_kernel_subtile) {
32629 TEST_REQUIRES_X86_XOP;
32630 for (size_t k = 1; k <= 40; k += 9) {
32631 for (uint32_t n = 1; n <= 4; n++) {
32632 for (uint32_t m = 1; m <= 2; m++) {
32633 GemmMicrokernelTester()
32634 .mr(2)
32635 .nr(4)
32636 .kr(8)
32637 .sr(1)
32638 .m(m)
32639 .n(n)
32640 .k(k)
32641 .ks(3)
32642 .iterations(1)
32643 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
32644 }
32645 }
32646 }
32647 }
32648
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD64,n_gt_4_small_kernel)32649 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD64, n_gt_4_small_kernel) {
32650 TEST_REQUIRES_X86_XOP;
32651 for (uint32_t n = 5; n < 8; n++) {
32652 for (size_t k = 1; k <= 40; k += 9) {
32653 GemmMicrokernelTester()
32654 .mr(2)
32655 .nr(4)
32656 .kr(8)
32657 .sr(1)
32658 .m(2)
32659 .n(n)
32660 .k(k)
32661 .ks(3)
32662 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
32663 }
32664 }
32665 }
32666
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD64,n_div_4_small_kernel)32667 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD64, n_div_4_small_kernel) {
32668 TEST_REQUIRES_X86_XOP;
32669 for (uint32_t n = 8; n <= 12; n += 4) {
32670 for (size_t k = 1; k <= 40; k += 9) {
32671 GemmMicrokernelTester()
32672 .mr(2)
32673 .nr(4)
32674 .kr(8)
32675 .sr(1)
32676 .m(2)
32677 .n(n)
32678 .k(k)
32679 .ks(3)
32680 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
32681 }
32682 }
32683 }
32684
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD64,strided_cm_subtile)32685 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD64, strided_cm_subtile) {
32686 TEST_REQUIRES_X86_XOP;
32687 for (size_t k = 1; k <= 40; k += 9) {
32688 for (uint32_t n = 1; n <= 4; n++) {
32689 for (uint32_t m = 1; m <= 2; m++) {
32690 GemmMicrokernelTester()
32691 .mr(2)
32692 .nr(4)
32693 .kr(8)
32694 .sr(1)
32695 .m(m)
32696 .n(n)
32697 .k(k)
32698 .cm_stride(7)
32699 .iterations(1)
32700 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
32701 }
32702 }
32703 }
32704 }
32705
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD64,a_offset)32706 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD64, a_offset) {
32707 TEST_REQUIRES_X86_XOP;
32708 for (size_t k = 1; k <= 40; k += 9) {
32709 GemmMicrokernelTester()
32710 .mr(2)
32711 .nr(4)
32712 .kr(8)
32713 .sr(1)
32714 .m(2)
32715 .n(4)
32716 .k(k)
32717 .ks(3)
32718 .a_offset(83)
32719 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
32720 }
32721 }
32722
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD64,zero)32723 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD64, zero) {
32724 TEST_REQUIRES_X86_XOP;
32725 for (size_t k = 1; k <= 40; k += 9) {
32726 for (uint32_t mz = 0; mz < 2; mz++) {
32727 GemmMicrokernelTester()
32728 .mr(2)
32729 .nr(4)
32730 .kr(8)
32731 .sr(1)
32732 .m(2)
32733 .n(4)
32734 .k(k)
32735 .ks(3)
32736 .a_offset(83)
32737 .zero_index(mz)
32738 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
32739 }
32740 }
32741 }
32742
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD64,qmin)32743 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD64, qmin) {
32744 TEST_REQUIRES_X86_XOP;
32745 GemmMicrokernelTester()
32746 .mr(2)
32747 .nr(4)
32748 .kr(8)
32749 .sr(1)
32750 .m(2)
32751 .n(4)
32752 .k(8)
32753 .qmin(128)
32754 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
32755 }
32756
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD64,qmax)32757 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD64, qmax) {
32758 TEST_REQUIRES_X86_XOP;
32759 GemmMicrokernelTester()
32760 .mr(2)
32761 .nr(4)
32762 .kr(8)
32763 .sr(1)
32764 .m(2)
32765 .n(4)
32766 .k(8)
32767 .qmax(128)
32768 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
32769 }
32770
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD64,strided_cm)32771 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__XOP_LD64, strided_cm) {
32772 TEST_REQUIRES_X86_XOP;
32773 GemmMicrokernelTester()
32774 .mr(2)
32775 .nr(4)
32776 .kr(8)
32777 .sr(1)
32778 .m(2)
32779 .n(4)
32780 .k(8)
32781 .cm_stride(7)
32782 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
32783 }
32784 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
32785
32786
32787 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD64,k_eq_8)32788 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD64, k_eq_8) {
32789 TEST_REQUIRES_X86_AVX;
32790 GemmMicrokernelTester()
32791 .mr(3)
32792 .nr(4)
32793 .kr(8)
32794 .sr(1)
32795 .m(3)
32796 .n(4)
32797 .k(8)
32798 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
32799 }
32800
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD64,strided_cn)32801 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD64, strided_cn) {
32802 TEST_REQUIRES_X86_AVX;
32803 GemmMicrokernelTester()
32804 .mr(3)
32805 .nr(4)
32806 .kr(8)
32807 .sr(1)
32808 .m(3)
32809 .n(4)
32810 .k(8)
32811 .cn_stride(7)
32812 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
32813 }
32814
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD64,k_eq_8_subtile)32815 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD64, k_eq_8_subtile) {
32816 TEST_REQUIRES_X86_AVX;
32817 for (uint32_t n = 1; n <= 4; n++) {
32818 for (uint32_t m = 1; m <= 3; m++) {
32819 GemmMicrokernelTester()
32820 .mr(3)
32821 .nr(4)
32822 .kr(8)
32823 .sr(1)
32824 .m(m)
32825 .n(n)
32826 .k(8)
32827 .iterations(1)
32828 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
32829 }
32830 }
32831 }
32832
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD64,k_eq_8_subtile_m)32833 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD64, k_eq_8_subtile_m) {
32834 TEST_REQUIRES_X86_AVX;
32835 for (uint32_t m = 1; m <= 3; m++) {
32836 GemmMicrokernelTester()
32837 .mr(3)
32838 .nr(4)
32839 .kr(8)
32840 .sr(1)
32841 .m(m)
32842 .n(4)
32843 .k(8)
32844 .iterations(1)
32845 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
32846 }
32847 }
32848
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD64,k_eq_8_subtile_n)32849 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD64, k_eq_8_subtile_n) {
32850 TEST_REQUIRES_X86_AVX;
32851 for (uint32_t n = 1; n <= 4; n++) {
32852 GemmMicrokernelTester()
32853 .mr(3)
32854 .nr(4)
32855 .kr(8)
32856 .sr(1)
32857 .m(3)
32858 .n(n)
32859 .k(8)
32860 .iterations(1)
32861 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
32862 }
32863 }
32864
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD64,k_lt_8)32865 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD64, k_lt_8) {
32866 TEST_REQUIRES_X86_AVX;
32867 for (size_t k = 1; k < 8; k++) {
32868 GemmMicrokernelTester()
32869 .mr(3)
32870 .nr(4)
32871 .kr(8)
32872 .sr(1)
32873 .m(3)
32874 .n(4)
32875 .k(k)
32876 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
32877 }
32878 }
32879
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD64,k_lt_8_subtile)32880 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD64, k_lt_8_subtile) {
32881 TEST_REQUIRES_X86_AVX;
32882 for (size_t k = 1; k < 8; k++) {
32883 for (uint32_t n = 1; n <= 4; n++) {
32884 for (uint32_t m = 1; m <= 3; m++) {
32885 GemmMicrokernelTester()
32886 .mr(3)
32887 .nr(4)
32888 .kr(8)
32889 .sr(1)
32890 .m(m)
32891 .n(n)
32892 .k(k)
32893 .iterations(1)
32894 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
32895 }
32896 }
32897 }
32898 }
32899
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD64,k_gt_8)32900 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD64, k_gt_8) {
32901 TEST_REQUIRES_X86_AVX;
32902 for (size_t k = 9; k < 16; k++) {
32903 GemmMicrokernelTester()
32904 .mr(3)
32905 .nr(4)
32906 .kr(8)
32907 .sr(1)
32908 .m(3)
32909 .n(4)
32910 .k(k)
32911 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
32912 }
32913 }
32914
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD64,k_gt_8_subtile)32915 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD64, k_gt_8_subtile) {
32916 TEST_REQUIRES_X86_AVX;
32917 for (size_t k = 9; k < 16; k++) {
32918 for (uint32_t n = 1; n <= 4; n++) {
32919 for (uint32_t m = 1; m <= 3; m++) {
32920 GemmMicrokernelTester()
32921 .mr(3)
32922 .nr(4)
32923 .kr(8)
32924 .sr(1)
32925 .m(m)
32926 .n(n)
32927 .k(k)
32928 .iterations(1)
32929 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
32930 }
32931 }
32932 }
32933 }
32934
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD64,k_div_8)32935 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD64, k_div_8) {
32936 TEST_REQUIRES_X86_AVX;
32937 for (size_t k = 16; k <= 80; k += 8) {
32938 GemmMicrokernelTester()
32939 .mr(3)
32940 .nr(4)
32941 .kr(8)
32942 .sr(1)
32943 .m(3)
32944 .n(4)
32945 .k(k)
32946 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
32947 }
32948 }
32949
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD64,k_div_8_subtile)32950 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD64, k_div_8_subtile) {
32951 TEST_REQUIRES_X86_AVX;
32952 for (size_t k = 16; k <= 80; k += 8) {
32953 for (uint32_t n = 1; n <= 4; n++) {
32954 for (uint32_t m = 1; m <= 3; m++) {
32955 GemmMicrokernelTester()
32956 .mr(3)
32957 .nr(4)
32958 .kr(8)
32959 .sr(1)
32960 .m(m)
32961 .n(n)
32962 .k(k)
32963 .iterations(1)
32964 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
32965 }
32966 }
32967 }
32968 }
32969
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD64,n_gt_4)32970 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD64, n_gt_4) {
32971 TEST_REQUIRES_X86_AVX;
32972 for (uint32_t n = 5; n < 8; n++) {
32973 for (size_t k = 1; k <= 40; k += 9) {
32974 GemmMicrokernelTester()
32975 .mr(3)
32976 .nr(4)
32977 .kr(8)
32978 .sr(1)
32979 .m(3)
32980 .n(n)
32981 .k(k)
32982 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
32983 }
32984 }
32985 }
32986
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD64,n_gt_4_strided_cn)32987 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD64, n_gt_4_strided_cn) {
32988 TEST_REQUIRES_X86_AVX;
32989 for (uint32_t n = 5; n < 8; n++) {
32990 for (size_t k = 1; k <= 40; k += 9) {
32991 GemmMicrokernelTester()
32992 .mr(3)
32993 .nr(4)
32994 .kr(8)
32995 .sr(1)
32996 .m(3)
32997 .n(n)
32998 .k(k)
32999 .cn_stride(7)
33000 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
33001 }
33002 }
33003 }
33004
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD64,n_gt_4_subtile)33005 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD64, n_gt_4_subtile) {
33006 TEST_REQUIRES_X86_AVX;
33007 for (uint32_t n = 5; n < 8; n++) {
33008 for (size_t k = 1; k <= 40; k += 9) {
33009 for (uint32_t m = 1; m <= 3; m++) {
33010 GemmMicrokernelTester()
33011 .mr(3)
33012 .nr(4)
33013 .kr(8)
33014 .sr(1)
33015 .m(m)
33016 .n(n)
33017 .k(k)
33018 .iterations(1)
33019 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
33020 }
33021 }
33022 }
33023 }
33024
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD64,n_div_4)33025 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD64, n_div_4) {
33026 TEST_REQUIRES_X86_AVX;
33027 for (uint32_t n = 8; n <= 12; n += 4) {
33028 for (size_t k = 1; k <= 40; k += 9) {
33029 GemmMicrokernelTester()
33030 .mr(3)
33031 .nr(4)
33032 .kr(8)
33033 .sr(1)
33034 .m(3)
33035 .n(n)
33036 .k(k)
33037 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
33038 }
33039 }
33040 }
33041
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD64,n_div_4_strided_cn)33042 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD64, n_div_4_strided_cn) {
33043 TEST_REQUIRES_X86_AVX;
33044 for (uint32_t n = 8; n <= 12; n += 4) {
33045 for (size_t k = 1; k <= 40; k += 9) {
33046 GemmMicrokernelTester()
33047 .mr(3)
33048 .nr(4)
33049 .kr(8)
33050 .sr(1)
33051 .m(3)
33052 .n(n)
33053 .k(k)
33054 .cn_stride(7)
33055 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
33056 }
33057 }
33058 }
33059
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD64,n_div_4_subtile)33060 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD64, n_div_4_subtile) {
33061 TEST_REQUIRES_X86_AVX;
33062 for (uint32_t n = 8; n <= 12; n += 4) {
33063 for (size_t k = 1; k <= 40; k += 9) {
33064 for (uint32_t m = 1; m <= 3; m++) {
33065 GemmMicrokernelTester()
33066 .mr(3)
33067 .nr(4)
33068 .kr(8)
33069 .sr(1)
33070 .m(m)
33071 .n(n)
33072 .k(k)
33073 .iterations(1)
33074 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
33075 }
33076 }
33077 }
33078 }
33079
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD64,small_kernel)33080 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD64, small_kernel) {
33081 TEST_REQUIRES_X86_AVX;
33082 for (size_t k = 1; k <= 40; k += 9) {
33083 GemmMicrokernelTester()
33084 .mr(3)
33085 .nr(4)
33086 .kr(8)
33087 .sr(1)
33088 .m(3)
33089 .n(4)
33090 .k(k)
33091 .ks(3)
33092 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
33093 }
33094 }
33095
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD64,small_kernel_subtile)33096 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD64, small_kernel_subtile) {
33097 TEST_REQUIRES_X86_AVX;
33098 for (size_t k = 1; k <= 40; k += 9) {
33099 for (uint32_t n = 1; n <= 4; n++) {
33100 for (uint32_t m = 1; m <= 3; m++) {
33101 GemmMicrokernelTester()
33102 .mr(3)
33103 .nr(4)
33104 .kr(8)
33105 .sr(1)
33106 .m(m)
33107 .n(n)
33108 .k(k)
33109 .ks(3)
33110 .iterations(1)
33111 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
33112 }
33113 }
33114 }
33115 }
33116
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD64,n_gt_4_small_kernel)33117 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD64, n_gt_4_small_kernel) {
33118 TEST_REQUIRES_X86_AVX;
33119 for (uint32_t n = 5; n < 8; n++) {
33120 for (size_t k = 1; k <= 40; k += 9) {
33121 GemmMicrokernelTester()
33122 .mr(3)
33123 .nr(4)
33124 .kr(8)
33125 .sr(1)
33126 .m(3)
33127 .n(n)
33128 .k(k)
33129 .ks(3)
33130 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
33131 }
33132 }
33133 }
33134
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD64,n_div_4_small_kernel)33135 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD64, n_div_4_small_kernel) {
33136 TEST_REQUIRES_X86_AVX;
33137 for (uint32_t n = 8; n <= 12; n += 4) {
33138 for (size_t k = 1; k <= 40; k += 9) {
33139 GemmMicrokernelTester()
33140 .mr(3)
33141 .nr(4)
33142 .kr(8)
33143 .sr(1)
33144 .m(3)
33145 .n(n)
33146 .k(k)
33147 .ks(3)
33148 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
33149 }
33150 }
33151 }
33152
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD64,strided_cm_subtile)33153 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD64, strided_cm_subtile) {
33154 TEST_REQUIRES_X86_AVX;
33155 for (size_t k = 1; k <= 40; k += 9) {
33156 for (uint32_t n = 1; n <= 4; n++) {
33157 for (uint32_t m = 1; m <= 3; m++) {
33158 GemmMicrokernelTester()
33159 .mr(3)
33160 .nr(4)
33161 .kr(8)
33162 .sr(1)
33163 .m(m)
33164 .n(n)
33165 .k(k)
33166 .cm_stride(7)
33167 .iterations(1)
33168 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
33169 }
33170 }
33171 }
33172 }
33173
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD64,a_offset)33174 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD64, a_offset) {
33175 TEST_REQUIRES_X86_AVX;
33176 for (size_t k = 1; k <= 40; k += 9) {
33177 GemmMicrokernelTester()
33178 .mr(3)
33179 .nr(4)
33180 .kr(8)
33181 .sr(1)
33182 .m(3)
33183 .n(4)
33184 .k(k)
33185 .ks(3)
33186 .a_offset(127)
33187 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
33188 }
33189 }
33190
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD64,zero)33191 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD64, zero) {
33192 TEST_REQUIRES_X86_AVX;
33193 for (size_t k = 1; k <= 40; k += 9) {
33194 for (uint32_t mz = 0; mz < 3; mz++) {
33195 GemmMicrokernelTester()
33196 .mr(3)
33197 .nr(4)
33198 .kr(8)
33199 .sr(1)
33200 .m(3)
33201 .n(4)
33202 .k(k)
33203 .ks(3)
33204 .a_offset(127)
33205 .zero_index(mz)
33206 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
33207 }
33208 }
33209 }
33210
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD64,qmin)33211 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD64, qmin) {
33212 TEST_REQUIRES_X86_AVX;
33213 GemmMicrokernelTester()
33214 .mr(3)
33215 .nr(4)
33216 .kr(8)
33217 .sr(1)
33218 .m(3)
33219 .n(4)
33220 .k(8)
33221 .qmin(128)
33222 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
33223 }
33224
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD64,qmax)33225 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD64, qmax) {
33226 TEST_REQUIRES_X86_AVX;
33227 GemmMicrokernelTester()
33228 .mr(3)
33229 .nr(4)
33230 .kr(8)
33231 .sr(1)
33232 .m(3)
33233 .n(4)
33234 .k(8)
33235 .qmax(128)
33236 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
33237 }
33238
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD64,strided_cm)33239 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__AVX_LD64, strided_cm) {
33240 TEST_REQUIRES_X86_AVX;
33241 GemmMicrokernelTester()
33242 .mr(3)
33243 .nr(4)
33244 .kr(8)
33245 .sr(1)
33246 .m(3)
33247 .n(4)
33248 .k(8)
33249 .cm_stride(7)
33250 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__avx_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
33251 }
33252 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
33253
33254
33255 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD64,k_eq_8)33256 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD64, k_eq_8) {
33257 TEST_REQUIRES_X86_XOP;
33258 GemmMicrokernelTester()
33259 .mr(3)
33260 .nr(4)
33261 .kr(8)
33262 .sr(1)
33263 .m(3)
33264 .n(4)
33265 .k(8)
33266 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
33267 }
33268
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD64,strided_cn)33269 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD64, strided_cn) {
33270 TEST_REQUIRES_X86_XOP;
33271 GemmMicrokernelTester()
33272 .mr(3)
33273 .nr(4)
33274 .kr(8)
33275 .sr(1)
33276 .m(3)
33277 .n(4)
33278 .k(8)
33279 .cn_stride(7)
33280 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
33281 }
33282
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD64,k_eq_8_subtile)33283 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD64, k_eq_8_subtile) {
33284 TEST_REQUIRES_X86_XOP;
33285 for (uint32_t n = 1; n <= 4; n++) {
33286 for (uint32_t m = 1; m <= 3; m++) {
33287 GemmMicrokernelTester()
33288 .mr(3)
33289 .nr(4)
33290 .kr(8)
33291 .sr(1)
33292 .m(m)
33293 .n(n)
33294 .k(8)
33295 .iterations(1)
33296 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
33297 }
33298 }
33299 }
33300
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD64,k_eq_8_subtile_m)33301 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD64, k_eq_8_subtile_m) {
33302 TEST_REQUIRES_X86_XOP;
33303 for (uint32_t m = 1; m <= 3; m++) {
33304 GemmMicrokernelTester()
33305 .mr(3)
33306 .nr(4)
33307 .kr(8)
33308 .sr(1)
33309 .m(m)
33310 .n(4)
33311 .k(8)
33312 .iterations(1)
33313 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
33314 }
33315 }
33316
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD64,k_eq_8_subtile_n)33317 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD64, k_eq_8_subtile_n) {
33318 TEST_REQUIRES_X86_XOP;
33319 for (uint32_t n = 1; n <= 4; n++) {
33320 GemmMicrokernelTester()
33321 .mr(3)
33322 .nr(4)
33323 .kr(8)
33324 .sr(1)
33325 .m(3)
33326 .n(n)
33327 .k(8)
33328 .iterations(1)
33329 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
33330 }
33331 }
33332
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD64,k_lt_8)33333 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD64, k_lt_8) {
33334 TEST_REQUIRES_X86_XOP;
33335 for (size_t k = 1; k < 8; k++) {
33336 GemmMicrokernelTester()
33337 .mr(3)
33338 .nr(4)
33339 .kr(8)
33340 .sr(1)
33341 .m(3)
33342 .n(4)
33343 .k(k)
33344 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
33345 }
33346 }
33347
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD64,k_lt_8_subtile)33348 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD64, k_lt_8_subtile) {
33349 TEST_REQUIRES_X86_XOP;
33350 for (size_t k = 1; k < 8; k++) {
33351 for (uint32_t n = 1; n <= 4; n++) {
33352 for (uint32_t m = 1; m <= 3; m++) {
33353 GemmMicrokernelTester()
33354 .mr(3)
33355 .nr(4)
33356 .kr(8)
33357 .sr(1)
33358 .m(m)
33359 .n(n)
33360 .k(k)
33361 .iterations(1)
33362 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
33363 }
33364 }
33365 }
33366 }
33367
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD64,k_gt_8)33368 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD64, k_gt_8) {
33369 TEST_REQUIRES_X86_XOP;
33370 for (size_t k = 9; k < 16; k++) {
33371 GemmMicrokernelTester()
33372 .mr(3)
33373 .nr(4)
33374 .kr(8)
33375 .sr(1)
33376 .m(3)
33377 .n(4)
33378 .k(k)
33379 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
33380 }
33381 }
33382
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD64,k_gt_8_subtile)33383 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD64, k_gt_8_subtile) {
33384 TEST_REQUIRES_X86_XOP;
33385 for (size_t k = 9; k < 16; k++) {
33386 for (uint32_t n = 1; n <= 4; n++) {
33387 for (uint32_t m = 1; m <= 3; m++) {
33388 GemmMicrokernelTester()
33389 .mr(3)
33390 .nr(4)
33391 .kr(8)
33392 .sr(1)
33393 .m(m)
33394 .n(n)
33395 .k(k)
33396 .iterations(1)
33397 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
33398 }
33399 }
33400 }
33401 }
33402
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD64,k_div_8)33403 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD64, k_div_8) {
33404 TEST_REQUIRES_X86_XOP;
33405 for (size_t k = 16; k <= 80; k += 8) {
33406 GemmMicrokernelTester()
33407 .mr(3)
33408 .nr(4)
33409 .kr(8)
33410 .sr(1)
33411 .m(3)
33412 .n(4)
33413 .k(k)
33414 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
33415 }
33416 }
33417
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD64,k_div_8_subtile)33418 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD64, k_div_8_subtile) {
33419 TEST_REQUIRES_X86_XOP;
33420 for (size_t k = 16; k <= 80; k += 8) {
33421 for (uint32_t n = 1; n <= 4; n++) {
33422 for (uint32_t m = 1; m <= 3; m++) {
33423 GemmMicrokernelTester()
33424 .mr(3)
33425 .nr(4)
33426 .kr(8)
33427 .sr(1)
33428 .m(m)
33429 .n(n)
33430 .k(k)
33431 .iterations(1)
33432 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
33433 }
33434 }
33435 }
33436 }
33437
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD64,n_gt_4)33438 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD64, n_gt_4) {
33439 TEST_REQUIRES_X86_XOP;
33440 for (uint32_t n = 5; n < 8; n++) {
33441 for (size_t k = 1; k <= 40; k += 9) {
33442 GemmMicrokernelTester()
33443 .mr(3)
33444 .nr(4)
33445 .kr(8)
33446 .sr(1)
33447 .m(3)
33448 .n(n)
33449 .k(k)
33450 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
33451 }
33452 }
33453 }
33454
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD64,n_gt_4_strided_cn)33455 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD64, n_gt_4_strided_cn) {
33456 TEST_REQUIRES_X86_XOP;
33457 for (uint32_t n = 5; n < 8; n++) {
33458 for (size_t k = 1; k <= 40; k += 9) {
33459 GemmMicrokernelTester()
33460 .mr(3)
33461 .nr(4)
33462 .kr(8)
33463 .sr(1)
33464 .m(3)
33465 .n(n)
33466 .k(k)
33467 .cn_stride(7)
33468 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
33469 }
33470 }
33471 }
33472
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD64,n_gt_4_subtile)33473 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD64, n_gt_4_subtile) {
33474 TEST_REQUIRES_X86_XOP;
33475 for (uint32_t n = 5; n < 8; n++) {
33476 for (size_t k = 1; k <= 40; k += 9) {
33477 for (uint32_t m = 1; m <= 3; m++) {
33478 GemmMicrokernelTester()
33479 .mr(3)
33480 .nr(4)
33481 .kr(8)
33482 .sr(1)
33483 .m(m)
33484 .n(n)
33485 .k(k)
33486 .iterations(1)
33487 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
33488 }
33489 }
33490 }
33491 }
33492
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD64,n_div_4)33493 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD64, n_div_4) {
33494 TEST_REQUIRES_X86_XOP;
33495 for (uint32_t n = 8; n <= 12; n += 4) {
33496 for (size_t k = 1; k <= 40; k += 9) {
33497 GemmMicrokernelTester()
33498 .mr(3)
33499 .nr(4)
33500 .kr(8)
33501 .sr(1)
33502 .m(3)
33503 .n(n)
33504 .k(k)
33505 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
33506 }
33507 }
33508 }
33509
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD64,n_div_4_strided_cn)33510 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD64, n_div_4_strided_cn) {
33511 TEST_REQUIRES_X86_XOP;
33512 for (uint32_t n = 8; n <= 12; n += 4) {
33513 for (size_t k = 1; k <= 40; k += 9) {
33514 GemmMicrokernelTester()
33515 .mr(3)
33516 .nr(4)
33517 .kr(8)
33518 .sr(1)
33519 .m(3)
33520 .n(n)
33521 .k(k)
33522 .cn_stride(7)
33523 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
33524 }
33525 }
33526 }
33527
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD64,n_div_4_subtile)33528 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD64, n_div_4_subtile) {
33529 TEST_REQUIRES_X86_XOP;
33530 for (uint32_t n = 8; n <= 12; n += 4) {
33531 for (size_t k = 1; k <= 40; k += 9) {
33532 for (uint32_t m = 1; m <= 3; m++) {
33533 GemmMicrokernelTester()
33534 .mr(3)
33535 .nr(4)
33536 .kr(8)
33537 .sr(1)
33538 .m(m)
33539 .n(n)
33540 .k(k)
33541 .iterations(1)
33542 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
33543 }
33544 }
33545 }
33546 }
33547
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD64,small_kernel)33548 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD64, small_kernel) {
33549 TEST_REQUIRES_X86_XOP;
33550 for (size_t k = 1; k <= 40; k += 9) {
33551 GemmMicrokernelTester()
33552 .mr(3)
33553 .nr(4)
33554 .kr(8)
33555 .sr(1)
33556 .m(3)
33557 .n(4)
33558 .k(k)
33559 .ks(3)
33560 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
33561 }
33562 }
33563
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD64,small_kernel_subtile)33564 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD64, small_kernel_subtile) {
33565 TEST_REQUIRES_X86_XOP;
33566 for (size_t k = 1; k <= 40; k += 9) {
33567 for (uint32_t n = 1; n <= 4; n++) {
33568 for (uint32_t m = 1; m <= 3; m++) {
33569 GemmMicrokernelTester()
33570 .mr(3)
33571 .nr(4)
33572 .kr(8)
33573 .sr(1)
33574 .m(m)
33575 .n(n)
33576 .k(k)
33577 .ks(3)
33578 .iterations(1)
33579 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
33580 }
33581 }
33582 }
33583 }
33584
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD64,n_gt_4_small_kernel)33585 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD64, n_gt_4_small_kernel) {
33586 TEST_REQUIRES_X86_XOP;
33587 for (uint32_t n = 5; n < 8; n++) {
33588 for (size_t k = 1; k <= 40; k += 9) {
33589 GemmMicrokernelTester()
33590 .mr(3)
33591 .nr(4)
33592 .kr(8)
33593 .sr(1)
33594 .m(3)
33595 .n(n)
33596 .k(k)
33597 .ks(3)
33598 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
33599 }
33600 }
33601 }
33602
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD64,n_div_4_small_kernel)33603 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD64, n_div_4_small_kernel) {
33604 TEST_REQUIRES_X86_XOP;
33605 for (uint32_t n = 8; n <= 12; n += 4) {
33606 for (size_t k = 1; k <= 40; k += 9) {
33607 GemmMicrokernelTester()
33608 .mr(3)
33609 .nr(4)
33610 .kr(8)
33611 .sr(1)
33612 .m(3)
33613 .n(n)
33614 .k(k)
33615 .ks(3)
33616 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
33617 }
33618 }
33619 }
33620
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD64,strided_cm_subtile)33621 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD64, strided_cm_subtile) {
33622 TEST_REQUIRES_X86_XOP;
33623 for (size_t k = 1; k <= 40; k += 9) {
33624 for (uint32_t n = 1; n <= 4; n++) {
33625 for (uint32_t m = 1; m <= 3; m++) {
33626 GemmMicrokernelTester()
33627 .mr(3)
33628 .nr(4)
33629 .kr(8)
33630 .sr(1)
33631 .m(m)
33632 .n(n)
33633 .k(k)
33634 .cm_stride(7)
33635 .iterations(1)
33636 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
33637 }
33638 }
33639 }
33640 }
33641
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD64,a_offset)33642 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD64, a_offset) {
33643 TEST_REQUIRES_X86_XOP;
33644 for (size_t k = 1; k <= 40; k += 9) {
33645 GemmMicrokernelTester()
33646 .mr(3)
33647 .nr(4)
33648 .kr(8)
33649 .sr(1)
33650 .m(3)
33651 .n(4)
33652 .k(k)
33653 .ks(3)
33654 .a_offset(127)
33655 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
33656 }
33657 }
33658
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD64,zero)33659 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD64, zero) {
33660 TEST_REQUIRES_X86_XOP;
33661 for (size_t k = 1; k <= 40; k += 9) {
33662 for (uint32_t mz = 0; mz < 3; mz++) {
33663 GemmMicrokernelTester()
33664 .mr(3)
33665 .nr(4)
33666 .kr(8)
33667 .sr(1)
33668 .m(3)
33669 .n(4)
33670 .k(k)
33671 .ks(3)
33672 .a_offset(127)
33673 .zero_index(mz)
33674 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
33675 }
33676 }
33677 }
33678
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD64,qmin)33679 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD64, qmin) {
33680 TEST_REQUIRES_X86_XOP;
33681 GemmMicrokernelTester()
33682 .mr(3)
33683 .nr(4)
33684 .kr(8)
33685 .sr(1)
33686 .m(3)
33687 .n(4)
33688 .k(8)
33689 .qmin(128)
33690 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
33691 }
33692
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD64,qmax)33693 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD64, qmax) {
33694 TEST_REQUIRES_X86_XOP;
33695 GemmMicrokernelTester()
33696 .mr(3)
33697 .nr(4)
33698 .kr(8)
33699 .sr(1)
33700 .m(3)
33701 .n(4)
33702 .k(8)
33703 .qmax(128)
33704 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
33705 }
33706
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD64,strided_cm)33707 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD64, strided_cm) {
33708 TEST_REQUIRES_X86_XOP;
33709 GemmMicrokernelTester()
33710 .mr(3)
33711 .nr(4)
33712 .kr(8)
33713 .sr(1)
33714 .m(3)
33715 .n(4)
33716 .k(8)
33717 .cm_stride(7)
33718 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld64, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
33719 }
33720 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
33721
33722
33723 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD128,k_eq_8)33724 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD128, k_eq_8) {
33725 TEST_REQUIRES_X86_SSE2;
33726 GemmMicrokernelTester()
33727 .mr(1)
33728 .nr(4)
33729 .kr(8)
33730 .sr(1)
33731 .m(1)
33732 .n(4)
33733 .k(8)
33734 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
33735 }
33736
TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD128,strided_cn)33737 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD128, strided_cn) {
33738 TEST_REQUIRES_X86_SSE2;
33739 GemmMicrokernelTester()
33740 .mr(1)
33741 .nr(4)
33742 .kr(8)
33743 .sr(1)
33744 .m(1)
33745 .n(4)
33746 .k(8)
33747 .cn_stride(7)
33748 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
33749 }
33750
TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD128,k_eq_8_subtile)33751 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD128, k_eq_8_subtile) {
33752 TEST_REQUIRES_X86_SSE2;
33753 for (uint32_t n = 1; n <= 4; n++) {
33754 for (uint32_t m = 1; m <= 1; m++) {
33755 GemmMicrokernelTester()
33756 .mr(1)
33757 .nr(4)
33758 .kr(8)
33759 .sr(1)
33760 .m(m)
33761 .n(n)
33762 .k(8)
33763 .iterations(1)
33764 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
33765 }
33766 }
33767 }
33768
TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD128,k_eq_8_subtile_m)33769 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD128, k_eq_8_subtile_m) {
33770 TEST_REQUIRES_X86_SSE2;
33771 for (uint32_t m = 1; m <= 1; m++) {
33772 GemmMicrokernelTester()
33773 .mr(1)
33774 .nr(4)
33775 .kr(8)
33776 .sr(1)
33777 .m(m)
33778 .n(4)
33779 .k(8)
33780 .iterations(1)
33781 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
33782 }
33783 }
33784
TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD128,k_eq_8_subtile_n)33785 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD128, k_eq_8_subtile_n) {
33786 TEST_REQUIRES_X86_SSE2;
33787 for (uint32_t n = 1; n <= 4; n++) {
33788 GemmMicrokernelTester()
33789 .mr(1)
33790 .nr(4)
33791 .kr(8)
33792 .sr(1)
33793 .m(1)
33794 .n(n)
33795 .k(8)
33796 .iterations(1)
33797 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
33798 }
33799 }
33800
TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD128,k_lt_8)33801 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD128, k_lt_8) {
33802 TEST_REQUIRES_X86_SSE2;
33803 for (size_t k = 1; k < 8; k++) {
33804 GemmMicrokernelTester()
33805 .mr(1)
33806 .nr(4)
33807 .kr(8)
33808 .sr(1)
33809 .m(1)
33810 .n(4)
33811 .k(k)
33812 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
33813 }
33814 }
33815
TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD128,k_lt_8_subtile)33816 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD128, k_lt_8_subtile) {
33817 TEST_REQUIRES_X86_SSE2;
33818 for (size_t k = 1; k < 8; k++) {
33819 for (uint32_t n = 1; n <= 4; n++) {
33820 for (uint32_t m = 1; m <= 1; m++) {
33821 GemmMicrokernelTester()
33822 .mr(1)
33823 .nr(4)
33824 .kr(8)
33825 .sr(1)
33826 .m(m)
33827 .n(n)
33828 .k(k)
33829 .iterations(1)
33830 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
33831 }
33832 }
33833 }
33834 }
33835
TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD128,k_gt_8)33836 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD128, k_gt_8) {
33837 TEST_REQUIRES_X86_SSE2;
33838 for (size_t k = 9; k < 16; k++) {
33839 GemmMicrokernelTester()
33840 .mr(1)
33841 .nr(4)
33842 .kr(8)
33843 .sr(1)
33844 .m(1)
33845 .n(4)
33846 .k(k)
33847 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
33848 }
33849 }
33850
TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD128,k_gt_8_subtile)33851 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD128, k_gt_8_subtile) {
33852 TEST_REQUIRES_X86_SSE2;
33853 for (size_t k = 9; k < 16; k++) {
33854 for (uint32_t n = 1; n <= 4; n++) {
33855 for (uint32_t m = 1; m <= 1; m++) {
33856 GemmMicrokernelTester()
33857 .mr(1)
33858 .nr(4)
33859 .kr(8)
33860 .sr(1)
33861 .m(m)
33862 .n(n)
33863 .k(k)
33864 .iterations(1)
33865 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
33866 }
33867 }
33868 }
33869 }
33870
TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD128,k_div_8)33871 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD128, k_div_8) {
33872 TEST_REQUIRES_X86_SSE2;
33873 for (size_t k = 16; k <= 80; k += 8) {
33874 GemmMicrokernelTester()
33875 .mr(1)
33876 .nr(4)
33877 .kr(8)
33878 .sr(1)
33879 .m(1)
33880 .n(4)
33881 .k(k)
33882 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
33883 }
33884 }
33885
TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD128,k_div_8_subtile)33886 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD128, k_div_8_subtile) {
33887 TEST_REQUIRES_X86_SSE2;
33888 for (size_t k = 16; k <= 80; k += 8) {
33889 for (uint32_t n = 1; n <= 4; n++) {
33890 for (uint32_t m = 1; m <= 1; m++) {
33891 GemmMicrokernelTester()
33892 .mr(1)
33893 .nr(4)
33894 .kr(8)
33895 .sr(1)
33896 .m(m)
33897 .n(n)
33898 .k(k)
33899 .iterations(1)
33900 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
33901 }
33902 }
33903 }
33904 }
33905
TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD128,n_gt_4)33906 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD128, n_gt_4) {
33907 TEST_REQUIRES_X86_SSE2;
33908 for (uint32_t n = 5; n < 8; n++) {
33909 for (size_t k = 1; k <= 40; k += 9) {
33910 GemmMicrokernelTester()
33911 .mr(1)
33912 .nr(4)
33913 .kr(8)
33914 .sr(1)
33915 .m(1)
33916 .n(n)
33917 .k(k)
33918 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
33919 }
33920 }
33921 }
33922
TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD128,n_gt_4_strided_cn)33923 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD128, n_gt_4_strided_cn) {
33924 TEST_REQUIRES_X86_SSE2;
33925 for (uint32_t n = 5; n < 8; n++) {
33926 for (size_t k = 1; k <= 40; k += 9) {
33927 GemmMicrokernelTester()
33928 .mr(1)
33929 .nr(4)
33930 .kr(8)
33931 .sr(1)
33932 .m(1)
33933 .n(n)
33934 .k(k)
33935 .cn_stride(7)
33936 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
33937 }
33938 }
33939 }
33940
TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD128,n_gt_4_subtile)33941 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD128, n_gt_4_subtile) {
33942 TEST_REQUIRES_X86_SSE2;
33943 for (uint32_t n = 5; n < 8; n++) {
33944 for (size_t k = 1; k <= 40; k += 9) {
33945 for (uint32_t m = 1; m <= 1; m++) {
33946 GemmMicrokernelTester()
33947 .mr(1)
33948 .nr(4)
33949 .kr(8)
33950 .sr(1)
33951 .m(m)
33952 .n(n)
33953 .k(k)
33954 .iterations(1)
33955 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
33956 }
33957 }
33958 }
33959 }
33960
TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD128,n_div_4)33961 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD128, n_div_4) {
33962 TEST_REQUIRES_X86_SSE2;
33963 for (uint32_t n = 8; n <= 12; n += 4) {
33964 for (size_t k = 1; k <= 40; k += 9) {
33965 GemmMicrokernelTester()
33966 .mr(1)
33967 .nr(4)
33968 .kr(8)
33969 .sr(1)
33970 .m(1)
33971 .n(n)
33972 .k(k)
33973 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
33974 }
33975 }
33976 }
33977
TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD128,n_div_4_strided_cn)33978 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD128, n_div_4_strided_cn) {
33979 TEST_REQUIRES_X86_SSE2;
33980 for (uint32_t n = 8; n <= 12; n += 4) {
33981 for (size_t k = 1; k <= 40; k += 9) {
33982 GemmMicrokernelTester()
33983 .mr(1)
33984 .nr(4)
33985 .kr(8)
33986 .sr(1)
33987 .m(1)
33988 .n(n)
33989 .k(k)
33990 .cn_stride(7)
33991 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
33992 }
33993 }
33994 }
33995
TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD128,n_div_4_subtile)33996 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD128, n_div_4_subtile) {
33997 TEST_REQUIRES_X86_SSE2;
33998 for (uint32_t n = 8; n <= 12; n += 4) {
33999 for (size_t k = 1; k <= 40; k += 9) {
34000 for (uint32_t m = 1; m <= 1; m++) {
34001 GemmMicrokernelTester()
34002 .mr(1)
34003 .nr(4)
34004 .kr(8)
34005 .sr(1)
34006 .m(m)
34007 .n(n)
34008 .k(k)
34009 .iterations(1)
34010 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
34011 }
34012 }
34013 }
34014 }
34015
TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD128,small_kernel)34016 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD128, small_kernel) {
34017 TEST_REQUIRES_X86_SSE2;
34018 for (size_t k = 1; k <= 40; k += 9) {
34019 GemmMicrokernelTester()
34020 .mr(1)
34021 .nr(4)
34022 .kr(8)
34023 .sr(1)
34024 .m(1)
34025 .n(4)
34026 .k(k)
34027 .ks(3)
34028 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
34029 }
34030 }
34031
TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD128,small_kernel_subtile)34032 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD128, small_kernel_subtile) {
34033 TEST_REQUIRES_X86_SSE2;
34034 for (size_t k = 1; k <= 40; k += 9) {
34035 for (uint32_t n = 1; n <= 4; n++) {
34036 for (uint32_t m = 1; m <= 1; m++) {
34037 GemmMicrokernelTester()
34038 .mr(1)
34039 .nr(4)
34040 .kr(8)
34041 .sr(1)
34042 .m(m)
34043 .n(n)
34044 .k(k)
34045 .ks(3)
34046 .iterations(1)
34047 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
34048 }
34049 }
34050 }
34051 }
34052
TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD128,n_gt_4_small_kernel)34053 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD128, n_gt_4_small_kernel) {
34054 TEST_REQUIRES_X86_SSE2;
34055 for (uint32_t n = 5; n < 8; n++) {
34056 for (size_t k = 1; k <= 40; k += 9) {
34057 GemmMicrokernelTester()
34058 .mr(1)
34059 .nr(4)
34060 .kr(8)
34061 .sr(1)
34062 .m(1)
34063 .n(n)
34064 .k(k)
34065 .ks(3)
34066 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
34067 }
34068 }
34069 }
34070
TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD128,n_div_4_small_kernel)34071 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD128, n_div_4_small_kernel) {
34072 TEST_REQUIRES_X86_SSE2;
34073 for (uint32_t n = 8; n <= 12; n += 4) {
34074 for (size_t k = 1; k <= 40; k += 9) {
34075 GemmMicrokernelTester()
34076 .mr(1)
34077 .nr(4)
34078 .kr(8)
34079 .sr(1)
34080 .m(1)
34081 .n(n)
34082 .k(k)
34083 .ks(3)
34084 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
34085 }
34086 }
34087 }
34088
TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD128,strided_cm_subtile)34089 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD128, strided_cm_subtile) {
34090 TEST_REQUIRES_X86_SSE2;
34091 for (size_t k = 1; k <= 40; k += 9) {
34092 for (uint32_t n = 1; n <= 4; n++) {
34093 for (uint32_t m = 1; m <= 1; m++) {
34094 GemmMicrokernelTester()
34095 .mr(1)
34096 .nr(4)
34097 .kr(8)
34098 .sr(1)
34099 .m(m)
34100 .n(n)
34101 .k(k)
34102 .cm_stride(7)
34103 .iterations(1)
34104 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
34105 }
34106 }
34107 }
34108 }
34109
TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD128,a_offset)34110 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD128, a_offset) {
34111 TEST_REQUIRES_X86_SSE2;
34112 for (size_t k = 1; k <= 40; k += 9) {
34113 GemmMicrokernelTester()
34114 .mr(1)
34115 .nr(4)
34116 .kr(8)
34117 .sr(1)
34118 .m(1)
34119 .n(4)
34120 .k(k)
34121 .ks(3)
34122 .a_offset(43)
34123 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
34124 }
34125 }
34126
TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD128,zero)34127 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD128, zero) {
34128 TEST_REQUIRES_X86_SSE2;
34129 for (size_t k = 1; k <= 40; k += 9) {
34130 for (uint32_t mz = 0; mz < 1; mz++) {
34131 GemmMicrokernelTester()
34132 .mr(1)
34133 .nr(4)
34134 .kr(8)
34135 .sr(1)
34136 .m(1)
34137 .n(4)
34138 .k(k)
34139 .ks(3)
34140 .a_offset(43)
34141 .zero_index(mz)
34142 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
34143 }
34144 }
34145 }
34146
TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD128,qmin)34147 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD128, qmin) {
34148 TEST_REQUIRES_X86_SSE2;
34149 GemmMicrokernelTester()
34150 .mr(1)
34151 .nr(4)
34152 .kr(8)
34153 .sr(1)
34154 .m(1)
34155 .n(4)
34156 .k(8)
34157 .qmin(128)
34158 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
34159 }
34160
TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD128,qmax)34161 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD128, qmax) {
34162 TEST_REQUIRES_X86_SSE2;
34163 GemmMicrokernelTester()
34164 .mr(1)
34165 .nr(4)
34166 .kr(8)
34167 .sr(1)
34168 .m(1)
34169 .n(4)
34170 .k(8)
34171 .qmax(128)
34172 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
34173 }
34174
TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD128,strided_cm)34175 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__SSE2_LD128, strided_cm) {
34176 TEST_REQUIRES_X86_SSE2;
34177 GemmMicrokernelTester()
34178 .mr(1)
34179 .nr(4)
34180 .kr(8)
34181 .sr(1)
34182 .m(1)
34183 .n(4)
34184 .k(8)
34185 .cm_stride(7)
34186 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
34187 }
34188 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
34189
34190
34191 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD128,k_eq_8)34192 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD128, k_eq_8) {
34193 TEST_REQUIRES_X86_SSE2;
34194 GemmMicrokernelTester()
34195 .mr(2)
34196 .nr(4)
34197 .kr(8)
34198 .sr(1)
34199 .m(2)
34200 .n(4)
34201 .k(8)
34202 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
34203 }
34204
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD128,strided_cn)34205 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD128, strided_cn) {
34206 TEST_REQUIRES_X86_SSE2;
34207 GemmMicrokernelTester()
34208 .mr(2)
34209 .nr(4)
34210 .kr(8)
34211 .sr(1)
34212 .m(2)
34213 .n(4)
34214 .k(8)
34215 .cn_stride(7)
34216 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
34217 }
34218
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD128,k_eq_8_subtile)34219 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD128, k_eq_8_subtile) {
34220 TEST_REQUIRES_X86_SSE2;
34221 for (uint32_t n = 1; n <= 4; n++) {
34222 for (uint32_t m = 1; m <= 2; m++) {
34223 GemmMicrokernelTester()
34224 .mr(2)
34225 .nr(4)
34226 .kr(8)
34227 .sr(1)
34228 .m(m)
34229 .n(n)
34230 .k(8)
34231 .iterations(1)
34232 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
34233 }
34234 }
34235 }
34236
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD128,k_eq_8_subtile_m)34237 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD128, k_eq_8_subtile_m) {
34238 TEST_REQUIRES_X86_SSE2;
34239 for (uint32_t m = 1; m <= 2; m++) {
34240 GemmMicrokernelTester()
34241 .mr(2)
34242 .nr(4)
34243 .kr(8)
34244 .sr(1)
34245 .m(m)
34246 .n(4)
34247 .k(8)
34248 .iterations(1)
34249 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
34250 }
34251 }
34252
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD128,k_eq_8_subtile_n)34253 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD128, k_eq_8_subtile_n) {
34254 TEST_REQUIRES_X86_SSE2;
34255 for (uint32_t n = 1; n <= 4; n++) {
34256 GemmMicrokernelTester()
34257 .mr(2)
34258 .nr(4)
34259 .kr(8)
34260 .sr(1)
34261 .m(2)
34262 .n(n)
34263 .k(8)
34264 .iterations(1)
34265 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
34266 }
34267 }
34268
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD128,k_lt_8)34269 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD128, k_lt_8) {
34270 TEST_REQUIRES_X86_SSE2;
34271 for (size_t k = 1; k < 8; k++) {
34272 GemmMicrokernelTester()
34273 .mr(2)
34274 .nr(4)
34275 .kr(8)
34276 .sr(1)
34277 .m(2)
34278 .n(4)
34279 .k(k)
34280 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
34281 }
34282 }
34283
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD128,k_lt_8_subtile)34284 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD128, k_lt_8_subtile) {
34285 TEST_REQUIRES_X86_SSE2;
34286 for (size_t k = 1; k < 8; k++) {
34287 for (uint32_t n = 1; n <= 4; n++) {
34288 for (uint32_t m = 1; m <= 2; m++) {
34289 GemmMicrokernelTester()
34290 .mr(2)
34291 .nr(4)
34292 .kr(8)
34293 .sr(1)
34294 .m(m)
34295 .n(n)
34296 .k(k)
34297 .iterations(1)
34298 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
34299 }
34300 }
34301 }
34302 }
34303
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD128,k_gt_8)34304 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD128, k_gt_8) {
34305 TEST_REQUIRES_X86_SSE2;
34306 for (size_t k = 9; k < 16; k++) {
34307 GemmMicrokernelTester()
34308 .mr(2)
34309 .nr(4)
34310 .kr(8)
34311 .sr(1)
34312 .m(2)
34313 .n(4)
34314 .k(k)
34315 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
34316 }
34317 }
34318
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD128,k_gt_8_subtile)34319 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD128, k_gt_8_subtile) {
34320 TEST_REQUIRES_X86_SSE2;
34321 for (size_t k = 9; k < 16; k++) {
34322 for (uint32_t n = 1; n <= 4; n++) {
34323 for (uint32_t m = 1; m <= 2; m++) {
34324 GemmMicrokernelTester()
34325 .mr(2)
34326 .nr(4)
34327 .kr(8)
34328 .sr(1)
34329 .m(m)
34330 .n(n)
34331 .k(k)
34332 .iterations(1)
34333 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
34334 }
34335 }
34336 }
34337 }
34338
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD128,k_div_8)34339 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD128, k_div_8) {
34340 TEST_REQUIRES_X86_SSE2;
34341 for (size_t k = 16; k <= 80; k += 8) {
34342 GemmMicrokernelTester()
34343 .mr(2)
34344 .nr(4)
34345 .kr(8)
34346 .sr(1)
34347 .m(2)
34348 .n(4)
34349 .k(k)
34350 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
34351 }
34352 }
34353
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD128,k_div_8_subtile)34354 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD128, k_div_8_subtile) {
34355 TEST_REQUIRES_X86_SSE2;
34356 for (size_t k = 16; k <= 80; k += 8) {
34357 for (uint32_t n = 1; n <= 4; n++) {
34358 for (uint32_t m = 1; m <= 2; m++) {
34359 GemmMicrokernelTester()
34360 .mr(2)
34361 .nr(4)
34362 .kr(8)
34363 .sr(1)
34364 .m(m)
34365 .n(n)
34366 .k(k)
34367 .iterations(1)
34368 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
34369 }
34370 }
34371 }
34372 }
34373
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD128,n_gt_4)34374 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD128, n_gt_4) {
34375 TEST_REQUIRES_X86_SSE2;
34376 for (uint32_t n = 5; n < 8; n++) {
34377 for (size_t k = 1; k <= 40; k += 9) {
34378 GemmMicrokernelTester()
34379 .mr(2)
34380 .nr(4)
34381 .kr(8)
34382 .sr(1)
34383 .m(2)
34384 .n(n)
34385 .k(k)
34386 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
34387 }
34388 }
34389 }
34390
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD128,n_gt_4_strided_cn)34391 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD128, n_gt_4_strided_cn) {
34392 TEST_REQUIRES_X86_SSE2;
34393 for (uint32_t n = 5; n < 8; n++) {
34394 for (size_t k = 1; k <= 40; k += 9) {
34395 GemmMicrokernelTester()
34396 .mr(2)
34397 .nr(4)
34398 .kr(8)
34399 .sr(1)
34400 .m(2)
34401 .n(n)
34402 .k(k)
34403 .cn_stride(7)
34404 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
34405 }
34406 }
34407 }
34408
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD128,n_gt_4_subtile)34409 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD128, n_gt_4_subtile) {
34410 TEST_REQUIRES_X86_SSE2;
34411 for (uint32_t n = 5; n < 8; n++) {
34412 for (size_t k = 1; k <= 40; k += 9) {
34413 for (uint32_t m = 1; m <= 2; m++) {
34414 GemmMicrokernelTester()
34415 .mr(2)
34416 .nr(4)
34417 .kr(8)
34418 .sr(1)
34419 .m(m)
34420 .n(n)
34421 .k(k)
34422 .iterations(1)
34423 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
34424 }
34425 }
34426 }
34427 }
34428
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD128,n_div_4)34429 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD128, n_div_4) {
34430 TEST_REQUIRES_X86_SSE2;
34431 for (uint32_t n = 8; n <= 12; n += 4) {
34432 for (size_t k = 1; k <= 40; k += 9) {
34433 GemmMicrokernelTester()
34434 .mr(2)
34435 .nr(4)
34436 .kr(8)
34437 .sr(1)
34438 .m(2)
34439 .n(n)
34440 .k(k)
34441 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
34442 }
34443 }
34444 }
34445
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD128,n_div_4_strided_cn)34446 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD128, n_div_4_strided_cn) {
34447 TEST_REQUIRES_X86_SSE2;
34448 for (uint32_t n = 8; n <= 12; n += 4) {
34449 for (size_t k = 1; k <= 40; k += 9) {
34450 GemmMicrokernelTester()
34451 .mr(2)
34452 .nr(4)
34453 .kr(8)
34454 .sr(1)
34455 .m(2)
34456 .n(n)
34457 .k(k)
34458 .cn_stride(7)
34459 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
34460 }
34461 }
34462 }
34463
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD128,n_div_4_subtile)34464 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD128, n_div_4_subtile) {
34465 TEST_REQUIRES_X86_SSE2;
34466 for (uint32_t n = 8; n <= 12; n += 4) {
34467 for (size_t k = 1; k <= 40; k += 9) {
34468 for (uint32_t m = 1; m <= 2; m++) {
34469 GemmMicrokernelTester()
34470 .mr(2)
34471 .nr(4)
34472 .kr(8)
34473 .sr(1)
34474 .m(m)
34475 .n(n)
34476 .k(k)
34477 .iterations(1)
34478 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
34479 }
34480 }
34481 }
34482 }
34483
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD128,small_kernel)34484 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD128, small_kernel) {
34485 TEST_REQUIRES_X86_SSE2;
34486 for (size_t k = 1; k <= 40; k += 9) {
34487 GemmMicrokernelTester()
34488 .mr(2)
34489 .nr(4)
34490 .kr(8)
34491 .sr(1)
34492 .m(2)
34493 .n(4)
34494 .k(k)
34495 .ks(3)
34496 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
34497 }
34498 }
34499
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD128,small_kernel_subtile)34500 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD128, small_kernel_subtile) {
34501 TEST_REQUIRES_X86_SSE2;
34502 for (size_t k = 1; k <= 40; k += 9) {
34503 for (uint32_t n = 1; n <= 4; n++) {
34504 for (uint32_t m = 1; m <= 2; m++) {
34505 GemmMicrokernelTester()
34506 .mr(2)
34507 .nr(4)
34508 .kr(8)
34509 .sr(1)
34510 .m(m)
34511 .n(n)
34512 .k(k)
34513 .ks(3)
34514 .iterations(1)
34515 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
34516 }
34517 }
34518 }
34519 }
34520
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD128,n_gt_4_small_kernel)34521 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD128, n_gt_4_small_kernel) {
34522 TEST_REQUIRES_X86_SSE2;
34523 for (uint32_t n = 5; n < 8; n++) {
34524 for (size_t k = 1; k <= 40; k += 9) {
34525 GemmMicrokernelTester()
34526 .mr(2)
34527 .nr(4)
34528 .kr(8)
34529 .sr(1)
34530 .m(2)
34531 .n(n)
34532 .k(k)
34533 .ks(3)
34534 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
34535 }
34536 }
34537 }
34538
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD128,n_div_4_small_kernel)34539 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD128, n_div_4_small_kernel) {
34540 TEST_REQUIRES_X86_SSE2;
34541 for (uint32_t n = 8; n <= 12; n += 4) {
34542 for (size_t k = 1; k <= 40; k += 9) {
34543 GemmMicrokernelTester()
34544 .mr(2)
34545 .nr(4)
34546 .kr(8)
34547 .sr(1)
34548 .m(2)
34549 .n(n)
34550 .k(k)
34551 .ks(3)
34552 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
34553 }
34554 }
34555 }
34556
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD128,strided_cm_subtile)34557 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD128, strided_cm_subtile) {
34558 TEST_REQUIRES_X86_SSE2;
34559 for (size_t k = 1; k <= 40; k += 9) {
34560 for (uint32_t n = 1; n <= 4; n++) {
34561 for (uint32_t m = 1; m <= 2; m++) {
34562 GemmMicrokernelTester()
34563 .mr(2)
34564 .nr(4)
34565 .kr(8)
34566 .sr(1)
34567 .m(m)
34568 .n(n)
34569 .k(k)
34570 .cm_stride(7)
34571 .iterations(1)
34572 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
34573 }
34574 }
34575 }
34576 }
34577
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD128,a_offset)34578 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD128, a_offset) {
34579 TEST_REQUIRES_X86_SSE2;
34580 for (size_t k = 1; k <= 40; k += 9) {
34581 GemmMicrokernelTester()
34582 .mr(2)
34583 .nr(4)
34584 .kr(8)
34585 .sr(1)
34586 .m(2)
34587 .n(4)
34588 .k(k)
34589 .ks(3)
34590 .a_offset(83)
34591 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
34592 }
34593 }
34594
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD128,zero)34595 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD128, zero) {
34596 TEST_REQUIRES_X86_SSE2;
34597 for (size_t k = 1; k <= 40; k += 9) {
34598 for (uint32_t mz = 0; mz < 2; mz++) {
34599 GemmMicrokernelTester()
34600 .mr(2)
34601 .nr(4)
34602 .kr(8)
34603 .sr(1)
34604 .m(2)
34605 .n(4)
34606 .k(k)
34607 .ks(3)
34608 .a_offset(83)
34609 .zero_index(mz)
34610 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
34611 }
34612 }
34613 }
34614
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD128,qmin)34615 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD128, qmin) {
34616 TEST_REQUIRES_X86_SSE2;
34617 GemmMicrokernelTester()
34618 .mr(2)
34619 .nr(4)
34620 .kr(8)
34621 .sr(1)
34622 .m(2)
34623 .n(4)
34624 .k(8)
34625 .qmin(128)
34626 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
34627 }
34628
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD128,qmax)34629 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD128, qmax) {
34630 TEST_REQUIRES_X86_SSE2;
34631 GemmMicrokernelTester()
34632 .mr(2)
34633 .nr(4)
34634 .kr(8)
34635 .sr(1)
34636 .m(2)
34637 .n(4)
34638 .k(8)
34639 .qmax(128)
34640 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
34641 }
34642
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD128,strided_cm)34643 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__SSE2_LD128, strided_cm) {
34644 TEST_REQUIRES_X86_SSE2;
34645 GemmMicrokernelTester()
34646 .mr(2)
34647 .nr(4)
34648 .kr(8)
34649 .sr(1)
34650 .m(2)
34651 .n(4)
34652 .k(8)
34653 .cm_stride(7)
34654 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
34655 }
34656 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
34657
34658
34659 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSSE3_LD128,k_eq_8)34660 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSSE3_LD128, k_eq_8) {
34661 TEST_REQUIRES_X86_SSSE3;
34662 GemmMicrokernelTester()
34663 .mr(3)
34664 .nr(4)
34665 .kr(8)
34666 .sr(1)
34667 .m(3)
34668 .n(4)
34669 .k(8)
34670 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
34671 }
34672
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSSE3_LD128,strided_cn)34673 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSSE3_LD128, strided_cn) {
34674 TEST_REQUIRES_X86_SSSE3;
34675 GemmMicrokernelTester()
34676 .mr(3)
34677 .nr(4)
34678 .kr(8)
34679 .sr(1)
34680 .m(3)
34681 .n(4)
34682 .k(8)
34683 .cn_stride(7)
34684 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
34685 }
34686
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSSE3_LD128,k_eq_8_subtile)34687 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSSE3_LD128, k_eq_8_subtile) {
34688 TEST_REQUIRES_X86_SSSE3;
34689 for (uint32_t n = 1; n <= 4; n++) {
34690 for (uint32_t m = 1; m <= 3; m++) {
34691 GemmMicrokernelTester()
34692 .mr(3)
34693 .nr(4)
34694 .kr(8)
34695 .sr(1)
34696 .m(m)
34697 .n(n)
34698 .k(8)
34699 .iterations(1)
34700 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
34701 }
34702 }
34703 }
34704
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSSE3_LD128,k_eq_8_subtile_m)34705 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSSE3_LD128, k_eq_8_subtile_m) {
34706 TEST_REQUIRES_X86_SSSE3;
34707 for (uint32_t m = 1; m <= 3; m++) {
34708 GemmMicrokernelTester()
34709 .mr(3)
34710 .nr(4)
34711 .kr(8)
34712 .sr(1)
34713 .m(m)
34714 .n(4)
34715 .k(8)
34716 .iterations(1)
34717 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
34718 }
34719 }
34720
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSSE3_LD128,k_eq_8_subtile_n)34721 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSSE3_LD128, k_eq_8_subtile_n) {
34722 TEST_REQUIRES_X86_SSSE3;
34723 for (uint32_t n = 1; n <= 4; n++) {
34724 GemmMicrokernelTester()
34725 .mr(3)
34726 .nr(4)
34727 .kr(8)
34728 .sr(1)
34729 .m(3)
34730 .n(n)
34731 .k(8)
34732 .iterations(1)
34733 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
34734 }
34735 }
34736
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSSE3_LD128,k_lt_8)34737 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSSE3_LD128, k_lt_8) {
34738 TEST_REQUIRES_X86_SSSE3;
34739 for (size_t k = 1; k < 8; k++) {
34740 GemmMicrokernelTester()
34741 .mr(3)
34742 .nr(4)
34743 .kr(8)
34744 .sr(1)
34745 .m(3)
34746 .n(4)
34747 .k(k)
34748 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
34749 }
34750 }
34751
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSSE3_LD128,k_lt_8_subtile)34752 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSSE3_LD128, k_lt_8_subtile) {
34753 TEST_REQUIRES_X86_SSSE3;
34754 for (size_t k = 1; k < 8; k++) {
34755 for (uint32_t n = 1; n <= 4; n++) {
34756 for (uint32_t m = 1; m <= 3; m++) {
34757 GemmMicrokernelTester()
34758 .mr(3)
34759 .nr(4)
34760 .kr(8)
34761 .sr(1)
34762 .m(m)
34763 .n(n)
34764 .k(k)
34765 .iterations(1)
34766 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
34767 }
34768 }
34769 }
34770 }
34771
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSSE3_LD128,k_gt_8)34772 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSSE3_LD128, k_gt_8) {
34773 TEST_REQUIRES_X86_SSSE3;
34774 for (size_t k = 9; k < 16; k++) {
34775 GemmMicrokernelTester()
34776 .mr(3)
34777 .nr(4)
34778 .kr(8)
34779 .sr(1)
34780 .m(3)
34781 .n(4)
34782 .k(k)
34783 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
34784 }
34785 }
34786
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSSE3_LD128,k_gt_8_subtile)34787 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSSE3_LD128, k_gt_8_subtile) {
34788 TEST_REQUIRES_X86_SSSE3;
34789 for (size_t k = 9; k < 16; k++) {
34790 for (uint32_t n = 1; n <= 4; n++) {
34791 for (uint32_t m = 1; m <= 3; m++) {
34792 GemmMicrokernelTester()
34793 .mr(3)
34794 .nr(4)
34795 .kr(8)
34796 .sr(1)
34797 .m(m)
34798 .n(n)
34799 .k(k)
34800 .iterations(1)
34801 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
34802 }
34803 }
34804 }
34805 }
34806
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSSE3_LD128,k_div_8)34807 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSSE3_LD128, k_div_8) {
34808 TEST_REQUIRES_X86_SSSE3;
34809 for (size_t k = 16; k <= 80; k += 8) {
34810 GemmMicrokernelTester()
34811 .mr(3)
34812 .nr(4)
34813 .kr(8)
34814 .sr(1)
34815 .m(3)
34816 .n(4)
34817 .k(k)
34818 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
34819 }
34820 }
34821
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSSE3_LD128,k_div_8_subtile)34822 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSSE3_LD128, k_div_8_subtile) {
34823 TEST_REQUIRES_X86_SSSE3;
34824 for (size_t k = 16; k <= 80; k += 8) {
34825 for (uint32_t n = 1; n <= 4; n++) {
34826 for (uint32_t m = 1; m <= 3; m++) {
34827 GemmMicrokernelTester()
34828 .mr(3)
34829 .nr(4)
34830 .kr(8)
34831 .sr(1)
34832 .m(m)
34833 .n(n)
34834 .k(k)
34835 .iterations(1)
34836 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
34837 }
34838 }
34839 }
34840 }
34841
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSSE3_LD128,n_gt_4)34842 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSSE3_LD128, n_gt_4) {
34843 TEST_REQUIRES_X86_SSSE3;
34844 for (uint32_t n = 5; n < 8; n++) {
34845 for (size_t k = 1; k <= 40; k += 9) {
34846 GemmMicrokernelTester()
34847 .mr(3)
34848 .nr(4)
34849 .kr(8)
34850 .sr(1)
34851 .m(3)
34852 .n(n)
34853 .k(k)
34854 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
34855 }
34856 }
34857 }
34858
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSSE3_LD128,n_gt_4_strided_cn)34859 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSSE3_LD128, n_gt_4_strided_cn) {
34860 TEST_REQUIRES_X86_SSSE3;
34861 for (uint32_t n = 5; n < 8; n++) {
34862 for (size_t k = 1; k <= 40; k += 9) {
34863 GemmMicrokernelTester()
34864 .mr(3)
34865 .nr(4)
34866 .kr(8)
34867 .sr(1)
34868 .m(3)
34869 .n(n)
34870 .k(k)
34871 .cn_stride(7)
34872 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
34873 }
34874 }
34875 }
34876
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSSE3_LD128,n_gt_4_subtile)34877 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSSE3_LD128, n_gt_4_subtile) {
34878 TEST_REQUIRES_X86_SSSE3;
34879 for (uint32_t n = 5; n < 8; n++) {
34880 for (size_t k = 1; k <= 40; k += 9) {
34881 for (uint32_t m = 1; m <= 3; m++) {
34882 GemmMicrokernelTester()
34883 .mr(3)
34884 .nr(4)
34885 .kr(8)
34886 .sr(1)
34887 .m(m)
34888 .n(n)
34889 .k(k)
34890 .iterations(1)
34891 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
34892 }
34893 }
34894 }
34895 }
34896
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSSE3_LD128,n_div_4)34897 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSSE3_LD128, n_div_4) {
34898 TEST_REQUIRES_X86_SSSE3;
34899 for (uint32_t n = 8; n <= 12; n += 4) {
34900 for (size_t k = 1; k <= 40; k += 9) {
34901 GemmMicrokernelTester()
34902 .mr(3)
34903 .nr(4)
34904 .kr(8)
34905 .sr(1)
34906 .m(3)
34907 .n(n)
34908 .k(k)
34909 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
34910 }
34911 }
34912 }
34913
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSSE3_LD128,n_div_4_strided_cn)34914 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSSE3_LD128, n_div_4_strided_cn) {
34915 TEST_REQUIRES_X86_SSSE3;
34916 for (uint32_t n = 8; n <= 12; n += 4) {
34917 for (size_t k = 1; k <= 40; k += 9) {
34918 GemmMicrokernelTester()
34919 .mr(3)
34920 .nr(4)
34921 .kr(8)
34922 .sr(1)
34923 .m(3)
34924 .n(n)
34925 .k(k)
34926 .cn_stride(7)
34927 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
34928 }
34929 }
34930 }
34931
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSSE3_LD128,n_div_4_subtile)34932 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSSE3_LD128, n_div_4_subtile) {
34933 TEST_REQUIRES_X86_SSSE3;
34934 for (uint32_t n = 8; n <= 12; n += 4) {
34935 for (size_t k = 1; k <= 40; k += 9) {
34936 for (uint32_t m = 1; m <= 3; m++) {
34937 GemmMicrokernelTester()
34938 .mr(3)
34939 .nr(4)
34940 .kr(8)
34941 .sr(1)
34942 .m(m)
34943 .n(n)
34944 .k(k)
34945 .iterations(1)
34946 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
34947 }
34948 }
34949 }
34950 }
34951
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSSE3_LD128,small_kernel)34952 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSSE3_LD128, small_kernel) {
34953 TEST_REQUIRES_X86_SSSE3;
34954 for (size_t k = 1; k <= 40; k += 9) {
34955 GemmMicrokernelTester()
34956 .mr(3)
34957 .nr(4)
34958 .kr(8)
34959 .sr(1)
34960 .m(3)
34961 .n(4)
34962 .k(k)
34963 .ks(3)
34964 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
34965 }
34966 }
34967
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSSE3_LD128,small_kernel_subtile)34968 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSSE3_LD128, small_kernel_subtile) {
34969 TEST_REQUIRES_X86_SSSE3;
34970 for (size_t k = 1; k <= 40; k += 9) {
34971 for (uint32_t n = 1; n <= 4; n++) {
34972 for (uint32_t m = 1; m <= 3; m++) {
34973 GemmMicrokernelTester()
34974 .mr(3)
34975 .nr(4)
34976 .kr(8)
34977 .sr(1)
34978 .m(m)
34979 .n(n)
34980 .k(k)
34981 .ks(3)
34982 .iterations(1)
34983 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
34984 }
34985 }
34986 }
34987 }
34988
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSSE3_LD128,n_gt_4_small_kernel)34989 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSSE3_LD128, n_gt_4_small_kernel) {
34990 TEST_REQUIRES_X86_SSSE3;
34991 for (uint32_t n = 5; n < 8; n++) {
34992 for (size_t k = 1; k <= 40; k += 9) {
34993 GemmMicrokernelTester()
34994 .mr(3)
34995 .nr(4)
34996 .kr(8)
34997 .sr(1)
34998 .m(3)
34999 .n(n)
35000 .k(k)
35001 .ks(3)
35002 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
35003 }
35004 }
35005 }
35006
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSSE3_LD128,n_div_4_small_kernel)35007 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSSE3_LD128, n_div_4_small_kernel) {
35008 TEST_REQUIRES_X86_SSSE3;
35009 for (uint32_t n = 8; n <= 12; n += 4) {
35010 for (size_t k = 1; k <= 40; k += 9) {
35011 GemmMicrokernelTester()
35012 .mr(3)
35013 .nr(4)
35014 .kr(8)
35015 .sr(1)
35016 .m(3)
35017 .n(n)
35018 .k(k)
35019 .ks(3)
35020 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
35021 }
35022 }
35023 }
35024
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSSE3_LD128,strided_cm_subtile)35025 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSSE3_LD128, strided_cm_subtile) {
35026 TEST_REQUIRES_X86_SSSE3;
35027 for (size_t k = 1; k <= 40; k += 9) {
35028 for (uint32_t n = 1; n <= 4; n++) {
35029 for (uint32_t m = 1; m <= 3; m++) {
35030 GemmMicrokernelTester()
35031 .mr(3)
35032 .nr(4)
35033 .kr(8)
35034 .sr(1)
35035 .m(m)
35036 .n(n)
35037 .k(k)
35038 .cm_stride(7)
35039 .iterations(1)
35040 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
35041 }
35042 }
35043 }
35044 }
35045
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSSE3_LD128,a_offset)35046 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSSE3_LD128, a_offset) {
35047 TEST_REQUIRES_X86_SSSE3;
35048 for (size_t k = 1; k <= 40; k += 9) {
35049 GemmMicrokernelTester()
35050 .mr(3)
35051 .nr(4)
35052 .kr(8)
35053 .sr(1)
35054 .m(3)
35055 .n(4)
35056 .k(k)
35057 .ks(3)
35058 .a_offset(127)
35059 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
35060 }
35061 }
35062
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSSE3_LD128,zero)35063 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSSE3_LD128, zero) {
35064 TEST_REQUIRES_X86_SSSE3;
35065 for (size_t k = 1; k <= 40; k += 9) {
35066 for (uint32_t mz = 0; mz < 3; mz++) {
35067 GemmMicrokernelTester()
35068 .mr(3)
35069 .nr(4)
35070 .kr(8)
35071 .sr(1)
35072 .m(3)
35073 .n(4)
35074 .k(k)
35075 .ks(3)
35076 .a_offset(127)
35077 .zero_index(mz)
35078 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
35079 }
35080 }
35081 }
35082
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSSE3_LD128,qmin)35083 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSSE3_LD128, qmin) {
35084 TEST_REQUIRES_X86_SSSE3;
35085 GemmMicrokernelTester()
35086 .mr(3)
35087 .nr(4)
35088 .kr(8)
35089 .sr(1)
35090 .m(3)
35091 .n(4)
35092 .k(8)
35093 .qmin(128)
35094 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
35095 }
35096
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSSE3_LD128,qmax)35097 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSSE3_LD128, qmax) {
35098 TEST_REQUIRES_X86_SSSE3;
35099 GemmMicrokernelTester()
35100 .mr(3)
35101 .nr(4)
35102 .kr(8)
35103 .sr(1)
35104 .m(3)
35105 .n(4)
35106 .k(8)
35107 .qmax(128)
35108 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
35109 }
35110
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSSE3_LD128,strided_cm)35111 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSSE3_LD128, strided_cm) {
35112 TEST_REQUIRES_X86_SSSE3;
35113 GemmMicrokernelTester()
35114 .mr(3)
35115 .nr(4)
35116 .kr(8)
35117 .sr(1)
35118 .m(3)
35119 .n(4)
35120 .k(8)
35121 .cm_stride(7)
35122 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_fp32_sse2_params, xnn_qs8_requantize_fp32);
35123 }
35124 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
35125
35126
35127 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD128,k_eq_8)35128 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD128, k_eq_8) {
35129 TEST_REQUIRES_X86_SSE41;
35130 GemmMicrokernelTester()
35131 .mr(3)
35132 .nr(4)
35133 .kr(8)
35134 .sr(1)
35135 .m(3)
35136 .n(4)
35137 .k(8)
35138 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
35139 }
35140
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD128,strided_cn)35141 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD128, strided_cn) {
35142 TEST_REQUIRES_X86_SSE41;
35143 GemmMicrokernelTester()
35144 .mr(3)
35145 .nr(4)
35146 .kr(8)
35147 .sr(1)
35148 .m(3)
35149 .n(4)
35150 .k(8)
35151 .cn_stride(7)
35152 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
35153 }
35154
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD128,k_eq_8_subtile)35155 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD128, k_eq_8_subtile) {
35156 TEST_REQUIRES_X86_SSE41;
35157 for (uint32_t n = 1; n <= 4; n++) {
35158 for (uint32_t m = 1; m <= 3; m++) {
35159 GemmMicrokernelTester()
35160 .mr(3)
35161 .nr(4)
35162 .kr(8)
35163 .sr(1)
35164 .m(m)
35165 .n(n)
35166 .k(8)
35167 .iterations(1)
35168 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
35169 }
35170 }
35171 }
35172
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD128,k_eq_8_subtile_m)35173 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD128, k_eq_8_subtile_m) {
35174 TEST_REQUIRES_X86_SSE41;
35175 for (uint32_t m = 1; m <= 3; m++) {
35176 GemmMicrokernelTester()
35177 .mr(3)
35178 .nr(4)
35179 .kr(8)
35180 .sr(1)
35181 .m(m)
35182 .n(4)
35183 .k(8)
35184 .iterations(1)
35185 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
35186 }
35187 }
35188
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD128,k_eq_8_subtile_n)35189 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD128, k_eq_8_subtile_n) {
35190 TEST_REQUIRES_X86_SSE41;
35191 for (uint32_t n = 1; n <= 4; n++) {
35192 GemmMicrokernelTester()
35193 .mr(3)
35194 .nr(4)
35195 .kr(8)
35196 .sr(1)
35197 .m(3)
35198 .n(n)
35199 .k(8)
35200 .iterations(1)
35201 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
35202 }
35203 }
35204
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD128,k_lt_8)35205 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD128, k_lt_8) {
35206 TEST_REQUIRES_X86_SSE41;
35207 for (size_t k = 1; k < 8; k++) {
35208 GemmMicrokernelTester()
35209 .mr(3)
35210 .nr(4)
35211 .kr(8)
35212 .sr(1)
35213 .m(3)
35214 .n(4)
35215 .k(k)
35216 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
35217 }
35218 }
35219
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD128,k_lt_8_subtile)35220 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD128, k_lt_8_subtile) {
35221 TEST_REQUIRES_X86_SSE41;
35222 for (size_t k = 1; k < 8; k++) {
35223 for (uint32_t n = 1; n <= 4; n++) {
35224 for (uint32_t m = 1; m <= 3; m++) {
35225 GemmMicrokernelTester()
35226 .mr(3)
35227 .nr(4)
35228 .kr(8)
35229 .sr(1)
35230 .m(m)
35231 .n(n)
35232 .k(k)
35233 .iterations(1)
35234 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
35235 }
35236 }
35237 }
35238 }
35239
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD128,k_gt_8)35240 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD128, k_gt_8) {
35241 TEST_REQUIRES_X86_SSE41;
35242 for (size_t k = 9; k < 16; k++) {
35243 GemmMicrokernelTester()
35244 .mr(3)
35245 .nr(4)
35246 .kr(8)
35247 .sr(1)
35248 .m(3)
35249 .n(4)
35250 .k(k)
35251 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
35252 }
35253 }
35254
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD128,k_gt_8_subtile)35255 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD128, k_gt_8_subtile) {
35256 TEST_REQUIRES_X86_SSE41;
35257 for (size_t k = 9; k < 16; k++) {
35258 for (uint32_t n = 1; n <= 4; n++) {
35259 for (uint32_t m = 1; m <= 3; m++) {
35260 GemmMicrokernelTester()
35261 .mr(3)
35262 .nr(4)
35263 .kr(8)
35264 .sr(1)
35265 .m(m)
35266 .n(n)
35267 .k(k)
35268 .iterations(1)
35269 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
35270 }
35271 }
35272 }
35273 }
35274
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD128,k_div_8)35275 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD128, k_div_8) {
35276 TEST_REQUIRES_X86_SSE41;
35277 for (size_t k = 16; k <= 80; k += 8) {
35278 GemmMicrokernelTester()
35279 .mr(3)
35280 .nr(4)
35281 .kr(8)
35282 .sr(1)
35283 .m(3)
35284 .n(4)
35285 .k(k)
35286 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
35287 }
35288 }
35289
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD128,k_div_8_subtile)35290 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD128, k_div_8_subtile) {
35291 TEST_REQUIRES_X86_SSE41;
35292 for (size_t k = 16; k <= 80; k += 8) {
35293 for (uint32_t n = 1; n <= 4; n++) {
35294 for (uint32_t m = 1; m <= 3; m++) {
35295 GemmMicrokernelTester()
35296 .mr(3)
35297 .nr(4)
35298 .kr(8)
35299 .sr(1)
35300 .m(m)
35301 .n(n)
35302 .k(k)
35303 .iterations(1)
35304 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
35305 }
35306 }
35307 }
35308 }
35309
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD128,n_gt_4)35310 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD128, n_gt_4) {
35311 TEST_REQUIRES_X86_SSE41;
35312 for (uint32_t n = 5; n < 8; n++) {
35313 for (size_t k = 1; k <= 40; k += 9) {
35314 GemmMicrokernelTester()
35315 .mr(3)
35316 .nr(4)
35317 .kr(8)
35318 .sr(1)
35319 .m(3)
35320 .n(n)
35321 .k(k)
35322 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
35323 }
35324 }
35325 }
35326
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD128,n_gt_4_strided_cn)35327 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD128, n_gt_4_strided_cn) {
35328 TEST_REQUIRES_X86_SSE41;
35329 for (uint32_t n = 5; n < 8; n++) {
35330 for (size_t k = 1; k <= 40; k += 9) {
35331 GemmMicrokernelTester()
35332 .mr(3)
35333 .nr(4)
35334 .kr(8)
35335 .sr(1)
35336 .m(3)
35337 .n(n)
35338 .k(k)
35339 .cn_stride(7)
35340 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
35341 }
35342 }
35343 }
35344
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD128,n_gt_4_subtile)35345 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD128, n_gt_4_subtile) {
35346 TEST_REQUIRES_X86_SSE41;
35347 for (uint32_t n = 5; n < 8; n++) {
35348 for (size_t k = 1; k <= 40; k += 9) {
35349 for (uint32_t m = 1; m <= 3; m++) {
35350 GemmMicrokernelTester()
35351 .mr(3)
35352 .nr(4)
35353 .kr(8)
35354 .sr(1)
35355 .m(m)
35356 .n(n)
35357 .k(k)
35358 .iterations(1)
35359 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
35360 }
35361 }
35362 }
35363 }
35364
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD128,n_div_4)35365 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD128, n_div_4) {
35366 TEST_REQUIRES_X86_SSE41;
35367 for (uint32_t n = 8; n <= 12; n += 4) {
35368 for (size_t k = 1; k <= 40; k += 9) {
35369 GemmMicrokernelTester()
35370 .mr(3)
35371 .nr(4)
35372 .kr(8)
35373 .sr(1)
35374 .m(3)
35375 .n(n)
35376 .k(k)
35377 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
35378 }
35379 }
35380 }
35381
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD128,n_div_4_strided_cn)35382 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD128, n_div_4_strided_cn) {
35383 TEST_REQUIRES_X86_SSE41;
35384 for (uint32_t n = 8; n <= 12; n += 4) {
35385 for (size_t k = 1; k <= 40; k += 9) {
35386 GemmMicrokernelTester()
35387 .mr(3)
35388 .nr(4)
35389 .kr(8)
35390 .sr(1)
35391 .m(3)
35392 .n(n)
35393 .k(k)
35394 .cn_stride(7)
35395 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
35396 }
35397 }
35398 }
35399
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD128,n_div_4_subtile)35400 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD128, n_div_4_subtile) {
35401 TEST_REQUIRES_X86_SSE41;
35402 for (uint32_t n = 8; n <= 12; n += 4) {
35403 for (size_t k = 1; k <= 40; k += 9) {
35404 for (uint32_t m = 1; m <= 3; m++) {
35405 GemmMicrokernelTester()
35406 .mr(3)
35407 .nr(4)
35408 .kr(8)
35409 .sr(1)
35410 .m(m)
35411 .n(n)
35412 .k(k)
35413 .iterations(1)
35414 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
35415 }
35416 }
35417 }
35418 }
35419
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD128,small_kernel)35420 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD128, small_kernel) {
35421 TEST_REQUIRES_X86_SSE41;
35422 for (size_t k = 1; k <= 40; k += 9) {
35423 GemmMicrokernelTester()
35424 .mr(3)
35425 .nr(4)
35426 .kr(8)
35427 .sr(1)
35428 .m(3)
35429 .n(4)
35430 .k(k)
35431 .ks(3)
35432 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
35433 }
35434 }
35435
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD128,small_kernel_subtile)35436 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD128, small_kernel_subtile) {
35437 TEST_REQUIRES_X86_SSE41;
35438 for (size_t k = 1; k <= 40; k += 9) {
35439 for (uint32_t n = 1; n <= 4; n++) {
35440 for (uint32_t m = 1; m <= 3; m++) {
35441 GemmMicrokernelTester()
35442 .mr(3)
35443 .nr(4)
35444 .kr(8)
35445 .sr(1)
35446 .m(m)
35447 .n(n)
35448 .k(k)
35449 .ks(3)
35450 .iterations(1)
35451 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
35452 }
35453 }
35454 }
35455 }
35456
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD128,n_gt_4_small_kernel)35457 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD128, n_gt_4_small_kernel) {
35458 TEST_REQUIRES_X86_SSE41;
35459 for (uint32_t n = 5; n < 8; n++) {
35460 for (size_t k = 1; k <= 40; k += 9) {
35461 GemmMicrokernelTester()
35462 .mr(3)
35463 .nr(4)
35464 .kr(8)
35465 .sr(1)
35466 .m(3)
35467 .n(n)
35468 .k(k)
35469 .ks(3)
35470 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
35471 }
35472 }
35473 }
35474
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD128,n_div_4_small_kernel)35475 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD128, n_div_4_small_kernel) {
35476 TEST_REQUIRES_X86_SSE41;
35477 for (uint32_t n = 8; n <= 12; n += 4) {
35478 for (size_t k = 1; k <= 40; k += 9) {
35479 GemmMicrokernelTester()
35480 .mr(3)
35481 .nr(4)
35482 .kr(8)
35483 .sr(1)
35484 .m(3)
35485 .n(n)
35486 .k(k)
35487 .ks(3)
35488 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
35489 }
35490 }
35491 }
35492
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD128,strided_cm_subtile)35493 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD128, strided_cm_subtile) {
35494 TEST_REQUIRES_X86_SSE41;
35495 for (size_t k = 1; k <= 40; k += 9) {
35496 for (uint32_t n = 1; n <= 4; n++) {
35497 for (uint32_t m = 1; m <= 3; m++) {
35498 GemmMicrokernelTester()
35499 .mr(3)
35500 .nr(4)
35501 .kr(8)
35502 .sr(1)
35503 .m(m)
35504 .n(n)
35505 .k(k)
35506 .cm_stride(7)
35507 .iterations(1)
35508 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
35509 }
35510 }
35511 }
35512 }
35513
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD128,a_offset)35514 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD128, a_offset) {
35515 TEST_REQUIRES_X86_SSE41;
35516 for (size_t k = 1; k <= 40; k += 9) {
35517 GemmMicrokernelTester()
35518 .mr(3)
35519 .nr(4)
35520 .kr(8)
35521 .sr(1)
35522 .m(3)
35523 .n(4)
35524 .k(k)
35525 .ks(3)
35526 .a_offset(127)
35527 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
35528 }
35529 }
35530
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD128,zero)35531 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD128, zero) {
35532 TEST_REQUIRES_X86_SSE41;
35533 for (size_t k = 1; k <= 40; k += 9) {
35534 for (uint32_t mz = 0; mz < 3; mz++) {
35535 GemmMicrokernelTester()
35536 .mr(3)
35537 .nr(4)
35538 .kr(8)
35539 .sr(1)
35540 .m(3)
35541 .n(4)
35542 .k(k)
35543 .ks(3)
35544 .a_offset(127)
35545 .zero_index(mz)
35546 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
35547 }
35548 }
35549 }
35550
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD128,qmin)35551 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD128, qmin) {
35552 TEST_REQUIRES_X86_SSE41;
35553 GemmMicrokernelTester()
35554 .mr(3)
35555 .nr(4)
35556 .kr(8)
35557 .sr(1)
35558 .m(3)
35559 .n(4)
35560 .k(8)
35561 .qmin(128)
35562 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
35563 }
35564
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD128,qmax)35565 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD128, qmax) {
35566 TEST_REQUIRES_X86_SSE41;
35567 GemmMicrokernelTester()
35568 .mr(3)
35569 .nr(4)
35570 .kr(8)
35571 .sr(1)
35572 .m(3)
35573 .n(4)
35574 .k(8)
35575 .qmax(128)
35576 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
35577 }
35578
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD128,strided_cm)35579 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__SSE41_LD128, strided_cm) {
35580 TEST_REQUIRES_X86_SSE41;
35581 GemmMicrokernelTester()
35582 .mr(3)
35583 .nr(4)
35584 .kr(8)
35585 .sr(1)
35586 .m(3)
35587 .n(4)
35588 .k(8)
35589 .cm_stride(7)
35590 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
35591 }
35592 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
35593
35594
35595 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD128,k_eq_8)35596 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD128, k_eq_8) {
35597 TEST_REQUIRES_X86_AVX;
35598 GemmMicrokernelTester()
35599 .mr(1)
35600 .nr(4)
35601 .kr(8)
35602 .sr(1)
35603 .m(1)
35604 .n(4)
35605 .k(8)
35606 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
35607 }
35608
TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD128,strided_cn)35609 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD128, strided_cn) {
35610 TEST_REQUIRES_X86_AVX;
35611 GemmMicrokernelTester()
35612 .mr(1)
35613 .nr(4)
35614 .kr(8)
35615 .sr(1)
35616 .m(1)
35617 .n(4)
35618 .k(8)
35619 .cn_stride(7)
35620 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
35621 }
35622
TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD128,k_eq_8_subtile)35623 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD128, k_eq_8_subtile) {
35624 TEST_REQUIRES_X86_AVX;
35625 for (uint32_t n = 1; n <= 4; n++) {
35626 for (uint32_t m = 1; m <= 1; m++) {
35627 GemmMicrokernelTester()
35628 .mr(1)
35629 .nr(4)
35630 .kr(8)
35631 .sr(1)
35632 .m(m)
35633 .n(n)
35634 .k(8)
35635 .iterations(1)
35636 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
35637 }
35638 }
35639 }
35640
TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD128,k_eq_8_subtile_m)35641 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD128, k_eq_8_subtile_m) {
35642 TEST_REQUIRES_X86_AVX;
35643 for (uint32_t m = 1; m <= 1; m++) {
35644 GemmMicrokernelTester()
35645 .mr(1)
35646 .nr(4)
35647 .kr(8)
35648 .sr(1)
35649 .m(m)
35650 .n(4)
35651 .k(8)
35652 .iterations(1)
35653 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
35654 }
35655 }
35656
TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD128,k_eq_8_subtile_n)35657 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD128, k_eq_8_subtile_n) {
35658 TEST_REQUIRES_X86_AVX;
35659 for (uint32_t n = 1; n <= 4; n++) {
35660 GemmMicrokernelTester()
35661 .mr(1)
35662 .nr(4)
35663 .kr(8)
35664 .sr(1)
35665 .m(1)
35666 .n(n)
35667 .k(8)
35668 .iterations(1)
35669 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
35670 }
35671 }
35672
TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD128,k_lt_8)35673 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD128, k_lt_8) {
35674 TEST_REQUIRES_X86_AVX;
35675 for (size_t k = 1; k < 8; k++) {
35676 GemmMicrokernelTester()
35677 .mr(1)
35678 .nr(4)
35679 .kr(8)
35680 .sr(1)
35681 .m(1)
35682 .n(4)
35683 .k(k)
35684 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
35685 }
35686 }
35687
TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD128,k_lt_8_subtile)35688 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD128, k_lt_8_subtile) {
35689 TEST_REQUIRES_X86_AVX;
35690 for (size_t k = 1; k < 8; k++) {
35691 for (uint32_t n = 1; n <= 4; n++) {
35692 for (uint32_t m = 1; m <= 1; m++) {
35693 GemmMicrokernelTester()
35694 .mr(1)
35695 .nr(4)
35696 .kr(8)
35697 .sr(1)
35698 .m(m)
35699 .n(n)
35700 .k(k)
35701 .iterations(1)
35702 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
35703 }
35704 }
35705 }
35706 }
35707
TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD128,k_gt_8)35708 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD128, k_gt_8) {
35709 TEST_REQUIRES_X86_AVX;
35710 for (size_t k = 9; k < 16; k++) {
35711 GemmMicrokernelTester()
35712 .mr(1)
35713 .nr(4)
35714 .kr(8)
35715 .sr(1)
35716 .m(1)
35717 .n(4)
35718 .k(k)
35719 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
35720 }
35721 }
35722
TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD128,k_gt_8_subtile)35723 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD128, k_gt_8_subtile) {
35724 TEST_REQUIRES_X86_AVX;
35725 for (size_t k = 9; k < 16; k++) {
35726 for (uint32_t n = 1; n <= 4; n++) {
35727 for (uint32_t m = 1; m <= 1; m++) {
35728 GemmMicrokernelTester()
35729 .mr(1)
35730 .nr(4)
35731 .kr(8)
35732 .sr(1)
35733 .m(m)
35734 .n(n)
35735 .k(k)
35736 .iterations(1)
35737 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
35738 }
35739 }
35740 }
35741 }
35742
TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD128,k_div_8)35743 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD128, k_div_8) {
35744 TEST_REQUIRES_X86_AVX;
35745 for (size_t k = 16; k <= 80; k += 8) {
35746 GemmMicrokernelTester()
35747 .mr(1)
35748 .nr(4)
35749 .kr(8)
35750 .sr(1)
35751 .m(1)
35752 .n(4)
35753 .k(k)
35754 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
35755 }
35756 }
35757
TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD128,k_div_8_subtile)35758 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD128, k_div_8_subtile) {
35759 TEST_REQUIRES_X86_AVX;
35760 for (size_t k = 16; k <= 80; k += 8) {
35761 for (uint32_t n = 1; n <= 4; n++) {
35762 for (uint32_t m = 1; m <= 1; m++) {
35763 GemmMicrokernelTester()
35764 .mr(1)
35765 .nr(4)
35766 .kr(8)
35767 .sr(1)
35768 .m(m)
35769 .n(n)
35770 .k(k)
35771 .iterations(1)
35772 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
35773 }
35774 }
35775 }
35776 }
35777
TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD128,n_gt_4)35778 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD128, n_gt_4) {
35779 TEST_REQUIRES_X86_AVX;
35780 for (uint32_t n = 5; n < 8; n++) {
35781 for (size_t k = 1; k <= 40; k += 9) {
35782 GemmMicrokernelTester()
35783 .mr(1)
35784 .nr(4)
35785 .kr(8)
35786 .sr(1)
35787 .m(1)
35788 .n(n)
35789 .k(k)
35790 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
35791 }
35792 }
35793 }
35794
TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD128,n_gt_4_strided_cn)35795 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD128, n_gt_4_strided_cn) {
35796 TEST_REQUIRES_X86_AVX;
35797 for (uint32_t n = 5; n < 8; n++) {
35798 for (size_t k = 1; k <= 40; k += 9) {
35799 GemmMicrokernelTester()
35800 .mr(1)
35801 .nr(4)
35802 .kr(8)
35803 .sr(1)
35804 .m(1)
35805 .n(n)
35806 .k(k)
35807 .cn_stride(7)
35808 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
35809 }
35810 }
35811 }
35812
TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD128,n_gt_4_subtile)35813 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD128, n_gt_4_subtile) {
35814 TEST_REQUIRES_X86_AVX;
35815 for (uint32_t n = 5; n < 8; n++) {
35816 for (size_t k = 1; k <= 40; k += 9) {
35817 for (uint32_t m = 1; m <= 1; m++) {
35818 GemmMicrokernelTester()
35819 .mr(1)
35820 .nr(4)
35821 .kr(8)
35822 .sr(1)
35823 .m(m)
35824 .n(n)
35825 .k(k)
35826 .iterations(1)
35827 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
35828 }
35829 }
35830 }
35831 }
35832
TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD128,n_div_4)35833 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD128, n_div_4) {
35834 TEST_REQUIRES_X86_AVX;
35835 for (uint32_t n = 8; n <= 12; n += 4) {
35836 for (size_t k = 1; k <= 40; k += 9) {
35837 GemmMicrokernelTester()
35838 .mr(1)
35839 .nr(4)
35840 .kr(8)
35841 .sr(1)
35842 .m(1)
35843 .n(n)
35844 .k(k)
35845 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
35846 }
35847 }
35848 }
35849
TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD128,n_div_4_strided_cn)35850 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD128, n_div_4_strided_cn) {
35851 TEST_REQUIRES_X86_AVX;
35852 for (uint32_t n = 8; n <= 12; n += 4) {
35853 for (size_t k = 1; k <= 40; k += 9) {
35854 GemmMicrokernelTester()
35855 .mr(1)
35856 .nr(4)
35857 .kr(8)
35858 .sr(1)
35859 .m(1)
35860 .n(n)
35861 .k(k)
35862 .cn_stride(7)
35863 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
35864 }
35865 }
35866 }
35867
TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD128,n_div_4_subtile)35868 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD128, n_div_4_subtile) {
35869 TEST_REQUIRES_X86_AVX;
35870 for (uint32_t n = 8; n <= 12; n += 4) {
35871 for (size_t k = 1; k <= 40; k += 9) {
35872 for (uint32_t m = 1; m <= 1; m++) {
35873 GemmMicrokernelTester()
35874 .mr(1)
35875 .nr(4)
35876 .kr(8)
35877 .sr(1)
35878 .m(m)
35879 .n(n)
35880 .k(k)
35881 .iterations(1)
35882 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
35883 }
35884 }
35885 }
35886 }
35887
TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD128,small_kernel)35888 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD128, small_kernel) {
35889 TEST_REQUIRES_X86_AVX;
35890 for (size_t k = 1; k <= 40; k += 9) {
35891 GemmMicrokernelTester()
35892 .mr(1)
35893 .nr(4)
35894 .kr(8)
35895 .sr(1)
35896 .m(1)
35897 .n(4)
35898 .k(k)
35899 .ks(3)
35900 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
35901 }
35902 }
35903
TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD128,small_kernel_subtile)35904 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD128, small_kernel_subtile) {
35905 TEST_REQUIRES_X86_AVX;
35906 for (size_t k = 1; k <= 40; k += 9) {
35907 for (uint32_t n = 1; n <= 4; n++) {
35908 for (uint32_t m = 1; m <= 1; m++) {
35909 GemmMicrokernelTester()
35910 .mr(1)
35911 .nr(4)
35912 .kr(8)
35913 .sr(1)
35914 .m(m)
35915 .n(n)
35916 .k(k)
35917 .ks(3)
35918 .iterations(1)
35919 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
35920 }
35921 }
35922 }
35923 }
35924
TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD128,n_gt_4_small_kernel)35925 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD128, n_gt_4_small_kernel) {
35926 TEST_REQUIRES_X86_AVX;
35927 for (uint32_t n = 5; n < 8; n++) {
35928 for (size_t k = 1; k <= 40; k += 9) {
35929 GemmMicrokernelTester()
35930 .mr(1)
35931 .nr(4)
35932 .kr(8)
35933 .sr(1)
35934 .m(1)
35935 .n(n)
35936 .k(k)
35937 .ks(3)
35938 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
35939 }
35940 }
35941 }
35942
TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD128,n_div_4_small_kernel)35943 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD128, n_div_4_small_kernel) {
35944 TEST_REQUIRES_X86_AVX;
35945 for (uint32_t n = 8; n <= 12; n += 4) {
35946 for (size_t k = 1; k <= 40; k += 9) {
35947 GemmMicrokernelTester()
35948 .mr(1)
35949 .nr(4)
35950 .kr(8)
35951 .sr(1)
35952 .m(1)
35953 .n(n)
35954 .k(k)
35955 .ks(3)
35956 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
35957 }
35958 }
35959 }
35960
TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD128,strided_cm_subtile)35961 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD128, strided_cm_subtile) {
35962 TEST_REQUIRES_X86_AVX;
35963 for (size_t k = 1; k <= 40; k += 9) {
35964 for (uint32_t n = 1; n <= 4; n++) {
35965 for (uint32_t m = 1; m <= 1; m++) {
35966 GemmMicrokernelTester()
35967 .mr(1)
35968 .nr(4)
35969 .kr(8)
35970 .sr(1)
35971 .m(m)
35972 .n(n)
35973 .k(k)
35974 .cm_stride(7)
35975 .iterations(1)
35976 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
35977 }
35978 }
35979 }
35980 }
35981
TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD128,a_offset)35982 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD128, a_offset) {
35983 TEST_REQUIRES_X86_AVX;
35984 for (size_t k = 1; k <= 40; k += 9) {
35985 GemmMicrokernelTester()
35986 .mr(1)
35987 .nr(4)
35988 .kr(8)
35989 .sr(1)
35990 .m(1)
35991 .n(4)
35992 .k(k)
35993 .ks(3)
35994 .a_offset(43)
35995 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
35996 }
35997 }
35998
TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD128,zero)35999 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD128, zero) {
36000 TEST_REQUIRES_X86_AVX;
36001 for (size_t k = 1; k <= 40; k += 9) {
36002 for (uint32_t mz = 0; mz < 1; mz++) {
36003 GemmMicrokernelTester()
36004 .mr(1)
36005 .nr(4)
36006 .kr(8)
36007 .sr(1)
36008 .m(1)
36009 .n(4)
36010 .k(k)
36011 .ks(3)
36012 .a_offset(43)
36013 .zero_index(mz)
36014 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
36015 }
36016 }
36017 }
36018
TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD128,qmin)36019 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD128, qmin) {
36020 TEST_REQUIRES_X86_AVX;
36021 GemmMicrokernelTester()
36022 .mr(1)
36023 .nr(4)
36024 .kr(8)
36025 .sr(1)
36026 .m(1)
36027 .n(4)
36028 .k(8)
36029 .qmin(128)
36030 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
36031 }
36032
TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD128,qmax)36033 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD128, qmax) {
36034 TEST_REQUIRES_X86_AVX;
36035 GemmMicrokernelTester()
36036 .mr(1)
36037 .nr(4)
36038 .kr(8)
36039 .sr(1)
36040 .m(1)
36041 .n(4)
36042 .k(8)
36043 .qmax(128)
36044 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
36045 }
36046
TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD128,strided_cm)36047 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__AVX_LD128, strided_cm) {
36048 TEST_REQUIRES_X86_AVX;
36049 GemmMicrokernelTester()
36050 .mr(1)
36051 .nr(4)
36052 .kr(8)
36053 .sr(1)
36054 .m(1)
36055 .n(4)
36056 .k(8)
36057 .cm_stride(7)
36058 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
36059 }
36060 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
36061
36062
36063 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD128,k_eq_8)36064 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD128, k_eq_8) {
36065 TEST_REQUIRES_X86_AVX;
36066 GemmMicrokernelTester()
36067 .mr(2)
36068 .nr(4)
36069 .kr(8)
36070 .sr(1)
36071 .m(2)
36072 .n(4)
36073 .k(8)
36074 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
36075 }
36076
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD128,strided_cn)36077 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD128, strided_cn) {
36078 TEST_REQUIRES_X86_AVX;
36079 GemmMicrokernelTester()
36080 .mr(2)
36081 .nr(4)
36082 .kr(8)
36083 .sr(1)
36084 .m(2)
36085 .n(4)
36086 .k(8)
36087 .cn_stride(7)
36088 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
36089 }
36090
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD128,k_eq_8_subtile)36091 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD128, k_eq_8_subtile) {
36092 TEST_REQUIRES_X86_AVX;
36093 for (uint32_t n = 1; n <= 4; n++) {
36094 for (uint32_t m = 1; m <= 2; m++) {
36095 GemmMicrokernelTester()
36096 .mr(2)
36097 .nr(4)
36098 .kr(8)
36099 .sr(1)
36100 .m(m)
36101 .n(n)
36102 .k(8)
36103 .iterations(1)
36104 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
36105 }
36106 }
36107 }
36108
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD128,k_eq_8_subtile_m)36109 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD128, k_eq_8_subtile_m) {
36110 TEST_REQUIRES_X86_AVX;
36111 for (uint32_t m = 1; m <= 2; m++) {
36112 GemmMicrokernelTester()
36113 .mr(2)
36114 .nr(4)
36115 .kr(8)
36116 .sr(1)
36117 .m(m)
36118 .n(4)
36119 .k(8)
36120 .iterations(1)
36121 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
36122 }
36123 }
36124
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD128,k_eq_8_subtile_n)36125 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD128, k_eq_8_subtile_n) {
36126 TEST_REQUIRES_X86_AVX;
36127 for (uint32_t n = 1; n <= 4; n++) {
36128 GemmMicrokernelTester()
36129 .mr(2)
36130 .nr(4)
36131 .kr(8)
36132 .sr(1)
36133 .m(2)
36134 .n(n)
36135 .k(8)
36136 .iterations(1)
36137 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
36138 }
36139 }
36140
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD128,k_lt_8)36141 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD128, k_lt_8) {
36142 TEST_REQUIRES_X86_AVX;
36143 for (size_t k = 1; k < 8; k++) {
36144 GemmMicrokernelTester()
36145 .mr(2)
36146 .nr(4)
36147 .kr(8)
36148 .sr(1)
36149 .m(2)
36150 .n(4)
36151 .k(k)
36152 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
36153 }
36154 }
36155
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD128,k_lt_8_subtile)36156 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD128, k_lt_8_subtile) {
36157 TEST_REQUIRES_X86_AVX;
36158 for (size_t k = 1; k < 8; k++) {
36159 for (uint32_t n = 1; n <= 4; n++) {
36160 for (uint32_t m = 1; m <= 2; m++) {
36161 GemmMicrokernelTester()
36162 .mr(2)
36163 .nr(4)
36164 .kr(8)
36165 .sr(1)
36166 .m(m)
36167 .n(n)
36168 .k(k)
36169 .iterations(1)
36170 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
36171 }
36172 }
36173 }
36174 }
36175
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD128,k_gt_8)36176 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD128, k_gt_8) {
36177 TEST_REQUIRES_X86_AVX;
36178 for (size_t k = 9; k < 16; k++) {
36179 GemmMicrokernelTester()
36180 .mr(2)
36181 .nr(4)
36182 .kr(8)
36183 .sr(1)
36184 .m(2)
36185 .n(4)
36186 .k(k)
36187 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
36188 }
36189 }
36190
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD128,k_gt_8_subtile)36191 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD128, k_gt_8_subtile) {
36192 TEST_REQUIRES_X86_AVX;
36193 for (size_t k = 9; k < 16; k++) {
36194 for (uint32_t n = 1; n <= 4; n++) {
36195 for (uint32_t m = 1; m <= 2; m++) {
36196 GemmMicrokernelTester()
36197 .mr(2)
36198 .nr(4)
36199 .kr(8)
36200 .sr(1)
36201 .m(m)
36202 .n(n)
36203 .k(k)
36204 .iterations(1)
36205 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
36206 }
36207 }
36208 }
36209 }
36210
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD128,k_div_8)36211 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD128, k_div_8) {
36212 TEST_REQUIRES_X86_AVX;
36213 for (size_t k = 16; k <= 80; k += 8) {
36214 GemmMicrokernelTester()
36215 .mr(2)
36216 .nr(4)
36217 .kr(8)
36218 .sr(1)
36219 .m(2)
36220 .n(4)
36221 .k(k)
36222 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
36223 }
36224 }
36225
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD128,k_div_8_subtile)36226 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD128, k_div_8_subtile) {
36227 TEST_REQUIRES_X86_AVX;
36228 for (size_t k = 16; k <= 80; k += 8) {
36229 for (uint32_t n = 1; n <= 4; n++) {
36230 for (uint32_t m = 1; m <= 2; m++) {
36231 GemmMicrokernelTester()
36232 .mr(2)
36233 .nr(4)
36234 .kr(8)
36235 .sr(1)
36236 .m(m)
36237 .n(n)
36238 .k(k)
36239 .iterations(1)
36240 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
36241 }
36242 }
36243 }
36244 }
36245
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD128,n_gt_4)36246 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD128, n_gt_4) {
36247 TEST_REQUIRES_X86_AVX;
36248 for (uint32_t n = 5; n < 8; n++) {
36249 for (size_t k = 1; k <= 40; k += 9) {
36250 GemmMicrokernelTester()
36251 .mr(2)
36252 .nr(4)
36253 .kr(8)
36254 .sr(1)
36255 .m(2)
36256 .n(n)
36257 .k(k)
36258 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
36259 }
36260 }
36261 }
36262
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD128,n_gt_4_strided_cn)36263 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD128, n_gt_4_strided_cn) {
36264 TEST_REQUIRES_X86_AVX;
36265 for (uint32_t n = 5; n < 8; n++) {
36266 for (size_t k = 1; k <= 40; k += 9) {
36267 GemmMicrokernelTester()
36268 .mr(2)
36269 .nr(4)
36270 .kr(8)
36271 .sr(1)
36272 .m(2)
36273 .n(n)
36274 .k(k)
36275 .cn_stride(7)
36276 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
36277 }
36278 }
36279 }
36280
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD128,n_gt_4_subtile)36281 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD128, n_gt_4_subtile) {
36282 TEST_REQUIRES_X86_AVX;
36283 for (uint32_t n = 5; n < 8; n++) {
36284 for (size_t k = 1; k <= 40; k += 9) {
36285 for (uint32_t m = 1; m <= 2; m++) {
36286 GemmMicrokernelTester()
36287 .mr(2)
36288 .nr(4)
36289 .kr(8)
36290 .sr(1)
36291 .m(m)
36292 .n(n)
36293 .k(k)
36294 .iterations(1)
36295 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
36296 }
36297 }
36298 }
36299 }
36300
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD128,n_div_4)36301 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD128, n_div_4) {
36302 TEST_REQUIRES_X86_AVX;
36303 for (uint32_t n = 8; n <= 12; n += 4) {
36304 for (size_t k = 1; k <= 40; k += 9) {
36305 GemmMicrokernelTester()
36306 .mr(2)
36307 .nr(4)
36308 .kr(8)
36309 .sr(1)
36310 .m(2)
36311 .n(n)
36312 .k(k)
36313 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
36314 }
36315 }
36316 }
36317
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD128,n_div_4_strided_cn)36318 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD128, n_div_4_strided_cn) {
36319 TEST_REQUIRES_X86_AVX;
36320 for (uint32_t n = 8; n <= 12; n += 4) {
36321 for (size_t k = 1; k <= 40; k += 9) {
36322 GemmMicrokernelTester()
36323 .mr(2)
36324 .nr(4)
36325 .kr(8)
36326 .sr(1)
36327 .m(2)
36328 .n(n)
36329 .k(k)
36330 .cn_stride(7)
36331 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
36332 }
36333 }
36334 }
36335
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD128,n_div_4_subtile)36336 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD128, n_div_4_subtile) {
36337 TEST_REQUIRES_X86_AVX;
36338 for (uint32_t n = 8; n <= 12; n += 4) {
36339 for (size_t k = 1; k <= 40; k += 9) {
36340 for (uint32_t m = 1; m <= 2; m++) {
36341 GemmMicrokernelTester()
36342 .mr(2)
36343 .nr(4)
36344 .kr(8)
36345 .sr(1)
36346 .m(m)
36347 .n(n)
36348 .k(k)
36349 .iterations(1)
36350 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
36351 }
36352 }
36353 }
36354 }
36355
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD128,small_kernel)36356 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD128, small_kernel) {
36357 TEST_REQUIRES_X86_AVX;
36358 for (size_t k = 1; k <= 40; k += 9) {
36359 GemmMicrokernelTester()
36360 .mr(2)
36361 .nr(4)
36362 .kr(8)
36363 .sr(1)
36364 .m(2)
36365 .n(4)
36366 .k(k)
36367 .ks(3)
36368 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
36369 }
36370 }
36371
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD128,small_kernel_subtile)36372 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD128, small_kernel_subtile) {
36373 TEST_REQUIRES_X86_AVX;
36374 for (size_t k = 1; k <= 40; k += 9) {
36375 for (uint32_t n = 1; n <= 4; n++) {
36376 for (uint32_t m = 1; m <= 2; m++) {
36377 GemmMicrokernelTester()
36378 .mr(2)
36379 .nr(4)
36380 .kr(8)
36381 .sr(1)
36382 .m(m)
36383 .n(n)
36384 .k(k)
36385 .ks(3)
36386 .iterations(1)
36387 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
36388 }
36389 }
36390 }
36391 }
36392
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD128,n_gt_4_small_kernel)36393 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD128, n_gt_4_small_kernel) {
36394 TEST_REQUIRES_X86_AVX;
36395 for (uint32_t n = 5; n < 8; n++) {
36396 for (size_t k = 1; k <= 40; k += 9) {
36397 GemmMicrokernelTester()
36398 .mr(2)
36399 .nr(4)
36400 .kr(8)
36401 .sr(1)
36402 .m(2)
36403 .n(n)
36404 .k(k)
36405 .ks(3)
36406 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
36407 }
36408 }
36409 }
36410
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD128,n_div_4_small_kernel)36411 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD128, n_div_4_small_kernel) {
36412 TEST_REQUIRES_X86_AVX;
36413 for (uint32_t n = 8; n <= 12; n += 4) {
36414 for (size_t k = 1; k <= 40; k += 9) {
36415 GemmMicrokernelTester()
36416 .mr(2)
36417 .nr(4)
36418 .kr(8)
36419 .sr(1)
36420 .m(2)
36421 .n(n)
36422 .k(k)
36423 .ks(3)
36424 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
36425 }
36426 }
36427 }
36428
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD128,strided_cm_subtile)36429 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD128, strided_cm_subtile) {
36430 TEST_REQUIRES_X86_AVX;
36431 for (size_t k = 1; k <= 40; k += 9) {
36432 for (uint32_t n = 1; n <= 4; n++) {
36433 for (uint32_t m = 1; m <= 2; m++) {
36434 GemmMicrokernelTester()
36435 .mr(2)
36436 .nr(4)
36437 .kr(8)
36438 .sr(1)
36439 .m(m)
36440 .n(n)
36441 .k(k)
36442 .cm_stride(7)
36443 .iterations(1)
36444 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
36445 }
36446 }
36447 }
36448 }
36449
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD128,a_offset)36450 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD128, a_offset) {
36451 TEST_REQUIRES_X86_AVX;
36452 for (size_t k = 1; k <= 40; k += 9) {
36453 GemmMicrokernelTester()
36454 .mr(2)
36455 .nr(4)
36456 .kr(8)
36457 .sr(1)
36458 .m(2)
36459 .n(4)
36460 .k(k)
36461 .ks(3)
36462 .a_offset(83)
36463 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
36464 }
36465 }
36466
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD128,zero)36467 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD128, zero) {
36468 TEST_REQUIRES_X86_AVX;
36469 for (size_t k = 1; k <= 40; k += 9) {
36470 for (uint32_t mz = 0; mz < 2; mz++) {
36471 GemmMicrokernelTester()
36472 .mr(2)
36473 .nr(4)
36474 .kr(8)
36475 .sr(1)
36476 .m(2)
36477 .n(4)
36478 .k(k)
36479 .ks(3)
36480 .a_offset(83)
36481 .zero_index(mz)
36482 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
36483 }
36484 }
36485 }
36486
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD128,qmin)36487 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD128, qmin) {
36488 TEST_REQUIRES_X86_AVX;
36489 GemmMicrokernelTester()
36490 .mr(2)
36491 .nr(4)
36492 .kr(8)
36493 .sr(1)
36494 .m(2)
36495 .n(4)
36496 .k(8)
36497 .qmin(128)
36498 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
36499 }
36500
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD128,qmax)36501 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD128, qmax) {
36502 TEST_REQUIRES_X86_AVX;
36503 GemmMicrokernelTester()
36504 .mr(2)
36505 .nr(4)
36506 .kr(8)
36507 .sr(1)
36508 .m(2)
36509 .n(4)
36510 .k(8)
36511 .qmax(128)
36512 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
36513 }
36514
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD128,strided_cm)36515 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__AVX_LD128, strided_cm) {
36516 TEST_REQUIRES_X86_AVX;
36517 GemmMicrokernelTester()
36518 .mr(2)
36519 .nr(4)
36520 .kr(8)
36521 .sr(1)
36522 .m(2)
36523 .n(4)
36524 .k(8)
36525 .cm_stride(7)
36526 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
36527 }
36528 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
36529
36530
36531 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD128,k_eq_8)36532 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD128, k_eq_8) {
36533 TEST_REQUIRES_X86_XOP;
36534 GemmMicrokernelTester()
36535 .mr(3)
36536 .nr(4)
36537 .kr(8)
36538 .sr(1)
36539 .m(3)
36540 .n(4)
36541 .k(8)
36542 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
36543 }
36544
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD128,strided_cn)36545 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD128, strided_cn) {
36546 TEST_REQUIRES_X86_XOP;
36547 GemmMicrokernelTester()
36548 .mr(3)
36549 .nr(4)
36550 .kr(8)
36551 .sr(1)
36552 .m(3)
36553 .n(4)
36554 .k(8)
36555 .cn_stride(7)
36556 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
36557 }
36558
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD128,k_eq_8_subtile)36559 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD128, k_eq_8_subtile) {
36560 TEST_REQUIRES_X86_XOP;
36561 for (uint32_t n = 1; n <= 4; n++) {
36562 for (uint32_t m = 1; m <= 3; m++) {
36563 GemmMicrokernelTester()
36564 .mr(3)
36565 .nr(4)
36566 .kr(8)
36567 .sr(1)
36568 .m(m)
36569 .n(n)
36570 .k(8)
36571 .iterations(1)
36572 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
36573 }
36574 }
36575 }
36576
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD128,k_eq_8_subtile_m)36577 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD128, k_eq_8_subtile_m) {
36578 TEST_REQUIRES_X86_XOP;
36579 for (uint32_t m = 1; m <= 3; m++) {
36580 GemmMicrokernelTester()
36581 .mr(3)
36582 .nr(4)
36583 .kr(8)
36584 .sr(1)
36585 .m(m)
36586 .n(4)
36587 .k(8)
36588 .iterations(1)
36589 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
36590 }
36591 }
36592
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD128,k_eq_8_subtile_n)36593 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD128, k_eq_8_subtile_n) {
36594 TEST_REQUIRES_X86_XOP;
36595 for (uint32_t n = 1; n <= 4; n++) {
36596 GemmMicrokernelTester()
36597 .mr(3)
36598 .nr(4)
36599 .kr(8)
36600 .sr(1)
36601 .m(3)
36602 .n(n)
36603 .k(8)
36604 .iterations(1)
36605 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
36606 }
36607 }
36608
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD128,k_lt_8)36609 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD128, k_lt_8) {
36610 TEST_REQUIRES_X86_XOP;
36611 for (size_t k = 1; k < 8; k++) {
36612 GemmMicrokernelTester()
36613 .mr(3)
36614 .nr(4)
36615 .kr(8)
36616 .sr(1)
36617 .m(3)
36618 .n(4)
36619 .k(k)
36620 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
36621 }
36622 }
36623
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD128,k_lt_8_subtile)36624 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD128, k_lt_8_subtile) {
36625 TEST_REQUIRES_X86_XOP;
36626 for (size_t k = 1; k < 8; k++) {
36627 for (uint32_t n = 1; n <= 4; n++) {
36628 for (uint32_t m = 1; m <= 3; m++) {
36629 GemmMicrokernelTester()
36630 .mr(3)
36631 .nr(4)
36632 .kr(8)
36633 .sr(1)
36634 .m(m)
36635 .n(n)
36636 .k(k)
36637 .iterations(1)
36638 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
36639 }
36640 }
36641 }
36642 }
36643
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD128,k_gt_8)36644 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD128, k_gt_8) {
36645 TEST_REQUIRES_X86_XOP;
36646 for (size_t k = 9; k < 16; k++) {
36647 GemmMicrokernelTester()
36648 .mr(3)
36649 .nr(4)
36650 .kr(8)
36651 .sr(1)
36652 .m(3)
36653 .n(4)
36654 .k(k)
36655 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
36656 }
36657 }
36658
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD128,k_gt_8_subtile)36659 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD128, k_gt_8_subtile) {
36660 TEST_REQUIRES_X86_XOP;
36661 for (size_t k = 9; k < 16; k++) {
36662 for (uint32_t n = 1; n <= 4; n++) {
36663 for (uint32_t m = 1; m <= 3; m++) {
36664 GemmMicrokernelTester()
36665 .mr(3)
36666 .nr(4)
36667 .kr(8)
36668 .sr(1)
36669 .m(m)
36670 .n(n)
36671 .k(k)
36672 .iterations(1)
36673 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
36674 }
36675 }
36676 }
36677 }
36678
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD128,k_div_8)36679 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD128, k_div_8) {
36680 TEST_REQUIRES_X86_XOP;
36681 for (size_t k = 16; k <= 80; k += 8) {
36682 GemmMicrokernelTester()
36683 .mr(3)
36684 .nr(4)
36685 .kr(8)
36686 .sr(1)
36687 .m(3)
36688 .n(4)
36689 .k(k)
36690 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
36691 }
36692 }
36693
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD128,k_div_8_subtile)36694 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD128, k_div_8_subtile) {
36695 TEST_REQUIRES_X86_XOP;
36696 for (size_t k = 16; k <= 80; k += 8) {
36697 for (uint32_t n = 1; n <= 4; n++) {
36698 for (uint32_t m = 1; m <= 3; m++) {
36699 GemmMicrokernelTester()
36700 .mr(3)
36701 .nr(4)
36702 .kr(8)
36703 .sr(1)
36704 .m(m)
36705 .n(n)
36706 .k(k)
36707 .iterations(1)
36708 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
36709 }
36710 }
36711 }
36712 }
36713
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD128,n_gt_4)36714 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD128, n_gt_4) {
36715 TEST_REQUIRES_X86_XOP;
36716 for (uint32_t n = 5; n < 8; n++) {
36717 for (size_t k = 1; k <= 40; k += 9) {
36718 GemmMicrokernelTester()
36719 .mr(3)
36720 .nr(4)
36721 .kr(8)
36722 .sr(1)
36723 .m(3)
36724 .n(n)
36725 .k(k)
36726 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
36727 }
36728 }
36729 }
36730
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD128,n_gt_4_strided_cn)36731 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD128, n_gt_4_strided_cn) {
36732 TEST_REQUIRES_X86_XOP;
36733 for (uint32_t n = 5; n < 8; n++) {
36734 for (size_t k = 1; k <= 40; k += 9) {
36735 GemmMicrokernelTester()
36736 .mr(3)
36737 .nr(4)
36738 .kr(8)
36739 .sr(1)
36740 .m(3)
36741 .n(n)
36742 .k(k)
36743 .cn_stride(7)
36744 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
36745 }
36746 }
36747 }
36748
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD128,n_gt_4_subtile)36749 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD128, n_gt_4_subtile) {
36750 TEST_REQUIRES_X86_XOP;
36751 for (uint32_t n = 5; n < 8; n++) {
36752 for (size_t k = 1; k <= 40; k += 9) {
36753 for (uint32_t m = 1; m <= 3; m++) {
36754 GemmMicrokernelTester()
36755 .mr(3)
36756 .nr(4)
36757 .kr(8)
36758 .sr(1)
36759 .m(m)
36760 .n(n)
36761 .k(k)
36762 .iterations(1)
36763 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
36764 }
36765 }
36766 }
36767 }
36768
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD128,n_div_4)36769 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD128, n_div_4) {
36770 TEST_REQUIRES_X86_XOP;
36771 for (uint32_t n = 8; n <= 12; n += 4) {
36772 for (size_t k = 1; k <= 40; k += 9) {
36773 GemmMicrokernelTester()
36774 .mr(3)
36775 .nr(4)
36776 .kr(8)
36777 .sr(1)
36778 .m(3)
36779 .n(n)
36780 .k(k)
36781 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
36782 }
36783 }
36784 }
36785
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD128,n_div_4_strided_cn)36786 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD128, n_div_4_strided_cn) {
36787 TEST_REQUIRES_X86_XOP;
36788 for (uint32_t n = 8; n <= 12; n += 4) {
36789 for (size_t k = 1; k <= 40; k += 9) {
36790 GemmMicrokernelTester()
36791 .mr(3)
36792 .nr(4)
36793 .kr(8)
36794 .sr(1)
36795 .m(3)
36796 .n(n)
36797 .k(k)
36798 .cn_stride(7)
36799 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
36800 }
36801 }
36802 }
36803
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD128,n_div_4_subtile)36804 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD128, n_div_4_subtile) {
36805 TEST_REQUIRES_X86_XOP;
36806 for (uint32_t n = 8; n <= 12; n += 4) {
36807 for (size_t k = 1; k <= 40; k += 9) {
36808 for (uint32_t m = 1; m <= 3; m++) {
36809 GemmMicrokernelTester()
36810 .mr(3)
36811 .nr(4)
36812 .kr(8)
36813 .sr(1)
36814 .m(m)
36815 .n(n)
36816 .k(k)
36817 .iterations(1)
36818 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
36819 }
36820 }
36821 }
36822 }
36823
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD128,small_kernel)36824 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD128, small_kernel) {
36825 TEST_REQUIRES_X86_XOP;
36826 for (size_t k = 1; k <= 40; k += 9) {
36827 GemmMicrokernelTester()
36828 .mr(3)
36829 .nr(4)
36830 .kr(8)
36831 .sr(1)
36832 .m(3)
36833 .n(4)
36834 .k(k)
36835 .ks(3)
36836 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
36837 }
36838 }
36839
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD128,small_kernel_subtile)36840 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD128, small_kernel_subtile) {
36841 TEST_REQUIRES_X86_XOP;
36842 for (size_t k = 1; k <= 40; k += 9) {
36843 for (uint32_t n = 1; n <= 4; n++) {
36844 for (uint32_t m = 1; m <= 3; m++) {
36845 GemmMicrokernelTester()
36846 .mr(3)
36847 .nr(4)
36848 .kr(8)
36849 .sr(1)
36850 .m(m)
36851 .n(n)
36852 .k(k)
36853 .ks(3)
36854 .iterations(1)
36855 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
36856 }
36857 }
36858 }
36859 }
36860
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD128,n_gt_4_small_kernel)36861 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD128, n_gt_4_small_kernel) {
36862 TEST_REQUIRES_X86_XOP;
36863 for (uint32_t n = 5; n < 8; n++) {
36864 for (size_t k = 1; k <= 40; k += 9) {
36865 GemmMicrokernelTester()
36866 .mr(3)
36867 .nr(4)
36868 .kr(8)
36869 .sr(1)
36870 .m(3)
36871 .n(n)
36872 .k(k)
36873 .ks(3)
36874 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
36875 }
36876 }
36877 }
36878
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD128,n_div_4_small_kernel)36879 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD128, n_div_4_small_kernel) {
36880 TEST_REQUIRES_X86_XOP;
36881 for (uint32_t n = 8; n <= 12; n += 4) {
36882 for (size_t k = 1; k <= 40; k += 9) {
36883 GemmMicrokernelTester()
36884 .mr(3)
36885 .nr(4)
36886 .kr(8)
36887 .sr(1)
36888 .m(3)
36889 .n(n)
36890 .k(k)
36891 .ks(3)
36892 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
36893 }
36894 }
36895 }
36896
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD128,strided_cm_subtile)36897 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD128, strided_cm_subtile) {
36898 TEST_REQUIRES_X86_XOP;
36899 for (size_t k = 1; k <= 40; k += 9) {
36900 for (uint32_t n = 1; n <= 4; n++) {
36901 for (uint32_t m = 1; m <= 3; m++) {
36902 GemmMicrokernelTester()
36903 .mr(3)
36904 .nr(4)
36905 .kr(8)
36906 .sr(1)
36907 .m(m)
36908 .n(n)
36909 .k(k)
36910 .cm_stride(7)
36911 .iterations(1)
36912 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
36913 }
36914 }
36915 }
36916 }
36917
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD128,a_offset)36918 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD128, a_offset) {
36919 TEST_REQUIRES_X86_XOP;
36920 for (size_t k = 1; k <= 40; k += 9) {
36921 GemmMicrokernelTester()
36922 .mr(3)
36923 .nr(4)
36924 .kr(8)
36925 .sr(1)
36926 .m(3)
36927 .n(4)
36928 .k(k)
36929 .ks(3)
36930 .a_offset(127)
36931 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
36932 }
36933 }
36934
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD128,zero)36935 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD128, zero) {
36936 TEST_REQUIRES_X86_XOP;
36937 for (size_t k = 1; k <= 40; k += 9) {
36938 for (uint32_t mz = 0; mz < 3; mz++) {
36939 GemmMicrokernelTester()
36940 .mr(3)
36941 .nr(4)
36942 .kr(8)
36943 .sr(1)
36944 .m(3)
36945 .n(4)
36946 .k(k)
36947 .ks(3)
36948 .a_offset(127)
36949 .zero_index(mz)
36950 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
36951 }
36952 }
36953 }
36954
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD128,qmin)36955 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD128, qmin) {
36956 TEST_REQUIRES_X86_XOP;
36957 GemmMicrokernelTester()
36958 .mr(3)
36959 .nr(4)
36960 .kr(8)
36961 .sr(1)
36962 .m(3)
36963 .n(4)
36964 .k(8)
36965 .qmin(128)
36966 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
36967 }
36968
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD128,qmax)36969 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD128, qmax) {
36970 TEST_REQUIRES_X86_XOP;
36971 GemmMicrokernelTester()
36972 .mr(3)
36973 .nr(4)
36974 .kr(8)
36975 .sr(1)
36976 .m(3)
36977 .n(4)
36978 .k(8)
36979 .qmax(128)
36980 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
36981 }
36982
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD128,strided_cm)36983 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__XOP_LD128, strided_cm) {
36984 TEST_REQUIRES_X86_XOP;
36985 GemmMicrokernelTester()
36986 .mr(3)
36987 .nr(4)
36988 .kr(8)
36989 .sr(1)
36990 .m(3)
36991 .n(4)
36992 .k(8)
36993 .cm_stride(7)
36994 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld128, xnn_init_qs8_conv_minmax_fp32_sse4_params, xnn_qs8_requantize_fp32);
36995 }
36996 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
36997
36998
36999 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AVX2,k_eq_8)37000 TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AVX2, k_eq_8) {
37001 TEST_REQUIRES_X86_AVX2;
37002 GemmMicrokernelTester()
37003 .mr(1)
37004 .nr(8)
37005 .kr(8)
37006 .sr(1)
37007 .m(1)
37008 .n(8)
37009 .k(8)
37010 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c8__avx2, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
37011 }
37012
TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AVX2,strided_cn)37013 TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AVX2, strided_cn) {
37014 TEST_REQUIRES_X86_AVX2;
37015 GemmMicrokernelTester()
37016 .mr(1)
37017 .nr(8)
37018 .kr(8)
37019 .sr(1)
37020 .m(1)
37021 .n(8)
37022 .k(8)
37023 .cn_stride(11)
37024 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c8__avx2, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
37025 }
37026
TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AVX2,k_eq_8_subtile)37027 TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AVX2, k_eq_8_subtile) {
37028 TEST_REQUIRES_X86_AVX2;
37029 for (uint32_t n = 1; n <= 8; n++) {
37030 for (uint32_t m = 1; m <= 1; m++) {
37031 GemmMicrokernelTester()
37032 .mr(1)
37033 .nr(8)
37034 .kr(8)
37035 .sr(1)
37036 .m(m)
37037 .n(n)
37038 .k(8)
37039 .iterations(1)
37040 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c8__avx2, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
37041 }
37042 }
37043 }
37044
TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AVX2,k_eq_8_subtile_m)37045 TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AVX2, k_eq_8_subtile_m) {
37046 TEST_REQUIRES_X86_AVX2;
37047 for (uint32_t m = 1; m <= 1; m++) {
37048 GemmMicrokernelTester()
37049 .mr(1)
37050 .nr(8)
37051 .kr(8)
37052 .sr(1)
37053 .m(m)
37054 .n(8)
37055 .k(8)
37056 .iterations(1)
37057 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c8__avx2, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
37058 }
37059 }
37060
TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AVX2,k_eq_8_subtile_n)37061 TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AVX2, k_eq_8_subtile_n) {
37062 TEST_REQUIRES_X86_AVX2;
37063 for (uint32_t n = 1; n <= 8; n++) {
37064 GemmMicrokernelTester()
37065 .mr(1)
37066 .nr(8)
37067 .kr(8)
37068 .sr(1)
37069 .m(1)
37070 .n(n)
37071 .k(8)
37072 .iterations(1)
37073 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c8__avx2, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
37074 }
37075 }
37076
TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AVX2,k_lt_8)37077 TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AVX2, k_lt_8) {
37078 TEST_REQUIRES_X86_AVX2;
37079 for (size_t k = 1; k < 8; k++) {
37080 GemmMicrokernelTester()
37081 .mr(1)
37082 .nr(8)
37083 .kr(8)
37084 .sr(1)
37085 .m(1)
37086 .n(8)
37087 .k(k)
37088 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c8__avx2, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
37089 }
37090 }
37091
TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AVX2,k_lt_8_subtile)37092 TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AVX2, k_lt_8_subtile) {
37093 TEST_REQUIRES_X86_AVX2;
37094 for (size_t k = 1; k < 8; k++) {
37095 for (uint32_t n = 1; n <= 8; n++) {
37096 for (uint32_t m = 1; m <= 1; m++) {
37097 GemmMicrokernelTester()
37098 .mr(1)
37099 .nr(8)
37100 .kr(8)
37101 .sr(1)
37102 .m(m)
37103 .n(n)
37104 .k(k)
37105 .iterations(1)
37106 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c8__avx2, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
37107 }
37108 }
37109 }
37110 }
37111
TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AVX2,k_gt_8)37112 TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AVX2, k_gt_8) {
37113 TEST_REQUIRES_X86_AVX2;
37114 for (size_t k = 9; k < 16; k++) {
37115 GemmMicrokernelTester()
37116 .mr(1)
37117 .nr(8)
37118 .kr(8)
37119 .sr(1)
37120 .m(1)
37121 .n(8)
37122 .k(k)
37123 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c8__avx2, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
37124 }
37125 }
37126
TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AVX2,k_gt_8_subtile)37127 TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AVX2, k_gt_8_subtile) {
37128 TEST_REQUIRES_X86_AVX2;
37129 for (size_t k = 9; k < 16; k++) {
37130 for (uint32_t n = 1; n <= 8; n++) {
37131 for (uint32_t m = 1; m <= 1; m++) {
37132 GemmMicrokernelTester()
37133 .mr(1)
37134 .nr(8)
37135 .kr(8)
37136 .sr(1)
37137 .m(m)
37138 .n(n)
37139 .k(k)
37140 .iterations(1)
37141 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c8__avx2, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
37142 }
37143 }
37144 }
37145 }
37146
TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AVX2,k_div_8)37147 TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AVX2, k_div_8) {
37148 TEST_REQUIRES_X86_AVX2;
37149 for (size_t k = 16; k <= 80; k += 8) {
37150 GemmMicrokernelTester()
37151 .mr(1)
37152 .nr(8)
37153 .kr(8)
37154 .sr(1)
37155 .m(1)
37156 .n(8)
37157 .k(k)
37158 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c8__avx2, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
37159 }
37160 }
37161
TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AVX2,k_div_8_subtile)37162 TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AVX2, k_div_8_subtile) {
37163 TEST_REQUIRES_X86_AVX2;
37164 for (size_t k = 16; k <= 80; k += 8) {
37165 for (uint32_t n = 1; n <= 8; n++) {
37166 for (uint32_t m = 1; m <= 1; m++) {
37167 GemmMicrokernelTester()
37168 .mr(1)
37169 .nr(8)
37170 .kr(8)
37171 .sr(1)
37172 .m(m)
37173 .n(n)
37174 .k(k)
37175 .iterations(1)
37176 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c8__avx2, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
37177 }
37178 }
37179 }
37180 }
37181
TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AVX2,n_gt_8)37182 TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AVX2, n_gt_8) {
37183 TEST_REQUIRES_X86_AVX2;
37184 for (uint32_t n = 9; n < 16; n++) {
37185 for (size_t k = 1; k <= 40; k += 9) {
37186 GemmMicrokernelTester()
37187 .mr(1)
37188 .nr(8)
37189 .kr(8)
37190 .sr(1)
37191 .m(1)
37192 .n(n)
37193 .k(k)
37194 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c8__avx2, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
37195 }
37196 }
37197 }
37198
TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AVX2,n_gt_8_strided_cn)37199 TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AVX2, n_gt_8_strided_cn) {
37200 TEST_REQUIRES_X86_AVX2;
37201 for (uint32_t n = 9; n < 16; n++) {
37202 for (size_t k = 1; k <= 40; k += 9) {
37203 GemmMicrokernelTester()
37204 .mr(1)
37205 .nr(8)
37206 .kr(8)
37207 .sr(1)
37208 .m(1)
37209 .n(n)
37210 .k(k)
37211 .cn_stride(11)
37212 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c8__avx2, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
37213 }
37214 }
37215 }
37216
TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AVX2,n_gt_8_subtile)37217 TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AVX2, n_gt_8_subtile) {
37218 TEST_REQUIRES_X86_AVX2;
37219 for (uint32_t n = 9; n < 16; n++) {
37220 for (size_t k = 1; k <= 40; k += 9) {
37221 for (uint32_t m = 1; m <= 1; m++) {
37222 GemmMicrokernelTester()
37223 .mr(1)
37224 .nr(8)
37225 .kr(8)
37226 .sr(1)
37227 .m(m)
37228 .n(n)
37229 .k(k)
37230 .iterations(1)
37231 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c8__avx2, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
37232 }
37233 }
37234 }
37235 }
37236
TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AVX2,n_div_8)37237 TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AVX2, n_div_8) {
37238 TEST_REQUIRES_X86_AVX2;
37239 for (uint32_t n = 16; n <= 24; n += 8) {
37240 for (size_t k = 1; k <= 40; k += 9) {
37241 GemmMicrokernelTester()
37242 .mr(1)
37243 .nr(8)
37244 .kr(8)
37245 .sr(1)
37246 .m(1)
37247 .n(n)
37248 .k(k)
37249 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c8__avx2, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
37250 }
37251 }
37252 }
37253
TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AVX2,n_div_8_strided_cn)37254 TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AVX2, n_div_8_strided_cn) {
37255 TEST_REQUIRES_X86_AVX2;
37256 for (uint32_t n = 16; n <= 24; n += 8) {
37257 for (size_t k = 1; k <= 40; k += 9) {
37258 GemmMicrokernelTester()
37259 .mr(1)
37260 .nr(8)
37261 .kr(8)
37262 .sr(1)
37263 .m(1)
37264 .n(n)
37265 .k(k)
37266 .cn_stride(11)
37267 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c8__avx2, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
37268 }
37269 }
37270 }
37271
TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AVX2,n_div_8_subtile)37272 TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AVX2, n_div_8_subtile) {
37273 TEST_REQUIRES_X86_AVX2;
37274 for (uint32_t n = 16; n <= 24; n += 8) {
37275 for (size_t k = 1; k <= 40; k += 9) {
37276 for (uint32_t m = 1; m <= 1; m++) {
37277 GemmMicrokernelTester()
37278 .mr(1)
37279 .nr(8)
37280 .kr(8)
37281 .sr(1)
37282 .m(m)
37283 .n(n)
37284 .k(k)
37285 .iterations(1)
37286 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c8__avx2, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
37287 }
37288 }
37289 }
37290 }
37291
TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AVX2,small_kernel)37292 TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AVX2, small_kernel) {
37293 TEST_REQUIRES_X86_AVX2;
37294 for (size_t k = 1; k <= 40; k += 9) {
37295 GemmMicrokernelTester()
37296 .mr(1)
37297 .nr(8)
37298 .kr(8)
37299 .sr(1)
37300 .m(1)
37301 .n(8)
37302 .k(k)
37303 .ks(3)
37304 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c8__avx2, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
37305 }
37306 }
37307
TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AVX2,small_kernel_subtile)37308 TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AVX2, small_kernel_subtile) {
37309 TEST_REQUIRES_X86_AVX2;
37310 for (size_t k = 1; k <= 40; k += 9) {
37311 for (uint32_t n = 1; n <= 8; n++) {
37312 for (uint32_t m = 1; m <= 1; m++) {
37313 GemmMicrokernelTester()
37314 .mr(1)
37315 .nr(8)
37316 .kr(8)
37317 .sr(1)
37318 .m(m)
37319 .n(n)
37320 .k(k)
37321 .ks(3)
37322 .iterations(1)
37323 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c8__avx2, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
37324 }
37325 }
37326 }
37327 }
37328
TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AVX2,n_gt_8_small_kernel)37329 TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AVX2, n_gt_8_small_kernel) {
37330 TEST_REQUIRES_X86_AVX2;
37331 for (uint32_t n = 9; n < 16; n++) {
37332 for (size_t k = 1; k <= 40; k += 9) {
37333 GemmMicrokernelTester()
37334 .mr(1)
37335 .nr(8)
37336 .kr(8)
37337 .sr(1)
37338 .m(1)
37339 .n(n)
37340 .k(k)
37341 .ks(3)
37342 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c8__avx2, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
37343 }
37344 }
37345 }
37346
TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AVX2,n_div_8_small_kernel)37347 TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AVX2, n_div_8_small_kernel) {
37348 TEST_REQUIRES_X86_AVX2;
37349 for (uint32_t n = 16; n <= 24; n += 8) {
37350 for (size_t k = 1; k <= 40; k += 9) {
37351 GemmMicrokernelTester()
37352 .mr(1)
37353 .nr(8)
37354 .kr(8)
37355 .sr(1)
37356 .m(1)
37357 .n(n)
37358 .k(k)
37359 .ks(3)
37360 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c8__avx2, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
37361 }
37362 }
37363 }
37364
TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AVX2,strided_cm_subtile)37365 TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AVX2, strided_cm_subtile) {
37366 TEST_REQUIRES_X86_AVX2;
37367 for (size_t k = 1; k <= 40; k += 9) {
37368 for (uint32_t n = 1; n <= 8; n++) {
37369 for (uint32_t m = 1; m <= 1; m++) {
37370 GemmMicrokernelTester()
37371 .mr(1)
37372 .nr(8)
37373 .kr(8)
37374 .sr(1)
37375 .m(m)
37376 .n(n)
37377 .k(k)
37378 .cm_stride(11)
37379 .iterations(1)
37380 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c8__avx2, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
37381 }
37382 }
37383 }
37384 }
37385
TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AVX2,a_offset)37386 TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AVX2, a_offset) {
37387 TEST_REQUIRES_X86_AVX2;
37388 for (size_t k = 1; k <= 40; k += 9) {
37389 GemmMicrokernelTester()
37390 .mr(1)
37391 .nr(8)
37392 .kr(8)
37393 .sr(1)
37394 .m(1)
37395 .n(8)
37396 .k(k)
37397 .ks(3)
37398 .a_offset(43)
37399 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c8__avx2, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
37400 }
37401 }
37402
TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AVX2,zero)37403 TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AVX2, zero) {
37404 TEST_REQUIRES_X86_AVX2;
37405 for (size_t k = 1; k <= 40; k += 9) {
37406 for (uint32_t mz = 0; mz < 1; mz++) {
37407 GemmMicrokernelTester()
37408 .mr(1)
37409 .nr(8)
37410 .kr(8)
37411 .sr(1)
37412 .m(1)
37413 .n(8)
37414 .k(k)
37415 .ks(3)
37416 .a_offset(43)
37417 .zero_index(mz)
37418 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c8__avx2, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
37419 }
37420 }
37421 }
37422
TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AVX2,qmin)37423 TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AVX2, qmin) {
37424 TEST_REQUIRES_X86_AVX2;
37425 GemmMicrokernelTester()
37426 .mr(1)
37427 .nr(8)
37428 .kr(8)
37429 .sr(1)
37430 .m(1)
37431 .n(8)
37432 .k(8)
37433 .qmin(128)
37434 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c8__avx2, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
37435 }
37436
TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AVX2,qmax)37437 TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AVX2, qmax) {
37438 TEST_REQUIRES_X86_AVX2;
37439 GemmMicrokernelTester()
37440 .mr(1)
37441 .nr(8)
37442 .kr(8)
37443 .sr(1)
37444 .m(1)
37445 .n(8)
37446 .k(8)
37447 .qmax(128)
37448 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c8__avx2, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
37449 }
37450
TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AVX2,strided_cm)37451 TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AVX2, strided_cm) {
37452 TEST_REQUIRES_X86_AVX2;
37453 GemmMicrokernelTester()
37454 .mr(1)
37455 .nr(8)
37456 .kr(8)
37457 .sr(1)
37458 .m(1)
37459 .n(8)
37460 .k(8)
37461 .cm_stride(11)
37462 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c8__avx2, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_qs8_requantize_fp32);
37463 }
37464 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
37465
37466
37467 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(QS8_IGEMM_MINMAX_FP32_3X16C8__AVX512SKX,k_eq_8)37468 TEST(QS8_IGEMM_MINMAX_FP32_3X16C8__AVX512SKX, k_eq_8) {
37469 TEST_REQUIRES_X86_AVX512SKX;
37470 GemmMicrokernelTester()
37471 .mr(3)
37472 .nr(16)
37473 .kr(8)
37474 .sr(1)
37475 .m(3)
37476 .n(16)
37477 .k(8)
37478 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x16c8__avx512skx, xnn_init_qs8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
37479 }
37480
TEST(QS8_IGEMM_MINMAX_FP32_3X16C8__AVX512SKX,strided_cn)37481 TEST(QS8_IGEMM_MINMAX_FP32_3X16C8__AVX512SKX, strided_cn) {
37482 TEST_REQUIRES_X86_AVX512SKX;
37483 GemmMicrokernelTester()
37484 .mr(3)
37485 .nr(16)
37486 .kr(8)
37487 .sr(1)
37488 .m(3)
37489 .n(16)
37490 .k(8)
37491 .cn_stride(19)
37492 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x16c8__avx512skx, xnn_init_qs8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
37493 }
37494
TEST(QS8_IGEMM_MINMAX_FP32_3X16C8__AVX512SKX,k_eq_8_subtile)37495 TEST(QS8_IGEMM_MINMAX_FP32_3X16C8__AVX512SKX, k_eq_8_subtile) {
37496 TEST_REQUIRES_X86_AVX512SKX;
37497 for (uint32_t n = 1; n <= 16; n++) {
37498 for (uint32_t m = 1; m <= 3; m++) {
37499 GemmMicrokernelTester()
37500 .mr(3)
37501 .nr(16)
37502 .kr(8)
37503 .sr(1)
37504 .m(m)
37505 .n(n)
37506 .k(8)
37507 .iterations(1)
37508 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x16c8__avx512skx, xnn_init_qs8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
37509 }
37510 }
37511 }
37512
TEST(QS8_IGEMM_MINMAX_FP32_3X16C8__AVX512SKX,k_eq_8_subtile_m)37513 TEST(QS8_IGEMM_MINMAX_FP32_3X16C8__AVX512SKX, k_eq_8_subtile_m) {
37514 TEST_REQUIRES_X86_AVX512SKX;
37515 for (uint32_t m = 1; m <= 3; m++) {
37516 GemmMicrokernelTester()
37517 .mr(3)
37518 .nr(16)
37519 .kr(8)
37520 .sr(1)
37521 .m(m)
37522 .n(16)
37523 .k(8)
37524 .iterations(1)
37525 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x16c8__avx512skx, xnn_init_qs8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
37526 }
37527 }
37528
TEST(QS8_IGEMM_MINMAX_FP32_3X16C8__AVX512SKX,k_eq_8_subtile_n)37529 TEST(QS8_IGEMM_MINMAX_FP32_3X16C8__AVX512SKX, k_eq_8_subtile_n) {
37530 TEST_REQUIRES_X86_AVX512SKX;
37531 for (uint32_t n = 1; n <= 16; n++) {
37532 GemmMicrokernelTester()
37533 .mr(3)
37534 .nr(16)
37535 .kr(8)
37536 .sr(1)
37537 .m(3)
37538 .n(n)
37539 .k(8)
37540 .iterations(1)
37541 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x16c8__avx512skx, xnn_init_qs8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
37542 }
37543 }
37544
TEST(QS8_IGEMM_MINMAX_FP32_3X16C8__AVX512SKX,k_lt_8)37545 TEST(QS8_IGEMM_MINMAX_FP32_3X16C8__AVX512SKX, k_lt_8) {
37546 TEST_REQUIRES_X86_AVX512SKX;
37547 for (size_t k = 1; k < 8; k++) {
37548 GemmMicrokernelTester()
37549 .mr(3)
37550 .nr(16)
37551 .kr(8)
37552 .sr(1)
37553 .m(3)
37554 .n(16)
37555 .k(k)
37556 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x16c8__avx512skx, xnn_init_qs8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
37557 }
37558 }
37559
TEST(QS8_IGEMM_MINMAX_FP32_3X16C8__AVX512SKX,k_lt_8_subtile)37560 TEST(QS8_IGEMM_MINMAX_FP32_3X16C8__AVX512SKX, k_lt_8_subtile) {
37561 TEST_REQUIRES_X86_AVX512SKX;
37562 for (size_t k = 1; k < 8; k++) {
37563 for (uint32_t n = 1; n <= 16; n++) {
37564 for (uint32_t m = 1; m <= 3; m++) {
37565 GemmMicrokernelTester()
37566 .mr(3)
37567 .nr(16)
37568 .kr(8)
37569 .sr(1)
37570 .m(m)
37571 .n(n)
37572 .k(k)
37573 .iterations(1)
37574 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x16c8__avx512skx, xnn_init_qs8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
37575 }
37576 }
37577 }
37578 }
37579
TEST(QS8_IGEMM_MINMAX_FP32_3X16C8__AVX512SKX,k_gt_8)37580 TEST(QS8_IGEMM_MINMAX_FP32_3X16C8__AVX512SKX, k_gt_8) {
37581 TEST_REQUIRES_X86_AVX512SKX;
37582 for (size_t k = 9; k < 16; k++) {
37583 GemmMicrokernelTester()
37584 .mr(3)
37585 .nr(16)
37586 .kr(8)
37587 .sr(1)
37588 .m(3)
37589 .n(16)
37590 .k(k)
37591 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x16c8__avx512skx, xnn_init_qs8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
37592 }
37593 }
37594
TEST(QS8_IGEMM_MINMAX_FP32_3X16C8__AVX512SKX,k_gt_8_subtile)37595 TEST(QS8_IGEMM_MINMAX_FP32_3X16C8__AVX512SKX, k_gt_8_subtile) {
37596 TEST_REQUIRES_X86_AVX512SKX;
37597 for (size_t k = 9; k < 16; k++) {
37598 for (uint32_t n = 1; n <= 16; n++) {
37599 for (uint32_t m = 1; m <= 3; m++) {
37600 GemmMicrokernelTester()
37601 .mr(3)
37602 .nr(16)
37603 .kr(8)
37604 .sr(1)
37605 .m(m)
37606 .n(n)
37607 .k(k)
37608 .iterations(1)
37609 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x16c8__avx512skx, xnn_init_qs8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
37610 }
37611 }
37612 }
37613 }
37614
TEST(QS8_IGEMM_MINMAX_FP32_3X16C8__AVX512SKX,k_div_8)37615 TEST(QS8_IGEMM_MINMAX_FP32_3X16C8__AVX512SKX, k_div_8) {
37616 TEST_REQUIRES_X86_AVX512SKX;
37617 for (size_t k = 16; k <= 80; k += 8) {
37618 GemmMicrokernelTester()
37619 .mr(3)
37620 .nr(16)
37621 .kr(8)
37622 .sr(1)
37623 .m(3)
37624 .n(16)
37625 .k(k)
37626 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x16c8__avx512skx, xnn_init_qs8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
37627 }
37628 }
37629
TEST(QS8_IGEMM_MINMAX_FP32_3X16C8__AVX512SKX,k_div_8_subtile)37630 TEST(QS8_IGEMM_MINMAX_FP32_3X16C8__AVX512SKX, k_div_8_subtile) {
37631 TEST_REQUIRES_X86_AVX512SKX;
37632 for (size_t k = 16; k <= 80; k += 8) {
37633 for (uint32_t n = 1; n <= 16; n++) {
37634 for (uint32_t m = 1; m <= 3; m++) {
37635 GemmMicrokernelTester()
37636 .mr(3)
37637 .nr(16)
37638 .kr(8)
37639 .sr(1)
37640 .m(m)
37641 .n(n)
37642 .k(k)
37643 .iterations(1)
37644 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x16c8__avx512skx, xnn_init_qs8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
37645 }
37646 }
37647 }
37648 }
37649
TEST(QS8_IGEMM_MINMAX_FP32_3X16C8__AVX512SKX,n_gt_16)37650 TEST(QS8_IGEMM_MINMAX_FP32_3X16C8__AVX512SKX, n_gt_16) {
37651 TEST_REQUIRES_X86_AVX512SKX;
37652 for (uint32_t n = 17; n < 32; n++) {
37653 for (size_t k = 1; k <= 40; k += 9) {
37654 GemmMicrokernelTester()
37655 .mr(3)
37656 .nr(16)
37657 .kr(8)
37658 .sr(1)
37659 .m(3)
37660 .n(n)
37661 .k(k)
37662 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x16c8__avx512skx, xnn_init_qs8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
37663 }
37664 }
37665 }
37666
TEST(QS8_IGEMM_MINMAX_FP32_3X16C8__AVX512SKX,n_gt_16_strided_cn)37667 TEST(QS8_IGEMM_MINMAX_FP32_3X16C8__AVX512SKX, n_gt_16_strided_cn) {
37668 TEST_REQUIRES_X86_AVX512SKX;
37669 for (uint32_t n = 17; n < 32; n++) {
37670 for (size_t k = 1; k <= 40; k += 9) {
37671 GemmMicrokernelTester()
37672 .mr(3)
37673 .nr(16)
37674 .kr(8)
37675 .sr(1)
37676 .m(3)
37677 .n(n)
37678 .k(k)
37679 .cn_stride(19)
37680 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x16c8__avx512skx, xnn_init_qs8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
37681 }
37682 }
37683 }
37684
TEST(QS8_IGEMM_MINMAX_FP32_3X16C8__AVX512SKX,n_gt_16_subtile)37685 TEST(QS8_IGEMM_MINMAX_FP32_3X16C8__AVX512SKX, n_gt_16_subtile) {
37686 TEST_REQUIRES_X86_AVX512SKX;
37687 for (uint32_t n = 17; n < 32; n++) {
37688 for (size_t k = 1; k <= 40; k += 9) {
37689 for (uint32_t m = 1; m <= 3; m++) {
37690 GemmMicrokernelTester()
37691 .mr(3)
37692 .nr(16)
37693 .kr(8)
37694 .sr(1)
37695 .m(m)
37696 .n(n)
37697 .k(k)
37698 .iterations(1)
37699 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x16c8__avx512skx, xnn_init_qs8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
37700 }
37701 }
37702 }
37703 }
37704
TEST(QS8_IGEMM_MINMAX_FP32_3X16C8__AVX512SKX,n_div_16)37705 TEST(QS8_IGEMM_MINMAX_FP32_3X16C8__AVX512SKX, n_div_16) {
37706 TEST_REQUIRES_X86_AVX512SKX;
37707 for (uint32_t n = 32; n <= 48; n += 16) {
37708 for (size_t k = 1; k <= 40; k += 9) {
37709 GemmMicrokernelTester()
37710 .mr(3)
37711 .nr(16)
37712 .kr(8)
37713 .sr(1)
37714 .m(3)
37715 .n(n)
37716 .k(k)
37717 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x16c8__avx512skx, xnn_init_qs8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
37718 }
37719 }
37720 }
37721
TEST(QS8_IGEMM_MINMAX_FP32_3X16C8__AVX512SKX,n_div_16_strided_cn)37722 TEST(QS8_IGEMM_MINMAX_FP32_3X16C8__AVX512SKX, n_div_16_strided_cn) {
37723 TEST_REQUIRES_X86_AVX512SKX;
37724 for (uint32_t n = 32; n <= 48; n += 16) {
37725 for (size_t k = 1; k <= 40; k += 9) {
37726 GemmMicrokernelTester()
37727 .mr(3)
37728 .nr(16)
37729 .kr(8)
37730 .sr(1)
37731 .m(3)
37732 .n(n)
37733 .k(k)
37734 .cn_stride(19)
37735 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x16c8__avx512skx, xnn_init_qs8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
37736 }
37737 }
37738 }
37739
TEST(QS8_IGEMM_MINMAX_FP32_3X16C8__AVX512SKX,n_div_16_subtile)37740 TEST(QS8_IGEMM_MINMAX_FP32_3X16C8__AVX512SKX, n_div_16_subtile) {
37741 TEST_REQUIRES_X86_AVX512SKX;
37742 for (uint32_t n = 32; n <= 48; n += 16) {
37743 for (size_t k = 1; k <= 40; k += 9) {
37744 for (uint32_t m = 1; m <= 3; m++) {
37745 GemmMicrokernelTester()
37746 .mr(3)
37747 .nr(16)
37748 .kr(8)
37749 .sr(1)
37750 .m(m)
37751 .n(n)
37752 .k(k)
37753 .iterations(1)
37754 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x16c8__avx512skx, xnn_init_qs8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
37755 }
37756 }
37757 }
37758 }
37759
TEST(QS8_IGEMM_MINMAX_FP32_3X16C8__AVX512SKX,small_kernel)37760 TEST(QS8_IGEMM_MINMAX_FP32_3X16C8__AVX512SKX, small_kernel) {
37761 TEST_REQUIRES_X86_AVX512SKX;
37762 for (size_t k = 1; k <= 40; k += 9) {
37763 GemmMicrokernelTester()
37764 .mr(3)
37765 .nr(16)
37766 .kr(8)
37767 .sr(1)
37768 .m(3)
37769 .n(16)
37770 .k(k)
37771 .ks(3)
37772 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x16c8__avx512skx, xnn_init_qs8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
37773 }
37774 }
37775
TEST(QS8_IGEMM_MINMAX_FP32_3X16C8__AVX512SKX,small_kernel_subtile)37776 TEST(QS8_IGEMM_MINMAX_FP32_3X16C8__AVX512SKX, small_kernel_subtile) {
37777 TEST_REQUIRES_X86_AVX512SKX;
37778 for (size_t k = 1; k <= 40; k += 9) {
37779 for (uint32_t n = 1; n <= 16; n++) {
37780 for (uint32_t m = 1; m <= 3; m++) {
37781 GemmMicrokernelTester()
37782 .mr(3)
37783 .nr(16)
37784 .kr(8)
37785 .sr(1)
37786 .m(m)
37787 .n(n)
37788 .k(k)
37789 .ks(3)
37790 .iterations(1)
37791 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x16c8__avx512skx, xnn_init_qs8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
37792 }
37793 }
37794 }
37795 }
37796
TEST(QS8_IGEMM_MINMAX_FP32_3X16C8__AVX512SKX,n_gt_16_small_kernel)37797 TEST(QS8_IGEMM_MINMAX_FP32_3X16C8__AVX512SKX, n_gt_16_small_kernel) {
37798 TEST_REQUIRES_X86_AVX512SKX;
37799 for (uint32_t n = 17; n < 32; n++) {
37800 for (size_t k = 1; k <= 40; k += 9) {
37801 GemmMicrokernelTester()
37802 .mr(3)
37803 .nr(16)
37804 .kr(8)
37805 .sr(1)
37806 .m(3)
37807 .n(n)
37808 .k(k)
37809 .ks(3)
37810 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x16c8__avx512skx, xnn_init_qs8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
37811 }
37812 }
37813 }
37814
TEST(QS8_IGEMM_MINMAX_FP32_3X16C8__AVX512SKX,n_div_16_small_kernel)37815 TEST(QS8_IGEMM_MINMAX_FP32_3X16C8__AVX512SKX, n_div_16_small_kernel) {
37816 TEST_REQUIRES_X86_AVX512SKX;
37817 for (uint32_t n = 32; n <= 48; n += 16) {
37818 for (size_t k = 1; k <= 40; k += 9) {
37819 GemmMicrokernelTester()
37820 .mr(3)
37821 .nr(16)
37822 .kr(8)
37823 .sr(1)
37824 .m(3)
37825 .n(n)
37826 .k(k)
37827 .ks(3)
37828 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x16c8__avx512skx, xnn_init_qs8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
37829 }
37830 }
37831 }
37832
TEST(QS8_IGEMM_MINMAX_FP32_3X16C8__AVX512SKX,strided_cm_subtile)37833 TEST(QS8_IGEMM_MINMAX_FP32_3X16C8__AVX512SKX, strided_cm_subtile) {
37834 TEST_REQUIRES_X86_AVX512SKX;
37835 for (size_t k = 1; k <= 40; k += 9) {
37836 for (uint32_t n = 1; n <= 16; n++) {
37837 for (uint32_t m = 1; m <= 3; m++) {
37838 GemmMicrokernelTester()
37839 .mr(3)
37840 .nr(16)
37841 .kr(8)
37842 .sr(1)
37843 .m(m)
37844 .n(n)
37845 .k(k)
37846 .cm_stride(19)
37847 .iterations(1)
37848 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x16c8__avx512skx, xnn_init_qs8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
37849 }
37850 }
37851 }
37852 }
37853
TEST(QS8_IGEMM_MINMAX_FP32_3X16C8__AVX512SKX,a_offset)37854 TEST(QS8_IGEMM_MINMAX_FP32_3X16C8__AVX512SKX, a_offset) {
37855 TEST_REQUIRES_X86_AVX512SKX;
37856 for (size_t k = 1; k <= 40; k += 9) {
37857 GemmMicrokernelTester()
37858 .mr(3)
37859 .nr(16)
37860 .kr(8)
37861 .sr(1)
37862 .m(3)
37863 .n(16)
37864 .k(k)
37865 .ks(3)
37866 .a_offset(127)
37867 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x16c8__avx512skx, xnn_init_qs8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
37868 }
37869 }
37870
TEST(QS8_IGEMM_MINMAX_FP32_3X16C8__AVX512SKX,zero)37871 TEST(QS8_IGEMM_MINMAX_FP32_3X16C8__AVX512SKX, zero) {
37872 TEST_REQUIRES_X86_AVX512SKX;
37873 for (size_t k = 1; k <= 40; k += 9) {
37874 for (uint32_t mz = 0; mz < 3; mz++) {
37875 GemmMicrokernelTester()
37876 .mr(3)
37877 .nr(16)
37878 .kr(8)
37879 .sr(1)
37880 .m(3)
37881 .n(16)
37882 .k(k)
37883 .ks(3)
37884 .a_offset(127)
37885 .zero_index(mz)
37886 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x16c8__avx512skx, xnn_init_qs8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
37887 }
37888 }
37889 }
37890
TEST(QS8_IGEMM_MINMAX_FP32_3X16C8__AVX512SKX,qmin)37891 TEST(QS8_IGEMM_MINMAX_FP32_3X16C8__AVX512SKX, qmin) {
37892 TEST_REQUIRES_X86_AVX512SKX;
37893 GemmMicrokernelTester()
37894 .mr(3)
37895 .nr(16)
37896 .kr(8)
37897 .sr(1)
37898 .m(3)
37899 .n(16)
37900 .k(8)
37901 .qmin(128)
37902 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x16c8__avx512skx, xnn_init_qs8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
37903 }
37904
TEST(QS8_IGEMM_MINMAX_FP32_3X16C8__AVX512SKX,qmax)37905 TEST(QS8_IGEMM_MINMAX_FP32_3X16C8__AVX512SKX, qmax) {
37906 TEST_REQUIRES_X86_AVX512SKX;
37907 GemmMicrokernelTester()
37908 .mr(3)
37909 .nr(16)
37910 .kr(8)
37911 .sr(1)
37912 .m(3)
37913 .n(16)
37914 .k(8)
37915 .qmax(128)
37916 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x16c8__avx512skx, xnn_init_qs8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
37917 }
37918
TEST(QS8_IGEMM_MINMAX_FP32_3X16C8__AVX512SKX,strided_cm)37919 TEST(QS8_IGEMM_MINMAX_FP32_3X16C8__AVX512SKX, strided_cm) {
37920 TEST_REQUIRES_X86_AVX512SKX;
37921 GemmMicrokernelTester()
37922 .mr(3)
37923 .nr(16)
37924 .kr(8)
37925 .sr(1)
37926 .m(3)
37927 .n(16)
37928 .k(8)
37929 .cm_stride(19)
37930 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x16c8__avx512skx, xnn_init_qs8_conv_minmax_fp32_avx512_params, xnn_qs8_requantize_fp32);
37931 }
37932 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
37933
37934
37935 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD64,k_eq_8)37936 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD64, k_eq_8) {
37937 GemmMicrokernelTester()
37938 .mr(1)
37939 .nr(4)
37940 .kr(2)
37941 .sr(1)
37942 .m(1)
37943 .n(4)
37944 .k(8)
37945 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
37946 }
37947
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD64,strided_cn)37948 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD64, strided_cn) {
37949 GemmMicrokernelTester()
37950 .mr(1)
37951 .nr(4)
37952 .kr(2)
37953 .sr(1)
37954 .m(1)
37955 .n(4)
37956 .k(8)
37957 .cn_stride(7)
37958 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
37959 }
37960
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD64,k_eq_8_subtile)37961 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD64, k_eq_8_subtile) {
37962 for (uint32_t n = 1; n <= 4; n++) {
37963 for (uint32_t m = 1; m <= 1; m++) {
37964 GemmMicrokernelTester()
37965 .mr(1)
37966 .nr(4)
37967 .kr(2)
37968 .sr(1)
37969 .m(m)
37970 .n(n)
37971 .k(8)
37972 .iterations(1)
37973 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
37974 }
37975 }
37976 }
37977
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD64,k_eq_8_subtile_m)37978 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD64, k_eq_8_subtile_m) {
37979 for (uint32_t m = 1; m <= 1; m++) {
37980 GemmMicrokernelTester()
37981 .mr(1)
37982 .nr(4)
37983 .kr(2)
37984 .sr(1)
37985 .m(m)
37986 .n(4)
37987 .k(8)
37988 .iterations(1)
37989 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
37990 }
37991 }
37992
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD64,k_eq_8_subtile_n)37993 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD64, k_eq_8_subtile_n) {
37994 for (uint32_t n = 1; n <= 4; n++) {
37995 GemmMicrokernelTester()
37996 .mr(1)
37997 .nr(4)
37998 .kr(2)
37999 .sr(1)
38000 .m(1)
38001 .n(n)
38002 .k(8)
38003 .iterations(1)
38004 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
38005 }
38006 }
38007
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD64,k_lt_8)38008 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD64, k_lt_8) {
38009 for (size_t k = 1; k < 8; k++) {
38010 GemmMicrokernelTester()
38011 .mr(1)
38012 .nr(4)
38013 .kr(2)
38014 .sr(1)
38015 .m(1)
38016 .n(4)
38017 .k(k)
38018 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
38019 }
38020 }
38021
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD64,k_lt_8_subtile)38022 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD64, k_lt_8_subtile) {
38023 for (size_t k = 1; k < 8; k++) {
38024 for (uint32_t n = 1; n <= 4; n++) {
38025 for (uint32_t m = 1; m <= 1; m++) {
38026 GemmMicrokernelTester()
38027 .mr(1)
38028 .nr(4)
38029 .kr(2)
38030 .sr(1)
38031 .m(m)
38032 .n(n)
38033 .k(k)
38034 .iterations(1)
38035 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
38036 }
38037 }
38038 }
38039 }
38040
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD64,k_gt_8)38041 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD64, k_gt_8) {
38042 for (size_t k = 9; k < 16; k++) {
38043 GemmMicrokernelTester()
38044 .mr(1)
38045 .nr(4)
38046 .kr(2)
38047 .sr(1)
38048 .m(1)
38049 .n(4)
38050 .k(k)
38051 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
38052 }
38053 }
38054
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD64,k_gt_8_subtile)38055 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD64, k_gt_8_subtile) {
38056 for (size_t k = 9; k < 16; k++) {
38057 for (uint32_t n = 1; n <= 4; n++) {
38058 for (uint32_t m = 1; m <= 1; m++) {
38059 GemmMicrokernelTester()
38060 .mr(1)
38061 .nr(4)
38062 .kr(2)
38063 .sr(1)
38064 .m(m)
38065 .n(n)
38066 .k(k)
38067 .iterations(1)
38068 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
38069 }
38070 }
38071 }
38072 }
38073
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD64,k_div_8)38074 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD64, k_div_8) {
38075 for (size_t k = 16; k <= 80; k += 8) {
38076 GemmMicrokernelTester()
38077 .mr(1)
38078 .nr(4)
38079 .kr(2)
38080 .sr(1)
38081 .m(1)
38082 .n(4)
38083 .k(k)
38084 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
38085 }
38086 }
38087
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD64,k_div_8_subtile)38088 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD64, k_div_8_subtile) {
38089 for (size_t k = 16; k <= 80; k += 8) {
38090 for (uint32_t n = 1; n <= 4; n++) {
38091 for (uint32_t m = 1; m <= 1; m++) {
38092 GemmMicrokernelTester()
38093 .mr(1)
38094 .nr(4)
38095 .kr(2)
38096 .sr(1)
38097 .m(m)
38098 .n(n)
38099 .k(k)
38100 .iterations(1)
38101 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
38102 }
38103 }
38104 }
38105 }
38106
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD64,n_gt_4)38107 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD64, n_gt_4) {
38108 for (uint32_t n = 5; n < 8; n++) {
38109 for (size_t k = 1; k <= 40; k += 9) {
38110 GemmMicrokernelTester()
38111 .mr(1)
38112 .nr(4)
38113 .kr(2)
38114 .sr(1)
38115 .m(1)
38116 .n(n)
38117 .k(k)
38118 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
38119 }
38120 }
38121 }
38122
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD64,n_gt_4_strided_cn)38123 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD64, n_gt_4_strided_cn) {
38124 for (uint32_t n = 5; n < 8; n++) {
38125 for (size_t k = 1; k <= 40; k += 9) {
38126 GemmMicrokernelTester()
38127 .mr(1)
38128 .nr(4)
38129 .kr(2)
38130 .sr(1)
38131 .m(1)
38132 .n(n)
38133 .k(k)
38134 .cn_stride(7)
38135 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
38136 }
38137 }
38138 }
38139
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD64,n_gt_4_subtile)38140 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD64, n_gt_4_subtile) {
38141 for (uint32_t n = 5; n < 8; n++) {
38142 for (size_t k = 1; k <= 40; k += 9) {
38143 for (uint32_t m = 1; m <= 1; m++) {
38144 GemmMicrokernelTester()
38145 .mr(1)
38146 .nr(4)
38147 .kr(2)
38148 .sr(1)
38149 .m(m)
38150 .n(n)
38151 .k(k)
38152 .iterations(1)
38153 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
38154 }
38155 }
38156 }
38157 }
38158
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD64,n_div_4)38159 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD64, n_div_4) {
38160 for (uint32_t n = 8; n <= 12; n += 4) {
38161 for (size_t k = 1; k <= 40; k += 9) {
38162 GemmMicrokernelTester()
38163 .mr(1)
38164 .nr(4)
38165 .kr(2)
38166 .sr(1)
38167 .m(1)
38168 .n(n)
38169 .k(k)
38170 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
38171 }
38172 }
38173 }
38174
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD64,n_div_4_strided_cn)38175 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD64, n_div_4_strided_cn) {
38176 for (uint32_t n = 8; n <= 12; n += 4) {
38177 for (size_t k = 1; k <= 40; k += 9) {
38178 GemmMicrokernelTester()
38179 .mr(1)
38180 .nr(4)
38181 .kr(2)
38182 .sr(1)
38183 .m(1)
38184 .n(n)
38185 .k(k)
38186 .cn_stride(7)
38187 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
38188 }
38189 }
38190 }
38191
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD64,n_div_4_subtile)38192 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD64, n_div_4_subtile) {
38193 for (uint32_t n = 8; n <= 12; n += 4) {
38194 for (size_t k = 1; k <= 40; k += 9) {
38195 for (uint32_t m = 1; m <= 1; m++) {
38196 GemmMicrokernelTester()
38197 .mr(1)
38198 .nr(4)
38199 .kr(2)
38200 .sr(1)
38201 .m(m)
38202 .n(n)
38203 .k(k)
38204 .iterations(1)
38205 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
38206 }
38207 }
38208 }
38209 }
38210
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD64,small_kernel)38211 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD64, small_kernel) {
38212 for (size_t k = 1; k <= 40; k += 9) {
38213 GemmMicrokernelTester()
38214 .mr(1)
38215 .nr(4)
38216 .kr(2)
38217 .sr(1)
38218 .m(1)
38219 .n(4)
38220 .k(k)
38221 .ks(3)
38222 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
38223 }
38224 }
38225
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD64,small_kernel_subtile)38226 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD64, small_kernel_subtile) {
38227 for (size_t k = 1; k <= 40; k += 9) {
38228 for (uint32_t n = 1; n <= 4; n++) {
38229 for (uint32_t m = 1; m <= 1; m++) {
38230 GemmMicrokernelTester()
38231 .mr(1)
38232 .nr(4)
38233 .kr(2)
38234 .sr(1)
38235 .m(m)
38236 .n(n)
38237 .k(k)
38238 .ks(3)
38239 .iterations(1)
38240 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
38241 }
38242 }
38243 }
38244 }
38245
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD64,n_gt_4_small_kernel)38246 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD64, n_gt_4_small_kernel) {
38247 for (uint32_t n = 5; n < 8; n++) {
38248 for (size_t k = 1; k <= 40; k += 9) {
38249 GemmMicrokernelTester()
38250 .mr(1)
38251 .nr(4)
38252 .kr(2)
38253 .sr(1)
38254 .m(1)
38255 .n(n)
38256 .k(k)
38257 .ks(3)
38258 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
38259 }
38260 }
38261 }
38262
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD64,n_div_4_small_kernel)38263 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD64, n_div_4_small_kernel) {
38264 for (uint32_t n = 8; n <= 12; n += 4) {
38265 for (size_t k = 1; k <= 40; k += 9) {
38266 GemmMicrokernelTester()
38267 .mr(1)
38268 .nr(4)
38269 .kr(2)
38270 .sr(1)
38271 .m(1)
38272 .n(n)
38273 .k(k)
38274 .ks(3)
38275 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
38276 }
38277 }
38278 }
38279
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD64,strided_cm_subtile)38280 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD64, strided_cm_subtile) {
38281 for (size_t k = 1; k <= 40; k += 9) {
38282 for (uint32_t n = 1; n <= 4; n++) {
38283 for (uint32_t m = 1; m <= 1; m++) {
38284 GemmMicrokernelTester()
38285 .mr(1)
38286 .nr(4)
38287 .kr(2)
38288 .sr(1)
38289 .m(m)
38290 .n(n)
38291 .k(k)
38292 .cm_stride(7)
38293 .iterations(1)
38294 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
38295 }
38296 }
38297 }
38298 }
38299
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD64,a_offset)38300 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD64, a_offset) {
38301 for (size_t k = 1; k <= 40; k += 9) {
38302 GemmMicrokernelTester()
38303 .mr(1)
38304 .nr(4)
38305 .kr(2)
38306 .sr(1)
38307 .m(1)
38308 .n(4)
38309 .k(k)
38310 .ks(3)
38311 .a_offset(43)
38312 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
38313 }
38314 }
38315
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD64,zero)38316 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD64, zero) {
38317 for (size_t k = 1; k <= 40; k += 9) {
38318 for (uint32_t mz = 0; mz < 1; mz++) {
38319 GemmMicrokernelTester()
38320 .mr(1)
38321 .nr(4)
38322 .kr(2)
38323 .sr(1)
38324 .m(1)
38325 .n(4)
38326 .k(k)
38327 .ks(3)
38328 .a_offset(43)
38329 .zero_index(mz)
38330 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
38331 }
38332 }
38333 }
38334
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD64,qmin)38335 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD64, qmin) {
38336 GemmMicrokernelTester()
38337 .mr(1)
38338 .nr(4)
38339 .kr(2)
38340 .sr(1)
38341 .m(1)
38342 .n(4)
38343 .k(8)
38344 .qmin(128)
38345 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
38346 }
38347
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD64,qmax)38348 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD64, qmax) {
38349 GemmMicrokernelTester()
38350 .mr(1)
38351 .nr(4)
38352 .kr(2)
38353 .sr(1)
38354 .m(1)
38355 .n(4)
38356 .k(8)
38357 .qmax(128)
38358 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
38359 }
38360
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD64,strided_cm)38361 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD64, strided_cm) {
38362 GemmMicrokernelTester()
38363 .mr(1)
38364 .nr(4)
38365 .kr(2)
38366 .sr(1)
38367 .m(1)
38368 .n(4)
38369 .k(8)
38370 .cm_stride(7)
38371 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
38372 }
38373 #endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
38374
38375
38376 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD128,k_eq_8)38377 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD128, k_eq_8) {
38378 GemmMicrokernelTester()
38379 .mr(1)
38380 .nr(4)
38381 .kr(2)
38382 .sr(1)
38383 .m(1)
38384 .n(4)
38385 .k(8)
38386 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
38387 }
38388
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD128,strided_cn)38389 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD128, strided_cn) {
38390 GemmMicrokernelTester()
38391 .mr(1)
38392 .nr(4)
38393 .kr(2)
38394 .sr(1)
38395 .m(1)
38396 .n(4)
38397 .k(8)
38398 .cn_stride(7)
38399 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
38400 }
38401
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD128,k_eq_8_subtile)38402 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD128, k_eq_8_subtile) {
38403 for (uint32_t n = 1; n <= 4; n++) {
38404 for (uint32_t m = 1; m <= 1; m++) {
38405 GemmMicrokernelTester()
38406 .mr(1)
38407 .nr(4)
38408 .kr(2)
38409 .sr(1)
38410 .m(m)
38411 .n(n)
38412 .k(8)
38413 .iterations(1)
38414 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
38415 }
38416 }
38417 }
38418
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD128,k_eq_8_subtile_m)38419 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD128, k_eq_8_subtile_m) {
38420 for (uint32_t m = 1; m <= 1; m++) {
38421 GemmMicrokernelTester()
38422 .mr(1)
38423 .nr(4)
38424 .kr(2)
38425 .sr(1)
38426 .m(m)
38427 .n(4)
38428 .k(8)
38429 .iterations(1)
38430 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
38431 }
38432 }
38433
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD128,k_eq_8_subtile_n)38434 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD128, k_eq_8_subtile_n) {
38435 for (uint32_t n = 1; n <= 4; n++) {
38436 GemmMicrokernelTester()
38437 .mr(1)
38438 .nr(4)
38439 .kr(2)
38440 .sr(1)
38441 .m(1)
38442 .n(n)
38443 .k(8)
38444 .iterations(1)
38445 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
38446 }
38447 }
38448
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD128,k_lt_8)38449 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD128, k_lt_8) {
38450 for (size_t k = 1; k < 8; k++) {
38451 GemmMicrokernelTester()
38452 .mr(1)
38453 .nr(4)
38454 .kr(2)
38455 .sr(1)
38456 .m(1)
38457 .n(4)
38458 .k(k)
38459 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
38460 }
38461 }
38462
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD128,k_lt_8_subtile)38463 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD128, k_lt_8_subtile) {
38464 for (size_t k = 1; k < 8; k++) {
38465 for (uint32_t n = 1; n <= 4; n++) {
38466 for (uint32_t m = 1; m <= 1; m++) {
38467 GemmMicrokernelTester()
38468 .mr(1)
38469 .nr(4)
38470 .kr(2)
38471 .sr(1)
38472 .m(m)
38473 .n(n)
38474 .k(k)
38475 .iterations(1)
38476 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
38477 }
38478 }
38479 }
38480 }
38481
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD128,k_gt_8)38482 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD128, k_gt_8) {
38483 for (size_t k = 9; k < 16; k++) {
38484 GemmMicrokernelTester()
38485 .mr(1)
38486 .nr(4)
38487 .kr(2)
38488 .sr(1)
38489 .m(1)
38490 .n(4)
38491 .k(k)
38492 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
38493 }
38494 }
38495
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD128,k_gt_8_subtile)38496 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD128, k_gt_8_subtile) {
38497 for (size_t k = 9; k < 16; k++) {
38498 for (uint32_t n = 1; n <= 4; n++) {
38499 for (uint32_t m = 1; m <= 1; m++) {
38500 GemmMicrokernelTester()
38501 .mr(1)
38502 .nr(4)
38503 .kr(2)
38504 .sr(1)
38505 .m(m)
38506 .n(n)
38507 .k(k)
38508 .iterations(1)
38509 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
38510 }
38511 }
38512 }
38513 }
38514
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD128,k_div_8)38515 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD128, k_div_8) {
38516 for (size_t k = 16; k <= 80; k += 8) {
38517 GemmMicrokernelTester()
38518 .mr(1)
38519 .nr(4)
38520 .kr(2)
38521 .sr(1)
38522 .m(1)
38523 .n(4)
38524 .k(k)
38525 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
38526 }
38527 }
38528
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD128,k_div_8_subtile)38529 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD128, k_div_8_subtile) {
38530 for (size_t k = 16; k <= 80; k += 8) {
38531 for (uint32_t n = 1; n <= 4; n++) {
38532 for (uint32_t m = 1; m <= 1; m++) {
38533 GemmMicrokernelTester()
38534 .mr(1)
38535 .nr(4)
38536 .kr(2)
38537 .sr(1)
38538 .m(m)
38539 .n(n)
38540 .k(k)
38541 .iterations(1)
38542 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
38543 }
38544 }
38545 }
38546 }
38547
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD128,n_gt_4)38548 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD128, n_gt_4) {
38549 for (uint32_t n = 5; n < 8; n++) {
38550 for (size_t k = 1; k <= 40; k += 9) {
38551 GemmMicrokernelTester()
38552 .mr(1)
38553 .nr(4)
38554 .kr(2)
38555 .sr(1)
38556 .m(1)
38557 .n(n)
38558 .k(k)
38559 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
38560 }
38561 }
38562 }
38563
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD128,n_gt_4_strided_cn)38564 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD128, n_gt_4_strided_cn) {
38565 for (uint32_t n = 5; n < 8; n++) {
38566 for (size_t k = 1; k <= 40; k += 9) {
38567 GemmMicrokernelTester()
38568 .mr(1)
38569 .nr(4)
38570 .kr(2)
38571 .sr(1)
38572 .m(1)
38573 .n(n)
38574 .k(k)
38575 .cn_stride(7)
38576 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
38577 }
38578 }
38579 }
38580
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD128,n_gt_4_subtile)38581 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD128, n_gt_4_subtile) {
38582 for (uint32_t n = 5; n < 8; n++) {
38583 for (size_t k = 1; k <= 40; k += 9) {
38584 for (uint32_t m = 1; m <= 1; m++) {
38585 GemmMicrokernelTester()
38586 .mr(1)
38587 .nr(4)
38588 .kr(2)
38589 .sr(1)
38590 .m(m)
38591 .n(n)
38592 .k(k)
38593 .iterations(1)
38594 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
38595 }
38596 }
38597 }
38598 }
38599
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD128,n_div_4)38600 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD128, n_div_4) {
38601 for (uint32_t n = 8; n <= 12; n += 4) {
38602 for (size_t k = 1; k <= 40; k += 9) {
38603 GemmMicrokernelTester()
38604 .mr(1)
38605 .nr(4)
38606 .kr(2)
38607 .sr(1)
38608 .m(1)
38609 .n(n)
38610 .k(k)
38611 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
38612 }
38613 }
38614 }
38615
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD128,n_div_4_strided_cn)38616 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD128, n_div_4_strided_cn) {
38617 for (uint32_t n = 8; n <= 12; n += 4) {
38618 for (size_t k = 1; k <= 40; k += 9) {
38619 GemmMicrokernelTester()
38620 .mr(1)
38621 .nr(4)
38622 .kr(2)
38623 .sr(1)
38624 .m(1)
38625 .n(n)
38626 .k(k)
38627 .cn_stride(7)
38628 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
38629 }
38630 }
38631 }
38632
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD128,n_div_4_subtile)38633 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD128, n_div_4_subtile) {
38634 for (uint32_t n = 8; n <= 12; n += 4) {
38635 for (size_t k = 1; k <= 40; k += 9) {
38636 for (uint32_t m = 1; m <= 1; m++) {
38637 GemmMicrokernelTester()
38638 .mr(1)
38639 .nr(4)
38640 .kr(2)
38641 .sr(1)
38642 .m(m)
38643 .n(n)
38644 .k(k)
38645 .iterations(1)
38646 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
38647 }
38648 }
38649 }
38650 }
38651
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD128,small_kernel)38652 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD128, small_kernel) {
38653 for (size_t k = 1; k <= 40; k += 9) {
38654 GemmMicrokernelTester()
38655 .mr(1)
38656 .nr(4)
38657 .kr(2)
38658 .sr(1)
38659 .m(1)
38660 .n(4)
38661 .k(k)
38662 .ks(3)
38663 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
38664 }
38665 }
38666
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD128,small_kernel_subtile)38667 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD128, small_kernel_subtile) {
38668 for (size_t k = 1; k <= 40; k += 9) {
38669 for (uint32_t n = 1; n <= 4; n++) {
38670 for (uint32_t m = 1; m <= 1; m++) {
38671 GemmMicrokernelTester()
38672 .mr(1)
38673 .nr(4)
38674 .kr(2)
38675 .sr(1)
38676 .m(m)
38677 .n(n)
38678 .k(k)
38679 .ks(3)
38680 .iterations(1)
38681 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
38682 }
38683 }
38684 }
38685 }
38686
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD128,n_gt_4_small_kernel)38687 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD128, n_gt_4_small_kernel) {
38688 for (uint32_t n = 5; n < 8; n++) {
38689 for (size_t k = 1; k <= 40; k += 9) {
38690 GemmMicrokernelTester()
38691 .mr(1)
38692 .nr(4)
38693 .kr(2)
38694 .sr(1)
38695 .m(1)
38696 .n(n)
38697 .k(k)
38698 .ks(3)
38699 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
38700 }
38701 }
38702 }
38703
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD128,n_div_4_small_kernel)38704 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD128, n_div_4_small_kernel) {
38705 for (uint32_t n = 8; n <= 12; n += 4) {
38706 for (size_t k = 1; k <= 40; k += 9) {
38707 GemmMicrokernelTester()
38708 .mr(1)
38709 .nr(4)
38710 .kr(2)
38711 .sr(1)
38712 .m(1)
38713 .n(n)
38714 .k(k)
38715 .ks(3)
38716 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
38717 }
38718 }
38719 }
38720
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD128,strided_cm_subtile)38721 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD128, strided_cm_subtile) {
38722 for (size_t k = 1; k <= 40; k += 9) {
38723 for (uint32_t n = 1; n <= 4; n++) {
38724 for (uint32_t m = 1; m <= 1; m++) {
38725 GemmMicrokernelTester()
38726 .mr(1)
38727 .nr(4)
38728 .kr(2)
38729 .sr(1)
38730 .m(m)
38731 .n(n)
38732 .k(k)
38733 .cm_stride(7)
38734 .iterations(1)
38735 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
38736 }
38737 }
38738 }
38739 }
38740
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD128,a_offset)38741 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD128, a_offset) {
38742 for (size_t k = 1; k <= 40; k += 9) {
38743 GemmMicrokernelTester()
38744 .mr(1)
38745 .nr(4)
38746 .kr(2)
38747 .sr(1)
38748 .m(1)
38749 .n(4)
38750 .k(k)
38751 .ks(3)
38752 .a_offset(43)
38753 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
38754 }
38755 }
38756
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD128,zero)38757 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD128, zero) {
38758 for (size_t k = 1; k <= 40; k += 9) {
38759 for (uint32_t mz = 0; mz < 1; mz++) {
38760 GemmMicrokernelTester()
38761 .mr(1)
38762 .nr(4)
38763 .kr(2)
38764 .sr(1)
38765 .m(1)
38766 .n(4)
38767 .k(k)
38768 .ks(3)
38769 .a_offset(43)
38770 .zero_index(mz)
38771 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
38772 }
38773 }
38774 }
38775
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD128,qmin)38776 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD128, qmin) {
38777 GemmMicrokernelTester()
38778 .mr(1)
38779 .nr(4)
38780 .kr(2)
38781 .sr(1)
38782 .m(1)
38783 .n(4)
38784 .k(8)
38785 .qmin(128)
38786 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
38787 }
38788
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD128,qmax)38789 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD128, qmax) {
38790 GemmMicrokernelTester()
38791 .mr(1)
38792 .nr(4)
38793 .kr(2)
38794 .sr(1)
38795 .m(1)
38796 .n(4)
38797 .k(8)
38798 .qmax(128)
38799 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
38800 }
38801
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD128,strided_cm)38802 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2__WASMSIMD_DOT16X2_LD128, strided_cm) {
38803 GemmMicrokernelTester()
38804 .mr(1)
38805 .nr(4)
38806 .kr(2)
38807 .sr(1)
38808 .m(1)
38809 .n(4)
38810 .k(8)
38811 .cm_stride(7)
38812 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
38813 }
38814 #endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
38815
38816
38817 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128,k_eq_8)38818 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128, k_eq_8) {
38819 GemmMicrokernelTester()
38820 .mr(1)
38821 .nr(4)
38822 .kr(2)
38823 .sr(4)
38824 .m(1)
38825 .n(4)
38826 .k(8)
38827 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
38828 }
38829
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128,strided_cn)38830 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128, strided_cn) {
38831 GemmMicrokernelTester()
38832 .mr(1)
38833 .nr(4)
38834 .kr(2)
38835 .sr(4)
38836 .m(1)
38837 .n(4)
38838 .k(8)
38839 .cn_stride(7)
38840 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
38841 }
38842
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128,k_eq_8_subtile)38843 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128, k_eq_8_subtile) {
38844 for (uint32_t n = 1; n <= 4; n++) {
38845 for (uint32_t m = 1; m <= 1; m++) {
38846 GemmMicrokernelTester()
38847 .mr(1)
38848 .nr(4)
38849 .kr(2)
38850 .sr(4)
38851 .m(m)
38852 .n(n)
38853 .k(8)
38854 .iterations(1)
38855 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
38856 }
38857 }
38858 }
38859
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128,k_eq_8_subtile_m)38860 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128, k_eq_8_subtile_m) {
38861 for (uint32_t m = 1; m <= 1; m++) {
38862 GemmMicrokernelTester()
38863 .mr(1)
38864 .nr(4)
38865 .kr(2)
38866 .sr(4)
38867 .m(m)
38868 .n(4)
38869 .k(8)
38870 .iterations(1)
38871 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
38872 }
38873 }
38874
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128,k_eq_8_subtile_n)38875 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128, k_eq_8_subtile_n) {
38876 for (uint32_t n = 1; n <= 4; n++) {
38877 GemmMicrokernelTester()
38878 .mr(1)
38879 .nr(4)
38880 .kr(2)
38881 .sr(4)
38882 .m(1)
38883 .n(n)
38884 .k(8)
38885 .iterations(1)
38886 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
38887 }
38888 }
38889
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128,k_lt_8)38890 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128, k_lt_8) {
38891 for (size_t k = 1; k < 8; k++) {
38892 GemmMicrokernelTester()
38893 .mr(1)
38894 .nr(4)
38895 .kr(2)
38896 .sr(4)
38897 .m(1)
38898 .n(4)
38899 .k(k)
38900 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
38901 }
38902 }
38903
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128,k_lt_8_subtile)38904 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128, k_lt_8_subtile) {
38905 for (size_t k = 1; k < 8; k++) {
38906 for (uint32_t n = 1; n <= 4; n++) {
38907 for (uint32_t m = 1; m <= 1; m++) {
38908 GemmMicrokernelTester()
38909 .mr(1)
38910 .nr(4)
38911 .kr(2)
38912 .sr(4)
38913 .m(m)
38914 .n(n)
38915 .k(k)
38916 .iterations(1)
38917 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
38918 }
38919 }
38920 }
38921 }
38922
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128,k_gt_8)38923 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128, k_gt_8) {
38924 for (size_t k = 9; k < 16; k++) {
38925 GemmMicrokernelTester()
38926 .mr(1)
38927 .nr(4)
38928 .kr(2)
38929 .sr(4)
38930 .m(1)
38931 .n(4)
38932 .k(k)
38933 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
38934 }
38935 }
38936
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128,k_gt_8_subtile)38937 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128, k_gt_8_subtile) {
38938 for (size_t k = 9; k < 16; k++) {
38939 for (uint32_t n = 1; n <= 4; n++) {
38940 for (uint32_t m = 1; m <= 1; m++) {
38941 GemmMicrokernelTester()
38942 .mr(1)
38943 .nr(4)
38944 .kr(2)
38945 .sr(4)
38946 .m(m)
38947 .n(n)
38948 .k(k)
38949 .iterations(1)
38950 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
38951 }
38952 }
38953 }
38954 }
38955
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128,k_div_8)38956 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128, k_div_8) {
38957 for (size_t k = 16; k <= 80; k += 8) {
38958 GemmMicrokernelTester()
38959 .mr(1)
38960 .nr(4)
38961 .kr(2)
38962 .sr(4)
38963 .m(1)
38964 .n(4)
38965 .k(k)
38966 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
38967 }
38968 }
38969
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128,k_div_8_subtile)38970 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128, k_div_8_subtile) {
38971 for (size_t k = 16; k <= 80; k += 8) {
38972 for (uint32_t n = 1; n <= 4; n++) {
38973 for (uint32_t m = 1; m <= 1; m++) {
38974 GemmMicrokernelTester()
38975 .mr(1)
38976 .nr(4)
38977 .kr(2)
38978 .sr(4)
38979 .m(m)
38980 .n(n)
38981 .k(k)
38982 .iterations(1)
38983 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
38984 }
38985 }
38986 }
38987 }
38988
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128,n_gt_4)38989 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128, n_gt_4) {
38990 for (uint32_t n = 5; n < 8; n++) {
38991 for (size_t k = 1; k <= 40; k += 9) {
38992 GemmMicrokernelTester()
38993 .mr(1)
38994 .nr(4)
38995 .kr(2)
38996 .sr(4)
38997 .m(1)
38998 .n(n)
38999 .k(k)
39000 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
39001 }
39002 }
39003 }
39004
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128,n_gt_4_strided_cn)39005 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128, n_gt_4_strided_cn) {
39006 for (uint32_t n = 5; n < 8; n++) {
39007 for (size_t k = 1; k <= 40; k += 9) {
39008 GemmMicrokernelTester()
39009 .mr(1)
39010 .nr(4)
39011 .kr(2)
39012 .sr(4)
39013 .m(1)
39014 .n(n)
39015 .k(k)
39016 .cn_stride(7)
39017 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
39018 }
39019 }
39020 }
39021
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128,n_gt_4_subtile)39022 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128, n_gt_4_subtile) {
39023 for (uint32_t n = 5; n < 8; n++) {
39024 for (size_t k = 1; k <= 40; k += 9) {
39025 for (uint32_t m = 1; m <= 1; m++) {
39026 GemmMicrokernelTester()
39027 .mr(1)
39028 .nr(4)
39029 .kr(2)
39030 .sr(4)
39031 .m(m)
39032 .n(n)
39033 .k(k)
39034 .iterations(1)
39035 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
39036 }
39037 }
39038 }
39039 }
39040
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128,n_div_4)39041 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128, n_div_4) {
39042 for (uint32_t n = 8; n <= 12; n += 4) {
39043 for (size_t k = 1; k <= 40; k += 9) {
39044 GemmMicrokernelTester()
39045 .mr(1)
39046 .nr(4)
39047 .kr(2)
39048 .sr(4)
39049 .m(1)
39050 .n(n)
39051 .k(k)
39052 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
39053 }
39054 }
39055 }
39056
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128,n_div_4_strided_cn)39057 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128, n_div_4_strided_cn) {
39058 for (uint32_t n = 8; n <= 12; n += 4) {
39059 for (size_t k = 1; k <= 40; k += 9) {
39060 GemmMicrokernelTester()
39061 .mr(1)
39062 .nr(4)
39063 .kr(2)
39064 .sr(4)
39065 .m(1)
39066 .n(n)
39067 .k(k)
39068 .cn_stride(7)
39069 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
39070 }
39071 }
39072 }
39073
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128,n_div_4_subtile)39074 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128, n_div_4_subtile) {
39075 for (uint32_t n = 8; n <= 12; n += 4) {
39076 for (size_t k = 1; k <= 40; k += 9) {
39077 for (uint32_t m = 1; m <= 1; m++) {
39078 GemmMicrokernelTester()
39079 .mr(1)
39080 .nr(4)
39081 .kr(2)
39082 .sr(4)
39083 .m(m)
39084 .n(n)
39085 .k(k)
39086 .iterations(1)
39087 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
39088 }
39089 }
39090 }
39091 }
39092
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128,small_kernel)39093 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128, small_kernel) {
39094 for (size_t k = 1; k <= 40; k += 9) {
39095 GemmMicrokernelTester()
39096 .mr(1)
39097 .nr(4)
39098 .kr(2)
39099 .sr(4)
39100 .m(1)
39101 .n(4)
39102 .k(k)
39103 .ks(3)
39104 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
39105 }
39106 }
39107
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128,small_kernel_subtile)39108 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128, small_kernel_subtile) {
39109 for (size_t k = 1; k <= 40; k += 9) {
39110 for (uint32_t n = 1; n <= 4; n++) {
39111 for (uint32_t m = 1; m <= 1; m++) {
39112 GemmMicrokernelTester()
39113 .mr(1)
39114 .nr(4)
39115 .kr(2)
39116 .sr(4)
39117 .m(m)
39118 .n(n)
39119 .k(k)
39120 .ks(3)
39121 .iterations(1)
39122 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
39123 }
39124 }
39125 }
39126 }
39127
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128,n_gt_4_small_kernel)39128 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128, n_gt_4_small_kernel) {
39129 for (uint32_t n = 5; n < 8; n++) {
39130 for (size_t k = 1; k <= 40; k += 9) {
39131 GemmMicrokernelTester()
39132 .mr(1)
39133 .nr(4)
39134 .kr(2)
39135 .sr(4)
39136 .m(1)
39137 .n(n)
39138 .k(k)
39139 .ks(3)
39140 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
39141 }
39142 }
39143 }
39144
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128,n_div_4_small_kernel)39145 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128, n_div_4_small_kernel) {
39146 for (uint32_t n = 8; n <= 12; n += 4) {
39147 for (size_t k = 1; k <= 40; k += 9) {
39148 GemmMicrokernelTester()
39149 .mr(1)
39150 .nr(4)
39151 .kr(2)
39152 .sr(4)
39153 .m(1)
39154 .n(n)
39155 .k(k)
39156 .ks(3)
39157 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
39158 }
39159 }
39160 }
39161
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128,strided_cm_subtile)39162 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128, strided_cm_subtile) {
39163 for (size_t k = 1; k <= 40; k += 9) {
39164 for (uint32_t n = 1; n <= 4; n++) {
39165 for (uint32_t m = 1; m <= 1; m++) {
39166 GemmMicrokernelTester()
39167 .mr(1)
39168 .nr(4)
39169 .kr(2)
39170 .sr(4)
39171 .m(m)
39172 .n(n)
39173 .k(k)
39174 .cm_stride(7)
39175 .iterations(1)
39176 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
39177 }
39178 }
39179 }
39180 }
39181
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128,a_offset)39182 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128, a_offset) {
39183 for (size_t k = 1; k <= 40; k += 9) {
39184 GemmMicrokernelTester()
39185 .mr(1)
39186 .nr(4)
39187 .kr(2)
39188 .sr(4)
39189 .m(1)
39190 .n(4)
39191 .k(k)
39192 .ks(3)
39193 .a_offset(43)
39194 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
39195 }
39196 }
39197
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128,zero)39198 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128, zero) {
39199 for (size_t k = 1; k <= 40; k += 9) {
39200 for (uint32_t mz = 0; mz < 1; mz++) {
39201 GemmMicrokernelTester()
39202 .mr(1)
39203 .nr(4)
39204 .kr(2)
39205 .sr(4)
39206 .m(1)
39207 .n(4)
39208 .k(k)
39209 .ks(3)
39210 .a_offset(43)
39211 .zero_index(mz)
39212 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
39213 }
39214 }
39215 }
39216
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128,qmin)39217 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128, qmin) {
39218 GemmMicrokernelTester()
39219 .mr(1)
39220 .nr(4)
39221 .kr(2)
39222 .sr(4)
39223 .m(1)
39224 .n(4)
39225 .k(8)
39226 .qmin(128)
39227 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
39228 }
39229
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128,qmax)39230 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128, qmax) {
39231 GemmMicrokernelTester()
39232 .mr(1)
39233 .nr(4)
39234 .kr(2)
39235 .sr(4)
39236 .m(1)
39237 .n(4)
39238 .k(8)
39239 .qmax(128)
39240 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
39241 }
39242
TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128,strided_cm)39243 TEST(QS8_IGEMM_MINMAX_FP32_1X4C2S4__WASMSIMD_DOT16X2_LD128, strided_cm) {
39244 GemmMicrokernelTester()
39245 .mr(1)
39246 .nr(4)
39247 .kr(2)
39248 .sr(4)
39249 .m(1)
39250 .n(4)
39251 .k(8)
39252 .cm_stride(7)
39253 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
39254 }
39255 #endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
39256
39257
39258 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD128,k_eq_8)39259 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD128, k_eq_8) {
39260 GemmMicrokernelTester()
39261 .mr(1)
39262 .nr(4)
39263 .kr(8)
39264 .sr(1)
39265 .m(1)
39266 .n(4)
39267 .k(8)
39268 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
39269 }
39270
TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD128,strided_cn)39271 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD128, strided_cn) {
39272 GemmMicrokernelTester()
39273 .mr(1)
39274 .nr(4)
39275 .kr(8)
39276 .sr(1)
39277 .m(1)
39278 .n(4)
39279 .k(8)
39280 .cn_stride(7)
39281 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
39282 }
39283
TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD128,k_eq_8_subtile)39284 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD128, k_eq_8_subtile) {
39285 for (uint32_t n = 1; n <= 4; n++) {
39286 for (uint32_t m = 1; m <= 1; m++) {
39287 GemmMicrokernelTester()
39288 .mr(1)
39289 .nr(4)
39290 .kr(8)
39291 .sr(1)
39292 .m(m)
39293 .n(n)
39294 .k(8)
39295 .iterations(1)
39296 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
39297 }
39298 }
39299 }
39300
TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD128,k_eq_8_subtile_m)39301 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD128, k_eq_8_subtile_m) {
39302 for (uint32_t m = 1; m <= 1; m++) {
39303 GemmMicrokernelTester()
39304 .mr(1)
39305 .nr(4)
39306 .kr(8)
39307 .sr(1)
39308 .m(m)
39309 .n(4)
39310 .k(8)
39311 .iterations(1)
39312 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
39313 }
39314 }
39315
TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD128,k_eq_8_subtile_n)39316 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD128, k_eq_8_subtile_n) {
39317 for (uint32_t n = 1; n <= 4; n++) {
39318 GemmMicrokernelTester()
39319 .mr(1)
39320 .nr(4)
39321 .kr(8)
39322 .sr(1)
39323 .m(1)
39324 .n(n)
39325 .k(8)
39326 .iterations(1)
39327 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
39328 }
39329 }
39330
TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD128,k_lt_8)39331 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD128, k_lt_8) {
39332 for (size_t k = 1; k < 8; k++) {
39333 GemmMicrokernelTester()
39334 .mr(1)
39335 .nr(4)
39336 .kr(8)
39337 .sr(1)
39338 .m(1)
39339 .n(4)
39340 .k(k)
39341 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
39342 }
39343 }
39344
TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD128,k_lt_8_subtile)39345 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD128, k_lt_8_subtile) {
39346 for (size_t k = 1; k < 8; k++) {
39347 for (uint32_t n = 1; n <= 4; n++) {
39348 for (uint32_t m = 1; m <= 1; m++) {
39349 GemmMicrokernelTester()
39350 .mr(1)
39351 .nr(4)
39352 .kr(8)
39353 .sr(1)
39354 .m(m)
39355 .n(n)
39356 .k(k)
39357 .iterations(1)
39358 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
39359 }
39360 }
39361 }
39362 }
39363
TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD128,k_gt_8)39364 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD128, k_gt_8) {
39365 for (size_t k = 9; k < 16; k++) {
39366 GemmMicrokernelTester()
39367 .mr(1)
39368 .nr(4)
39369 .kr(8)
39370 .sr(1)
39371 .m(1)
39372 .n(4)
39373 .k(k)
39374 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
39375 }
39376 }
39377
TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD128,k_gt_8_subtile)39378 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD128, k_gt_8_subtile) {
39379 for (size_t k = 9; k < 16; k++) {
39380 for (uint32_t n = 1; n <= 4; n++) {
39381 for (uint32_t m = 1; m <= 1; m++) {
39382 GemmMicrokernelTester()
39383 .mr(1)
39384 .nr(4)
39385 .kr(8)
39386 .sr(1)
39387 .m(m)
39388 .n(n)
39389 .k(k)
39390 .iterations(1)
39391 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
39392 }
39393 }
39394 }
39395 }
39396
TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD128,k_div_8)39397 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD128, k_div_8) {
39398 for (size_t k = 16; k <= 80; k += 8) {
39399 GemmMicrokernelTester()
39400 .mr(1)
39401 .nr(4)
39402 .kr(8)
39403 .sr(1)
39404 .m(1)
39405 .n(4)
39406 .k(k)
39407 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
39408 }
39409 }
39410
TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD128,k_div_8_subtile)39411 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD128, k_div_8_subtile) {
39412 for (size_t k = 16; k <= 80; k += 8) {
39413 for (uint32_t n = 1; n <= 4; n++) {
39414 for (uint32_t m = 1; m <= 1; m++) {
39415 GemmMicrokernelTester()
39416 .mr(1)
39417 .nr(4)
39418 .kr(8)
39419 .sr(1)
39420 .m(m)
39421 .n(n)
39422 .k(k)
39423 .iterations(1)
39424 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
39425 }
39426 }
39427 }
39428 }
39429
TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD128,n_gt_4)39430 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD128, n_gt_4) {
39431 for (uint32_t n = 5; n < 8; n++) {
39432 for (size_t k = 1; k <= 40; k += 9) {
39433 GemmMicrokernelTester()
39434 .mr(1)
39435 .nr(4)
39436 .kr(8)
39437 .sr(1)
39438 .m(1)
39439 .n(n)
39440 .k(k)
39441 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
39442 }
39443 }
39444 }
39445
TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD128,n_gt_4_strided_cn)39446 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD128, n_gt_4_strided_cn) {
39447 for (uint32_t n = 5; n < 8; n++) {
39448 for (size_t k = 1; k <= 40; k += 9) {
39449 GemmMicrokernelTester()
39450 .mr(1)
39451 .nr(4)
39452 .kr(8)
39453 .sr(1)
39454 .m(1)
39455 .n(n)
39456 .k(k)
39457 .cn_stride(7)
39458 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
39459 }
39460 }
39461 }
39462
TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD128,n_gt_4_subtile)39463 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD128, n_gt_4_subtile) {
39464 for (uint32_t n = 5; n < 8; n++) {
39465 for (size_t k = 1; k <= 40; k += 9) {
39466 for (uint32_t m = 1; m <= 1; m++) {
39467 GemmMicrokernelTester()
39468 .mr(1)
39469 .nr(4)
39470 .kr(8)
39471 .sr(1)
39472 .m(m)
39473 .n(n)
39474 .k(k)
39475 .iterations(1)
39476 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
39477 }
39478 }
39479 }
39480 }
39481
TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD128,n_div_4)39482 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD128, n_div_4) {
39483 for (uint32_t n = 8; n <= 12; n += 4) {
39484 for (size_t k = 1; k <= 40; k += 9) {
39485 GemmMicrokernelTester()
39486 .mr(1)
39487 .nr(4)
39488 .kr(8)
39489 .sr(1)
39490 .m(1)
39491 .n(n)
39492 .k(k)
39493 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
39494 }
39495 }
39496 }
39497
TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD128,n_div_4_strided_cn)39498 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD128, n_div_4_strided_cn) {
39499 for (uint32_t n = 8; n <= 12; n += 4) {
39500 for (size_t k = 1; k <= 40; k += 9) {
39501 GemmMicrokernelTester()
39502 .mr(1)
39503 .nr(4)
39504 .kr(8)
39505 .sr(1)
39506 .m(1)
39507 .n(n)
39508 .k(k)
39509 .cn_stride(7)
39510 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
39511 }
39512 }
39513 }
39514
TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD128,n_div_4_subtile)39515 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD128, n_div_4_subtile) {
39516 for (uint32_t n = 8; n <= 12; n += 4) {
39517 for (size_t k = 1; k <= 40; k += 9) {
39518 for (uint32_t m = 1; m <= 1; m++) {
39519 GemmMicrokernelTester()
39520 .mr(1)
39521 .nr(4)
39522 .kr(8)
39523 .sr(1)
39524 .m(m)
39525 .n(n)
39526 .k(k)
39527 .iterations(1)
39528 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
39529 }
39530 }
39531 }
39532 }
39533
TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD128,small_kernel)39534 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD128, small_kernel) {
39535 for (size_t k = 1; k <= 40; k += 9) {
39536 GemmMicrokernelTester()
39537 .mr(1)
39538 .nr(4)
39539 .kr(8)
39540 .sr(1)
39541 .m(1)
39542 .n(4)
39543 .k(k)
39544 .ks(3)
39545 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
39546 }
39547 }
39548
TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD128,small_kernel_subtile)39549 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD128, small_kernel_subtile) {
39550 for (size_t k = 1; k <= 40; k += 9) {
39551 for (uint32_t n = 1; n <= 4; n++) {
39552 for (uint32_t m = 1; m <= 1; m++) {
39553 GemmMicrokernelTester()
39554 .mr(1)
39555 .nr(4)
39556 .kr(8)
39557 .sr(1)
39558 .m(m)
39559 .n(n)
39560 .k(k)
39561 .ks(3)
39562 .iterations(1)
39563 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
39564 }
39565 }
39566 }
39567 }
39568
TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD128,n_gt_4_small_kernel)39569 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD128, n_gt_4_small_kernel) {
39570 for (uint32_t n = 5; n < 8; n++) {
39571 for (size_t k = 1; k <= 40; k += 9) {
39572 GemmMicrokernelTester()
39573 .mr(1)
39574 .nr(4)
39575 .kr(8)
39576 .sr(1)
39577 .m(1)
39578 .n(n)
39579 .k(k)
39580 .ks(3)
39581 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
39582 }
39583 }
39584 }
39585
TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD128,n_div_4_small_kernel)39586 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD128, n_div_4_small_kernel) {
39587 for (uint32_t n = 8; n <= 12; n += 4) {
39588 for (size_t k = 1; k <= 40; k += 9) {
39589 GemmMicrokernelTester()
39590 .mr(1)
39591 .nr(4)
39592 .kr(8)
39593 .sr(1)
39594 .m(1)
39595 .n(n)
39596 .k(k)
39597 .ks(3)
39598 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
39599 }
39600 }
39601 }
39602
TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD128,strided_cm_subtile)39603 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD128, strided_cm_subtile) {
39604 for (size_t k = 1; k <= 40; k += 9) {
39605 for (uint32_t n = 1; n <= 4; n++) {
39606 for (uint32_t m = 1; m <= 1; m++) {
39607 GemmMicrokernelTester()
39608 .mr(1)
39609 .nr(4)
39610 .kr(8)
39611 .sr(1)
39612 .m(m)
39613 .n(n)
39614 .k(k)
39615 .cm_stride(7)
39616 .iterations(1)
39617 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
39618 }
39619 }
39620 }
39621 }
39622
TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD128,a_offset)39623 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD128, a_offset) {
39624 for (size_t k = 1; k <= 40; k += 9) {
39625 GemmMicrokernelTester()
39626 .mr(1)
39627 .nr(4)
39628 .kr(8)
39629 .sr(1)
39630 .m(1)
39631 .n(4)
39632 .k(k)
39633 .ks(3)
39634 .a_offset(43)
39635 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
39636 }
39637 }
39638
TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD128,zero)39639 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD128, zero) {
39640 for (size_t k = 1; k <= 40; k += 9) {
39641 for (uint32_t mz = 0; mz < 1; mz++) {
39642 GemmMicrokernelTester()
39643 .mr(1)
39644 .nr(4)
39645 .kr(8)
39646 .sr(1)
39647 .m(1)
39648 .n(4)
39649 .k(k)
39650 .ks(3)
39651 .a_offset(43)
39652 .zero_index(mz)
39653 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
39654 }
39655 }
39656 }
39657
TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD128,qmin)39658 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD128, qmin) {
39659 GemmMicrokernelTester()
39660 .mr(1)
39661 .nr(4)
39662 .kr(8)
39663 .sr(1)
39664 .m(1)
39665 .n(4)
39666 .k(8)
39667 .qmin(128)
39668 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
39669 }
39670
TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD128,qmax)39671 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD128, qmax) {
39672 GemmMicrokernelTester()
39673 .mr(1)
39674 .nr(4)
39675 .kr(8)
39676 .sr(1)
39677 .m(1)
39678 .n(4)
39679 .k(8)
39680 .qmax(128)
39681 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
39682 }
39683
TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD128,strided_cm)39684 TEST(QS8_IGEMM_MINMAX_FP32_1X4C8__WASMSIMD_DOT16X2_LD128, strided_cm) {
39685 GemmMicrokernelTester()
39686 .mr(1)
39687 .nr(4)
39688 .kr(8)
39689 .sr(1)
39690 .m(1)
39691 .n(4)
39692 .k(8)
39693 .cm_stride(7)
39694 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
39695 }
39696 #endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
39697
39698
39699 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128,k_eq_8)39700 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128, k_eq_8) {
39701 GemmMicrokernelTester()
39702 .mr(2)
39703 .nr(4)
39704 .kr(2)
39705 .sr(1)
39706 .m(2)
39707 .n(4)
39708 .k(8)
39709 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
39710 }
39711
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128,strided_cn)39712 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128, strided_cn) {
39713 GemmMicrokernelTester()
39714 .mr(2)
39715 .nr(4)
39716 .kr(2)
39717 .sr(1)
39718 .m(2)
39719 .n(4)
39720 .k(8)
39721 .cn_stride(7)
39722 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
39723 }
39724
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128,k_eq_8_subtile)39725 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128, k_eq_8_subtile) {
39726 for (uint32_t n = 1; n <= 4; n++) {
39727 for (uint32_t m = 1; m <= 2; m++) {
39728 GemmMicrokernelTester()
39729 .mr(2)
39730 .nr(4)
39731 .kr(2)
39732 .sr(1)
39733 .m(m)
39734 .n(n)
39735 .k(8)
39736 .iterations(1)
39737 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
39738 }
39739 }
39740 }
39741
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128,k_eq_8_subtile_m)39742 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128, k_eq_8_subtile_m) {
39743 for (uint32_t m = 1; m <= 2; m++) {
39744 GemmMicrokernelTester()
39745 .mr(2)
39746 .nr(4)
39747 .kr(2)
39748 .sr(1)
39749 .m(m)
39750 .n(4)
39751 .k(8)
39752 .iterations(1)
39753 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
39754 }
39755 }
39756
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128,k_eq_8_subtile_n)39757 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128, k_eq_8_subtile_n) {
39758 for (uint32_t n = 1; n <= 4; n++) {
39759 GemmMicrokernelTester()
39760 .mr(2)
39761 .nr(4)
39762 .kr(2)
39763 .sr(1)
39764 .m(2)
39765 .n(n)
39766 .k(8)
39767 .iterations(1)
39768 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
39769 }
39770 }
39771
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128,k_lt_8)39772 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128, k_lt_8) {
39773 for (size_t k = 1; k < 8; k++) {
39774 GemmMicrokernelTester()
39775 .mr(2)
39776 .nr(4)
39777 .kr(2)
39778 .sr(1)
39779 .m(2)
39780 .n(4)
39781 .k(k)
39782 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
39783 }
39784 }
39785
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128,k_lt_8_subtile)39786 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128, k_lt_8_subtile) {
39787 for (size_t k = 1; k < 8; k++) {
39788 for (uint32_t n = 1; n <= 4; n++) {
39789 for (uint32_t m = 1; m <= 2; m++) {
39790 GemmMicrokernelTester()
39791 .mr(2)
39792 .nr(4)
39793 .kr(2)
39794 .sr(1)
39795 .m(m)
39796 .n(n)
39797 .k(k)
39798 .iterations(1)
39799 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
39800 }
39801 }
39802 }
39803 }
39804
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128,k_gt_8)39805 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128, k_gt_8) {
39806 for (size_t k = 9; k < 16; k++) {
39807 GemmMicrokernelTester()
39808 .mr(2)
39809 .nr(4)
39810 .kr(2)
39811 .sr(1)
39812 .m(2)
39813 .n(4)
39814 .k(k)
39815 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
39816 }
39817 }
39818
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128,k_gt_8_subtile)39819 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128, k_gt_8_subtile) {
39820 for (size_t k = 9; k < 16; k++) {
39821 for (uint32_t n = 1; n <= 4; n++) {
39822 for (uint32_t m = 1; m <= 2; m++) {
39823 GemmMicrokernelTester()
39824 .mr(2)
39825 .nr(4)
39826 .kr(2)
39827 .sr(1)
39828 .m(m)
39829 .n(n)
39830 .k(k)
39831 .iterations(1)
39832 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
39833 }
39834 }
39835 }
39836 }
39837
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128,k_div_8)39838 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128, k_div_8) {
39839 for (size_t k = 16; k <= 80; k += 8) {
39840 GemmMicrokernelTester()
39841 .mr(2)
39842 .nr(4)
39843 .kr(2)
39844 .sr(1)
39845 .m(2)
39846 .n(4)
39847 .k(k)
39848 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
39849 }
39850 }
39851
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128,k_div_8_subtile)39852 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128, k_div_8_subtile) {
39853 for (size_t k = 16; k <= 80; k += 8) {
39854 for (uint32_t n = 1; n <= 4; n++) {
39855 for (uint32_t m = 1; m <= 2; m++) {
39856 GemmMicrokernelTester()
39857 .mr(2)
39858 .nr(4)
39859 .kr(2)
39860 .sr(1)
39861 .m(m)
39862 .n(n)
39863 .k(k)
39864 .iterations(1)
39865 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
39866 }
39867 }
39868 }
39869 }
39870
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128,n_gt_4)39871 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128, n_gt_4) {
39872 for (uint32_t n = 5; n < 8; n++) {
39873 for (size_t k = 1; k <= 40; k += 9) {
39874 GemmMicrokernelTester()
39875 .mr(2)
39876 .nr(4)
39877 .kr(2)
39878 .sr(1)
39879 .m(2)
39880 .n(n)
39881 .k(k)
39882 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
39883 }
39884 }
39885 }
39886
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128,n_gt_4_strided_cn)39887 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128, n_gt_4_strided_cn) {
39888 for (uint32_t n = 5; n < 8; n++) {
39889 for (size_t k = 1; k <= 40; k += 9) {
39890 GemmMicrokernelTester()
39891 .mr(2)
39892 .nr(4)
39893 .kr(2)
39894 .sr(1)
39895 .m(2)
39896 .n(n)
39897 .k(k)
39898 .cn_stride(7)
39899 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
39900 }
39901 }
39902 }
39903
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128,n_gt_4_subtile)39904 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128, n_gt_4_subtile) {
39905 for (uint32_t n = 5; n < 8; n++) {
39906 for (size_t k = 1; k <= 40; k += 9) {
39907 for (uint32_t m = 1; m <= 2; m++) {
39908 GemmMicrokernelTester()
39909 .mr(2)
39910 .nr(4)
39911 .kr(2)
39912 .sr(1)
39913 .m(m)
39914 .n(n)
39915 .k(k)
39916 .iterations(1)
39917 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
39918 }
39919 }
39920 }
39921 }
39922
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128,n_div_4)39923 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128, n_div_4) {
39924 for (uint32_t n = 8; n <= 12; n += 4) {
39925 for (size_t k = 1; k <= 40; k += 9) {
39926 GemmMicrokernelTester()
39927 .mr(2)
39928 .nr(4)
39929 .kr(2)
39930 .sr(1)
39931 .m(2)
39932 .n(n)
39933 .k(k)
39934 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
39935 }
39936 }
39937 }
39938
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128,n_div_4_strided_cn)39939 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128, n_div_4_strided_cn) {
39940 for (uint32_t n = 8; n <= 12; n += 4) {
39941 for (size_t k = 1; k <= 40; k += 9) {
39942 GemmMicrokernelTester()
39943 .mr(2)
39944 .nr(4)
39945 .kr(2)
39946 .sr(1)
39947 .m(2)
39948 .n(n)
39949 .k(k)
39950 .cn_stride(7)
39951 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
39952 }
39953 }
39954 }
39955
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128,n_div_4_subtile)39956 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128, n_div_4_subtile) {
39957 for (uint32_t n = 8; n <= 12; n += 4) {
39958 for (size_t k = 1; k <= 40; k += 9) {
39959 for (uint32_t m = 1; m <= 2; m++) {
39960 GemmMicrokernelTester()
39961 .mr(2)
39962 .nr(4)
39963 .kr(2)
39964 .sr(1)
39965 .m(m)
39966 .n(n)
39967 .k(k)
39968 .iterations(1)
39969 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
39970 }
39971 }
39972 }
39973 }
39974
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128,small_kernel)39975 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128, small_kernel) {
39976 for (size_t k = 1; k <= 40; k += 9) {
39977 GemmMicrokernelTester()
39978 .mr(2)
39979 .nr(4)
39980 .kr(2)
39981 .sr(1)
39982 .m(2)
39983 .n(4)
39984 .k(k)
39985 .ks(3)
39986 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
39987 }
39988 }
39989
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128,small_kernel_subtile)39990 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128, small_kernel_subtile) {
39991 for (size_t k = 1; k <= 40; k += 9) {
39992 for (uint32_t n = 1; n <= 4; n++) {
39993 for (uint32_t m = 1; m <= 2; m++) {
39994 GemmMicrokernelTester()
39995 .mr(2)
39996 .nr(4)
39997 .kr(2)
39998 .sr(1)
39999 .m(m)
40000 .n(n)
40001 .k(k)
40002 .ks(3)
40003 .iterations(1)
40004 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
40005 }
40006 }
40007 }
40008 }
40009
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128,n_gt_4_small_kernel)40010 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128, n_gt_4_small_kernel) {
40011 for (uint32_t n = 5; n < 8; n++) {
40012 for (size_t k = 1; k <= 40; k += 9) {
40013 GemmMicrokernelTester()
40014 .mr(2)
40015 .nr(4)
40016 .kr(2)
40017 .sr(1)
40018 .m(2)
40019 .n(n)
40020 .k(k)
40021 .ks(3)
40022 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
40023 }
40024 }
40025 }
40026
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128,n_div_4_small_kernel)40027 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128, n_div_4_small_kernel) {
40028 for (uint32_t n = 8; n <= 12; n += 4) {
40029 for (size_t k = 1; k <= 40; k += 9) {
40030 GemmMicrokernelTester()
40031 .mr(2)
40032 .nr(4)
40033 .kr(2)
40034 .sr(1)
40035 .m(2)
40036 .n(n)
40037 .k(k)
40038 .ks(3)
40039 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
40040 }
40041 }
40042 }
40043
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128,strided_cm_subtile)40044 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128, strided_cm_subtile) {
40045 for (size_t k = 1; k <= 40; k += 9) {
40046 for (uint32_t n = 1; n <= 4; n++) {
40047 for (uint32_t m = 1; m <= 2; m++) {
40048 GemmMicrokernelTester()
40049 .mr(2)
40050 .nr(4)
40051 .kr(2)
40052 .sr(1)
40053 .m(m)
40054 .n(n)
40055 .k(k)
40056 .cm_stride(7)
40057 .iterations(1)
40058 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
40059 }
40060 }
40061 }
40062 }
40063
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128,a_offset)40064 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128, a_offset) {
40065 for (size_t k = 1; k <= 40; k += 9) {
40066 GemmMicrokernelTester()
40067 .mr(2)
40068 .nr(4)
40069 .kr(2)
40070 .sr(1)
40071 .m(2)
40072 .n(4)
40073 .k(k)
40074 .ks(3)
40075 .a_offset(83)
40076 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
40077 }
40078 }
40079
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128,zero)40080 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128, zero) {
40081 for (size_t k = 1; k <= 40; k += 9) {
40082 for (uint32_t mz = 0; mz < 2; mz++) {
40083 GemmMicrokernelTester()
40084 .mr(2)
40085 .nr(4)
40086 .kr(2)
40087 .sr(1)
40088 .m(2)
40089 .n(4)
40090 .k(k)
40091 .ks(3)
40092 .a_offset(83)
40093 .zero_index(mz)
40094 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
40095 }
40096 }
40097 }
40098
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128,qmin)40099 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128, qmin) {
40100 GemmMicrokernelTester()
40101 .mr(2)
40102 .nr(4)
40103 .kr(2)
40104 .sr(1)
40105 .m(2)
40106 .n(4)
40107 .k(8)
40108 .qmin(128)
40109 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
40110 }
40111
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128,qmax)40112 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128, qmax) {
40113 GemmMicrokernelTester()
40114 .mr(2)
40115 .nr(4)
40116 .kr(2)
40117 .sr(1)
40118 .m(2)
40119 .n(4)
40120 .k(8)
40121 .qmax(128)
40122 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
40123 }
40124
TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128,strided_cm)40125 TEST(QS8_IGEMM_MINMAX_FP32_2X4C2__WASMSIMD_DOT16X2_LD128, strided_cm) {
40126 GemmMicrokernelTester()
40127 .mr(2)
40128 .nr(4)
40129 .kr(2)
40130 .sr(1)
40131 .m(2)
40132 .n(4)
40133 .k(8)
40134 .cm_stride(7)
40135 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
40136 }
40137 #endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
40138
40139
40140 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD64,k_eq_8)40141 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD64, k_eq_8) {
40142 GemmMicrokernelTester()
40143 .mr(2)
40144 .nr(4)
40145 .kr(8)
40146 .sr(1)
40147 .m(2)
40148 .n(4)
40149 .k(8)
40150 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
40151 }
40152
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD64,strided_cn)40153 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD64, strided_cn) {
40154 GemmMicrokernelTester()
40155 .mr(2)
40156 .nr(4)
40157 .kr(8)
40158 .sr(1)
40159 .m(2)
40160 .n(4)
40161 .k(8)
40162 .cn_stride(7)
40163 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
40164 }
40165
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD64,k_eq_8_subtile)40166 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD64, k_eq_8_subtile) {
40167 for (uint32_t n = 1; n <= 4; n++) {
40168 for (uint32_t m = 1; m <= 2; m++) {
40169 GemmMicrokernelTester()
40170 .mr(2)
40171 .nr(4)
40172 .kr(8)
40173 .sr(1)
40174 .m(m)
40175 .n(n)
40176 .k(8)
40177 .iterations(1)
40178 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
40179 }
40180 }
40181 }
40182
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD64,k_eq_8_subtile_m)40183 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD64, k_eq_8_subtile_m) {
40184 for (uint32_t m = 1; m <= 2; m++) {
40185 GemmMicrokernelTester()
40186 .mr(2)
40187 .nr(4)
40188 .kr(8)
40189 .sr(1)
40190 .m(m)
40191 .n(4)
40192 .k(8)
40193 .iterations(1)
40194 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
40195 }
40196 }
40197
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD64,k_eq_8_subtile_n)40198 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD64, k_eq_8_subtile_n) {
40199 for (uint32_t n = 1; n <= 4; n++) {
40200 GemmMicrokernelTester()
40201 .mr(2)
40202 .nr(4)
40203 .kr(8)
40204 .sr(1)
40205 .m(2)
40206 .n(n)
40207 .k(8)
40208 .iterations(1)
40209 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
40210 }
40211 }
40212
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD64,k_lt_8)40213 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD64, k_lt_8) {
40214 for (size_t k = 1; k < 8; k++) {
40215 GemmMicrokernelTester()
40216 .mr(2)
40217 .nr(4)
40218 .kr(8)
40219 .sr(1)
40220 .m(2)
40221 .n(4)
40222 .k(k)
40223 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
40224 }
40225 }
40226
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD64,k_lt_8_subtile)40227 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD64, k_lt_8_subtile) {
40228 for (size_t k = 1; k < 8; k++) {
40229 for (uint32_t n = 1; n <= 4; n++) {
40230 for (uint32_t m = 1; m <= 2; m++) {
40231 GemmMicrokernelTester()
40232 .mr(2)
40233 .nr(4)
40234 .kr(8)
40235 .sr(1)
40236 .m(m)
40237 .n(n)
40238 .k(k)
40239 .iterations(1)
40240 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
40241 }
40242 }
40243 }
40244 }
40245
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD64,k_gt_8)40246 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD64, k_gt_8) {
40247 for (size_t k = 9; k < 16; k++) {
40248 GemmMicrokernelTester()
40249 .mr(2)
40250 .nr(4)
40251 .kr(8)
40252 .sr(1)
40253 .m(2)
40254 .n(4)
40255 .k(k)
40256 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
40257 }
40258 }
40259
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD64,k_gt_8_subtile)40260 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD64, k_gt_8_subtile) {
40261 for (size_t k = 9; k < 16; k++) {
40262 for (uint32_t n = 1; n <= 4; n++) {
40263 for (uint32_t m = 1; m <= 2; m++) {
40264 GemmMicrokernelTester()
40265 .mr(2)
40266 .nr(4)
40267 .kr(8)
40268 .sr(1)
40269 .m(m)
40270 .n(n)
40271 .k(k)
40272 .iterations(1)
40273 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
40274 }
40275 }
40276 }
40277 }
40278
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD64,k_div_8)40279 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD64, k_div_8) {
40280 for (size_t k = 16; k <= 80; k += 8) {
40281 GemmMicrokernelTester()
40282 .mr(2)
40283 .nr(4)
40284 .kr(8)
40285 .sr(1)
40286 .m(2)
40287 .n(4)
40288 .k(k)
40289 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
40290 }
40291 }
40292
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD64,k_div_8_subtile)40293 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD64, k_div_8_subtile) {
40294 for (size_t k = 16; k <= 80; k += 8) {
40295 for (uint32_t n = 1; n <= 4; n++) {
40296 for (uint32_t m = 1; m <= 2; m++) {
40297 GemmMicrokernelTester()
40298 .mr(2)
40299 .nr(4)
40300 .kr(8)
40301 .sr(1)
40302 .m(m)
40303 .n(n)
40304 .k(k)
40305 .iterations(1)
40306 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
40307 }
40308 }
40309 }
40310 }
40311
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD64,n_gt_4)40312 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD64, n_gt_4) {
40313 for (uint32_t n = 5; n < 8; n++) {
40314 for (size_t k = 1; k <= 40; k += 9) {
40315 GemmMicrokernelTester()
40316 .mr(2)
40317 .nr(4)
40318 .kr(8)
40319 .sr(1)
40320 .m(2)
40321 .n(n)
40322 .k(k)
40323 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
40324 }
40325 }
40326 }
40327
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD64,n_gt_4_strided_cn)40328 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD64, n_gt_4_strided_cn) {
40329 for (uint32_t n = 5; n < 8; n++) {
40330 for (size_t k = 1; k <= 40; k += 9) {
40331 GemmMicrokernelTester()
40332 .mr(2)
40333 .nr(4)
40334 .kr(8)
40335 .sr(1)
40336 .m(2)
40337 .n(n)
40338 .k(k)
40339 .cn_stride(7)
40340 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
40341 }
40342 }
40343 }
40344
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD64,n_gt_4_subtile)40345 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD64, n_gt_4_subtile) {
40346 for (uint32_t n = 5; n < 8; n++) {
40347 for (size_t k = 1; k <= 40; k += 9) {
40348 for (uint32_t m = 1; m <= 2; m++) {
40349 GemmMicrokernelTester()
40350 .mr(2)
40351 .nr(4)
40352 .kr(8)
40353 .sr(1)
40354 .m(m)
40355 .n(n)
40356 .k(k)
40357 .iterations(1)
40358 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
40359 }
40360 }
40361 }
40362 }
40363
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD64,n_div_4)40364 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD64, n_div_4) {
40365 for (uint32_t n = 8; n <= 12; n += 4) {
40366 for (size_t k = 1; k <= 40; k += 9) {
40367 GemmMicrokernelTester()
40368 .mr(2)
40369 .nr(4)
40370 .kr(8)
40371 .sr(1)
40372 .m(2)
40373 .n(n)
40374 .k(k)
40375 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
40376 }
40377 }
40378 }
40379
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD64,n_div_4_strided_cn)40380 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD64, n_div_4_strided_cn) {
40381 for (uint32_t n = 8; n <= 12; n += 4) {
40382 for (size_t k = 1; k <= 40; k += 9) {
40383 GemmMicrokernelTester()
40384 .mr(2)
40385 .nr(4)
40386 .kr(8)
40387 .sr(1)
40388 .m(2)
40389 .n(n)
40390 .k(k)
40391 .cn_stride(7)
40392 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
40393 }
40394 }
40395 }
40396
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD64,n_div_4_subtile)40397 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD64, n_div_4_subtile) {
40398 for (uint32_t n = 8; n <= 12; n += 4) {
40399 for (size_t k = 1; k <= 40; k += 9) {
40400 for (uint32_t m = 1; m <= 2; m++) {
40401 GemmMicrokernelTester()
40402 .mr(2)
40403 .nr(4)
40404 .kr(8)
40405 .sr(1)
40406 .m(m)
40407 .n(n)
40408 .k(k)
40409 .iterations(1)
40410 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
40411 }
40412 }
40413 }
40414 }
40415
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD64,small_kernel)40416 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD64, small_kernel) {
40417 for (size_t k = 1; k <= 40; k += 9) {
40418 GemmMicrokernelTester()
40419 .mr(2)
40420 .nr(4)
40421 .kr(8)
40422 .sr(1)
40423 .m(2)
40424 .n(4)
40425 .k(k)
40426 .ks(3)
40427 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
40428 }
40429 }
40430
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD64,small_kernel_subtile)40431 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD64, small_kernel_subtile) {
40432 for (size_t k = 1; k <= 40; k += 9) {
40433 for (uint32_t n = 1; n <= 4; n++) {
40434 for (uint32_t m = 1; m <= 2; m++) {
40435 GemmMicrokernelTester()
40436 .mr(2)
40437 .nr(4)
40438 .kr(8)
40439 .sr(1)
40440 .m(m)
40441 .n(n)
40442 .k(k)
40443 .ks(3)
40444 .iterations(1)
40445 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
40446 }
40447 }
40448 }
40449 }
40450
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD64,n_gt_4_small_kernel)40451 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD64, n_gt_4_small_kernel) {
40452 for (uint32_t n = 5; n < 8; n++) {
40453 for (size_t k = 1; k <= 40; k += 9) {
40454 GemmMicrokernelTester()
40455 .mr(2)
40456 .nr(4)
40457 .kr(8)
40458 .sr(1)
40459 .m(2)
40460 .n(n)
40461 .k(k)
40462 .ks(3)
40463 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
40464 }
40465 }
40466 }
40467
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD64,n_div_4_small_kernel)40468 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD64, n_div_4_small_kernel) {
40469 for (uint32_t n = 8; n <= 12; n += 4) {
40470 for (size_t k = 1; k <= 40; k += 9) {
40471 GemmMicrokernelTester()
40472 .mr(2)
40473 .nr(4)
40474 .kr(8)
40475 .sr(1)
40476 .m(2)
40477 .n(n)
40478 .k(k)
40479 .ks(3)
40480 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
40481 }
40482 }
40483 }
40484
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD64,strided_cm_subtile)40485 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD64, strided_cm_subtile) {
40486 for (size_t k = 1; k <= 40; k += 9) {
40487 for (uint32_t n = 1; n <= 4; n++) {
40488 for (uint32_t m = 1; m <= 2; m++) {
40489 GemmMicrokernelTester()
40490 .mr(2)
40491 .nr(4)
40492 .kr(8)
40493 .sr(1)
40494 .m(m)
40495 .n(n)
40496 .k(k)
40497 .cm_stride(7)
40498 .iterations(1)
40499 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
40500 }
40501 }
40502 }
40503 }
40504
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD64,a_offset)40505 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD64, a_offset) {
40506 for (size_t k = 1; k <= 40; k += 9) {
40507 GemmMicrokernelTester()
40508 .mr(2)
40509 .nr(4)
40510 .kr(8)
40511 .sr(1)
40512 .m(2)
40513 .n(4)
40514 .k(k)
40515 .ks(3)
40516 .a_offset(83)
40517 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
40518 }
40519 }
40520
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD64,zero)40521 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD64, zero) {
40522 for (size_t k = 1; k <= 40; k += 9) {
40523 for (uint32_t mz = 0; mz < 2; mz++) {
40524 GemmMicrokernelTester()
40525 .mr(2)
40526 .nr(4)
40527 .kr(8)
40528 .sr(1)
40529 .m(2)
40530 .n(4)
40531 .k(k)
40532 .ks(3)
40533 .a_offset(83)
40534 .zero_index(mz)
40535 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
40536 }
40537 }
40538 }
40539
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD64,qmin)40540 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD64, qmin) {
40541 GemmMicrokernelTester()
40542 .mr(2)
40543 .nr(4)
40544 .kr(8)
40545 .sr(1)
40546 .m(2)
40547 .n(4)
40548 .k(8)
40549 .qmin(128)
40550 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
40551 }
40552
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD64,qmax)40553 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD64, qmax) {
40554 GemmMicrokernelTester()
40555 .mr(2)
40556 .nr(4)
40557 .kr(8)
40558 .sr(1)
40559 .m(2)
40560 .n(4)
40561 .k(8)
40562 .qmax(128)
40563 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
40564 }
40565
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD64,strided_cm)40566 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD64, strided_cm) {
40567 GemmMicrokernelTester()
40568 .mr(2)
40569 .nr(4)
40570 .kr(8)
40571 .sr(1)
40572 .m(2)
40573 .n(4)
40574 .k(8)
40575 .cm_stride(7)
40576 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
40577 }
40578 #endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
40579
40580
40581 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD128,k_eq_8)40582 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD128, k_eq_8) {
40583 GemmMicrokernelTester()
40584 .mr(2)
40585 .nr(4)
40586 .kr(8)
40587 .sr(1)
40588 .m(2)
40589 .n(4)
40590 .k(8)
40591 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
40592 }
40593
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD128,strided_cn)40594 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD128, strided_cn) {
40595 GemmMicrokernelTester()
40596 .mr(2)
40597 .nr(4)
40598 .kr(8)
40599 .sr(1)
40600 .m(2)
40601 .n(4)
40602 .k(8)
40603 .cn_stride(7)
40604 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
40605 }
40606
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD128,k_eq_8_subtile)40607 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD128, k_eq_8_subtile) {
40608 for (uint32_t n = 1; n <= 4; n++) {
40609 for (uint32_t m = 1; m <= 2; m++) {
40610 GemmMicrokernelTester()
40611 .mr(2)
40612 .nr(4)
40613 .kr(8)
40614 .sr(1)
40615 .m(m)
40616 .n(n)
40617 .k(8)
40618 .iterations(1)
40619 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
40620 }
40621 }
40622 }
40623
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD128,k_eq_8_subtile_m)40624 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD128, k_eq_8_subtile_m) {
40625 for (uint32_t m = 1; m <= 2; m++) {
40626 GemmMicrokernelTester()
40627 .mr(2)
40628 .nr(4)
40629 .kr(8)
40630 .sr(1)
40631 .m(m)
40632 .n(4)
40633 .k(8)
40634 .iterations(1)
40635 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
40636 }
40637 }
40638
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD128,k_eq_8_subtile_n)40639 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD128, k_eq_8_subtile_n) {
40640 for (uint32_t n = 1; n <= 4; n++) {
40641 GemmMicrokernelTester()
40642 .mr(2)
40643 .nr(4)
40644 .kr(8)
40645 .sr(1)
40646 .m(2)
40647 .n(n)
40648 .k(8)
40649 .iterations(1)
40650 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
40651 }
40652 }
40653
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD128,k_lt_8)40654 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD128, k_lt_8) {
40655 for (size_t k = 1; k < 8; k++) {
40656 GemmMicrokernelTester()
40657 .mr(2)
40658 .nr(4)
40659 .kr(8)
40660 .sr(1)
40661 .m(2)
40662 .n(4)
40663 .k(k)
40664 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
40665 }
40666 }
40667
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD128,k_lt_8_subtile)40668 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD128, k_lt_8_subtile) {
40669 for (size_t k = 1; k < 8; k++) {
40670 for (uint32_t n = 1; n <= 4; n++) {
40671 for (uint32_t m = 1; m <= 2; m++) {
40672 GemmMicrokernelTester()
40673 .mr(2)
40674 .nr(4)
40675 .kr(8)
40676 .sr(1)
40677 .m(m)
40678 .n(n)
40679 .k(k)
40680 .iterations(1)
40681 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
40682 }
40683 }
40684 }
40685 }
40686
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD128,k_gt_8)40687 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD128, k_gt_8) {
40688 for (size_t k = 9; k < 16; k++) {
40689 GemmMicrokernelTester()
40690 .mr(2)
40691 .nr(4)
40692 .kr(8)
40693 .sr(1)
40694 .m(2)
40695 .n(4)
40696 .k(k)
40697 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
40698 }
40699 }
40700
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD128,k_gt_8_subtile)40701 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD128, k_gt_8_subtile) {
40702 for (size_t k = 9; k < 16; k++) {
40703 for (uint32_t n = 1; n <= 4; n++) {
40704 for (uint32_t m = 1; m <= 2; m++) {
40705 GemmMicrokernelTester()
40706 .mr(2)
40707 .nr(4)
40708 .kr(8)
40709 .sr(1)
40710 .m(m)
40711 .n(n)
40712 .k(k)
40713 .iterations(1)
40714 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
40715 }
40716 }
40717 }
40718 }
40719
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD128,k_div_8)40720 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD128, k_div_8) {
40721 for (size_t k = 16; k <= 80; k += 8) {
40722 GemmMicrokernelTester()
40723 .mr(2)
40724 .nr(4)
40725 .kr(8)
40726 .sr(1)
40727 .m(2)
40728 .n(4)
40729 .k(k)
40730 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
40731 }
40732 }
40733
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD128,k_div_8_subtile)40734 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD128, k_div_8_subtile) {
40735 for (size_t k = 16; k <= 80; k += 8) {
40736 for (uint32_t n = 1; n <= 4; n++) {
40737 for (uint32_t m = 1; m <= 2; m++) {
40738 GemmMicrokernelTester()
40739 .mr(2)
40740 .nr(4)
40741 .kr(8)
40742 .sr(1)
40743 .m(m)
40744 .n(n)
40745 .k(k)
40746 .iterations(1)
40747 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
40748 }
40749 }
40750 }
40751 }
40752
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD128,n_gt_4)40753 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD128, n_gt_4) {
40754 for (uint32_t n = 5; n < 8; n++) {
40755 for (size_t k = 1; k <= 40; k += 9) {
40756 GemmMicrokernelTester()
40757 .mr(2)
40758 .nr(4)
40759 .kr(8)
40760 .sr(1)
40761 .m(2)
40762 .n(n)
40763 .k(k)
40764 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
40765 }
40766 }
40767 }
40768
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD128,n_gt_4_strided_cn)40769 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD128, n_gt_4_strided_cn) {
40770 for (uint32_t n = 5; n < 8; n++) {
40771 for (size_t k = 1; k <= 40; k += 9) {
40772 GemmMicrokernelTester()
40773 .mr(2)
40774 .nr(4)
40775 .kr(8)
40776 .sr(1)
40777 .m(2)
40778 .n(n)
40779 .k(k)
40780 .cn_stride(7)
40781 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
40782 }
40783 }
40784 }
40785
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD128,n_gt_4_subtile)40786 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD128, n_gt_4_subtile) {
40787 for (uint32_t n = 5; n < 8; n++) {
40788 for (size_t k = 1; k <= 40; k += 9) {
40789 for (uint32_t m = 1; m <= 2; m++) {
40790 GemmMicrokernelTester()
40791 .mr(2)
40792 .nr(4)
40793 .kr(8)
40794 .sr(1)
40795 .m(m)
40796 .n(n)
40797 .k(k)
40798 .iterations(1)
40799 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
40800 }
40801 }
40802 }
40803 }
40804
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD128,n_div_4)40805 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD128, n_div_4) {
40806 for (uint32_t n = 8; n <= 12; n += 4) {
40807 for (size_t k = 1; k <= 40; k += 9) {
40808 GemmMicrokernelTester()
40809 .mr(2)
40810 .nr(4)
40811 .kr(8)
40812 .sr(1)
40813 .m(2)
40814 .n(n)
40815 .k(k)
40816 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
40817 }
40818 }
40819 }
40820
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD128,n_div_4_strided_cn)40821 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD128, n_div_4_strided_cn) {
40822 for (uint32_t n = 8; n <= 12; n += 4) {
40823 for (size_t k = 1; k <= 40; k += 9) {
40824 GemmMicrokernelTester()
40825 .mr(2)
40826 .nr(4)
40827 .kr(8)
40828 .sr(1)
40829 .m(2)
40830 .n(n)
40831 .k(k)
40832 .cn_stride(7)
40833 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
40834 }
40835 }
40836 }
40837
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD128,n_div_4_subtile)40838 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD128, n_div_4_subtile) {
40839 for (uint32_t n = 8; n <= 12; n += 4) {
40840 for (size_t k = 1; k <= 40; k += 9) {
40841 for (uint32_t m = 1; m <= 2; m++) {
40842 GemmMicrokernelTester()
40843 .mr(2)
40844 .nr(4)
40845 .kr(8)
40846 .sr(1)
40847 .m(m)
40848 .n(n)
40849 .k(k)
40850 .iterations(1)
40851 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
40852 }
40853 }
40854 }
40855 }
40856
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD128,small_kernel)40857 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD128, small_kernel) {
40858 for (size_t k = 1; k <= 40; k += 9) {
40859 GemmMicrokernelTester()
40860 .mr(2)
40861 .nr(4)
40862 .kr(8)
40863 .sr(1)
40864 .m(2)
40865 .n(4)
40866 .k(k)
40867 .ks(3)
40868 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
40869 }
40870 }
40871
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD128,small_kernel_subtile)40872 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD128, small_kernel_subtile) {
40873 for (size_t k = 1; k <= 40; k += 9) {
40874 for (uint32_t n = 1; n <= 4; n++) {
40875 for (uint32_t m = 1; m <= 2; m++) {
40876 GemmMicrokernelTester()
40877 .mr(2)
40878 .nr(4)
40879 .kr(8)
40880 .sr(1)
40881 .m(m)
40882 .n(n)
40883 .k(k)
40884 .ks(3)
40885 .iterations(1)
40886 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
40887 }
40888 }
40889 }
40890 }
40891
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD128,n_gt_4_small_kernel)40892 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD128, n_gt_4_small_kernel) {
40893 for (uint32_t n = 5; n < 8; n++) {
40894 for (size_t k = 1; k <= 40; k += 9) {
40895 GemmMicrokernelTester()
40896 .mr(2)
40897 .nr(4)
40898 .kr(8)
40899 .sr(1)
40900 .m(2)
40901 .n(n)
40902 .k(k)
40903 .ks(3)
40904 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
40905 }
40906 }
40907 }
40908
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD128,n_div_4_small_kernel)40909 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD128, n_div_4_small_kernel) {
40910 for (uint32_t n = 8; n <= 12; n += 4) {
40911 for (size_t k = 1; k <= 40; k += 9) {
40912 GemmMicrokernelTester()
40913 .mr(2)
40914 .nr(4)
40915 .kr(8)
40916 .sr(1)
40917 .m(2)
40918 .n(n)
40919 .k(k)
40920 .ks(3)
40921 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
40922 }
40923 }
40924 }
40925
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD128,strided_cm_subtile)40926 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD128, strided_cm_subtile) {
40927 for (size_t k = 1; k <= 40; k += 9) {
40928 for (uint32_t n = 1; n <= 4; n++) {
40929 for (uint32_t m = 1; m <= 2; m++) {
40930 GemmMicrokernelTester()
40931 .mr(2)
40932 .nr(4)
40933 .kr(8)
40934 .sr(1)
40935 .m(m)
40936 .n(n)
40937 .k(k)
40938 .cm_stride(7)
40939 .iterations(1)
40940 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
40941 }
40942 }
40943 }
40944 }
40945
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD128,a_offset)40946 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD128, a_offset) {
40947 for (size_t k = 1; k <= 40; k += 9) {
40948 GemmMicrokernelTester()
40949 .mr(2)
40950 .nr(4)
40951 .kr(8)
40952 .sr(1)
40953 .m(2)
40954 .n(4)
40955 .k(k)
40956 .ks(3)
40957 .a_offset(83)
40958 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
40959 }
40960 }
40961
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD128,zero)40962 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD128, zero) {
40963 for (size_t k = 1; k <= 40; k += 9) {
40964 for (uint32_t mz = 0; mz < 2; mz++) {
40965 GemmMicrokernelTester()
40966 .mr(2)
40967 .nr(4)
40968 .kr(8)
40969 .sr(1)
40970 .m(2)
40971 .n(4)
40972 .k(k)
40973 .ks(3)
40974 .a_offset(83)
40975 .zero_index(mz)
40976 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
40977 }
40978 }
40979 }
40980
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD128,qmin)40981 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD128, qmin) {
40982 GemmMicrokernelTester()
40983 .mr(2)
40984 .nr(4)
40985 .kr(8)
40986 .sr(1)
40987 .m(2)
40988 .n(4)
40989 .k(8)
40990 .qmin(128)
40991 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
40992 }
40993
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD128,qmax)40994 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD128, qmax) {
40995 GemmMicrokernelTester()
40996 .mr(2)
40997 .nr(4)
40998 .kr(8)
40999 .sr(1)
41000 .m(2)
41001 .n(4)
41002 .k(8)
41003 .qmax(128)
41004 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
41005 }
41006
TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD128,strided_cm)41007 TEST(QS8_IGEMM_MINMAX_FP32_2X4C8__WASMSIMD_DOT16X2_LD128, strided_cm) {
41008 GemmMicrokernelTester()
41009 .mr(2)
41010 .nr(4)
41011 .kr(8)
41012 .sr(1)
41013 .m(2)
41014 .n(4)
41015 .k(8)
41016 .cm_stride(7)
41017 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
41018 }
41019 #endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
41020
41021
41022 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD64,k_eq_8)41023 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD64, k_eq_8) {
41024 GemmMicrokernelTester()
41025 .mr(3)
41026 .nr(4)
41027 .kr(2)
41028 .sr(1)
41029 .m(3)
41030 .n(4)
41031 .k(8)
41032 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
41033 }
41034
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD64,strided_cn)41035 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD64, strided_cn) {
41036 GemmMicrokernelTester()
41037 .mr(3)
41038 .nr(4)
41039 .kr(2)
41040 .sr(1)
41041 .m(3)
41042 .n(4)
41043 .k(8)
41044 .cn_stride(7)
41045 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
41046 }
41047
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD64,k_eq_8_subtile)41048 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD64, k_eq_8_subtile) {
41049 for (uint32_t n = 1; n <= 4; n++) {
41050 for (uint32_t m = 1; m <= 3; m++) {
41051 GemmMicrokernelTester()
41052 .mr(3)
41053 .nr(4)
41054 .kr(2)
41055 .sr(1)
41056 .m(m)
41057 .n(n)
41058 .k(8)
41059 .iterations(1)
41060 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
41061 }
41062 }
41063 }
41064
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD64,k_eq_8_subtile_m)41065 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD64, k_eq_8_subtile_m) {
41066 for (uint32_t m = 1; m <= 3; m++) {
41067 GemmMicrokernelTester()
41068 .mr(3)
41069 .nr(4)
41070 .kr(2)
41071 .sr(1)
41072 .m(m)
41073 .n(4)
41074 .k(8)
41075 .iterations(1)
41076 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
41077 }
41078 }
41079
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD64,k_eq_8_subtile_n)41080 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD64, k_eq_8_subtile_n) {
41081 for (uint32_t n = 1; n <= 4; n++) {
41082 GemmMicrokernelTester()
41083 .mr(3)
41084 .nr(4)
41085 .kr(2)
41086 .sr(1)
41087 .m(3)
41088 .n(n)
41089 .k(8)
41090 .iterations(1)
41091 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
41092 }
41093 }
41094
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD64,k_lt_8)41095 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD64, k_lt_8) {
41096 for (size_t k = 1; k < 8; k++) {
41097 GemmMicrokernelTester()
41098 .mr(3)
41099 .nr(4)
41100 .kr(2)
41101 .sr(1)
41102 .m(3)
41103 .n(4)
41104 .k(k)
41105 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
41106 }
41107 }
41108
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD64,k_lt_8_subtile)41109 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD64, k_lt_8_subtile) {
41110 for (size_t k = 1; k < 8; k++) {
41111 for (uint32_t n = 1; n <= 4; n++) {
41112 for (uint32_t m = 1; m <= 3; m++) {
41113 GemmMicrokernelTester()
41114 .mr(3)
41115 .nr(4)
41116 .kr(2)
41117 .sr(1)
41118 .m(m)
41119 .n(n)
41120 .k(k)
41121 .iterations(1)
41122 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
41123 }
41124 }
41125 }
41126 }
41127
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD64,k_gt_8)41128 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD64, k_gt_8) {
41129 for (size_t k = 9; k < 16; k++) {
41130 GemmMicrokernelTester()
41131 .mr(3)
41132 .nr(4)
41133 .kr(2)
41134 .sr(1)
41135 .m(3)
41136 .n(4)
41137 .k(k)
41138 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
41139 }
41140 }
41141
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD64,k_gt_8_subtile)41142 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD64, k_gt_8_subtile) {
41143 for (size_t k = 9; k < 16; k++) {
41144 for (uint32_t n = 1; n <= 4; n++) {
41145 for (uint32_t m = 1; m <= 3; m++) {
41146 GemmMicrokernelTester()
41147 .mr(3)
41148 .nr(4)
41149 .kr(2)
41150 .sr(1)
41151 .m(m)
41152 .n(n)
41153 .k(k)
41154 .iterations(1)
41155 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
41156 }
41157 }
41158 }
41159 }
41160
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD64,k_div_8)41161 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD64, k_div_8) {
41162 for (size_t k = 16; k <= 80; k += 8) {
41163 GemmMicrokernelTester()
41164 .mr(3)
41165 .nr(4)
41166 .kr(2)
41167 .sr(1)
41168 .m(3)
41169 .n(4)
41170 .k(k)
41171 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
41172 }
41173 }
41174
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD64,k_div_8_subtile)41175 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD64, k_div_8_subtile) {
41176 for (size_t k = 16; k <= 80; k += 8) {
41177 for (uint32_t n = 1; n <= 4; n++) {
41178 for (uint32_t m = 1; m <= 3; m++) {
41179 GemmMicrokernelTester()
41180 .mr(3)
41181 .nr(4)
41182 .kr(2)
41183 .sr(1)
41184 .m(m)
41185 .n(n)
41186 .k(k)
41187 .iterations(1)
41188 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
41189 }
41190 }
41191 }
41192 }
41193
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD64,n_gt_4)41194 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD64, n_gt_4) {
41195 for (uint32_t n = 5; n < 8; n++) {
41196 for (size_t k = 1; k <= 40; k += 9) {
41197 GemmMicrokernelTester()
41198 .mr(3)
41199 .nr(4)
41200 .kr(2)
41201 .sr(1)
41202 .m(3)
41203 .n(n)
41204 .k(k)
41205 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
41206 }
41207 }
41208 }
41209
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD64,n_gt_4_strided_cn)41210 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD64, n_gt_4_strided_cn) {
41211 for (uint32_t n = 5; n < 8; n++) {
41212 for (size_t k = 1; k <= 40; k += 9) {
41213 GemmMicrokernelTester()
41214 .mr(3)
41215 .nr(4)
41216 .kr(2)
41217 .sr(1)
41218 .m(3)
41219 .n(n)
41220 .k(k)
41221 .cn_stride(7)
41222 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
41223 }
41224 }
41225 }
41226
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD64,n_gt_4_subtile)41227 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD64, n_gt_4_subtile) {
41228 for (uint32_t n = 5; n < 8; n++) {
41229 for (size_t k = 1; k <= 40; k += 9) {
41230 for (uint32_t m = 1; m <= 3; m++) {
41231 GemmMicrokernelTester()
41232 .mr(3)
41233 .nr(4)
41234 .kr(2)
41235 .sr(1)
41236 .m(m)
41237 .n(n)
41238 .k(k)
41239 .iterations(1)
41240 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
41241 }
41242 }
41243 }
41244 }
41245
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD64,n_div_4)41246 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD64, n_div_4) {
41247 for (uint32_t n = 8; n <= 12; n += 4) {
41248 for (size_t k = 1; k <= 40; k += 9) {
41249 GemmMicrokernelTester()
41250 .mr(3)
41251 .nr(4)
41252 .kr(2)
41253 .sr(1)
41254 .m(3)
41255 .n(n)
41256 .k(k)
41257 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
41258 }
41259 }
41260 }
41261
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD64,n_div_4_strided_cn)41262 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD64, n_div_4_strided_cn) {
41263 for (uint32_t n = 8; n <= 12; n += 4) {
41264 for (size_t k = 1; k <= 40; k += 9) {
41265 GemmMicrokernelTester()
41266 .mr(3)
41267 .nr(4)
41268 .kr(2)
41269 .sr(1)
41270 .m(3)
41271 .n(n)
41272 .k(k)
41273 .cn_stride(7)
41274 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
41275 }
41276 }
41277 }
41278
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD64,n_div_4_subtile)41279 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD64, n_div_4_subtile) {
41280 for (uint32_t n = 8; n <= 12; n += 4) {
41281 for (size_t k = 1; k <= 40; k += 9) {
41282 for (uint32_t m = 1; m <= 3; m++) {
41283 GemmMicrokernelTester()
41284 .mr(3)
41285 .nr(4)
41286 .kr(2)
41287 .sr(1)
41288 .m(m)
41289 .n(n)
41290 .k(k)
41291 .iterations(1)
41292 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
41293 }
41294 }
41295 }
41296 }
41297
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD64,small_kernel)41298 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD64, small_kernel) {
41299 for (size_t k = 1; k <= 40; k += 9) {
41300 GemmMicrokernelTester()
41301 .mr(3)
41302 .nr(4)
41303 .kr(2)
41304 .sr(1)
41305 .m(3)
41306 .n(4)
41307 .k(k)
41308 .ks(3)
41309 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
41310 }
41311 }
41312
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD64,small_kernel_subtile)41313 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD64, small_kernel_subtile) {
41314 for (size_t k = 1; k <= 40; k += 9) {
41315 for (uint32_t n = 1; n <= 4; n++) {
41316 for (uint32_t m = 1; m <= 3; m++) {
41317 GemmMicrokernelTester()
41318 .mr(3)
41319 .nr(4)
41320 .kr(2)
41321 .sr(1)
41322 .m(m)
41323 .n(n)
41324 .k(k)
41325 .ks(3)
41326 .iterations(1)
41327 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
41328 }
41329 }
41330 }
41331 }
41332
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD64,n_gt_4_small_kernel)41333 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD64, n_gt_4_small_kernel) {
41334 for (uint32_t n = 5; n < 8; n++) {
41335 for (size_t k = 1; k <= 40; k += 9) {
41336 GemmMicrokernelTester()
41337 .mr(3)
41338 .nr(4)
41339 .kr(2)
41340 .sr(1)
41341 .m(3)
41342 .n(n)
41343 .k(k)
41344 .ks(3)
41345 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
41346 }
41347 }
41348 }
41349
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD64,n_div_4_small_kernel)41350 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD64, n_div_4_small_kernel) {
41351 for (uint32_t n = 8; n <= 12; n += 4) {
41352 for (size_t k = 1; k <= 40; k += 9) {
41353 GemmMicrokernelTester()
41354 .mr(3)
41355 .nr(4)
41356 .kr(2)
41357 .sr(1)
41358 .m(3)
41359 .n(n)
41360 .k(k)
41361 .ks(3)
41362 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
41363 }
41364 }
41365 }
41366
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD64,strided_cm_subtile)41367 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD64, strided_cm_subtile) {
41368 for (size_t k = 1; k <= 40; k += 9) {
41369 for (uint32_t n = 1; n <= 4; n++) {
41370 for (uint32_t m = 1; m <= 3; m++) {
41371 GemmMicrokernelTester()
41372 .mr(3)
41373 .nr(4)
41374 .kr(2)
41375 .sr(1)
41376 .m(m)
41377 .n(n)
41378 .k(k)
41379 .cm_stride(7)
41380 .iterations(1)
41381 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
41382 }
41383 }
41384 }
41385 }
41386
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD64,a_offset)41387 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD64, a_offset) {
41388 for (size_t k = 1; k <= 40; k += 9) {
41389 GemmMicrokernelTester()
41390 .mr(3)
41391 .nr(4)
41392 .kr(2)
41393 .sr(1)
41394 .m(3)
41395 .n(4)
41396 .k(k)
41397 .ks(3)
41398 .a_offset(127)
41399 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
41400 }
41401 }
41402
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD64,zero)41403 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD64, zero) {
41404 for (size_t k = 1; k <= 40; k += 9) {
41405 for (uint32_t mz = 0; mz < 3; mz++) {
41406 GemmMicrokernelTester()
41407 .mr(3)
41408 .nr(4)
41409 .kr(2)
41410 .sr(1)
41411 .m(3)
41412 .n(4)
41413 .k(k)
41414 .ks(3)
41415 .a_offset(127)
41416 .zero_index(mz)
41417 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
41418 }
41419 }
41420 }
41421
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD64,qmin)41422 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD64, qmin) {
41423 GemmMicrokernelTester()
41424 .mr(3)
41425 .nr(4)
41426 .kr(2)
41427 .sr(1)
41428 .m(3)
41429 .n(4)
41430 .k(8)
41431 .qmin(128)
41432 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
41433 }
41434
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD64,qmax)41435 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD64, qmax) {
41436 GemmMicrokernelTester()
41437 .mr(3)
41438 .nr(4)
41439 .kr(2)
41440 .sr(1)
41441 .m(3)
41442 .n(4)
41443 .k(8)
41444 .qmax(128)
41445 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
41446 }
41447
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD64,strided_cm)41448 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD64, strided_cm) {
41449 GemmMicrokernelTester()
41450 .mr(3)
41451 .nr(4)
41452 .kr(2)
41453 .sr(1)
41454 .m(3)
41455 .n(4)
41456 .k(8)
41457 .cm_stride(7)
41458 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
41459 }
41460 #endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
41461
41462
41463 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD128,k_eq_8)41464 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD128, k_eq_8) {
41465 GemmMicrokernelTester()
41466 .mr(3)
41467 .nr(4)
41468 .kr(2)
41469 .sr(1)
41470 .m(3)
41471 .n(4)
41472 .k(8)
41473 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
41474 }
41475
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD128,strided_cn)41476 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD128, strided_cn) {
41477 GemmMicrokernelTester()
41478 .mr(3)
41479 .nr(4)
41480 .kr(2)
41481 .sr(1)
41482 .m(3)
41483 .n(4)
41484 .k(8)
41485 .cn_stride(7)
41486 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
41487 }
41488
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD128,k_eq_8_subtile)41489 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD128, k_eq_8_subtile) {
41490 for (uint32_t n = 1; n <= 4; n++) {
41491 for (uint32_t m = 1; m <= 3; m++) {
41492 GemmMicrokernelTester()
41493 .mr(3)
41494 .nr(4)
41495 .kr(2)
41496 .sr(1)
41497 .m(m)
41498 .n(n)
41499 .k(8)
41500 .iterations(1)
41501 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
41502 }
41503 }
41504 }
41505
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD128,k_eq_8_subtile_m)41506 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD128, k_eq_8_subtile_m) {
41507 for (uint32_t m = 1; m <= 3; m++) {
41508 GemmMicrokernelTester()
41509 .mr(3)
41510 .nr(4)
41511 .kr(2)
41512 .sr(1)
41513 .m(m)
41514 .n(4)
41515 .k(8)
41516 .iterations(1)
41517 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
41518 }
41519 }
41520
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD128,k_eq_8_subtile_n)41521 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD128, k_eq_8_subtile_n) {
41522 for (uint32_t n = 1; n <= 4; n++) {
41523 GemmMicrokernelTester()
41524 .mr(3)
41525 .nr(4)
41526 .kr(2)
41527 .sr(1)
41528 .m(3)
41529 .n(n)
41530 .k(8)
41531 .iterations(1)
41532 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
41533 }
41534 }
41535
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD128,k_lt_8)41536 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD128, k_lt_8) {
41537 for (size_t k = 1; k < 8; k++) {
41538 GemmMicrokernelTester()
41539 .mr(3)
41540 .nr(4)
41541 .kr(2)
41542 .sr(1)
41543 .m(3)
41544 .n(4)
41545 .k(k)
41546 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
41547 }
41548 }
41549
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD128,k_lt_8_subtile)41550 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD128, k_lt_8_subtile) {
41551 for (size_t k = 1; k < 8; k++) {
41552 for (uint32_t n = 1; n <= 4; n++) {
41553 for (uint32_t m = 1; m <= 3; m++) {
41554 GemmMicrokernelTester()
41555 .mr(3)
41556 .nr(4)
41557 .kr(2)
41558 .sr(1)
41559 .m(m)
41560 .n(n)
41561 .k(k)
41562 .iterations(1)
41563 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
41564 }
41565 }
41566 }
41567 }
41568
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD128,k_gt_8)41569 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD128, k_gt_8) {
41570 for (size_t k = 9; k < 16; k++) {
41571 GemmMicrokernelTester()
41572 .mr(3)
41573 .nr(4)
41574 .kr(2)
41575 .sr(1)
41576 .m(3)
41577 .n(4)
41578 .k(k)
41579 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
41580 }
41581 }
41582
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD128,k_gt_8_subtile)41583 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD128, k_gt_8_subtile) {
41584 for (size_t k = 9; k < 16; k++) {
41585 for (uint32_t n = 1; n <= 4; n++) {
41586 for (uint32_t m = 1; m <= 3; m++) {
41587 GemmMicrokernelTester()
41588 .mr(3)
41589 .nr(4)
41590 .kr(2)
41591 .sr(1)
41592 .m(m)
41593 .n(n)
41594 .k(k)
41595 .iterations(1)
41596 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
41597 }
41598 }
41599 }
41600 }
41601
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD128,k_div_8)41602 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD128, k_div_8) {
41603 for (size_t k = 16; k <= 80; k += 8) {
41604 GemmMicrokernelTester()
41605 .mr(3)
41606 .nr(4)
41607 .kr(2)
41608 .sr(1)
41609 .m(3)
41610 .n(4)
41611 .k(k)
41612 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
41613 }
41614 }
41615
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD128,k_div_8_subtile)41616 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD128, k_div_8_subtile) {
41617 for (size_t k = 16; k <= 80; k += 8) {
41618 for (uint32_t n = 1; n <= 4; n++) {
41619 for (uint32_t m = 1; m <= 3; m++) {
41620 GemmMicrokernelTester()
41621 .mr(3)
41622 .nr(4)
41623 .kr(2)
41624 .sr(1)
41625 .m(m)
41626 .n(n)
41627 .k(k)
41628 .iterations(1)
41629 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
41630 }
41631 }
41632 }
41633 }
41634
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD128,n_gt_4)41635 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD128, n_gt_4) {
41636 for (uint32_t n = 5; n < 8; n++) {
41637 for (size_t k = 1; k <= 40; k += 9) {
41638 GemmMicrokernelTester()
41639 .mr(3)
41640 .nr(4)
41641 .kr(2)
41642 .sr(1)
41643 .m(3)
41644 .n(n)
41645 .k(k)
41646 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
41647 }
41648 }
41649 }
41650
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD128,n_gt_4_strided_cn)41651 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD128, n_gt_4_strided_cn) {
41652 for (uint32_t n = 5; n < 8; n++) {
41653 for (size_t k = 1; k <= 40; k += 9) {
41654 GemmMicrokernelTester()
41655 .mr(3)
41656 .nr(4)
41657 .kr(2)
41658 .sr(1)
41659 .m(3)
41660 .n(n)
41661 .k(k)
41662 .cn_stride(7)
41663 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
41664 }
41665 }
41666 }
41667
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD128,n_gt_4_subtile)41668 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD128, n_gt_4_subtile) {
41669 for (uint32_t n = 5; n < 8; n++) {
41670 for (size_t k = 1; k <= 40; k += 9) {
41671 for (uint32_t m = 1; m <= 3; m++) {
41672 GemmMicrokernelTester()
41673 .mr(3)
41674 .nr(4)
41675 .kr(2)
41676 .sr(1)
41677 .m(m)
41678 .n(n)
41679 .k(k)
41680 .iterations(1)
41681 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
41682 }
41683 }
41684 }
41685 }
41686
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD128,n_div_4)41687 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD128, n_div_4) {
41688 for (uint32_t n = 8; n <= 12; n += 4) {
41689 for (size_t k = 1; k <= 40; k += 9) {
41690 GemmMicrokernelTester()
41691 .mr(3)
41692 .nr(4)
41693 .kr(2)
41694 .sr(1)
41695 .m(3)
41696 .n(n)
41697 .k(k)
41698 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
41699 }
41700 }
41701 }
41702
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD128,n_div_4_strided_cn)41703 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD128, n_div_4_strided_cn) {
41704 for (uint32_t n = 8; n <= 12; n += 4) {
41705 for (size_t k = 1; k <= 40; k += 9) {
41706 GemmMicrokernelTester()
41707 .mr(3)
41708 .nr(4)
41709 .kr(2)
41710 .sr(1)
41711 .m(3)
41712 .n(n)
41713 .k(k)
41714 .cn_stride(7)
41715 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
41716 }
41717 }
41718 }
41719
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD128,n_div_4_subtile)41720 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD128, n_div_4_subtile) {
41721 for (uint32_t n = 8; n <= 12; n += 4) {
41722 for (size_t k = 1; k <= 40; k += 9) {
41723 for (uint32_t m = 1; m <= 3; m++) {
41724 GemmMicrokernelTester()
41725 .mr(3)
41726 .nr(4)
41727 .kr(2)
41728 .sr(1)
41729 .m(m)
41730 .n(n)
41731 .k(k)
41732 .iterations(1)
41733 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
41734 }
41735 }
41736 }
41737 }
41738
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD128,small_kernel)41739 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD128, small_kernel) {
41740 for (size_t k = 1; k <= 40; k += 9) {
41741 GemmMicrokernelTester()
41742 .mr(3)
41743 .nr(4)
41744 .kr(2)
41745 .sr(1)
41746 .m(3)
41747 .n(4)
41748 .k(k)
41749 .ks(3)
41750 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
41751 }
41752 }
41753
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD128,small_kernel_subtile)41754 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD128, small_kernel_subtile) {
41755 for (size_t k = 1; k <= 40; k += 9) {
41756 for (uint32_t n = 1; n <= 4; n++) {
41757 for (uint32_t m = 1; m <= 3; m++) {
41758 GemmMicrokernelTester()
41759 .mr(3)
41760 .nr(4)
41761 .kr(2)
41762 .sr(1)
41763 .m(m)
41764 .n(n)
41765 .k(k)
41766 .ks(3)
41767 .iterations(1)
41768 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
41769 }
41770 }
41771 }
41772 }
41773
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD128,n_gt_4_small_kernel)41774 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD128, n_gt_4_small_kernel) {
41775 for (uint32_t n = 5; n < 8; n++) {
41776 for (size_t k = 1; k <= 40; k += 9) {
41777 GemmMicrokernelTester()
41778 .mr(3)
41779 .nr(4)
41780 .kr(2)
41781 .sr(1)
41782 .m(3)
41783 .n(n)
41784 .k(k)
41785 .ks(3)
41786 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
41787 }
41788 }
41789 }
41790
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD128,n_div_4_small_kernel)41791 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD128, n_div_4_small_kernel) {
41792 for (uint32_t n = 8; n <= 12; n += 4) {
41793 for (size_t k = 1; k <= 40; k += 9) {
41794 GemmMicrokernelTester()
41795 .mr(3)
41796 .nr(4)
41797 .kr(2)
41798 .sr(1)
41799 .m(3)
41800 .n(n)
41801 .k(k)
41802 .ks(3)
41803 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
41804 }
41805 }
41806 }
41807
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD128,strided_cm_subtile)41808 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD128, strided_cm_subtile) {
41809 for (size_t k = 1; k <= 40; k += 9) {
41810 for (uint32_t n = 1; n <= 4; n++) {
41811 for (uint32_t m = 1; m <= 3; m++) {
41812 GemmMicrokernelTester()
41813 .mr(3)
41814 .nr(4)
41815 .kr(2)
41816 .sr(1)
41817 .m(m)
41818 .n(n)
41819 .k(k)
41820 .cm_stride(7)
41821 .iterations(1)
41822 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
41823 }
41824 }
41825 }
41826 }
41827
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD128,a_offset)41828 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD128, a_offset) {
41829 for (size_t k = 1; k <= 40; k += 9) {
41830 GemmMicrokernelTester()
41831 .mr(3)
41832 .nr(4)
41833 .kr(2)
41834 .sr(1)
41835 .m(3)
41836 .n(4)
41837 .k(k)
41838 .ks(3)
41839 .a_offset(127)
41840 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
41841 }
41842 }
41843
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD128,zero)41844 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD128, zero) {
41845 for (size_t k = 1; k <= 40; k += 9) {
41846 for (uint32_t mz = 0; mz < 3; mz++) {
41847 GemmMicrokernelTester()
41848 .mr(3)
41849 .nr(4)
41850 .kr(2)
41851 .sr(1)
41852 .m(3)
41853 .n(4)
41854 .k(k)
41855 .ks(3)
41856 .a_offset(127)
41857 .zero_index(mz)
41858 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
41859 }
41860 }
41861 }
41862
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD128,qmin)41863 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD128, qmin) {
41864 GemmMicrokernelTester()
41865 .mr(3)
41866 .nr(4)
41867 .kr(2)
41868 .sr(1)
41869 .m(3)
41870 .n(4)
41871 .k(8)
41872 .qmin(128)
41873 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
41874 }
41875
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD128,qmax)41876 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD128, qmax) {
41877 GemmMicrokernelTester()
41878 .mr(3)
41879 .nr(4)
41880 .kr(2)
41881 .sr(1)
41882 .m(3)
41883 .n(4)
41884 .k(8)
41885 .qmax(128)
41886 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
41887 }
41888
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD128,strided_cm)41889 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2__WASMSIMD_DOT16X2_LD128, strided_cm) {
41890 GemmMicrokernelTester()
41891 .mr(3)
41892 .nr(4)
41893 .kr(2)
41894 .sr(1)
41895 .m(3)
41896 .n(4)
41897 .k(8)
41898 .cm_stride(7)
41899 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
41900 }
41901 #endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
41902
41903
41904 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128,k_eq_8)41905 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128, k_eq_8) {
41906 GemmMicrokernelTester()
41907 .mr(3)
41908 .nr(4)
41909 .kr(2)
41910 .sr(4)
41911 .m(3)
41912 .n(4)
41913 .k(8)
41914 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
41915 }
41916
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128,strided_cn)41917 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128, strided_cn) {
41918 GemmMicrokernelTester()
41919 .mr(3)
41920 .nr(4)
41921 .kr(2)
41922 .sr(4)
41923 .m(3)
41924 .n(4)
41925 .k(8)
41926 .cn_stride(7)
41927 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
41928 }
41929
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128,k_eq_8_subtile)41930 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128, k_eq_8_subtile) {
41931 for (uint32_t n = 1; n <= 4; n++) {
41932 for (uint32_t m = 1; m <= 3; m++) {
41933 GemmMicrokernelTester()
41934 .mr(3)
41935 .nr(4)
41936 .kr(2)
41937 .sr(4)
41938 .m(m)
41939 .n(n)
41940 .k(8)
41941 .iterations(1)
41942 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
41943 }
41944 }
41945 }
41946
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128,k_eq_8_subtile_m)41947 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128, k_eq_8_subtile_m) {
41948 for (uint32_t m = 1; m <= 3; m++) {
41949 GemmMicrokernelTester()
41950 .mr(3)
41951 .nr(4)
41952 .kr(2)
41953 .sr(4)
41954 .m(m)
41955 .n(4)
41956 .k(8)
41957 .iterations(1)
41958 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
41959 }
41960 }
41961
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128,k_eq_8_subtile_n)41962 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128, k_eq_8_subtile_n) {
41963 for (uint32_t n = 1; n <= 4; n++) {
41964 GemmMicrokernelTester()
41965 .mr(3)
41966 .nr(4)
41967 .kr(2)
41968 .sr(4)
41969 .m(3)
41970 .n(n)
41971 .k(8)
41972 .iterations(1)
41973 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
41974 }
41975 }
41976
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128,k_lt_8)41977 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128, k_lt_8) {
41978 for (size_t k = 1; k < 8; k++) {
41979 GemmMicrokernelTester()
41980 .mr(3)
41981 .nr(4)
41982 .kr(2)
41983 .sr(4)
41984 .m(3)
41985 .n(4)
41986 .k(k)
41987 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
41988 }
41989 }
41990
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128,k_lt_8_subtile)41991 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128, k_lt_8_subtile) {
41992 for (size_t k = 1; k < 8; k++) {
41993 for (uint32_t n = 1; n <= 4; n++) {
41994 for (uint32_t m = 1; m <= 3; m++) {
41995 GemmMicrokernelTester()
41996 .mr(3)
41997 .nr(4)
41998 .kr(2)
41999 .sr(4)
42000 .m(m)
42001 .n(n)
42002 .k(k)
42003 .iterations(1)
42004 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
42005 }
42006 }
42007 }
42008 }
42009
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128,k_gt_8)42010 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128, k_gt_8) {
42011 for (size_t k = 9; k < 16; k++) {
42012 GemmMicrokernelTester()
42013 .mr(3)
42014 .nr(4)
42015 .kr(2)
42016 .sr(4)
42017 .m(3)
42018 .n(4)
42019 .k(k)
42020 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
42021 }
42022 }
42023
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128,k_gt_8_subtile)42024 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128, k_gt_8_subtile) {
42025 for (size_t k = 9; k < 16; k++) {
42026 for (uint32_t n = 1; n <= 4; n++) {
42027 for (uint32_t m = 1; m <= 3; m++) {
42028 GemmMicrokernelTester()
42029 .mr(3)
42030 .nr(4)
42031 .kr(2)
42032 .sr(4)
42033 .m(m)
42034 .n(n)
42035 .k(k)
42036 .iterations(1)
42037 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
42038 }
42039 }
42040 }
42041 }
42042
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128,k_div_8)42043 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128, k_div_8) {
42044 for (size_t k = 16; k <= 80; k += 8) {
42045 GemmMicrokernelTester()
42046 .mr(3)
42047 .nr(4)
42048 .kr(2)
42049 .sr(4)
42050 .m(3)
42051 .n(4)
42052 .k(k)
42053 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
42054 }
42055 }
42056
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128,k_div_8_subtile)42057 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128, k_div_8_subtile) {
42058 for (size_t k = 16; k <= 80; k += 8) {
42059 for (uint32_t n = 1; n <= 4; n++) {
42060 for (uint32_t m = 1; m <= 3; m++) {
42061 GemmMicrokernelTester()
42062 .mr(3)
42063 .nr(4)
42064 .kr(2)
42065 .sr(4)
42066 .m(m)
42067 .n(n)
42068 .k(k)
42069 .iterations(1)
42070 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
42071 }
42072 }
42073 }
42074 }
42075
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128,n_gt_4)42076 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128, n_gt_4) {
42077 for (uint32_t n = 5; n < 8; n++) {
42078 for (size_t k = 1; k <= 40; k += 9) {
42079 GemmMicrokernelTester()
42080 .mr(3)
42081 .nr(4)
42082 .kr(2)
42083 .sr(4)
42084 .m(3)
42085 .n(n)
42086 .k(k)
42087 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
42088 }
42089 }
42090 }
42091
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128,n_gt_4_strided_cn)42092 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128, n_gt_4_strided_cn) {
42093 for (uint32_t n = 5; n < 8; n++) {
42094 for (size_t k = 1; k <= 40; k += 9) {
42095 GemmMicrokernelTester()
42096 .mr(3)
42097 .nr(4)
42098 .kr(2)
42099 .sr(4)
42100 .m(3)
42101 .n(n)
42102 .k(k)
42103 .cn_stride(7)
42104 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
42105 }
42106 }
42107 }
42108
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128,n_gt_4_subtile)42109 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128, n_gt_4_subtile) {
42110 for (uint32_t n = 5; n < 8; n++) {
42111 for (size_t k = 1; k <= 40; k += 9) {
42112 for (uint32_t m = 1; m <= 3; m++) {
42113 GemmMicrokernelTester()
42114 .mr(3)
42115 .nr(4)
42116 .kr(2)
42117 .sr(4)
42118 .m(m)
42119 .n(n)
42120 .k(k)
42121 .iterations(1)
42122 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
42123 }
42124 }
42125 }
42126 }
42127
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128,n_div_4)42128 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128, n_div_4) {
42129 for (uint32_t n = 8; n <= 12; n += 4) {
42130 for (size_t k = 1; k <= 40; k += 9) {
42131 GemmMicrokernelTester()
42132 .mr(3)
42133 .nr(4)
42134 .kr(2)
42135 .sr(4)
42136 .m(3)
42137 .n(n)
42138 .k(k)
42139 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
42140 }
42141 }
42142 }
42143
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128,n_div_4_strided_cn)42144 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128, n_div_4_strided_cn) {
42145 for (uint32_t n = 8; n <= 12; n += 4) {
42146 for (size_t k = 1; k <= 40; k += 9) {
42147 GemmMicrokernelTester()
42148 .mr(3)
42149 .nr(4)
42150 .kr(2)
42151 .sr(4)
42152 .m(3)
42153 .n(n)
42154 .k(k)
42155 .cn_stride(7)
42156 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
42157 }
42158 }
42159 }
42160
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128,n_div_4_subtile)42161 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128, n_div_4_subtile) {
42162 for (uint32_t n = 8; n <= 12; n += 4) {
42163 for (size_t k = 1; k <= 40; k += 9) {
42164 for (uint32_t m = 1; m <= 3; m++) {
42165 GemmMicrokernelTester()
42166 .mr(3)
42167 .nr(4)
42168 .kr(2)
42169 .sr(4)
42170 .m(m)
42171 .n(n)
42172 .k(k)
42173 .iterations(1)
42174 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
42175 }
42176 }
42177 }
42178 }
42179
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128,small_kernel)42180 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128, small_kernel) {
42181 for (size_t k = 1; k <= 40; k += 9) {
42182 GemmMicrokernelTester()
42183 .mr(3)
42184 .nr(4)
42185 .kr(2)
42186 .sr(4)
42187 .m(3)
42188 .n(4)
42189 .k(k)
42190 .ks(3)
42191 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
42192 }
42193 }
42194
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128,small_kernel_subtile)42195 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128, small_kernel_subtile) {
42196 for (size_t k = 1; k <= 40; k += 9) {
42197 for (uint32_t n = 1; n <= 4; n++) {
42198 for (uint32_t m = 1; m <= 3; m++) {
42199 GemmMicrokernelTester()
42200 .mr(3)
42201 .nr(4)
42202 .kr(2)
42203 .sr(4)
42204 .m(m)
42205 .n(n)
42206 .k(k)
42207 .ks(3)
42208 .iterations(1)
42209 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
42210 }
42211 }
42212 }
42213 }
42214
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128,n_gt_4_small_kernel)42215 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128, n_gt_4_small_kernel) {
42216 for (uint32_t n = 5; n < 8; n++) {
42217 for (size_t k = 1; k <= 40; k += 9) {
42218 GemmMicrokernelTester()
42219 .mr(3)
42220 .nr(4)
42221 .kr(2)
42222 .sr(4)
42223 .m(3)
42224 .n(n)
42225 .k(k)
42226 .ks(3)
42227 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
42228 }
42229 }
42230 }
42231
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128,n_div_4_small_kernel)42232 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128, n_div_4_small_kernel) {
42233 for (uint32_t n = 8; n <= 12; n += 4) {
42234 for (size_t k = 1; k <= 40; k += 9) {
42235 GemmMicrokernelTester()
42236 .mr(3)
42237 .nr(4)
42238 .kr(2)
42239 .sr(4)
42240 .m(3)
42241 .n(n)
42242 .k(k)
42243 .ks(3)
42244 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
42245 }
42246 }
42247 }
42248
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128,strided_cm_subtile)42249 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128, strided_cm_subtile) {
42250 for (size_t k = 1; k <= 40; k += 9) {
42251 for (uint32_t n = 1; n <= 4; n++) {
42252 for (uint32_t m = 1; m <= 3; m++) {
42253 GemmMicrokernelTester()
42254 .mr(3)
42255 .nr(4)
42256 .kr(2)
42257 .sr(4)
42258 .m(m)
42259 .n(n)
42260 .k(k)
42261 .cm_stride(7)
42262 .iterations(1)
42263 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
42264 }
42265 }
42266 }
42267 }
42268
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128,a_offset)42269 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128, a_offset) {
42270 for (size_t k = 1; k <= 40; k += 9) {
42271 GemmMicrokernelTester()
42272 .mr(3)
42273 .nr(4)
42274 .kr(2)
42275 .sr(4)
42276 .m(3)
42277 .n(4)
42278 .k(k)
42279 .ks(3)
42280 .a_offset(127)
42281 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
42282 }
42283 }
42284
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128,zero)42285 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128, zero) {
42286 for (size_t k = 1; k <= 40; k += 9) {
42287 for (uint32_t mz = 0; mz < 3; mz++) {
42288 GemmMicrokernelTester()
42289 .mr(3)
42290 .nr(4)
42291 .kr(2)
42292 .sr(4)
42293 .m(3)
42294 .n(4)
42295 .k(k)
42296 .ks(3)
42297 .a_offset(127)
42298 .zero_index(mz)
42299 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
42300 }
42301 }
42302 }
42303
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128,qmin)42304 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128, qmin) {
42305 GemmMicrokernelTester()
42306 .mr(3)
42307 .nr(4)
42308 .kr(2)
42309 .sr(4)
42310 .m(3)
42311 .n(4)
42312 .k(8)
42313 .qmin(128)
42314 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
42315 }
42316
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128,qmax)42317 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128, qmax) {
42318 GemmMicrokernelTester()
42319 .mr(3)
42320 .nr(4)
42321 .kr(2)
42322 .sr(4)
42323 .m(3)
42324 .n(4)
42325 .k(8)
42326 .qmax(128)
42327 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
42328 }
42329
TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128,strided_cm)42330 TEST(QS8_IGEMM_MINMAX_FP32_3X4C2S4__WASMSIMD_DOT16X2_LD128, strided_cm) {
42331 GemmMicrokernelTester()
42332 .mr(3)
42333 .nr(4)
42334 .kr(2)
42335 .sr(4)
42336 .m(3)
42337 .n(4)
42338 .k(8)
42339 .cm_stride(7)
42340 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
42341 }
42342 #endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
42343
42344
42345 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD128,k_eq_8)42346 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD128, k_eq_8) {
42347 GemmMicrokernelTester()
42348 .mr(3)
42349 .nr(4)
42350 .kr(8)
42351 .sr(1)
42352 .m(3)
42353 .n(4)
42354 .k(8)
42355 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
42356 }
42357
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD128,strided_cn)42358 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD128, strided_cn) {
42359 GemmMicrokernelTester()
42360 .mr(3)
42361 .nr(4)
42362 .kr(8)
42363 .sr(1)
42364 .m(3)
42365 .n(4)
42366 .k(8)
42367 .cn_stride(7)
42368 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
42369 }
42370
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD128,k_eq_8_subtile)42371 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD128, k_eq_8_subtile) {
42372 for (uint32_t n = 1; n <= 4; n++) {
42373 for (uint32_t m = 1; m <= 3; m++) {
42374 GemmMicrokernelTester()
42375 .mr(3)
42376 .nr(4)
42377 .kr(8)
42378 .sr(1)
42379 .m(m)
42380 .n(n)
42381 .k(8)
42382 .iterations(1)
42383 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
42384 }
42385 }
42386 }
42387
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD128,k_eq_8_subtile_m)42388 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD128, k_eq_8_subtile_m) {
42389 for (uint32_t m = 1; m <= 3; m++) {
42390 GemmMicrokernelTester()
42391 .mr(3)
42392 .nr(4)
42393 .kr(8)
42394 .sr(1)
42395 .m(m)
42396 .n(4)
42397 .k(8)
42398 .iterations(1)
42399 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
42400 }
42401 }
42402
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD128,k_eq_8_subtile_n)42403 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD128, k_eq_8_subtile_n) {
42404 for (uint32_t n = 1; n <= 4; n++) {
42405 GemmMicrokernelTester()
42406 .mr(3)
42407 .nr(4)
42408 .kr(8)
42409 .sr(1)
42410 .m(3)
42411 .n(n)
42412 .k(8)
42413 .iterations(1)
42414 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
42415 }
42416 }
42417
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD128,k_lt_8)42418 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD128, k_lt_8) {
42419 for (size_t k = 1; k < 8; k++) {
42420 GemmMicrokernelTester()
42421 .mr(3)
42422 .nr(4)
42423 .kr(8)
42424 .sr(1)
42425 .m(3)
42426 .n(4)
42427 .k(k)
42428 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
42429 }
42430 }
42431
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD128,k_lt_8_subtile)42432 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD128, k_lt_8_subtile) {
42433 for (size_t k = 1; k < 8; k++) {
42434 for (uint32_t n = 1; n <= 4; n++) {
42435 for (uint32_t m = 1; m <= 3; m++) {
42436 GemmMicrokernelTester()
42437 .mr(3)
42438 .nr(4)
42439 .kr(8)
42440 .sr(1)
42441 .m(m)
42442 .n(n)
42443 .k(k)
42444 .iterations(1)
42445 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
42446 }
42447 }
42448 }
42449 }
42450
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD128,k_gt_8)42451 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD128, k_gt_8) {
42452 for (size_t k = 9; k < 16; k++) {
42453 GemmMicrokernelTester()
42454 .mr(3)
42455 .nr(4)
42456 .kr(8)
42457 .sr(1)
42458 .m(3)
42459 .n(4)
42460 .k(k)
42461 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
42462 }
42463 }
42464
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD128,k_gt_8_subtile)42465 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD128, k_gt_8_subtile) {
42466 for (size_t k = 9; k < 16; k++) {
42467 for (uint32_t n = 1; n <= 4; n++) {
42468 for (uint32_t m = 1; m <= 3; m++) {
42469 GemmMicrokernelTester()
42470 .mr(3)
42471 .nr(4)
42472 .kr(8)
42473 .sr(1)
42474 .m(m)
42475 .n(n)
42476 .k(k)
42477 .iterations(1)
42478 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
42479 }
42480 }
42481 }
42482 }
42483
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD128,k_div_8)42484 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD128, k_div_8) {
42485 for (size_t k = 16; k <= 80; k += 8) {
42486 GemmMicrokernelTester()
42487 .mr(3)
42488 .nr(4)
42489 .kr(8)
42490 .sr(1)
42491 .m(3)
42492 .n(4)
42493 .k(k)
42494 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
42495 }
42496 }
42497
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD128,k_div_8_subtile)42498 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD128, k_div_8_subtile) {
42499 for (size_t k = 16; k <= 80; k += 8) {
42500 for (uint32_t n = 1; n <= 4; n++) {
42501 for (uint32_t m = 1; m <= 3; m++) {
42502 GemmMicrokernelTester()
42503 .mr(3)
42504 .nr(4)
42505 .kr(8)
42506 .sr(1)
42507 .m(m)
42508 .n(n)
42509 .k(k)
42510 .iterations(1)
42511 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
42512 }
42513 }
42514 }
42515 }
42516
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD128,n_gt_4)42517 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD128, n_gt_4) {
42518 for (uint32_t n = 5; n < 8; n++) {
42519 for (size_t k = 1; k <= 40; k += 9) {
42520 GemmMicrokernelTester()
42521 .mr(3)
42522 .nr(4)
42523 .kr(8)
42524 .sr(1)
42525 .m(3)
42526 .n(n)
42527 .k(k)
42528 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
42529 }
42530 }
42531 }
42532
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD128,n_gt_4_strided_cn)42533 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD128, n_gt_4_strided_cn) {
42534 for (uint32_t n = 5; n < 8; n++) {
42535 for (size_t k = 1; k <= 40; k += 9) {
42536 GemmMicrokernelTester()
42537 .mr(3)
42538 .nr(4)
42539 .kr(8)
42540 .sr(1)
42541 .m(3)
42542 .n(n)
42543 .k(k)
42544 .cn_stride(7)
42545 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
42546 }
42547 }
42548 }
42549
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD128,n_gt_4_subtile)42550 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD128, n_gt_4_subtile) {
42551 for (uint32_t n = 5; n < 8; n++) {
42552 for (size_t k = 1; k <= 40; k += 9) {
42553 for (uint32_t m = 1; m <= 3; m++) {
42554 GemmMicrokernelTester()
42555 .mr(3)
42556 .nr(4)
42557 .kr(8)
42558 .sr(1)
42559 .m(m)
42560 .n(n)
42561 .k(k)
42562 .iterations(1)
42563 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
42564 }
42565 }
42566 }
42567 }
42568
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD128,n_div_4)42569 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD128, n_div_4) {
42570 for (uint32_t n = 8; n <= 12; n += 4) {
42571 for (size_t k = 1; k <= 40; k += 9) {
42572 GemmMicrokernelTester()
42573 .mr(3)
42574 .nr(4)
42575 .kr(8)
42576 .sr(1)
42577 .m(3)
42578 .n(n)
42579 .k(k)
42580 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
42581 }
42582 }
42583 }
42584
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD128,n_div_4_strided_cn)42585 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD128, n_div_4_strided_cn) {
42586 for (uint32_t n = 8; n <= 12; n += 4) {
42587 for (size_t k = 1; k <= 40; k += 9) {
42588 GemmMicrokernelTester()
42589 .mr(3)
42590 .nr(4)
42591 .kr(8)
42592 .sr(1)
42593 .m(3)
42594 .n(n)
42595 .k(k)
42596 .cn_stride(7)
42597 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
42598 }
42599 }
42600 }
42601
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD128,n_div_4_subtile)42602 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD128, n_div_4_subtile) {
42603 for (uint32_t n = 8; n <= 12; n += 4) {
42604 for (size_t k = 1; k <= 40; k += 9) {
42605 for (uint32_t m = 1; m <= 3; m++) {
42606 GemmMicrokernelTester()
42607 .mr(3)
42608 .nr(4)
42609 .kr(8)
42610 .sr(1)
42611 .m(m)
42612 .n(n)
42613 .k(k)
42614 .iterations(1)
42615 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
42616 }
42617 }
42618 }
42619 }
42620
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD128,small_kernel)42621 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD128, small_kernel) {
42622 for (size_t k = 1; k <= 40; k += 9) {
42623 GemmMicrokernelTester()
42624 .mr(3)
42625 .nr(4)
42626 .kr(8)
42627 .sr(1)
42628 .m(3)
42629 .n(4)
42630 .k(k)
42631 .ks(3)
42632 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
42633 }
42634 }
42635
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD128,small_kernel_subtile)42636 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD128, small_kernel_subtile) {
42637 for (size_t k = 1; k <= 40; k += 9) {
42638 for (uint32_t n = 1; n <= 4; n++) {
42639 for (uint32_t m = 1; m <= 3; m++) {
42640 GemmMicrokernelTester()
42641 .mr(3)
42642 .nr(4)
42643 .kr(8)
42644 .sr(1)
42645 .m(m)
42646 .n(n)
42647 .k(k)
42648 .ks(3)
42649 .iterations(1)
42650 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
42651 }
42652 }
42653 }
42654 }
42655
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD128,n_gt_4_small_kernel)42656 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD128, n_gt_4_small_kernel) {
42657 for (uint32_t n = 5; n < 8; n++) {
42658 for (size_t k = 1; k <= 40; k += 9) {
42659 GemmMicrokernelTester()
42660 .mr(3)
42661 .nr(4)
42662 .kr(8)
42663 .sr(1)
42664 .m(3)
42665 .n(n)
42666 .k(k)
42667 .ks(3)
42668 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
42669 }
42670 }
42671 }
42672
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD128,n_div_4_small_kernel)42673 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD128, n_div_4_small_kernel) {
42674 for (uint32_t n = 8; n <= 12; n += 4) {
42675 for (size_t k = 1; k <= 40; k += 9) {
42676 GemmMicrokernelTester()
42677 .mr(3)
42678 .nr(4)
42679 .kr(8)
42680 .sr(1)
42681 .m(3)
42682 .n(n)
42683 .k(k)
42684 .ks(3)
42685 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
42686 }
42687 }
42688 }
42689
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD128,strided_cm_subtile)42690 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD128, strided_cm_subtile) {
42691 for (size_t k = 1; k <= 40; k += 9) {
42692 for (uint32_t n = 1; n <= 4; n++) {
42693 for (uint32_t m = 1; m <= 3; m++) {
42694 GemmMicrokernelTester()
42695 .mr(3)
42696 .nr(4)
42697 .kr(8)
42698 .sr(1)
42699 .m(m)
42700 .n(n)
42701 .k(k)
42702 .cm_stride(7)
42703 .iterations(1)
42704 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
42705 }
42706 }
42707 }
42708 }
42709
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD128,a_offset)42710 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD128, a_offset) {
42711 for (size_t k = 1; k <= 40; k += 9) {
42712 GemmMicrokernelTester()
42713 .mr(3)
42714 .nr(4)
42715 .kr(8)
42716 .sr(1)
42717 .m(3)
42718 .n(4)
42719 .k(k)
42720 .ks(3)
42721 .a_offset(127)
42722 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
42723 }
42724 }
42725
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD128,zero)42726 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD128, zero) {
42727 for (size_t k = 1; k <= 40; k += 9) {
42728 for (uint32_t mz = 0; mz < 3; mz++) {
42729 GemmMicrokernelTester()
42730 .mr(3)
42731 .nr(4)
42732 .kr(8)
42733 .sr(1)
42734 .m(3)
42735 .n(4)
42736 .k(k)
42737 .ks(3)
42738 .a_offset(127)
42739 .zero_index(mz)
42740 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
42741 }
42742 }
42743 }
42744
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD128,qmin)42745 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD128, qmin) {
42746 GemmMicrokernelTester()
42747 .mr(3)
42748 .nr(4)
42749 .kr(8)
42750 .sr(1)
42751 .m(3)
42752 .n(4)
42753 .k(8)
42754 .qmin(128)
42755 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
42756 }
42757
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD128,qmax)42758 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD128, qmax) {
42759 GemmMicrokernelTester()
42760 .mr(3)
42761 .nr(4)
42762 .kr(8)
42763 .sr(1)
42764 .m(3)
42765 .n(4)
42766 .k(8)
42767 .qmax(128)
42768 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
42769 }
42770
TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD128,strided_cm)42771 TEST(QS8_IGEMM_MINMAX_FP32_3X4C8__WASMSIMD_DOT16X2_LD128, strided_cm) {
42772 GemmMicrokernelTester()
42773 .mr(3)
42774 .nr(4)
42775 .kr(8)
42776 .sr(1)
42777 .m(3)
42778 .n(4)
42779 .k(8)
42780 .cm_stride(7)
42781 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld128, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
42782 }
42783 #endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
42784
42785
42786 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(QS8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD64,k_eq_8)42787 TEST(QS8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD64, k_eq_8) {
42788 GemmMicrokernelTester()
42789 .mr(4)
42790 .nr(4)
42791 .kr(8)
42792 .sr(1)
42793 .m(4)
42794 .n(4)
42795 .k(8)
42796 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
42797 }
42798
TEST(QS8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD64,strided_cn)42799 TEST(QS8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD64, strided_cn) {
42800 GemmMicrokernelTester()
42801 .mr(4)
42802 .nr(4)
42803 .kr(8)
42804 .sr(1)
42805 .m(4)
42806 .n(4)
42807 .k(8)
42808 .cn_stride(7)
42809 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
42810 }
42811
TEST(QS8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD64,k_eq_8_subtile)42812 TEST(QS8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD64, k_eq_8_subtile) {
42813 for (uint32_t n = 1; n <= 4; n++) {
42814 for (uint32_t m = 1; m <= 4; m++) {
42815 GemmMicrokernelTester()
42816 .mr(4)
42817 .nr(4)
42818 .kr(8)
42819 .sr(1)
42820 .m(m)
42821 .n(n)
42822 .k(8)
42823 .iterations(1)
42824 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
42825 }
42826 }
42827 }
42828
TEST(QS8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD64,k_eq_8_subtile_m)42829 TEST(QS8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD64, k_eq_8_subtile_m) {
42830 for (uint32_t m = 1; m <= 4; m++) {
42831 GemmMicrokernelTester()
42832 .mr(4)
42833 .nr(4)
42834 .kr(8)
42835 .sr(1)
42836 .m(m)
42837 .n(4)
42838 .k(8)
42839 .iterations(1)
42840 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
42841 }
42842 }
42843
TEST(QS8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD64,k_eq_8_subtile_n)42844 TEST(QS8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD64, k_eq_8_subtile_n) {
42845 for (uint32_t n = 1; n <= 4; n++) {
42846 GemmMicrokernelTester()
42847 .mr(4)
42848 .nr(4)
42849 .kr(8)
42850 .sr(1)
42851 .m(4)
42852 .n(n)
42853 .k(8)
42854 .iterations(1)
42855 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
42856 }
42857 }
42858
TEST(QS8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD64,k_lt_8)42859 TEST(QS8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD64, k_lt_8) {
42860 for (size_t k = 1; k < 8; k++) {
42861 GemmMicrokernelTester()
42862 .mr(4)
42863 .nr(4)
42864 .kr(8)
42865 .sr(1)
42866 .m(4)
42867 .n(4)
42868 .k(k)
42869 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
42870 }
42871 }
42872
TEST(QS8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD64,k_lt_8_subtile)42873 TEST(QS8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD64, k_lt_8_subtile) {
42874 for (size_t k = 1; k < 8; k++) {
42875 for (uint32_t n = 1; n <= 4; n++) {
42876 for (uint32_t m = 1; m <= 4; m++) {
42877 GemmMicrokernelTester()
42878 .mr(4)
42879 .nr(4)
42880 .kr(8)
42881 .sr(1)
42882 .m(m)
42883 .n(n)
42884 .k(k)
42885 .iterations(1)
42886 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
42887 }
42888 }
42889 }
42890 }
42891
TEST(QS8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD64,k_gt_8)42892 TEST(QS8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD64, k_gt_8) {
42893 for (size_t k = 9; k < 16; k++) {
42894 GemmMicrokernelTester()
42895 .mr(4)
42896 .nr(4)
42897 .kr(8)
42898 .sr(1)
42899 .m(4)
42900 .n(4)
42901 .k(k)
42902 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
42903 }
42904 }
42905
TEST(QS8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD64,k_gt_8_subtile)42906 TEST(QS8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD64, k_gt_8_subtile) {
42907 for (size_t k = 9; k < 16; k++) {
42908 for (uint32_t n = 1; n <= 4; n++) {
42909 for (uint32_t m = 1; m <= 4; m++) {
42910 GemmMicrokernelTester()
42911 .mr(4)
42912 .nr(4)
42913 .kr(8)
42914 .sr(1)
42915 .m(m)
42916 .n(n)
42917 .k(k)
42918 .iterations(1)
42919 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
42920 }
42921 }
42922 }
42923 }
42924
TEST(QS8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD64,k_div_8)42925 TEST(QS8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD64, k_div_8) {
42926 for (size_t k = 16; k <= 80; k += 8) {
42927 GemmMicrokernelTester()
42928 .mr(4)
42929 .nr(4)
42930 .kr(8)
42931 .sr(1)
42932 .m(4)
42933 .n(4)
42934 .k(k)
42935 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
42936 }
42937 }
42938
TEST(QS8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD64,k_div_8_subtile)42939 TEST(QS8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD64, k_div_8_subtile) {
42940 for (size_t k = 16; k <= 80; k += 8) {
42941 for (uint32_t n = 1; n <= 4; n++) {
42942 for (uint32_t m = 1; m <= 4; m++) {
42943 GemmMicrokernelTester()
42944 .mr(4)
42945 .nr(4)
42946 .kr(8)
42947 .sr(1)
42948 .m(m)
42949 .n(n)
42950 .k(k)
42951 .iterations(1)
42952 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
42953 }
42954 }
42955 }
42956 }
42957
TEST(QS8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD64,n_gt_4)42958 TEST(QS8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD64, n_gt_4) {
42959 for (uint32_t n = 5; n < 8; n++) {
42960 for (size_t k = 1; k <= 40; k += 9) {
42961 GemmMicrokernelTester()
42962 .mr(4)
42963 .nr(4)
42964 .kr(8)
42965 .sr(1)
42966 .m(4)
42967 .n(n)
42968 .k(k)
42969 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
42970 }
42971 }
42972 }
42973
TEST(QS8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD64,n_gt_4_strided_cn)42974 TEST(QS8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD64, n_gt_4_strided_cn) {
42975 for (uint32_t n = 5; n < 8; n++) {
42976 for (size_t k = 1; k <= 40; k += 9) {
42977 GemmMicrokernelTester()
42978 .mr(4)
42979 .nr(4)
42980 .kr(8)
42981 .sr(1)
42982 .m(4)
42983 .n(n)
42984 .k(k)
42985 .cn_stride(7)
42986 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
42987 }
42988 }
42989 }
42990
TEST(QS8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD64,n_gt_4_subtile)42991 TEST(QS8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD64, n_gt_4_subtile) {
42992 for (uint32_t n = 5; n < 8; n++) {
42993 for (size_t k = 1; k <= 40; k += 9) {
42994 for (uint32_t m = 1; m <= 4; m++) {
42995 GemmMicrokernelTester()
42996 .mr(4)
42997 .nr(4)
42998 .kr(8)
42999 .sr(1)
43000 .m(m)
43001 .n(n)
43002 .k(k)
43003 .iterations(1)
43004 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
43005 }
43006 }
43007 }
43008 }
43009
TEST(QS8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD64,n_div_4)43010 TEST(QS8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD64, n_div_4) {
43011 for (uint32_t n = 8; n <= 12; n += 4) {
43012 for (size_t k = 1; k <= 40; k += 9) {
43013 GemmMicrokernelTester()
43014 .mr(4)
43015 .nr(4)
43016 .kr(8)
43017 .sr(1)
43018 .m(4)
43019 .n(n)
43020 .k(k)
43021 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
43022 }
43023 }
43024 }
43025
TEST(QS8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD64,n_div_4_strided_cn)43026 TEST(QS8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD64, n_div_4_strided_cn) {
43027 for (uint32_t n = 8; n <= 12; n += 4) {
43028 for (size_t k = 1; k <= 40; k += 9) {
43029 GemmMicrokernelTester()
43030 .mr(4)
43031 .nr(4)
43032 .kr(8)
43033 .sr(1)
43034 .m(4)
43035 .n(n)
43036 .k(k)
43037 .cn_stride(7)
43038 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
43039 }
43040 }
43041 }
43042
TEST(QS8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD64,n_div_4_subtile)43043 TEST(QS8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD64, n_div_4_subtile) {
43044 for (uint32_t n = 8; n <= 12; n += 4) {
43045 for (size_t k = 1; k <= 40; k += 9) {
43046 for (uint32_t m = 1; m <= 4; m++) {
43047 GemmMicrokernelTester()
43048 .mr(4)
43049 .nr(4)
43050 .kr(8)
43051 .sr(1)
43052 .m(m)
43053 .n(n)
43054 .k(k)
43055 .iterations(1)
43056 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
43057 }
43058 }
43059 }
43060 }
43061
TEST(QS8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD64,small_kernel)43062 TEST(QS8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD64, small_kernel) {
43063 for (size_t k = 1; k <= 40; k += 9) {
43064 GemmMicrokernelTester()
43065 .mr(4)
43066 .nr(4)
43067 .kr(8)
43068 .sr(1)
43069 .m(4)
43070 .n(4)
43071 .k(k)
43072 .ks(3)
43073 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
43074 }
43075 }
43076
TEST(QS8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD64,small_kernel_subtile)43077 TEST(QS8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD64, small_kernel_subtile) {
43078 for (size_t k = 1; k <= 40; k += 9) {
43079 for (uint32_t n = 1; n <= 4; n++) {
43080 for (uint32_t m = 1; m <= 4; m++) {
43081 GemmMicrokernelTester()
43082 .mr(4)
43083 .nr(4)
43084 .kr(8)
43085 .sr(1)
43086 .m(m)
43087 .n(n)
43088 .k(k)
43089 .ks(3)
43090 .iterations(1)
43091 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
43092 }
43093 }
43094 }
43095 }
43096
TEST(QS8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD64,n_gt_4_small_kernel)43097 TEST(QS8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD64, n_gt_4_small_kernel) {
43098 for (uint32_t n = 5; n < 8; n++) {
43099 for (size_t k = 1; k <= 40; k += 9) {
43100 GemmMicrokernelTester()
43101 .mr(4)
43102 .nr(4)
43103 .kr(8)
43104 .sr(1)
43105 .m(4)
43106 .n(n)
43107 .k(k)
43108 .ks(3)
43109 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
43110 }
43111 }
43112 }
43113
TEST(QS8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD64,n_div_4_small_kernel)43114 TEST(QS8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD64, n_div_4_small_kernel) {
43115 for (uint32_t n = 8; n <= 12; n += 4) {
43116 for (size_t k = 1; k <= 40; k += 9) {
43117 GemmMicrokernelTester()
43118 .mr(4)
43119 .nr(4)
43120 .kr(8)
43121 .sr(1)
43122 .m(4)
43123 .n(n)
43124 .k(k)
43125 .ks(3)
43126 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
43127 }
43128 }
43129 }
43130
TEST(QS8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD64,strided_cm_subtile)43131 TEST(QS8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD64, strided_cm_subtile) {
43132 for (size_t k = 1; k <= 40; k += 9) {
43133 for (uint32_t n = 1; n <= 4; n++) {
43134 for (uint32_t m = 1; m <= 4; m++) {
43135 GemmMicrokernelTester()
43136 .mr(4)
43137 .nr(4)
43138 .kr(8)
43139 .sr(1)
43140 .m(m)
43141 .n(n)
43142 .k(k)
43143 .cm_stride(7)
43144 .iterations(1)
43145 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
43146 }
43147 }
43148 }
43149 }
43150
TEST(QS8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD64,a_offset)43151 TEST(QS8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD64, a_offset) {
43152 for (size_t k = 1; k <= 40; k += 9) {
43153 GemmMicrokernelTester()
43154 .mr(4)
43155 .nr(4)
43156 .kr(8)
43157 .sr(1)
43158 .m(4)
43159 .n(4)
43160 .k(k)
43161 .ks(3)
43162 .a_offset(163)
43163 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
43164 }
43165 }
43166
TEST(QS8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD64,zero)43167 TEST(QS8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD64, zero) {
43168 for (size_t k = 1; k <= 40; k += 9) {
43169 for (uint32_t mz = 0; mz < 4; mz++) {
43170 GemmMicrokernelTester()
43171 .mr(4)
43172 .nr(4)
43173 .kr(8)
43174 .sr(1)
43175 .m(4)
43176 .n(4)
43177 .k(k)
43178 .ks(3)
43179 .a_offset(163)
43180 .zero_index(mz)
43181 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
43182 }
43183 }
43184 }
43185
TEST(QS8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD64,qmin)43186 TEST(QS8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD64, qmin) {
43187 GemmMicrokernelTester()
43188 .mr(4)
43189 .nr(4)
43190 .kr(8)
43191 .sr(1)
43192 .m(4)
43193 .n(4)
43194 .k(8)
43195 .qmin(128)
43196 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
43197 }
43198
TEST(QS8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD64,qmax)43199 TEST(QS8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD64, qmax) {
43200 GemmMicrokernelTester()
43201 .mr(4)
43202 .nr(4)
43203 .kr(8)
43204 .sr(1)
43205 .m(4)
43206 .n(4)
43207 .k(8)
43208 .qmax(128)
43209 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
43210 }
43211
TEST(QS8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD64,strided_cm)43212 TEST(QS8_IGEMM_MINMAX_FP32_4X4C8__WASMSIMD_DOT16X2_LD64, strided_cm) {
43213 GemmMicrokernelTester()
43214 .mr(4)
43215 .nr(4)
43216 .kr(8)
43217 .sr(1)
43218 .m(4)
43219 .n(4)
43220 .k(8)
43221 .cm_stride(7)
43222 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld64, xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, xnn_qs8_requantize_fp32);
43223 }
43224 #endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
43225
43226
43227 #if XNN_ARCH_WASM || XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(QS8_IGEMM_MINMAX_FP32_1X2__WASM_FMAGIC,k_eq_1)43228 TEST(QS8_IGEMM_MINMAX_FP32_1X2__WASM_FMAGIC, k_eq_1) {
43229 GemmMicrokernelTester()
43230 .mr(1)
43231 .nr(2)
43232 .kr(1)
43233 .sr(1)
43234 .m(1)
43235 .n(2)
43236 .k(1)
43237 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x2__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
43238 }
43239
TEST(QS8_IGEMM_MINMAX_FP32_1X2__WASM_FMAGIC,strided_cn)43240 TEST(QS8_IGEMM_MINMAX_FP32_1X2__WASM_FMAGIC, strided_cn) {
43241 GemmMicrokernelTester()
43242 .mr(1)
43243 .nr(2)
43244 .kr(1)
43245 .sr(1)
43246 .m(1)
43247 .n(2)
43248 .k(1)
43249 .cn_stride(5)
43250 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x2__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
43251 }
43252
TEST(QS8_IGEMM_MINMAX_FP32_1X2__WASM_FMAGIC,k_eq_1_subtile)43253 TEST(QS8_IGEMM_MINMAX_FP32_1X2__WASM_FMAGIC, k_eq_1_subtile) {
43254 for (uint32_t n = 1; n <= 2; n++) {
43255 for (uint32_t m = 1; m <= 1; m++) {
43256 GemmMicrokernelTester()
43257 .mr(1)
43258 .nr(2)
43259 .kr(1)
43260 .sr(1)
43261 .m(m)
43262 .n(n)
43263 .k(1)
43264 .iterations(1)
43265 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x2__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
43266 }
43267 }
43268 }
43269
TEST(QS8_IGEMM_MINMAX_FP32_1X2__WASM_FMAGIC,k_eq_1_subtile_m)43270 TEST(QS8_IGEMM_MINMAX_FP32_1X2__WASM_FMAGIC, k_eq_1_subtile_m) {
43271 for (uint32_t m = 1; m <= 1; m++) {
43272 GemmMicrokernelTester()
43273 .mr(1)
43274 .nr(2)
43275 .kr(1)
43276 .sr(1)
43277 .m(m)
43278 .n(2)
43279 .k(1)
43280 .iterations(1)
43281 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x2__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
43282 }
43283 }
43284
TEST(QS8_IGEMM_MINMAX_FP32_1X2__WASM_FMAGIC,k_eq_1_subtile_n)43285 TEST(QS8_IGEMM_MINMAX_FP32_1X2__WASM_FMAGIC, k_eq_1_subtile_n) {
43286 for (uint32_t n = 1; n <= 2; n++) {
43287 GemmMicrokernelTester()
43288 .mr(1)
43289 .nr(2)
43290 .kr(1)
43291 .sr(1)
43292 .m(1)
43293 .n(n)
43294 .k(1)
43295 .iterations(1)
43296 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x2__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
43297 }
43298 }
43299
TEST(QS8_IGEMM_MINMAX_FP32_1X2__WASM_FMAGIC,k_gt_1)43300 TEST(QS8_IGEMM_MINMAX_FP32_1X2__WASM_FMAGIC, k_gt_1) {
43301 for (size_t k = 2; k < 10; k++) {
43302 GemmMicrokernelTester()
43303 .mr(1)
43304 .nr(2)
43305 .kr(1)
43306 .sr(1)
43307 .m(1)
43308 .n(2)
43309 .k(k)
43310 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x2__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
43311 }
43312 }
43313
TEST(QS8_IGEMM_MINMAX_FP32_1X2__WASM_FMAGIC,k_gt_1_subtile)43314 TEST(QS8_IGEMM_MINMAX_FP32_1X2__WASM_FMAGIC, k_gt_1_subtile) {
43315 for (size_t k = 2; k < 10; k++) {
43316 for (uint32_t n = 1; n <= 2; n++) {
43317 for (uint32_t m = 1; m <= 1; m++) {
43318 GemmMicrokernelTester()
43319 .mr(1)
43320 .nr(2)
43321 .kr(1)
43322 .sr(1)
43323 .m(m)
43324 .n(n)
43325 .k(k)
43326 .iterations(1)
43327 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x2__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
43328 }
43329 }
43330 }
43331 }
43332
TEST(QS8_IGEMM_MINMAX_FP32_1X2__WASM_FMAGIC,n_gt_2)43333 TEST(QS8_IGEMM_MINMAX_FP32_1X2__WASM_FMAGIC, n_gt_2) {
43334 for (uint32_t n = 3; n < 4; n++) {
43335 for (size_t k = 1; k <= 5; k += 2) {
43336 GemmMicrokernelTester()
43337 .mr(1)
43338 .nr(2)
43339 .kr(1)
43340 .sr(1)
43341 .m(1)
43342 .n(n)
43343 .k(k)
43344 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x2__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
43345 }
43346 }
43347 }
43348
TEST(QS8_IGEMM_MINMAX_FP32_1X2__WASM_FMAGIC,n_gt_2_strided_cn)43349 TEST(QS8_IGEMM_MINMAX_FP32_1X2__WASM_FMAGIC, n_gt_2_strided_cn) {
43350 for (uint32_t n = 3; n < 4; n++) {
43351 for (size_t k = 1; k <= 5; k += 2) {
43352 GemmMicrokernelTester()
43353 .mr(1)
43354 .nr(2)
43355 .kr(1)
43356 .sr(1)
43357 .m(1)
43358 .n(n)
43359 .k(k)
43360 .cn_stride(5)
43361 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x2__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
43362 }
43363 }
43364 }
43365
TEST(QS8_IGEMM_MINMAX_FP32_1X2__WASM_FMAGIC,n_gt_2_subtile)43366 TEST(QS8_IGEMM_MINMAX_FP32_1X2__WASM_FMAGIC, n_gt_2_subtile) {
43367 for (uint32_t n = 3; n < 4; n++) {
43368 for (size_t k = 1; k <= 5; k += 2) {
43369 for (uint32_t m = 1; m <= 1; m++) {
43370 GemmMicrokernelTester()
43371 .mr(1)
43372 .nr(2)
43373 .kr(1)
43374 .sr(1)
43375 .m(m)
43376 .n(n)
43377 .k(k)
43378 .iterations(1)
43379 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x2__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
43380 }
43381 }
43382 }
43383 }
43384
TEST(QS8_IGEMM_MINMAX_FP32_1X2__WASM_FMAGIC,n_div_2)43385 TEST(QS8_IGEMM_MINMAX_FP32_1X2__WASM_FMAGIC, n_div_2) {
43386 for (uint32_t n = 4; n <= 6; n += 2) {
43387 for (size_t k = 1; k <= 5; k += 2) {
43388 GemmMicrokernelTester()
43389 .mr(1)
43390 .nr(2)
43391 .kr(1)
43392 .sr(1)
43393 .m(1)
43394 .n(n)
43395 .k(k)
43396 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x2__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
43397 }
43398 }
43399 }
43400
TEST(QS8_IGEMM_MINMAX_FP32_1X2__WASM_FMAGIC,n_div_2_strided_cn)43401 TEST(QS8_IGEMM_MINMAX_FP32_1X2__WASM_FMAGIC, n_div_2_strided_cn) {
43402 for (uint32_t n = 4; n <= 6; n += 2) {
43403 for (size_t k = 1; k <= 5; k += 2) {
43404 GemmMicrokernelTester()
43405 .mr(1)
43406 .nr(2)
43407 .kr(1)
43408 .sr(1)
43409 .m(1)
43410 .n(n)
43411 .k(k)
43412 .cn_stride(5)
43413 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x2__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
43414 }
43415 }
43416 }
43417
TEST(QS8_IGEMM_MINMAX_FP32_1X2__WASM_FMAGIC,n_div_2_subtile)43418 TEST(QS8_IGEMM_MINMAX_FP32_1X2__WASM_FMAGIC, n_div_2_subtile) {
43419 for (uint32_t n = 4; n <= 6; n += 2) {
43420 for (size_t k = 1; k <= 5; k += 2) {
43421 for (uint32_t m = 1; m <= 1; m++) {
43422 GemmMicrokernelTester()
43423 .mr(1)
43424 .nr(2)
43425 .kr(1)
43426 .sr(1)
43427 .m(m)
43428 .n(n)
43429 .k(k)
43430 .iterations(1)
43431 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x2__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
43432 }
43433 }
43434 }
43435 }
43436
TEST(QS8_IGEMM_MINMAX_FP32_1X2__WASM_FMAGIC,small_kernel)43437 TEST(QS8_IGEMM_MINMAX_FP32_1X2__WASM_FMAGIC, small_kernel) {
43438 for (size_t k = 1; k <= 5; k += 2) {
43439 GemmMicrokernelTester()
43440 .mr(1)
43441 .nr(2)
43442 .kr(1)
43443 .sr(1)
43444 .m(1)
43445 .n(2)
43446 .k(k)
43447 .ks(3)
43448 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x2__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
43449 }
43450 }
43451
TEST(QS8_IGEMM_MINMAX_FP32_1X2__WASM_FMAGIC,small_kernel_subtile)43452 TEST(QS8_IGEMM_MINMAX_FP32_1X2__WASM_FMAGIC, small_kernel_subtile) {
43453 for (size_t k = 1; k <= 5; k += 2) {
43454 for (uint32_t n = 1; n <= 2; n++) {
43455 for (uint32_t m = 1; m <= 1; m++) {
43456 GemmMicrokernelTester()
43457 .mr(1)
43458 .nr(2)
43459 .kr(1)
43460 .sr(1)
43461 .m(m)
43462 .n(n)
43463 .k(k)
43464 .ks(3)
43465 .iterations(1)
43466 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x2__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
43467 }
43468 }
43469 }
43470 }
43471
TEST(QS8_IGEMM_MINMAX_FP32_1X2__WASM_FMAGIC,n_gt_2_small_kernel)43472 TEST(QS8_IGEMM_MINMAX_FP32_1X2__WASM_FMAGIC, n_gt_2_small_kernel) {
43473 for (uint32_t n = 3; n < 4; n++) {
43474 for (size_t k = 1; k <= 5; k += 2) {
43475 GemmMicrokernelTester()
43476 .mr(1)
43477 .nr(2)
43478 .kr(1)
43479 .sr(1)
43480 .m(1)
43481 .n(n)
43482 .k(k)
43483 .ks(3)
43484 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x2__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
43485 }
43486 }
43487 }
43488
TEST(QS8_IGEMM_MINMAX_FP32_1X2__WASM_FMAGIC,n_div_2_small_kernel)43489 TEST(QS8_IGEMM_MINMAX_FP32_1X2__WASM_FMAGIC, n_div_2_small_kernel) {
43490 for (uint32_t n = 4; n <= 6; n += 2) {
43491 for (size_t k = 1; k <= 5; k += 2) {
43492 GemmMicrokernelTester()
43493 .mr(1)
43494 .nr(2)
43495 .kr(1)
43496 .sr(1)
43497 .m(1)
43498 .n(n)
43499 .k(k)
43500 .ks(3)
43501 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x2__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
43502 }
43503 }
43504 }
43505
TEST(QS8_IGEMM_MINMAX_FP32_1X2__WASM_FMAGIC,strided_cm_subtile)43506 TEST(QS8_IGEMM_MINMAX_FP32_1X2__WASM_FMAGIC, strided_cm_subtile) {
43507 for (size_t k = 1; k <= 5; k += 2) {
43508 for (uint32_t n = 1; n <= 2; n++) {
43509 for (uint32_t m = 1; m <= 1; m++) {
43510 GemmMicrokernelTester()
43511 .mr(1)
43512 .nr(2)
43513 .kr(1)
43514 .sr(1)
43515 .m(m)
43516 .n(n)
43517 .k(k)
43518 .cm_stride(5)
43519 .iterations(1)
43520 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x2__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
43521 }
43522 }
43523 }
43524 }
43525
TEST(QS8_IGEMM_MINMAX_FP32_1X2__WASM_FMAGIC,a_offset)43526 TEST(QS8_IGEMM_MINMAX_FP32_1X2__WASM_FMAGIC, a_offset) {
43527 for (size_t k = 1; k <= 5; k += 2) {
43528 GemmMicrokernelTester()
43529 .mr(1)
43530 .nr(2)
43531 .kr(1)
43532 .sr(1)
43533 .m(1)
43534 .n(2)
43535 .k(k)
43536 .ks(3)
43537 .a_offset(7)
43538 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x2__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
43539 }
43540 }
43541
TEST(QS8_IGEMM_MINMAX_FP32_1X2__WASM_FMAGIC,zero)43542 TEST(QS8_IGEMM_MINMAX_FP32_1X2__WASM_FMAGIC, zero) {
43543 for (size_t k = 1; k <= 5; k += 2) {
43544 for (uint32_t mz = 0; mz < 1; mz++) {
43545 GemmMicrokernelTester()
43546 .mr(1)
43547 .nr(2)
43548 .kr(1)
43549 .sr(1)
43550 .m(1)
43551 .n(2)
43552 .k(k)
43553 .ks(3)
43554 .a_offset(7)
43555 .zero_index(mz)
43556 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x2__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
43557 }
43558 }
43559 }
43560
TEST(QS8_IGEMM_MINMAX_FP32_1X2__WASM_FMAGIC,qmin)43561 TEST(QS8_IGEMM_MINMAX_FP32_1X2__WASM_FMAGIC, qmin) {
43562 GemmMicrokernelTester()
43563 .mr(1)
43564 .nr(2)
43565 .kr(1)
43566 .sr(1)
43567 .m(1)
43568 .n(2)
43569 .k(1)
43570 .qmin(128)
43571 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x2__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
43572 }
43573
TEST(QS8_IGEMM_MINMAX_FP32_1X2__WASM_FMAGIC,qmax)43574 TEST(QS8_IGEMM_MINMAX_FP32_1X2__WASM_FMAGIC, qmax) {
43575 GemmMicrokernelTester()
43576 .mr(1)
43577 .nr(2)
43578 .kr(1)
43579 .sr(1)
43580 .m(1)
43581 .n(2)
43582 .k(1)
43583 .qmax(128)
43584 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x2__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
43585 }
43586
TEST(QS8_IGEMM_MINMAX_FP32_1X2__WASM_FMAGIC,strided_cm)43587 TEST(QS8_IGEMM_MINMAX_FP32_1X2__WASM_FMAGIC, strided_cm) {
43588 GemmMicrokernelTester()
43589 .mr(1)
43590 .nr(2)
43591 .kr(1)
43592 .sr(1)
43593 .m(1)
43594 .n(2)
43595 .k(1)
43596 .cm_stride(5)
43597 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x2__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
43598 }
43599 #endif // XNN_ARCH_WASM || XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
43600
43601
43602 #if XNN_ARCH_WASM || XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(QS8_IGEMM_MINMAX_FP32_1X4__WASM_FMAGIC,k_eq_1)43603 TEST(QS8_IGEMM_MINMAX_FP32_1X4__WASM_FMAGIC, k_eq_1) {
43604 GemmMicrokernelTester()
43605 .mr(1)
43606 .nr(4)
43607 .kr(1)
43608 .sr(1)
43609 .m(1)
43610 .n(4)
43611 .k(1)
43612 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
43613 }
43614
TEST(QS8_IGEMM_MINMAX_FP32_1X4__WASM_FMAGIC,strided_cn)43615 TEST(QS8_IGEMM_MINMAX_FP32_1X4__WASM_FMAGIC, strided_cn) {
43616 GemmMicrokernelTester()
43617 .mr(1)
43618 .nr(4)
43619 .kr(1)
43620 .sr(1)
43621 .m(1)
43622 .n(4)
43623 .k(1)
43624 .cn_stride(7)
43625 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
43626 }
43627
TEST(QS8_IGEMM_MINMAX_FP32_1X4__WASM_FMAGIC,k_eq_1_subtile)43628 TEST(QS8_IGEMM_MINMAX_FP32_1X4__WASM_FMAGIC, k_eq_1_subtile) {
43629 for (uint32_t n = 1; n <= 4; n++) {
43630 for (uint32_t m = 1; m <= 1; m++) {
43631 GemmMicrokernelTester()
43632 .mr(1)
43633 .nr(4)
43634 .kr(1)
43635 .sr(1)
43636 .m(m)
43637 .n(n)
43638 .k(1)
43639 .iterations(1)
43640 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
43641 }
43642 }
43643 }
43644
TEST(QS8_IGEMM_MINMAX_FP32_1X4__WASM_FMAGIC,k_eq_1_subtile_m)43645 TEST(QS8_IGEMM_MINMAX_FP32_1X4__WASM_FMAGIC, k_eq_1_subtile_m) {
43646 for (uint32_t m = 1; m <= 1; m++) {
43647 GemmMicrokernelTester()
43648 .mr(1)
43649 .nr(4)
43650 .kr(1)
43651 .sr(1)
43652 .m(m)
43653 .n(4)
43654 .k(1)
43655 .iterations(1)
43656 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
43657 }
43658 }
43659
TEST(QS8_IGEMM_MINMAX_FP32_1X4__WASM_FMAGIC,k_eq_1_subtile_n)43660 TEST(QS8_IGEMM_MINMAX_FP32_1X4__WASM_FMAGIC, k_eq_1_subtile_n) {
43661 for (uint32_t n = 1; n <= 4; n++) {
43662 GemmMicrokernelTester()
43663 .mr(1)
43664 .nr(4)
43665 .kr(1)
43666 .sr(1)
43667 .m(1)
43668 .n(n)
43669 .k(1)
43670 .iterations(1)
43671 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
43672 }
43673 }
43674
TEST(QS8_IGEMM_MINMAX_FP32_1X4__WASM_FMAGIC,k_gt_1)43675 TEST(QS8_IGEMM_MINMAX_FP32_1X4__WASM_FMAGIC, k_gt_1) {
43676 for (size_t k = 2; k < 10; k++) {
43677 GemmMicrokernelTester()
43678 .mr(1)
43679 .nr(4)
43680 .kr(1)
43681 .sr(1)
43682 .m(1)
43683 .n(4)
43684 .k(k)
43685 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
43686 }
43687 }
43688
TEST(QS8_IGEMM_MINMAX_FP32_1X4__WASM_FMAGIC,k_gt_1_subtile)43689 TEST(QS8_IGEMM_MINMAX_FP32_1X4__WASM_FMAGIC, k_gt_1_subtile) {
43690 for (size_t k = 2; k < 10; k++) {
43691 for (uint32_t n = 1; n <= 4; n++) {
43692 for (uint32_t m = 1; m <= 1; m++) {
43693 GemmMicrokernelTester()
43694 .mr(1)
43695 .nr(4)
43696 .kr(1)
43697 .sr(1)
43698 .m(m)
43699 .n(n)
43700 .k(k)
43701 .iterations(1)
43702 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
43703 }
43704 }
43705 }
43706 }
43707
TEST(QS8_IGEMM_MINMAX_FP32_1X4__WASM_FMAGIC,n_gt_4)43708 TEST(QS8_IGEMM_MINMAX_FP32_1X4__WASM_FMAGIC, n_gt_4) {
43709 for (uint32_t n = 5; n < 8; n++) {
43710 for (size_t k = 1; k <= 5; k += 2) {
43711 GemmMicrokernelTester()
43712 .mr(1)
43713 .nr(4)
43714 .kr(1)
43715 .sr(1)
43716 .m(1)
43717 .n(n)
43718 .k(k)
43719 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
43720 }
43721 }
43722 }
43723
TEST(QS8_IGEMM_MINMAX_FP32_1X4__WASM_FMAGIC,n_gt_4_strided_cn)43724 TEST(QS8_IGEMM_MINMAX_FP32_1X4__WASM_FMAGIC, n_gt_4_strided_cn) {
43725 for (uint32_t n = 5; n < 8; n++) {
43726 for (size_t k = 1; k <= 5; k += 2) {
43727 GemmMicrokernelTester()
43728 .mr(1)
43729 .nr(4)
43730 .kr(1)
43731 .sr(1)
43732 .m(1)
43733 .n(n)
43734 .k(k)
43735 .cn_stride(7)
43736 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
43737 }
43738 }
43739 }
43740
TEST(QS8_IGEMM_MINMAX_FP32_1X4__WASM_FMAGIC,n_gt_4_subtile)43741 TEST(QS8_IGEMM_MINMAX_FP32_1X4__WASM_FMAGIC, n_gt_4_subtile) {
43742 for (uint32_t n = 5; n < 8; n++) {
43743 for (size_t k = 1; k <= 5; k += 2) {
43744 for (uint32_t m = 1; m <= 1; m++) {
43745 GemmMicrokernelTester()
43746 .mr(1)
43747 .nr(4)
43748 .kr(1)
43749 .sr(1)
43750 .m(m)
43751 .n(n)
43752 .k(k)
43753 .iterations(1)
43754 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
43755 }
43756 }
43757 }
43758 }
43759
TEST(QS8_IGEMM_MINMAX_FP32_1X4__WASM_FMAGIC,n_div_4)43760 TEST(QS8_IGEMM_MINMAX_FP32_1X4__WASM_FMAGIC, n_div_4) {
43761 for (uint32_t n = 8; n <= 12; n += 4) {
43762 for (size_t k = 1; k <= 5; k += 2) {
43763 GemmMicrokernelTester()
43764 .mr(1)
43765 .nr(4)
43766 .kr(1)
43767 .sr(1)
43768 .m(1)
43769 .n(n)
43770 .k(k)
43771 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
43772 }
43773 }
43774 }
43775
TEST(QS8_IGEMM_MINMAX_FP32_1X4__WASM_FMAGIC,n_div_4_strided_cn)43776 TEST(QS8_IGEMM_MINMAX_FP32_1X4__WASM_FMAGIC, n_div_4_strided_cn) {
43777 for (uint32_t n = 8; n <= 12; n += 4) {
43778 for (size_t k = 1; k <= 5; k += 2) {
43779 GemmMicrokernelTester()
43780 .mr(1)
43781 .nr(4)
43782 .kr(1)
43783 .sr(1)
43784 .m(1)
43785 .n(n)
43786 .k(k)
43787 .cn_stride(7)
43788 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
43789 }
43790 }
43791 }
43792
TEST(QS8_IGEMM_MINMAX_FP32_1X4__WASM_FMAGIC,n_div_4_subtile)43793 TEST(QS8_IGEMM_MINMAX_FP32_1X4__WASM_FMAGIC, n_div_4_subtile) {
43794 for (uint32_t n = 8; n <= 12; n += 4) {
43795 for (size_t k = 1; k <= 5; k += 2) {
43796 for (uint32_t m = 1; m <= 1; m++) {
43797 GemmMicrokernelTester()
43798 .mr(1)
43799 .nr(4)
43800 .kr(1)
43801 .sr(1)
43802 .m(m)
43803 .n(n)
43804 .k(k)
43805 .iterations(1)
43806 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
43807 }
43808 }
43809 }
43810 }
43811
TEST(QS8_IGEMM_MINMAX_FP32_1X4__WASM_FMAGIC,small_kernel)43812 TEST(QS8_IGEMM_MINMAX_FP32_1X4__WASM_FMAGIC, small_kernel) {
43813 for (size_t k = 1; k <= 5; k += 2) {
43814 GemmMicrokernelTester()
43815 .mr(1)
43816 .nr(4)
43817 .kr(1)
43818 .sr(1)
43819 .m(1)
43820 .n(4)
43821 .k(k)
43822 .ks(3)
43823 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
43824 }
43825 }
43826
TEST(QS8_IGEMM_MINMAX_FP32_1X4__WASM_FMAGIC,small_kernel_subtile)43827 TEST(QS8_IGEMM_MINMAX_FP32_1X4__WASM_FMAGIC, small_kernel_subtile) {
43828 for (size_t k = 1; k <= 5; k += 2) {
43829 for (uint32_t n = 1; n <= 4; n++) {
43830 for (uint32_t m = 1; m <= 1; m++) {
43831 GemmMicrokernelTester()
43832 .mr(1)
43833 .nr(4)
43834 .kr(1)
43835 .sr(1)
43836 .m(m)
43837 .n(n)
43838 .k(k)
43839 .ks(3)
43840 .iterations(1)
43841 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
43842 }
43843 }
43844 }
43845 }
43846
TEST(QS8_IGEMM_MINMAX_FP32_1X4__WASM_FMAGIC,n_gt_4_small_kernel)43847 TEST(QS8_IGEMM_MINMAX_FP32_1X4__WASM_FMAGIC, n_gt_4_small_kernel) {
43848 for (uint32_t n = 5; n < 8; n++) {
43849 for (size_t k = 1; k <= 5; k += 2) {
43850 GemmMicrokernelTester()
43851 .mr(1)
43852 .nr(4)
43853 .kr(1)
43854 .sr(1)
43855 .m(1)
43856 .n(n)
43857 .k(k)
43858 .ks(3)
43859 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
43860 }
43861 }
43862 }
43863
TEST(QS8_IGEMM_MINMAX_FP32_1X4__WASM_FMAGIC,n_div_4_small_kernel)43864 TEST(QS8_IGEMM_MINMAX_FP32_1X4__WASM_FMAGIC, n_div_4_small_kernel) {
43865 for (uint32_t n = 8; n <= 12; n += 4) {
43866 for (size_t k = 1; k <= 5; k += 2) {
43867 GemmMicrokernelTester()
43868 .mr(1)
43869 .nr(4)
43870 .kr(1)
43871 .sr(1)
43872 .m(1)
43873 .n(n)
43874 .k(k)
43875 .ks(3)
43876 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
43877 }
43878 }
43879 }
43880
TEST(QS8_IGEMM_MINMAX_FP32_1X4__WASM_FMAGIC,strided_cm_subtile)43881 TEST(QS8_IGEMM_MINMAX_FP32_1X4__WASM_FMAGIC, strided_cm_subtile) {
43882 for (size_t k = 1; k <= 5; k += 2) {
43883 for (uint32_t n = 1; n <= 4; n++) {
43884 for (uint32_t m = 1; m <= 1; m++) {
43885 GemmMicrokernelTester()
43886 .mr(1)
43887 .nr(4)
43888 .kr(1)
43889 .sr(1)
43890 .m(m)
43891 .n(n)
43892 .k(k)
43893 .cm_stride(7)
43894 .iterations(1)
43895 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
43896 }
43897 }
43898 }
43899 }
43900
TEST(QS8_IGEMM_MINMAX_FP32_1X4__WASM_FMAGIC,a_offset)43901 TEST(QS8_IGEMM_MINMAX_FP32_1X4__WASM_FMAGIC, a_offset) {
43902 for (size_t k = 1; k <= 5; k += 2) {
43903 GemmMicrokernelTester()
43904 .mr(1)
43905 .nr(4)
43906 .kr(1)
43907 .sr(1)
43908 .m(1)
43909 .n(4)
43910 .k(k)
43911 .ks(3)
43912 .a_offset(7)
43913 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
43914 }
43915 }
43916
TEST(QS8_IGEMM_MINMAX_FP32_1X4__WASM_FMAGIC,zero)43917 TEST(QS8_IGEMM_MINMAX_FP32_1X4__WASM_FMAGIC, zero) {
43918 for (size_t k = 1; k <= 5; k += 2) {
43919 for (uint32_t mz = 0; mz < 1; mz++) {
43920 GemmMicrokernelTester()
43921 .mr(1)
43922 .nr(4)
43923 .kr(1)
43924 .sr(1)
43925 .m(1)
43926 .n(4)
43927 .k(k)
43928 .ks(3)
43929 .a_offset(7)
43930 .zero_index(mz)
43931 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
43932 }
43933 }
43934 }
43935
TEST(QS8_IGEMM_MINMAX_FP32_1X4__WASM_FMAGIC,qmin)43936 TEST(QS8_IGEMM_MINMAX_FP32_1X4__WASM_FMAGIC, qmin) {
43937 GemmMicrokernelTester()
43938 .mr(1)
43939 .nr(4)
43940 .kr(1)
43941 .sr(1)
43942 .m(1)
43943 .n(4)
43944 .k(1)
43945 .qmin(128)
43946 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
43947 }
43948
TEST(QS8_IGEMM_MINMAX_FP32_1X4__WASM_FMAGIC,qmax)43949 TEST(QS8_IGEMM_MINMAX_FP32_1X4__WASM_FMAGIC, qmax) {
43950 GemmMicrokernelTester()
43951 .mr(1)
43952 .nr(4)
43953 .kr(1)
43954 .sr(1)
43955 .m(1)
43956 .n(4)
43957 .k(1)
43958 .qmax(128)
43959 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
43960 }
43961
TEST(QS8_IGEMM_MINMAX_FP32_1X4__WASM_FMAGIC,strided_cm)43962 TEST(QS8_IGEMM_MINMAX_FP32_1X4__WASM_FMAGIC, strided_cm) {
43963 GemmMicrokernelTester()
43964 .mr(1)
43965 .nr(4)
43966 .kr(1)
43967 .sr(1)
43968 .m(1)
43969 .n(4)
43970 .k(1)
43971 .cm_stride(7)
43972 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
43973 }
43974 #endif // XNN_ARCH_WASM || XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
43975
43976
43977 #if XNN_ARCH_WASM || XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(QS8_IGEMM_MINMAX_FP32_2X2__WASM_FMAGIC,k_eq_1)43978 TEST(QS8_IGEMM_MINMAX_FP32_2X2__WASM_FMAGIC, k_eq_1) {
43979 GemmMicrokernelTester()
43980 .mr(2)
43981 .nr(2)
43982 .kr(1)
43983 .sr(1)
43984 .m(2)
43985 .n(2)
43986 .k(1)
43987 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x2__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
43988 }
43989
TEST(QS8_IGEMM_MINMAX_FP32_2X2__WASM_FMAGIC,strided_cn)43990 TEST(QS8_IGEMM_MINMAX_FP32_2X2__WASM_FMAGIC, strided_cn) {
43991 GemmMicrokernelTester()
43992 .mr(2)
43993 .nr(2)
43994 .kr(1)
43995 .sr(1)
43996 .m(2)
43997 .n(2)
43998 .k(1)
43999 .cn_stride(5)
44000 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x2__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
44001 }
44002
TEST(QS8_IGEMM_MINMAX_FP32_2X2__WASM_FMAGIC,k_eq_1_subtile)44003 TEST(QS8_IGEMM_MINMAX_FP32_2X2__WASM_FMAGIC, k_eq_1_subtile) {
44004 for (uint32_t n = 1; n <= 2; n++) {
44005 for (uint32_t m = 1; m <= 2; m++) {
44006 GemmMicrokernelTester()
44007 .mr(2)
44008 .nr(2)
44009 .kr(1)
44010 .sr(1)
44011 .m(m)
44012 .n(n)
44013 .k(1)
44014 .iterations(1)
44015 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x2__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
44016 }
44017 }
44018 }
44019
TEST(QS8_IGEMM_MINMAX_FP32_2X2__WASM_FMAGIC,k_eq_1_subtile_m)44020 TEST(QS8_IGEMM_MINMAX_FP32_2X2__WASM_FMAGIC, k_eq_1_subtile_m) {
44021 for (uint32_t m = 1; m <= 2; m++) {
44022 GemmMicrokernelTester()
44023 .mr(2)
44024 .nr(2)
44025 .kr(1)
44026 .sr(1)
44027 .m(m)
44028 .n(2)
44029 .k(1)
44030 .iterations(1)
44031 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x2__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
44032 }
44033 }
44034
TEST(QS8_IGEMM_MINMAX_FP32_2X2__WASM_FMAGIC,k_eq_1_subtile_n)44035 TEST(QS8_IGEMM_MINMAX_FP32_2X2__WASM_FMAGIC, k_eq_1_subtile_n) {
44036 for (uint32_t n = 1; n <= 2; n++) {
44037 GemmMicrokernelTester()
44038 .mr(2)
44039 .nr(2)
44040 .kr(1)
44041 .sr(1)
44042 .m(2)
44043 .n(n)
44044 .k(1)
44045 .iterations(1)
44046 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x2__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
44047 }
44048 }
44049
TEST(QS8_IGEMM_MINMAX_FP32_2X2__WASM_FMAGIC,k_gt_1)44050 TEST(QS8_IGEMM_MINMAX_FP32_2X2__WASM_FMAGIC, k_gt_1) {
44051 for (size_t k = 2; k < 10; k++) {
44052 GemmMicrokernelTester()
44053 .mr(2)
44054 .nr(2)
44055 .kr(1)
44056 .sr(1)
44057 .m(2)
44058 .n(2)
44059 .k(k)
44060 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x2__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
44061 }
44062 }
44063
TEST(QS8_IGEMM_MINMAX_FP32_2X2__WASM_FMAGIC,k_gt_1_subtile)44064 TEST(QS8_IGEMM_MINMAX_FP32_2X2__WASM_FMAGIC, k_gt_1_subtile) {
44065 for (size_t k = 2; k < 10; k++) {
44066 for (uint32_t n = 1; n <= 2; n++) {
44067 for (uint32_t m = 1; m <= 2; m++) {
44068 GemmMicrokernelTester()
44069 .mr(2)
44070 .nr(2)
44071 .kr(1)
44072 .sr(1)
44073 .m(m)
44074 .n(n)
44075 .k(k)
44076 .iterations(1)
44077 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x2__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
44078 }
44079 }
44080 }
44081 }
44082
TEST(QS8_IGEMM_MINMAX_FP32_2X2__WASM_FMAGIC,n_gt_2)44083 TEST(QS8_IGEMM_MINMAX_FP32_2X2__WASM_FMAGIC, n_gt_2) {
44084 for (uint32_t n = 3; n < 4; n++) {
44085 for (size_t k = 1; k <= 5; k += 2) {
44086 GemmMicrokernelTester()
44087 .mr(2)
44088 .nr(2)
44089 .kr(1)
44090 .sr(1)
44091 .m(2)
44092 .n(n)
44093 .k(k)
44094 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x2__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
44095 }
44096 }
44097 }
44098
TEST(QS8_IGEMM_MINMAX_FP32_2X2__WASM_FMAGIC,n_gt_2_strided_cn)44099 TEST(QS8_IGEMM_MINMAX_FP32_2X2__WASM_FMAGIC, n_gt_2_strided_cn) {
44100 for (uint32_t n = 3; n < 4; n++) {
44101 for (size_t k = 1; k <= 5; k += 2) {
44102 GemmMicrokernelTester()
44103 .mr(2)
44104 .nr(2)
44105 .kr(1)
44106 .sr(1)
44107 .m(2)
44108 .n(n)
44109 .k(k)
44110 .cn_stride(5)
44111 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x2__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
44112 }
44113 }
44114 }
44115
TEST(QS8_IGEMM_MINMAX_FP32_2X2__WASM_FMAGIC,n_gt_2_subtile)44116 TEST(QS8_IGEMM_MINMAX_FP32_2X2__WASM_FMAGIC, n_gt_2_subtile) {
44117 for (uint32_t n = 3; n < 4; n++) {
44118 for (size_t k = 1; k <= 5; k += 2) {
44119 for (uint32_t m = 1; m <= 2; m++) {
44120 GemmMicrokernelTester()
44121 .mr(2)
44122 .nr(2)
44123 .kr(1)
44124 .sr(1)
44125 .m(m)
44126 .n(n)
44127 .k(k)
44128 .iterations(1)
44129 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x2__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
44130 }
44131 }
44132 }
44133 }
44134
TEST(QS8_IGEMM_MINMAX_FP32_2X2__WASM_FMAGIC,n_div_2)44135 TEST(QS8_IGEMM_MINMAX_FP32_2X2__WASM_FMAGIC, n_div_2) {
44136 for (uint32_t n = 4; n <= 6; n += 2) {
44137 for (size_t k = 1; k <= 5; k += 2) {
44138 GemmMicrokernelTester()
44139 .mr(2)
44140 .nr(2)
44141 .kr(1)
44142 .sr(1)
44143 .m(2)
44144 .n(n)
44145 .k(k)
44146 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x2__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
44147 }
44148 }
44149 }
44150
TEST(QS8_IGEMM_MINMAX_FP32_2X2__WASM_FMAGIC,n_div_2_strided_cn)44151 TEST(QS8_IGEMM_MINMAX_FP32_2X2__WASM_FMAGIC, n_div_2_strided_cn) {
44152 for (uint32_t n = 4; n <= 6; n += 2) {
44153 for (size_t k = 1; k <= 5; k += 2) {
44154 GemmMicrokernelTester()
44155 .mr(2)
44156 .nr(2)
44157 .kr(1)
44158 .sr(1)
44159 .m(2)
44160 .n(n)
44161 .k(k)
44162 .cn_stride(5)
44163 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x2__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
44164 }
44165 }
44166 }
44167
TEST(QS8_IGEMM_MINMAX_FP32_2X2__WASM_FMAGIC,n_div_2_subtile)44168 TEST(QS8_IGEMM_MINMAX_FP32_2X2__WASM_FMAGIC, n_div_2_subtile) {
44169 for (uint32_t n = 4; n <= 6; n += 2) {
44170 for (size_t k = 1; k <= 5; k += 2) {
44171 for (uint32_t m = 1; m <= 2; m++) {
44172 GemmMicrokernelTester()
44173 .mr(2)
44174 .nr(2)
44175 .kr(1)
44176 .sr(1)
44177 .m(m)
44178 .n(n)
44179 .k(k)
44180 .iterations(1)
44181 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x2__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
44182 }
44183 }
44184 }
44185 }
44186
TEST(QS8_IGEMM_MINMAX_FP32_2X2__WASM_FMAGIC,small_kernel)44187 TEST(QS8_IGEMM_MINMAX_FP32_2X2__WASM_FMAGIC, small_kernel) {
44188 for (size_t k = 1; k <= 5; k += 2) {
44189 GemmMicrokernelTester()
44190 .mr(2)
44191 .nr(2)
44192 .kr(1)
44193 .sr(1)
44194 .m(2)
44195 .n(2)
44196 .k(k)
44197 .ks(3)
44198 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x2__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
44199 }
44200 }
44201
TEST(QS8_IGEMM_MINMAX_FP32_2X2__WASM_FMAGIC,small_kernel_subtile)44202 TEST(QS8_IGEMM_MINMAX_FP32_2X2__WASM_FMAGIC, small_kernel_subtile) {
44203 for (size_t k = 1; k <= 5; k += 2) {
44204 for (uint32_t n = 1; n <= 2; n++) {
44205 for (uint32_t m = 1; m <= 2; m++) {
44206 GemmMicrokernelTester()
44207 .mr(2)
44208 .nr(2)
44209 .kr(1)
44210 .sr(1)
44211 .m(m)
44212 .n(n)
44213 .k(k)
44214 .ks(3)
44215 .iterations(1)
44216 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x2__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
44217 }
44218 }
44219 }
44220 }
44221
TEST(QS8_IGEMM_MINMAX_FP32_2X2__WASM_FMAGIC,n_gt_2_small_kernel)44222 TEST(QS8_IGEMM_MINMAX_FP32_2X2__WASM_FMAGIC, n_gt_2_small_kernel) {
44223 for (uint32_t n = 3; n < 4; n++) {
44224 for (size_t k = 1; k <= 5; k += 2) {
44225 GemmMicrokernelTester()
44226 .mr(2)
44227 .nr(2)
44228 .kr(1)
44229 .sr(1)
44230 .m(2)
44231 .n(n)
44232 .k(k)
44233 .ks(3)
44234 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x2__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
44235 }
44236 }
44237 }
44238
TEST(QS8_IGEMM_MINMAX_FP32_2X2__WASM_FMAGIC,n_div_2_small_kernel)44239 TEST(QS8_IGEMM_MINMAX_FP32_2X2__WASM_FMAGIC, n_div_2_small_kernel) {
44240 for (uint32_t n = 4; n <= 6; n += 2) {
44241 for (size_t k = 1; k <= 5; k += 2) {
44242 GemmMicrokernelTester()
44243 .mr(2)
44244 .nr(2)
44245 .kr(1)
44246 .sr(1)
44247 .m(2)
44248 .n(n)
44249 .k(k)
44250 .ks(3)
44251 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x2__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
44252 }
44253 }
44254 }
44255
TEST(QS8_IGEMM_MINMAX_FP32_2X2__WASM_FMAGIC,strided_cm_subtile)44256 TEST(QS8_IGEMM_MINMAX_FP32_2X2__WASM_FMAGIC, strided_cm_subtile) {
44257 for (size_t k = 1; k <= 5; k += 2) {
44258 for (uint32_t n = 1; n <= 2; n++) {
44259 for (uint32_t m = 1; m <= 2; m++) {
44260 GemmMicrokernelTester()
44261 .mr(2)
44262 .nr(2)
44263 .kr(1)
44264 .sr(1)
44265 .m(m)
44266 .n(n)
44267 .k(k)
44268 .cm_stride(5)
44269 .iterations(1)
44270 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x2__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
44271 }
44272 }
44273 }
44274 }
44275
TEST(QS8_IGEMM_MINMAX_FP32_2X2__WASM_FMAGIC,a_offset)44276 TEST(QS8_IGEMM_MINMAX_FP32_2X2__WASM_FMAGIC, a_offset) {
44277 for (size_t k = 1; k <= 5; k += 2) {
44278 GemmMicrokernelTester()
44279 .mr(2)
44280 .nr(2)
44281 .kr(1)
44282 .sr(1)
44283 .m(2)
44284 .n(2)
44285 .k(k)
44286 .ks(3)
44287 .a_offset(13)
44288 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x2__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
44289 }
44290 }
44291
TEST(QS8_IGEMM_MINMAX_FP32_2X2__WASM_FMAGIC,zero)44292 TEST(QS8_IGEMM_MINMAX_FP32_2X2__WASM_FMAGIC, zero) {
44293 for (size_t k = 1; k <= 5; k += 2) {
44294 for (uint32_t mz = 0; mz < 2; mz++) {
44295 GemmMicrokernelTester()
44296 .mr(2)
44297 .nr(2)
44298 .kr(1)
44299 .sr(1)
44300 .m(2)
44301 .n(2)
44302 .k(k)
44303 .ks(3)
44304 .a_offset(13)
44305 .zero_index(mz)
44306 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x2__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
44307 }
44308 }
44309 }
44310
TEST(QS8_IGEMM_MINMAX_FP32_2X2__WASM_FMAGIC,qmin)44311 TEST(QS8_IGEMM_MINMAX_FP32_2X2__WASM_FMAGIC, qmin) {
44312 GemmMicrokernelTester()
44313 .mr(2)
44314 .nr(2)
44315 .kr(1)
44316 .sr(1)
44317 .m(2)
44318 .n(2)
44319 .k(1)
44320 .qmin(128)
44321 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x2__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
44322 }
44323
TEST(QS8_IGEMM_MINMAX_FP32_2X2__WASM_FMAGIC,qmax)44324 TEST(QS8_IGEMM_MINMAX_FP32_2X2__WASM_FMAGIC, qmax) {
44325 GemmMicrokernelTester()
44326 .mr(2)
44327 .nr(2)
44328 .kr(1)
44329 .sr(1)
44330 .m(2)
44331 .n(2)
44332 .k(1)
44333 .qmax(128)
44334 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x2__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
44335 }
44336
TEST(QS8_IGEMM_MINMAX_FP32_2X2__WASM_FMAGIC,strided_cm)44337 TEST(QS8_IGEMM_MINMAX_FP32_2X2__WASM_FMAGIC, strided_cm) {
44338 GemmMicrokernelTester()
44339 .mr(2)
44340 .nr(2)
44341 .kr(1)
44342 .sr(1)
44343 .m(2)
44344 .n(2)
44345 .k(1)
44346 .cm_stride(5)
44347 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x2__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
44348 }
44349 #endif // XNN_ARCH_WASM || XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
44350
44351
44352 #if XNN_ARCH_WASM || XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(QS8_IGEMM_MINMAX_FP32_2X4__WASM_FMAGIC,k_eq_1)44353 TEST(QS8_IGEMM_MINMAX_FP32_2X4__WASM_FMAGIC, k_eq_1) {
44354 GemmMicrokernelTester()
44355 .mr(2)
44356 .nr(4)
44357 .kr(1)
44358 .sr(1)
44359 .m(2)
44360 .n(4)
44361 .k(1)
44362 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
44363 }
44364
TEST(QS8_IGEMM_MINMAX_FP32_2X4__WASM_FMAGIC,strided_cn)44365 TEST(QS8_IGEMM_MINMAX_FP32_2X4__WASM_FMAGIC, strided_cn) {
44366 GemmMicrokernelTester()
44367 .mr(2)
44368 .nr(4)
44369 .kr(1)
44370 .sr(1)
44371 .m(2)
44372 .n(4)
44373 .k(1)
44374 .cn_stride(7)
44375 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
44376 }
44377
TEST(QS8_IGEMM_MINMAX_FP32_2X4__WASM_FMAGIC,k_eq_1_subtile)44378 TEST(QS8_IGEMM_MINMAX_FP32_2X4__WASM_FMAGIC, k_eq_1_subtile) {
44379 for (uint32_t n = 1; n <= 4; n++) {
44380 for (uint32_t m = 1; m <= 2; m++) {
44381 GemmMicrokernelTester()
44382 .mr(2)
44383 .nr(4)
44384 .kr(1)
44385 .sr(1)
44386 .m(m)
44387 .n(n)
44388 .k(1)
44389 .iterations(1)
44390 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
44391 }
44392 }
44393 }
44394
TEST(QS8_IGEMM_MINMAX_FP32_2X4__WASM_FMAGIC,k_eq_1_subtile_m)44395 TEST(QS8_IGEMM_MINMAX_FP32_2X4__WASM_FMAGIC, k_eq_1_subtile_m) {
44396 for (uint32_t m = 1; m <= 2; m++) {
44397 GemmMicrokernelTester()
44398 .mr(2)
44399 .nr(4)
44400 .kr(1)
44401 .sr(1)
44402 .m(m)
44403 .n(4)
44404 .k(1)
44405 .iterations(1)
44406 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
44407 }
44408 }
44409
TEST(QS8_IGEMM_MINMAX_FP32_2X4__WASM_FMAGIC,k_eq_1_subtile_n)44410 TEST(QS8_IGEMM_MINMAX_FP32_2X4__WASM_FMAGIC, k_eq_1_subtile_n) {
44411 for (uint32_t n = 1; n <= 4; n++) {
44412 GemmMicrokernelTester()
44413 .mr(2)
44414 .nr(4)
44415 .kr(1)
44416 .sr(1)
44417 .m(2)
44418 .n(n)
44419 .k(1)
44420 .iterations(1)
44421 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
44422 }
44423 }
44424
TEST(QS8_IGEMM_MINMAX_FP32_2X4__WASM_FMAGIC,k_gt_1)44425 TEST(QS8_IGEMM_MINMAX_FP32_2X4__WASM_FMAGIC, k_gt_1) {
44426 for (size_t k = 2; k < 10; k++) {
44427 GemmMicrokernelTester()
44428 .mr(2)
44429 .nr(4)
44430 .kr(1)
44431 .sr(1)
44432 .m(2)
44433 .n(4)
44434 .k(k)
44435 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
44436 }
44437 }
44438
TEST(QS8_IGEMM_MINMAX_FP32_2X4__WASM_FMAGIC,k_gt_1_subtile)44439 TEST(QS8_IGEMM_MINMAX_FP32_2X4__WASM_FMAGIC, k_gt_1_subtile) {
44440 for (size_t k = 2; k < 10; k++) {
44441 for (uint32_t n = 1; n <= 4; n++) {
44442 for (uint32_t m = 1; m <= 2; m++) {
44443 GemmMicrokernelTester()
44444 .mr(2)
44445 .nr(4)
44446 .kr(1)
44447 .sr(1)
44448 .m(m)
44449 .n(n)
44450 .k(k)
44451 .iterations(1)
44452 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
44453 }
44454 }
44455 }
44456 }
44457
TEST(QS8_IGEMM_MINMAX_FP32_2X4__WASM_FMAGIC,n_gt_4)44458 TEST(QS8_IGEMM_MINMAX_FP32_2X4__WASM_FMAGIC, n_gt_4) {
44459 for (uint32_t n = 5; n < 8; n++) {
44460 for (size_t k = 1; k <= 5; k += 2) {
44461 GemmMicrokernelTester()
44462 .mr(2)
44463 .nr(4)
44464 .kr(1)
44465 .sr(1)
44466 .m(2)
44467 .n(n)
44468 .k(k)
44469 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
44470 }
44471 }
44472 }
44473
TEST(QS8_IGEMM_MINMAX_FP32_2X4__WASM_FMAGIC,n_gt_4_strided_cn)44474 TEST(QS8_IGEMM_MINMAX_FP32_2X4__WASM_FMAGIC, n_gt_4_strided_cn) {
44475 for (uint32_t n = 5; n < 8; n++) {
44476 for (size_t k = 1; k <= 5; k += 2) {
44477 GemmMicrokernelTester()
44478 .mr(2)
44479 .nr(4)
44480 .kr(1)
44481 .sr(1)
44482 .m(2)
44483 .n(n)
44484 .k(k)
44485 .cn_stride(7)
44486 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
44487 }
44488 }
44489 }
44490
TEST(QS8_IGEMM_MINMAX_FP32_2X4__WASM_FMAGIC,n_gt_4_subtile)44491 TEST(QS8_IGEMM_MINMAX_FP32_2X4__WASM_FMAGIC, n_gt_4_subtile) {
44492 for (uint32_t n = 5; n < 8; n++) {
44493 for (size_t k = 1; k <= 5; k += 2) {
44494 for (uint32_t m = 1; m <= 2; m++) {
44495 GemmMicrokernelTester()
44496 .mr(2)
44497 .nr(4)
44498 .kr(1)
44499 .sr(1)
44500 .m(m)
44501 .n(n)
44502 .k(k)
44503 .iterations(1)
44504 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
44505 }
44506 }
44507 }
44508 }
44509
TEST(QS8_IGEMM_MINMAX_FP32_2X4__WASM_FMAGIC,n_div_4)44510 TEST(QS8_IGEMM_MINMAX_FP32_2X4__WASM_FMAGIC, n_div_4) {
44511 for (uint32_t n = 8; n <= 12; n += 4) {
44512 for (size_t k = 1; k <= 5; k += 2) {
44513 GemmMicrokernelTester()
44514 .mr(2)
44515 .nr(4)
44516 .kr(1)
44517 .sr(1)
44518 .m(2)
44519 .n(n)
44520 .k(k)
44521 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
44522 }
44523 }
44524 }
44525
TEST(QS8_IGEMM_MINMAX_FP32_2X4__WASM_FMAGIC,n_div_4_strided_cn)44526 TEST(QS8_IGEMM_MINMAX_FP32_2X4__WASM_FMAGIC, n_div_4_strided_cn) {
44527 for (uint32_t n = 8; n <= 12; n += 4) {
44528 for (size_t k = 1; k <= 5; k += 2) {
44529 GemmMicrokernelTester()
44530 .mr(2)
44531 .nr(4)
44532 .kr(1)
44533 .sr(1)
44534 .m(2)
44535 .n(n)
44536 .k(k)
44537 .cn_stride(7)
44538 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
44539 }
44540 }
44541 }
44542
TEST(QS8_IGEMM_MINMAX_FP32_2X4__WASM_FMAGIC,n_div_4_subtile)44543 TEST(QS8_IGEMM_MINMAX_FP32_2X4__WASM_FMAGIC, n_div_4_subtile) {
44544 for (uint32_t n = 8; n <= 12; n += 4) {
44545 for (size_t k = 1; k <= 5; k += 2) {
44546 for (uint32_t m = 1; m <= 2; m++) {
44547 GemmMicrokernelTester()
44548 .mr(2)
44549 .nr(4)
44550 .kr(1)
44551 .sr(1)
44552 .m(m)
44553 .n(n)
44554 .k(k)
44555 .iterations(1)
44556 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
44557 }
44558 }
44559 }
44560 }
44561
TEST(QS8_IGEMM_MINMAX_FP32_2X4__WASM_FMAGIC,small_kernel)44562 TEST(QS8_IGEMM_MINMAX_FP32_2X4__WASM_FMAGIC, small_kernel) {
44563 for (size_t k = 1; k <= 5; k += 2) {
44564 GemmMicrokernelTester()
44565 .mr(2)
44566 .nr(4)
44567 .kr(1)
44568 .sr(1)
44569 .m(2)
44570 .n(4)
44571 .k(k)
44572 .ks(3)
44573 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
44574 }
44575 }
44576
TEST(QS8_IGEMM_MINMAX_FP32_2X4__WASM_FMAGIC,small_kernel_subtile)44577 TEST(QS8_IGEMM_MINMAX_FP32_2X4__WASM_FMAGIC, small_kernel_subtile) {
44578 for (size_t k = 1; k <= 5; k += 2) {
44579 for (uint32_t n = 1; n <= 4; n++) {
44580 for (uint32_t m = 1; m <= 2; m++) {
44581 GemmMicrokernelTester()
44582 .mr(2)
44583 .nr(4)
44584 .kr(1)
44585 .sr(1)
44586 .m(m)
44587 .n(n)
44588 .k(k)
44589 .ks(3)
44590 .iterations(1)
44591 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
44592 }
44593 }
44594 }
44595 }
44596
TEST(QS8_IGEMM_MINMAX_FP32_2X4__WASM_FMAGIC,n_gt_4_small_kernel)44597 TEST(QS8_IGEMM_MINMAX_FP32_2X4__WASM_FMAGIC, n_gt_4_small_kernel) {
44598 for (uint32_t n = 5; n < 8; n++) {
44599 for (size_t k = 1; k <= 5; k += 2) {
44600 GemmMicrokernelTester()
44601 .mr(2)
44602 .nr(4)
44603 .kr(1)
44604 .sr(1)
44605 .m(2)
44606 .n(n)
44607 .k(k)
44608 .ks(3)
44609 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
44610 }
44611 }
44612 }
44613
TEST(QS8_IGEMM_MINMAX_FP32_2X4__WASM_FMAGIC,n_div_4_small_kernel)44614 TEST(QS8_IGEMM_MINMAX_FP32_2X4__WASM_FMAGIC, n_div_4_small_kernel) {
44615 for (uint32_t n = 8; n <= 12; n += 4) {
44616 for (size_t k = 1; k <= 5; k += 2) {
44617 GemmMicrokernelTester()
44618 .mr(2)
44619 .nr(4)
44620 .kr(1)
44621 .sr(1)
44622 .m(2)
44623 .n(n)
44624 .k(k)
44625 .ks(3)
44626 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
44627 }
44628 }
44629 }
44630
TEST(QS8_IGEMM_MINMAX_FP32_2X4__WASM_FMAGIC,strided_cm_subtile)44631 TEST(QS8_IGEMM_MINMAX_FP32_2X4__WASM_FMAGIC, strided_cm_subtile) {
44632 for (size_t k = 1; k <= 5; k += 2) {
44633 for (uint32_t n = 1; n <= 4; n++) {
44634 for (uint32_t m = 1; m <= 2; m++) {
44635 GemmMicrokernelTester()
44636 .mr(2)
44637 .nr(4)
44638 .kr(1)
44639 .sr(1)
44640 .m(m)
44641 .n(n)
44642 .k(k)
44643 .cm_stride(7)
44644 .iterations(1)
44645 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
44646 }
44647 }
44648 }
44649 }
44650
TEST(QS8_IGEMM_MINMAX_FP32_2X4__WASM_FMAGIC,a_offset)44651 TEST(QS8_IGEMM_MINMAX_FP32_2X4__WASM_FMAGIC, a_offset) {
44652 for (size_t k = 1; k <= 5; k += 2) {
44653 GemmMicrokernelTester()
44654 .mr(2)
44655 .nr(4)
44656 .kr(1)
44657 .sr(1)
44658 .m(2)
44659 .n(4)
44660 .k(k)
44661 .ks(3)
44662 .a_offset(13)
44663 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
44664 }
44665 }
44666
TEST(QS8_IGEMM_MINMAX_FP32_2X4__WASM_FMAGIC,zero)44667 TEST(QS8_IGEMM_MINMAX_FP32_2X4__WASM_FMAGIC, zero) {
44668 for (size_t k = 1; k <= 5; k += 2) {
44669 for (uint32_t mz = 0; mz < 2; mz++) {
44670 GemmMicrokernelTester()
44671 .mr(2)
44672 .nr(4)
44673 .kr(1)
44674 .sr(1)
44675 .m(2)
44676 .n(4)
44677 .k(k)
44678 .ks(3)
44679 .a_offset(13)
44680 .zero_index(mz)
44681 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
44682 }
44683 }
44684 }
44685
TEST(QS8_IGEMM_MINMAX_FP32_2X4__WASM_FMAGIC,qmin)44686 TEST(QS8_IGEMM_MINMAX_FP32_2X4__WASM_FMAGIC, qmin) {
44687 GemmMicrokernelTester()
44688 .mr(2)
44689 .nr(4)
44690 .kr(1)
44691 .sr(1)
44692 .m(2)
44693 .n(4)
44694 .k(1)
44695 .qmin(128)
44696 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
44697 }
44698
TEST(QS8_IGEMM_MINMAX_FP32_2X4__WASM_FMAGIC,qmax)44699 TEST(QS8_IGEMM_MINMAX_FP32_2X4__WASM_FMAGIC, qmax) {
44700 GemmMicrokernelTester()
44701 .mr(2)
44702 .nr(4)
44703 .kr(1)
44704 .sr(1)
44705 .m(2)
44706 .n(4)
44707 .k(1)
44708 .qmax(128)
44709 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
44710 }
44711
TEST(QS8_IGEMM_MINMAX_FP32_2X4__WASM_FMAGIC,strided_cm)44712 TEST(QS8_IGEMM_MINMAX_FP32_2X4__WASM_FMAGIC, strided_cm) {
44713 GemmMicrokernelTester()
44714 .mr(2)
44715 .nr(4)
44716 .kr(1)
44717 .sr(1)
44718 .m(2)
44719 .n(4)
44720 .k(1)
44721 .cm_stride(7)
44722 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
44723 }
44724 #endif // XNN_ARCH_WASM || XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
44725
44726
44727 #if XNN_ARCH_WASM || XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(QS8_IGEMM_MINMAX_FP32_4X2__WASM_FMAGIC,k_eq_1)44728 TEST(QS8_IGEMM_MINMAX_FP32_4X2__WASM_FMAGIC, k_eq_1) {
44729 GemmMicrokernelTester()
44730 .mr(4)
44731 .nr(2)
44732 .kr(1)
44733 .sr(1)
44734 .m(4)
44735 .n(2)
44736 .k(1)
44737 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x2__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
44738 }
44739
TEST(QS8_IGEMM_MINMAX_FP32_4X2__WASM_FMAGIC,strided_cn)44740 TEST(QS8_IGEMM_MINMAX_FP32_4X2__WASM_FMAGIC, strided_cn) {
44741 GemmMicrokernelTester()
44742 .mr(4)
44743 .nr(2)
44744 .kr(1)
44745 .sr(1)
44746 .m(4)
44747 .n(2)
44748 .k(1)
44749 .cn_stride(5)
44750 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x2__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
44751 }
44752
TEST(QS8_IGEMM_MINMAX_FP32_4X2__WASM_FMAGIC,k_eq_1_subtile)44753 TEST(QS8_IGEMM_MINMAX_FP32_4X2__WASM_FMAGIC, k_eq_1_subtile) {
44754 for (uint32_t n = 1; n <= 2; n++) {
44755 for (uint32_t m = 1; m <= 4; m++) {
44756 GemmMicrokernelTester()
44757 .mr(4)
44758 .nr(2)
44759 .kr(1)
44760 .sr(1)
44761 .m(m)
44762 .n(n)
44763 .k(1)
44764 .iterations(1)
44765 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x2__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
44766 }
44767 }
44768 }
44769
TEST(QS8_IGEMM_MINMAX_FP32_4X2__WASM_FMAGIC,k_eq_1_subtile_m)44770 TEST(QS8_IGEMM_MINMAX_FP32_4X2__WASM_FMAGIC, k_eq_1_subtile_m) {
44771 for (uint32_t m = 1; m <= 4; m++) {
44772 GemmMicrokernelTester()
44773 .mr(4)
44774 .nr(2)
44775 .kr(1)
44776 .sr(1)
44777 .m(m)
44778 .n(2)
44779 .k(1)
44780 .iterations(1)
44781 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x2__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
44782 }
44783 }
44784
TEST(QS8_IGEMM_MINMAX_FP32_4X2__WASM_FMAGIC,k_eq_1_subtile_n)44785 TEST(QS8_IGEMM_MINMAX_FP32_4X2__WASM_FMAGIC, k_eq_1_subtile_n) {
44786 for (uint32_t n = 1; n <= 2; n++) {
44787 GemmMicrokernelTester()
44788 .mr(4)
44789 .nr(2)
44790 .kr(1)
44791 .sr(1)
44792 .m(4)
44793 .n(n)
44794 .k(1)
44795 .iterations(1)
44796 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x2__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
44797 }
44798 }
44799
TEST(QS8_IGEMM_MINMAX_FP32_4X2__WASM_FMAGIC,k_gt_1)44800 TEST(QS8_IGEMM_MINMAX_FP32_4X2__WASM_FMAGIC, k_gt_1) {
44801 for (size_t k = 2; k < 10; k++) {
44802 GemmMicrokernelTester()
44803 .mr(4)
44804 .nr(2)
44805 .kr(1)
44806 .sr(1)
44807 .m(4)
44808 .n(2)
44809 .k(k)
44810 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x2__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
44811 }
44812 }
44813
TEST(QS8_IGEMM_MINMAX_FP32_4X2__WASM_FMAGIC,k_gt_1_subtile)44814 TEST(QS8_IGEMM_MINMAX_FP32_4X2__WASM_FMAGIC, k_gt_1_subtile) {
44815 for (size_t k = 2; k < 10; k++) {
44816 for (uint32_t n = 1; n <= 2; n++) {
44817 for (uint32_t m = 1; m <= 4; m++) {
44818 GemmMicrokernelTester()
44819 .mr(4)
44820 .nr(2)
44821 .kr(1)
44822 .sr(1)
44823 .m(m)
44824 .n(n)
44825 .k(k)
44826 .iterations(1)
44827 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x2__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
44828 }
44829 }
44830 }
44831 }
44832
TEST(QS8_IGEMM_MINMAX_FP32_4X2__WASM_FMAGIC,n_gt_2)44833 TEST(QS8_IGEMM_MINMAX_FP32_4X2__WASM_FMAGIC, n_gt_2) {
44834 for (uint32_t n = 3; n < 4; n++) {
44835 for (size_t k = 1; k <= 5; k += 2) {
44836 GemmMicrokernelTester()
44837 .mr(4)
44838 .nr(2)
44839 .kr(1)
44840 .sr(1)
44841 .m(4)
44842 .n(n)
44843 .k(k)
44844 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x2__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
44845 }
44846 }
44847 }
44848
TEST(QS8_IGEMM_MINMAX_FP32_4X2__WASM_FMAGIC,n_gt_2_strided_cn)44849 TEST(QS8_IGEMM_MINMAX_FP32_4X2__WASM_FMAGIC, n_gt_2_strided_cn) {
44850 for (uint32_t n = 3; n < 4; n++) {
44851 for (size_t k = 1; k <= 5; k += 2) {
44852 GemmMicrokernelTester()
44853 .mr(4)
44854 .nr(2)
44855 .kr(1)
44856 .sr(1)
44857 .m(4)
44858 .n(n)
44859 .k(k)
44860 .cn_stride(5)
44861 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x2__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
44862 }
44863 }
44864 }
44865
TEST(QS8_IGEMM_MINMAX_FP32_4X2__WASM_FMAGIC,n_gt_2_subtile)44866 TEST(QS8_IGEMM_MINMAX_FP32_4X2__WASM_FMAGIC, n_gt_2_subtile) {
44867 for (uint32_t n = 3; n < 4; n++) {
44868 for (size_t k = 1; k <= 5; k += 2) {
44869 for (uint32_t m = 1; m <= 4; m++) {
44870 GemmMicrokernelTester()
44871 .mr(4)
44872 .nr(2)
44873 .kr(1)
44874 .sr(1)
44875 .m(m)
44876 .n(n)
44877 .k(k)
44878 .iterations(1)
44879 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x2__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
44880 }
44881 }
44882 }
44883 }
44884
TEST(QS8_IGEMM_MINMAX_FP32_4X2__WASM_FMAGIC,n_div_2)44885 TEST(QS8_IGEMM_MINMAX_FP32_4X2__WASM_FMAGIC, n_div_2) {
44886 for (uint32_t n = 4; n <= 6; n += 2) {
44887 for (size_t k = 1; k <= 5; k += 2) {
44888 GemmMicrokernelTester()
44889 .mr(4)
44890 .nr(2)
44891 .kr(1)
44892 .sr(1)
44893 .m(4)
44894 .n(n)
44895 .k(k)
44896 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x2__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
44897 }
44898 }
44899 }
44900
TEST(QS8_IGEMM_MINMAX_FP32_4X2__WASM_FMAGIC,n_div_2_strided_cn)44901 TEST(QS8_IGEMM_MINMAX_FP32_4X2__WASM_FMAGIC, n_div_2_strided_cn) {
44902 for (uint32_t n = 4; n <= 6; n += 2) {
44903 for (size_t k = 1; k <= 5; k += 2) {
44904 GemmMicrokernelTester()
44905 .mr(4)
44906 .nr(2)
44907 .kr(1)
44908 .sr(1)
44909 .m(4)
44910 .n(n)
44911 .k(k)
44912 .cn_stride(5)
44913 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x2__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
44914 }
44915 }
44916 }
44917
TEST(QS8_IGEMM_MINMAX_FP32_4X2__WASM_FMAGIC,n_div_2_subtile)44918 TEST(QS8_IGEMM_MINMAX_FP32_4X2__WASM_FMAGIC, n_div_2_subtile) {
44919 for (uint32_t n = 4; n <= 6; n += 2) {
44920 for (size_t k = 1; k <= 5; k += 2) {
44921 for (uint32_t m = 1; m <= 4; m++) {
44922 GemmMicrokernelTester()
44923 .mr(4)
44924 .nr(2)
44925 .kr(1)
44926 .sr(1)
44927 .m(m)
44928 .n(n)
44929 .k(k)
44930 .iterations(1)
44931 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x2__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
44932 }
44933 }
44934 }
44935 }
44936
TEST(QS8_IGEMM_MINMAX_FP32_4X2__WASM_FMAGIC,small_kernel)44937 TEST(QS8_IGEMM_MINMAX_FP32_4X2__WASM_FMAGIC, small_kernel) {
44938 for (size_t k = 1; k <= 5; k += 2) {
44939 GemmMicrokernelTester()
44940 .mr(4)
44941 .nr(2)
44942 .kr(1)
44943 .sr(1)
44944 .m(4)
44945 .n(2)
44946 .k(k)
44947 .ks(3)
44948 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x2__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
44949 }
44950 }
44951
TEST(QS8_IGEMM_MINMAX_FP32_4X2__WASM_FMAGIC,small_kernel_subtile)44952 TEST(QS8_IGEMM_MINMAX_FP32_4X2__WASM_FMAGIC, small_kernel_subtile) {
44953 for (size_t k = 1; k <= 5; k += 2) {
44954 for (uint32_t n = 1; n <= 2; n++) {
44955 for (uint32_t m = 1; m <= 4; m++) {
44956 GemmMicrokernelTester()
44957 .mr(4)
44958 .nr(2)
44959 .kr(1)
44960 .sr(1)
44961 .m(m)
44962 .n(n)
44963 .k(k)
44964 .ks(3)
44965 .iterations(1)
44966 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x2__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
44967 }
44968 }
44969 }
44970 }
44971
TEST(QS8_IGEMM_MINMAX_FP32_4X2__WASM_FMAGIC,n_gt_2_small_kernel)44972 TEST(QS8_IGEMM_MINMAX_FP32_4X2__WASM_FMAGIC, n_gt_2_small_kernel) {
44973 for (uint32_t n = 3; n < 4; n++) {
44974 for (size_t k = 1; k <= 5; k += 2) {
44975 GemmMicrokernelTester()
44976 .mr(4)
44977 .nr(2)
44978 .kr(1)
44979 .sr(1)
44980 .m(4)
44981 .n(n)
44982 .k(k)
44983 .ks(3)
44984 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x2__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
44985 }
44986 }
44987 }
44988
TEST(QS8_IGEMM_MINMAX_FP32_4X2__WASM_FMAGIC,n_div_2_small_kernel)44989 TEST(QS8_IGEMM_MINMAX_FP32_4X2__WASM_FMAGIC, n_div_2_small_kernel) {
44990 for (uint32_t n = 4; n <= 6; n += 2) {
44991 for (size_t k = 1; k <= 5; k += 2) {
44992 GemmMicrokernelTester()
44993 .mr(4)
44994 .nr(2)
44995 .kr(1)
44996 .sr(1)
44997 .m(4)
44998 .n(n)
44999 .k(k)
45000 .ks(3)
45001 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x2__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
45002 }
45003 }
45004 }
45005
TEST(QS8_IGEMM_MINMAX_FP32_4X2__WASM_FMAGIC,strided_cm_subtile)45006 TEST(QS8_IGEMM_MINMAX_FP32_4X2__WASM_FMAGIC, strided_cm_subtile) {
45007 for (size_t k = 1; k <= 5; k += 2) {
45008 for (uint32_t n = 1; n <= 2; n++) {
45009 for (uint32_t m = 1; m <= 4; m++) {
45010 GemmMicrokernelTester()
45011 .mr(4)
45012 .nr(2)
45013 .kr(1)
45014 .sr(1)
45015 .m(m)
45016 .n(n)
45017 .k(k)
45018 .cm_stride(5)
45019 .iterations(1)
45020 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x2__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
45021 }
45022 }
45023 }
45024 }
45025
TEST(QS8_IGEMM_MINMAX_FP32_4X2__WASM_FMAGIC,a_offset)45026 TEST(QS8_IGEMM_MINMAX_FP32_4X2__WASM_FMAGIC, a_offset) {
45027 for (size_t k = 1; k <= 5; k += 2) {
45028 GemmMicrokernelTester()
45029 .mr(4)
45030 .nr(2)
45031 .kr(1)
45032 .sr(1)
45033 .m(4)
45034 .n(2)
45035 .k(k)
45036 .ks(3)
45037 .a_offset(23)
45038 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x2__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
45039 }
45040 }
45041
TEST(QS8_IGEMM_MINMAX_FP32_4X2__WASM_FMAGIC,zero)45042 TEST(QS8_IGEMM_MINMAX_FP32_4X2__WASM_FMAGIC, zero) {
45043 for (size_t k = 1; k <= 5; k += 2) {
45044 for (uint32_t mz = 0; mz < 4; mz++) {
45045 GemmMicrokernelTester()
45046 .mr(4)
45047 .nr(2)
45048 .kr(1)
45049 .sr(1)
45050 .m(4)
45051 .n(2)
45052 .k(k)
45053 .ks(3)
45054 .a_offset(23)
45055 .zero_index(mz)
45056 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x2__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
45057 }
45058 }
45059 }
45060
TEST(QS8_IGEMM_MINMAX_FP32_4X2__WASM_FMAGIC,qmin)45061 TEST(QS8_IGEMM_MINMAX_FP32_4X2__WASM_FMAGIC, qmin) {
45062 GemmMicrokernelTester()
45063 .mr(4)
45064 .nr(2)
45065 .kr(1)
45066 .sr(1)
45067 .m(4)
45068 .n(2)
45069 .k(1)
45070 .qmin(128)
45071 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x2__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
45072 }
45073
TEST(QS8_IGEMM_MINMAX_FP32_4X2__WASM_FMAGIC,qmax)45074 TEST(QS8_IGEMM_MINMAX_FP32_4X2__WASM_FMAGIC, qmax) {
45075 GemmMicrokernelTester()
45076 .mr(4)
45077 .nr(2)
45078 .kr(1)
45079 .sr(1)
45080 .m(4)
45081 .n(2)
45082 .k(1)
45083 .qmax(128)
45084 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x2__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
45085 }
45086
TEST(QS8_IGEMM_MINMAX_FP32_4X2__WASM_FMAGIC,strided_cm)45087 TEST(QS8_IGEMM_MINMAX_FP32_4X2__WASM_FMAGIC, strided_cm) {
45088 GemmMicrokernelTester()
45089 .mr(4)
45090 .nr(2)
45091 .kr(1)
45092 .sr(1)
45093 .m(4)
45094 .n(2)
45095 .k(1)
45096 .cm_stride(5)
45097 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x2__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
45098 }
45099 #endif // XNN_ARCH_WASM || XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
45100
45101
45102 #if XNN_ARCH_WASM || XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(QS8_IGEMM_MINMAX_FP32_4X4__WASM_FMAGIC,k_eq_1)45103 TEST(QS8_IGEMM_MINMAX_FP32_4X4__WASM_FMAGIC, k_eq_1) {
45104 GemmMicrokernelTester()
45105 .mr(4)
45106 .nr(4)
45107 .kr(1)
45108 .sr(1)
45109 .m(4)
45110 .n(4)
45111 .k(1)
45112 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
45113 }
45114
TEST(QS8_IGEMM_MINMAX_FP32_4X4__WASM_FMAGIC,strided_cn)45115 TEST(QS8_IGEMM_MINMAX_FP32_4X4__WASM_FMAGIC, strided_cn) {
45116 GemmMicrokernelTester()
45117 .mr(4)
45118 .nr(4)
45119 .kr(1)
45120 .sr(1)
45121 .m(4)
45122 .n(4)
45123 .k(1)
45124 .cn_stride(7)
45125 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
45126 }
45127
TEST(QS8_IGEMM_MINMAX_FP32_4X4__WASM_FMAGIC,k_eq_1_subtile)45128 TEST(QS8_IGEMM_MINMAX_FP32_4X4__WASM_FMAGIC, k_eq_1_subtile) {
45129 for (uint32_t n = 1; n <= 4; n++) {
45130 for (uint32_t m = 1; m <= 4; m++) {
45131 GemmMicrokernelTester()
45132 .mr(4)
45133 .nr(4)
45134 .kr(1)
45135 .sr(1)
45136 .m(m)
45137 .n(n)
45138 .k(1)
45139 .iterations(1)
45140 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
45141 }
45142 }
45143 }
45144
TEST(QS8_IGEMM_MINMAX_FP32_4X4__WASM_FMAGIC,k_eq_1_subtile_m)45145 TEST(QS8_IGEMM_MINMAX_FP32_4X4__WASM_FMAGIC, k_eq_1_subtile_m) {
45146 for (uint32_t m = 1; m <= 4; m++) {
45147 GemmMicrokernelTester()
45148 .mr(4)
45149 .nr(4)
45150 .kr(1)
45151 .sr(1)
45152 .m(m)
45153 .n(4)
45154 .k(1)
45155 .iterations(1)
45156 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
45157 }
45158 }
45159
TEST(QS8_IGEMM_MINMAX_FP32_4X4__WASM_FMAGIC,k_eq_1_subtile_n)45160 TEST(QS8_IGEMM_MINMAX_FP32_4X4__WASM_FMAGIC, k_eq_1_subtile_n) {
45161 for (uint32_t n = 1; n <= 4; n++) {
45162 GemmMicrokernelTester()
45163 .mr(4)
45164 .nr(4)
45165 .kr(1)
45166 .sr(1)
45167 .m(4)
45168 .n(n)
45169 .k(1)
45170 .iterations(1)
45171 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
45172 }
45173 }
45174
TEST(QS8_IGEMM_MINMAX_FP32_4X4__WASM_FMAGIC,k_gt_1)45175 TEST(QS8_IGEMM_MINMAX_FP32_4X4__WASM_FMAGIC, k_gt_1) {
45176 for (size_t k = 2; k < 10; k++) {
45177 GemmMicrokernelTester()
45178 .mr(4)
45179 .nr(4)
45180 .kr(1)
45181 .sr(1)
45182 .m(4)
45183 .n(4)
45184 .k(k)
45185 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
45186 }
45187 }
45188
TEST(QS8_IGEMM_MINMAX_FP32_4X4__WASM_FMAGIC,k_gt_1_subtile)45189 TEST(QS8_IGEMM_MINMAX_FP32_4X4__WASM_FMAGIC, k_gt_1_subtile) {
45190 for (size_t k = 2; k < 10; k++) {
45191 for (uint32_t n = 1; n <= 4; n++) {
45192 for (uint32_t m = 1; m <= 4; m++) {
45193 GemmMicrokernelTester()
45194 .mr(4)
45195 .nr(4)
45196 .kr(1)
45197 .sr(1)
45198 .m(m)
45199 .n(n)
45200 .k(k)
45201 .iterations(1)
45202 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
45203 }
45204 }
45205 }
45206 }
45207
TEST(QS8_IGEMM_MINMAX_FP32_4X4__WASM_FMAGIC,n_gt_4)45208 TEST(QS8_IGEMM_MINMAX_FP32_4X4__WASM_FMAGIC, n_gt_4) {
45209 for (uint32_t n = 5; n < 8; n++) {
45210 for (size_t k = 1; k <= 5; k += 2) {
45211 GemmMicrokernelTester()
45212 .mr(4)
45213 .nr(4)
45214 .kr(1)
45215 .sr(1)
45216 .m(4)
45217 .n(n)
45218 .k(k)
45219 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
45220 }
45221 }
45222 }
45223
TEST(QS8_IGEMM_MINMAX_FP32_4X4__WASM_FMAGIC,n_gt_4_strided_cn)45224 TEST(QS8_IGEMM_MINMAX_FP32_4X4__WASM_FMAGIC, n_gt_4_strided_cn) {
45225 for (uint32_t n = 5; n < 8; n++) {
45226 for (size_t k = 1; k <= 5; k += 2) {
45227 GemmMicrokernelTester()
45228 .mr(4)
45229 .nr(4)
45230 .kr(1)
45231 .sr(1)
45232 .m(4)
45233 .n(n)
45234 .k(k)
45235 .cn_stride(7)
45236 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
45237 }
45238 }
45239 }
45240
TEST(QS8_IGEMM_MINMAX_FP32_4X4__WASM_FMAGIC,n_gt_4_subtile)45241 TEST(QS8_IGEMM_MINMAX_FP32_4X4__WASM_FMAGIC, n_gt_4_subtile) {
45242 for (uint32_t n = 5; n < 8; n++) {
45243 for (size_t k = 1; k <= 5; k += 2) {
45244 for (uint32_t m = 1; m <= 4; m++) {
45245 GemmMicrokernelTester()
45246 .mr(4)
45247 .nr(4)
45248 .kr(1)
45249 .sr(1)
45250 .m(m)
45251 .n(n)
45252 .k(k)
45253 .iterations(1)
45254 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
45255 }
45256 }
45257 }
45258 }
45259
TEST(QS8_IGEMM_MINMAX_FP32_4X4__WASM_FMAGIC,n_div_4)45260 TEST(QS8_IGEMM_MINMAX_FP32_4X4__WASM_FMAGIC, n_div_4) {
45261 for (uint32_t n = 8; n <= 12; n += 4) {
45262 for (size_t k = 1; k <= 5; k += 2) {
45263 GemmMicrokernelTester()
45264 .mr(4)
45265 .nr(4)
45266 .kr(1)
45267 .sr(1)
45268 .m(4)
45269 .n(n)
45270 .k(k)
45271 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
45272 }
45273 }
45274 }
45275
TEST(QS8_IGEMM_MINMAX_FP32_4X4__WASM_FMAGIC,n_div_4_strided_cn)45276 TEST(QS8_IGEMM_MINMAX_FP32_4X4__WASM_FMAGIC, n_div_4_strided_cn) {
45277 for (uint32_t n = 8; n <= 12; n += 4) {
45278 for (size_t k = 1; k <= 5; k += 2) {
45279 GemmMicrokernelTester()
45280 .mr(4)
45281 .nr(4)
45282 .kr(1)
45283 .sr(1)
45284 .m(4)
45285 .n(n)
45286 .k(k)
45287 .cn_stride(7)
45288 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
45289 }
45290 }
45291 }
45292
TEST(QS8_IGEMM_MINMAX_FP32_4X4__WASM_FMAGIC,n_div_4_subtile)45293 TEST(QS8_IGEMM_MINMAX_FP32_4X4__WASM_FMAGIC, n_div_4_subtile) {
45294 for (uint32_t n = 8; n <= 12; n += 4) {
45295 for (size_t k = 1; k <= 5; k += 2) {
45296 for (uint32_t m = 1; m <= 4; m++) {
45297 GemmMicrokernelTester()
45298 .mr(4)
45299 .nr(4)
45300 .kr(1)
45301 .sr(1)
45302 .m(m)
45303 .n(n)
45304 .k(k)
45305 .iterations(1)
45306 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
45307 }
45308 }
45309 }
45310 }
45311
TEST(QS8_IGEMM_MINMAX_FP32_4X4__WASM_FMAGIC,small_kernel)45312 TEST(QS8_IGEMM_MINMAX_FP32_4X4__WASM_FMAGIC, small_kernel) {
45313 for (size_t k = 1; k <= 5; k += 2) {
45314 GemmMicrokernelTester()
45315 .mr(4)
45316 .nr(4)
45317 .kr(1)
45318 .sr(1)
45319 .m(4)
45320 .n(4)
45321 .k(k)
45322 .ks(3)
45323 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
45324 }
45325 }
45326
TEST(QS8_IGEMM_MINMAX_FP32_4X4__WASM_FMAGIC,small_kernel_subtile)45327 TEST(QS8_IGEMM_MINMAX_FP32_4X4__WASM_FMAGIC, small_kernel_subtile) {
45328 for (size_t k = 1; k <= 5; k += 2) {
45329 for (uint32_t n = 1; n <= 4; n++) {
45330 for (uint32_t m = 1; m <= 4; m++) {
45331 GemmMicrokernelTester()
45332 .mr(4)
45333 .nr(4)
45334 .kr(1)
45335 .sr(1)
45336 .m(m)
45337 .n(n)
45338 .k(k)
45339 .ks(3)
45340 .iterations(1)
45341 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
45342 }
45343 }
45344 }
45345 }
45346
TEST(QS8_IGEMM_MINMAX_FP32_4X4__WASM_FMAGIC,n_gt_4_small_kernel)45347 TEST(QS8_IGEMM_MINMAX_FP32_4X4__WASM_FMAGIC, n_gt_4_small_kernel) {
45348 for (uint32_t n = 5; n < 8; n++) {
45349 for (size_t k = 1; k <= 5; k += 2) {
45350 GemmMicrokernelTester()
45351 .mr(4)
45352 .nr(4)
45353 .kr(1)
45354 .sr(1)
45355 .m(4)
45356 .n(n)
45357 .k(k)
45358 .ks(3)
45359 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
45360 }
45361 }
45362 }
45363
TEST(QS8_IGEMM_MINMAX_FP32_4X4__WASM_FMAGIC,n_div_4_small_kernel)45364 TEST(QS8_IGEMM_MINMAX_FP32_4X4__WASM_FMAGIC, n_div_4_small_kernel) {
45365 for (uint32_t n = 8; n <= 12; n += 4) {
45366 for (size_t k = 1; k <= 5; k += 2) {
45367 GemmMicrokernelTester()
45368 .mr(4)
45369 .nr(4)
45370 .kr(1)
45371 .sr(1)
45372 .m(4)
45373 .n(n)
45374 .k(k)
45375 .ks(3)
45376 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
45377 }
45378 }
45379 }
45380
TEST(QS8_IGEMM_MINMAX_FP32_4X4__WASM_FMAGIC,strided_cm_subtile)45381 TEST(QS8_IGEMM_MINMAX_FP32_4X4__WASM_FMAGIC, strided_cm_subtile) {
45382 for (size_t k = 1; k <= 5; k += 2) {
45383 for (uint32_t n = 1; n <= 4; n++) {
45384 for (uint32_t m = 1; m <= 4; m++) {
45385 GemmMicrokernelTester()
45386 .mr(4)
45387 .nr(4)
45388 .kr(1)
45389 .sr(1)
45390 .m(m)
45391 .n(n)
45392 .k(k)
45393 .cm_stride(7)
45394 .iterations(1)
45395 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
45396 }
45397 }
45398 }
45399 }
45400
TEST(QS8_IGEMM_MINMAX_FP32_4X4__WASM_FMAGIC,a_offset)45401 TEST(QS8_IGEMM_MINMAX_FP32_4X4__WASM_FMAGIC, a_offset) {
45402 for (size_t k = 1; k <= 5; k += 2) {
45403 GemmMicrokernelTester()
45404 .mr(4)
45405 .nr(4)
45406 .kr(1)
45407 .sr(1)
45408 .m(4)
45409 .n(4)
45410 .k(k)
45411 .ks(3)
45412 .a_offset(23)
45413 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
45414 }
45415 }
45416
TEST(QS8_IGEMM_MINMAX_FP32_4X4__WASM_FMAGIC,zero)45417 TEST(QS8_IGEMM_MINMAX_FP32_4X4__WASM_FMAGIC, zero) {
45418 for (size_t k = 1; k <= 5; k += 2) {
45419 for (uint32_t mz = 0; mz < 4; mz++) {
45420 GemmMicrokernelTester()
45421 .mr(4)
45422 .nr(4)
45423 .kr(1)
45424 .sr(1)
45425 .m(4)
45426 .n(4)
45427 .k(k)
45428 .ks(3)
45429 .a_offset(23)
45430 .zero_index(mz)
45431 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
45432 }
45433 }
45434 }
45435
TEST(QS8_IGEMM_MINMAX_FP32_4X4__WASM_FMAGIC,qmin)45436 TEST(QS8_IGEMM_MINMAX_FP32_4X4__WASM_FMAGIC, qmin) {
45437 GemmMicrokernelTester()
45438 .mr(4)
45439 .nr(4)
45440 .kr(1)
45441 .sr(1)
45442 .m(4)
45443 .n(4)
45444 .k(1)
45445 .qmin(128)
45446 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
45447 }
45448
TEST(QS8_IGEMM_MINMAX_FP32_4X4__WASM_FMAGIC,qmax)45449 TEST(QS8_IGEMM_MINMAX_FP32_4X4__WASM_FMAGIC, qmax) {
45450 GemmMicrokernelTester()
45451 .mr(4)
45452 .nr(4)
45453 .kr(1)
45454 .sr(1)
45455 .m(4)
45456 .n(4)
45457 .k(1)
45458 .qmax(128)
45459 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
45460 }
45461
TEST(QS8_IGEMM_MINMAX_FP32_4X4__WASM_FMAGIC,strided_cm)45462 TEST(QS8_IGEMM_MINMAX_FP32_4X4__WASM_FMAGIC, strided_cm) {
45463 GemmMicrokernelTester()
45464 .mr(4)
45465 .nr(4)
45466 .kr(1)
45467 .sr(1)
45468 .m(4)
45469 .n(4)
45470 .k(1)
45471 .cm_stride(7)
45472 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4__wasm_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
45473 }
45474 #endif // XNN_ARCH_WASM || XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
45475
45476
TEST(QS8_IGEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC,k_eq_1)45477 TEST(QS8_IGEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC, k_eq_1) {
45478 GemmMicrokernelTester()
45479 .mr(1)
45480 .nr(2)
45481 .kr(1)
45482 .sr(1)
45483 .m(1)
45484 .n(2)
45485 .k(1)
45486 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x2__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
45487 }
45488
TEST(QS8_IGEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC,strided_cn)45489 TEST(QS8_IGEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC, strided_cn) {
45490 GemmMicrokernelTester()
45491 .mr(1)
45492 .nr(2)
45493 .kr(1)
45494 .sr(1)
45495 .m(1)
45496 .n(2)
45497 .k(1)
45498 .cn_stride(5)
45499 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x2__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
45500 }
45501
TEST(QS8_IGEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC,k_eq_1_subtile)45502 TEST(QS8_IGEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC, k_eq_1_subtile) {
45503 for (uint32_t n = 1; n <= 2; n++) {
45504 for (uint32_t m = 1; m <= 1; m++) {
45505 GemmMicrokernelTester()
45506 .mr(1)
45507 .nr(2)
45508 .kr(1)
45509 .sr(1)
45510 .m(m)
45511 .n(n)
45512 .k(1)
45513 .iterations(1)
45514 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x2__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
45515 }
45516 }
45517 }
45518
TEST(QS8_IGEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC,k_eq_1_subtile_m)45519 TEST(QS8_IGEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC, k_eq_1_subtile_m) {
45520 for (uint32_t m = 1; m <= 1; m++) {
45521 GemmMicrokernelTester()
45522 .mr(1)
45523 .nr(2)
45524 .kr(1)
45525 .sr(1)
45526 .m(m)
45527 .n(2)
45528 .k(1)
45529 .iterations(1)
45530 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x2__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
45531 }
45532 }
45533
TEST(QS8_IGEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC,k_eq_1_subtile_n)45534 TEST(QS8_IGEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC, k_eq_1_subtile_n) {
45535 for (uint32_t n = 1; n <= 2; n++) {
45536 GemmMicrokernelTester()
45537 .mr(1)
45538 .nr(2)
45539 .kr(1)
45540 .sr(1)
45541 .m(1)
45542 .n(n)
45543 .k(1)
45544 .iterations(1)
45545 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x2__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
45546 }
45547 }
45548
TEST(QS8_IGEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC,k_gt_1)45549 TEST(QS8_IGEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC, k_gt_1) {
45550 for (size_t k = 2; k < 10; k++) {
45551 GemmMicrokernelTester()
45552 .mr(1)
45553 .nr(2)
45554 .kr(1)
45555 .sr(1)
45556 .m(1)
45557 .n(2)
45558 .k(k)
45559 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x2__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
45560 }
45561 }
45562
TEST(QS8_IGEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC,k_gt_1_subtile)45563 TEST(QS8_IGEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC, k_gt_1_subtile) {
45564 for (size_t k = 2; k < 10; k++) {
45565 for (uint32_t n = 1; n <= 2; n++) {
45566 for (uint32_t m = 1; m <= 1; m++) {
45567 GemmMicrokernelTester()
45568 .mr(1)
45569 .nr(2)
45570 .kr(1)
45571 .sr(1)
45572 .m(m)
45573 .n(n)
45574 .k(k)
45575 .iterations(1)
45576 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x2__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
45577 }
45578 }
45579 }
45580 }
45581
TEST(QS8_IGEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC,n_gt_2)45582 TEST(QS8_IGEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC, n_gt_2) {
45583 for (uint32_t n = 3; n < 4; n++) {
45584 for (size_t k = 1; k <= 5; k += 2) {
45585 GemmMicrokernelTester()
45586 .mr(1)
45587 .nr(2)
45588 .kr(1)
45589 .sr(1)
45590 .m(1)
45591 .n(n)
45592 .k(k)
45593 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x2__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
45594 }
45595 }
45596 }
45597
TEST(QS8_IGEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC,n_gt_2_strided_cn)45598 TEST(QS8_IGEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC, n_gt_2_strided_cn) {
45599 for (uint32_t n = 3; n < 4; n++) {
45600 for (size_t k = 1; k <= 5; k += 2) {
45601 GemmMicrokernelTester()
45602 .mr(1)
45603 .nr(2)
45604 .kr(1)
45605 .sr(1)
45606 .m(1)
45607 .n(n)
45608 .k(k)
45609 .cn_stride(5)
45610 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x2__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
45611 }
45612 }
45613 }
45614
TEST(QS8_IGEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC,n_gt_2_subtile)45615 TEST(QS8_IGEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC, n_gt_2_subtile) {
45616 for (uint32_t n = 3; n < 4; n++) {
45617 for (size_t k = 1; k <= 5; k += 2) {
45618 for (uint32_t m = 1; m <= 1; m++) {
45619 GemmMicrokernelTester()
45620 .mr(1)
45621 .nr(2)
45622 .kr(1)
45623 .sr(1)
45624 .m(m)
45625 .n(n)
45626 .k(k)
45627 .iterations(1)
45628 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x2__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
45629 }
45630 }
45631 }
45632 }
45633
TEST(QS8_IGEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC,n_div_2)45634 TEST(QS8_IGEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC, n_div_2) {
45635 for (uint32_t n = 4; n <= 6; n += 2) {
45636 for (size_t k = 1; k <= 5; k += 2) {
45637 GemmMicrokernelTester()
45638 .mr(1)
45639 .nr(2)
45640 .kr(1)
45641 .sr(1)
45642 .m(1)
45643 .n(n)
45644 .k(k)
45645 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x2__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
45646 }
45647 }
45648 }
45649
TEST(QS8_IGEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC,n_div_2_strided_cn)45650 TEST(QS8_IGEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC, n_div_2_strided_cn) {
45651 for (uint32_t n = 4; n <= 6; n += 2) {
45652 for (size_t k = 1; k <= 5; k += 2) {
45653 GemmMicrokernelTester()
45654 .mr(1)
45655 .nr(2)
45656 .kr(1)
45657 .sr(1)
45658 .m(1)
45659 .n(n)
45660 .k(k)
45661 .cn_stride(5)
45662 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x2__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
45663 }
45664 }
45665 }
45666
TEST(QS8_IGEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC,n_div_2_subtile)45667 TEST(QS8_IGEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC, n_div_2_subtile) {
45668 for (uint32_t n = 4; n <= 6; n += 2) {
45669 for (size_t k = 1; k <= 5; k += 2) {
45670 for (uint32_t m = 1; m <= 1; m++) {
45671 GemmMicrokernelTester()
45672 .mr(1)
45673 .nr(2)
45674 .kr(1)
45675 .sr(1)
45676 .m(m)
45677 .n(n)
45678 .k(k)
45679 .iterations(1)
45680 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x2__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
45681 }
45682 }
45683 }
45684 }
45685
TEST(QS8_IGEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC,small_kernel)45686 TEST(QS8_IGEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC, small_kernel) {
45687 for (size_t k = 1; k <= 5; k += 2) {
45688 GemmMicrokernelTester()
45689 .mr(1)
45690 .nr(2)
45691 .kr(1)
45692 .sr(1)
45693 .m(1)
45694 .n(2)
45695 .k(k)
45696 .ks(3)
45697 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x2__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
45698 }
45699 }
45700
TEST(QS8_IGEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC,small_kernel_subtile)45701 TEST(QS8_IGEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC, small_kernel_subtile) {
45702 for (size_t k = 1; k <= 5; k += 2) {
45703 for (uint32_t n = 1; n <= 2; n++) {
45704 for (uint32_t m = 1; m <= 1; m++) {
45705 GemmMicrokernelTester()
45706 .mr(1)
45707 .nr(2)
45708 .kr(1)
45709 .sr(1)
45710 .m(m)
45711 .n(n)
45712 .k(k)
45713 .ks(3)
45714 .iterations(1)
45715 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x2__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
45716 }
45717 }
45718 }
45719 }
45720
TEST(QS8_IGEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC,n_gt_2_small_kernel)45721 TEST(QS8_IGEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC, n_gt_2_small_kernel) {
45722 for (uint32_t n = 3; n < 4; n++) {
45723 for (size_t k = 1; k <= 5; k += 2) {
45724 GemmMicrokernelTester()
45725 .mr(1)
45726 .nr(2)
45727 .kr(1)
45728 .sr(1)
45729 .m(1)
45730 .n(n)
45731 .k(k)
45732 .ks(3)
45733 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x2__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
45734 }
45735 }
45736 }
45737
TEST(QS8_IGEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC,n_div_2_small_kernel)45738 TEST(QS8_IGEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC, n_div_2_small_kernel) {
45739 for (uint32_t n = 4; n <= 6; n += 2) {
45740 for (size_t k = 1; k <= 5; k += 2) {
45741 GemmMicrokernelTester()
45742 .mr(1)
45743 .nr(2)
45744 .kr(1)
45745 .sr(1)
45746 .m(1)
45747 .n(n)
45748 .k(k)
45749 .ks(3)
45750 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x2__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
45751 }
45752 }
45753 }
45754
TEST(QS8_IGEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC,strided_cm_subtile)45755 TEST(QS8_IGEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC, strided_cm_subtile) {
45756 for (size_t k = 1; k <= 5; k += 2) {
45757 for (uint32_t n = 1; n <= 2; n++) {
45758 for (uint32_t m = 1; m <= 1; m++) {
45759 GemmMicrokernelTester()
45760 .mr(1)
45761 .nr(2)
45762 .kr(1)
45763 .sr(1)
45764 .m(m)
45765 .n(n)
45766 .k(k)
45767 .cm_stride(5)
45768 .iterations(1)
45769 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x2__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
45770 }
45771 }
45772 }
45773 }
45774
TEST(QS8_IGEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC,a_offset)45775 TEST(QS8_IGEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC, a_offset) {
45776 for (size_t k = 1; k <= 5; k += 2) {
45777 GemmMicrokernelTester()
45778 .mr(1)
45779 .nr(2)
45780 .kr(1)
45781 .sr(1)
45782 .m(1)
45783 .n(2)
45784 .k(k)
45785 .ks(3)
45786 .a_offset(7)
45787 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x2__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
45788 }
45789 }
45790
TEST(QS8_IGEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC,zero)45791 TEST(QS8_IGEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC, zero) {
45792 for (size_t k = 1; k <= 5; k += 2) {
45793 for (uint32_t mz = 0; mz < 1; mz++) {
45794 GemmMicrokernelTester()
45795 .mr(1)
45796 .nr(2)
45797 .kr(1)
45798 .sr(1)
45799 .m(1)
45800 .n(2)
45801 .k(k)
45802 .ks(3)
45803 .a_offset(7)
45804 .zero_index(mz)
45805 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x2__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
45806 }
45807 }
45808 }
45809
TEST(QS8_IGEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC,qmin)45810 TEST(QS8_IGEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC, qmin) {
45811 GemmMicrokernelTester()
45812 .mr(1)
45813 .nr(2)
45814 .kr(1)
45815 .sr(1)
45816 .m(1)
45817 .n(2)
45818 .k(1)
45819 .qmin(128)
45820 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x2__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
45821 }
45822
TEST(QS8_IGEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC,qmax)45823 TEST(QS8_IGEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC, qmax) {
45824 GemmMicrokernelTester()
45825 .mr(1)
45826 .nr(2)
45827 .kr(1)
45828 .sr(1)
45829 .m(1)
45830 .n(2)
45831 .k(1)
45832 .qmax(128)
45833 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x2__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
45834 }
45835
TEST(QS8_IGEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC,strided_cm)45836 TEST(QS8_IGEMM_MINMAX_FP32_1X2__SCALAR_IMAGIC, strided_cm) {
45837 GemmMicrokernelTester()
45838 .mr(1)
45839 .nr(2)
45840 .kr(1)
45841 .sr(1)
45842 .m(1)
45843 .n(2)
45844 .k(1)
45845 .cm_stride(5)
45846 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x2__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
45847 }
45848
45849
TEST(QS8_IGEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC,k_eq_1)45850 TEST(QS8_IGEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC, k_eq_1) {
45851 GemmMicrokernelTester()
45852 .mr(1)
45853 .nr(4)
45854 .kr(1)
45855 .sr(1)
45856 .m(1)
45857 .n(4)
45858 .k(1)
45859 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
45860 }
45861
TEST(QS8_IGEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC,strided_cn)45862 TEST(QS8_IGEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC, strided_cn) {
45863 GemmMicrokernelTester()
45864 .mr(1)
45865 .nr(4)
45866 .kr(1)
45867 .sr(1)
45868 .m(1)
45869 .n(4)
45870 .k(1)
45871 .cn_stride(7)
45872 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
45873 }
45874
TEST(QS8_IGEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC,k_eq_1_subtile)45875 TEST(QS8_IGEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC, k_eq_1_subtile) {
45876 for (uint32_t n = 1; n <= 4; n++) {
45877 for (uint32_t m = 1; m <= 1; m++) {
45878 GemmMicrokernelTester()
45879 .mr(1)
45880 .nr(4)
45881 .kr(1)
45882 .sr(1)
45883 .m(m)
45884 .n(n)
45885 .k(1)
45886 .iterations(1)
45887 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
45888 }
45889 }
45890 }
45891
TEST(QS8_IGEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC,k_eq_1_subtile_m)45892 TEST(QS8_IGEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC, k_eq_1_subtile_m) {
45893 for (uint32_t m = 1; m <= 1; m++) {
45894 GemmMicrokernelTester()
45895 .mr(1)
45896 .nr(4)
45897 .kr(1)
45898 .sr(1)
45899 .m(m)
45900 .n(4)
45901 .k(1)
45902 .iterations(1)
45903 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
45904 }
45905 }
45906
TEST(QS8_IGEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC,k_eq_1_subtile_n)45907 TEST(QS8_IGEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC, k_eq_1_subtile_n) {
45908 for (uint32_t n = 1; n <= 4; n++) {
45909 GemmMicrokernelTester()
45910 .mr(1)
45911 .nr(4)
45912 .kr(1)
45913 .sr(1)
45914 .m(1)
45915 .n(n)
45916 .k(1)
45917 .iterations(1)
45918 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
45919 }
45920 }
45921
TEST(QS8_IGEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC,k_gt_1)45922 TEST(QS8_IGEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC, k_gt_1) {
45923 for (size_t k = 2; k < 10; k++) {
45924 GemmMicrokernelTester()
45925 .mr(1)
45926 .nr(4)
45927 .kr(1)
45928 .sr(1)
45929 .m(1)
45930 .n(4)
45931 .k(k)
45932 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
45933 }
45934 }
45935
TEST(QS8_IGEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC,k_gt_1_subtile)45936 TEST(QS8_IGEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC, k_gt_1_subtile) {
45937 for (size_t k = 2; k < 10; k++) {
45938 for (uint32_t n = 1; n <= 4; n++) {
45939 for (uint32_t m = 1; m <= 1; m++) {
45940 GemmMicrokernelTester()
45941 .mr(1)
45942 .nr(4)
45943 .kr(1)
45944 .sr(1)
45945 .m(m)
45946 .n(n)
45947 .k(k)
45948 .iterations(1)
45949 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
45950 }
45951 }
45952 }
45953 }
45954
TEST(QS8_IGEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC,n_gt_4)45955 TEST(QS8_IGEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC, n_gt_4) {
45956 for (uint32_t n = 5; n < 8; n++) {
45957 for (size_t k = 1; k <= 5; k += 2) {
45958 GemmMicrokernelTester()
45959 .mr(1)
45960 .nr(4)
45961 .kr(1)
45962 .sr(1)
45963 .m(1)
45964 .n(n)
45965 .k(k)
45966 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
45967 }
45968 }
45969 }
45970
TEST(QS8_IGEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC,n_gt_4_strided_cn)45971 TEST(QS8_IGEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC, n_gt_4_strided_cn) {
45972 for (uint32_t n = 5; n < 8; n++) {
45973 for (size_t k = 1; k <= 5; k += 2) {
45974 GemmMicrokernelTester()
45975 .mr(1)
45976 .nr(4)
45977 .kr(1)
45978 .sr(1)
45979 .m(1)
45980 .n(n)
45981 .k(k)
45982 .cn_stride(7)
45983 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
45984 }
45985 }
45986 }
45987
TEST(QS8_IGEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC,n_gt_4_subtile)45988 TEST(QS8_IGEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC, n_gt_4_subtile) {
45989 for (uint32_t n = 5; n < 8; n++) {
45990 for (size_t k = 1; k <= 5; k += 2) {
45991 for (uint32_t m = 1; m <= 1; m++) {
45992 GemmMicrokernelTester()
45993 .mr(1)
45994 .nr(4)
45995 .kr(1)
45996 .sr(1)
45997 .m(m)
45998 .n(n)
45999 .k(k)
46000 .iterations(1)
46001 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
46002 }
46003 }
46004 }
46005 }
46006
TEST(QS8_IGEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC,n_div_4)46007 TEST(QS8_IGEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC, n_div_4) {
46008 for (uint32_t n = 8; n <= 12; n += 4) {
46009 for (size_t k = 1; k <= 5; k += 2) {
46010 GemmMicrokernelTester()
46011 .mr(1)
46012 .nr(4)
46013 .kr(1)
46014 .sr(1)
46015 .m(1)
46016 .n(n)
46017 .k(k)
46018 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
46019 }
46020 }
46021 }
46022
TEST(QS8_IGEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC,n_div_4_strided_cn)46023 TEST(QS8_IGEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC, n_div_4_strided_cn) {
46024 for (uint32_t n = 8; n <= 12; n += 4) {
46025 for (size_t k = 1; k <= 5; k += 2) {
46026 GemmMicrokernelTester()
46027 .mr(1)
46028 .nr(4)
46029 .kr(1)
46030 .sr(1)
46031 .m(1)
46032 .n(n)
46033 .k(k)
46034 .cn_stride(7)
46035 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
46036 }
46037 }
46038 }
46039
TEST(QS8_IGEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC,n_div_4_subtile)46040 TEST(QS8_IGEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC, n_div_4_subtile) {
46041 for (uint32_t n = 8; n <= 12; n += 4) {
46042 for (size_t k = 1; k <= 5; k += 2) {
46043 for (uint32_t m = 1; m <= 1; m++) {
46044 GemmMicrokernelTester()
46045 .mr(1)
46046 .nr(4)
46047 .kr(1)
46048 .sr(1)
46049 .m(m)
46050 .n(n)
46051 .k(k)
46052 .iterations(1)
46053 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
46054 }
46055 }
46056 }
46057 }
46058
TEST(QS8_IGEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC,small_kernel)46059 TEST(QS8_IGEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC, small_kernel) {
46060 for (size_t k = 1; k <= 5; k += 2) {
46061 GemmMicrokernelTester()
46062 .mr(1)
46063 .nr(4)
46064 .kr(1)
46065 .sr(1)
46066 .m(1)
46067 .n(4)
46068 .k(k)
46069 .ks(3)
46070 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
46071 }
46072 }
46073
TEST(QS8_IGEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC,small_kernel_subtile)46074 TEST(QS8_IGEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC, small_kernel_subtile) {
46075 for (size_t k = 1; k <= 5; k += 2) {
46076 for (uint32_t n = 1; n <= 4; n++) {
46077 for (uint32_t m = 1; m <= 1; m++) {
46078 GemmMicrokernelTester()
46079 .mr(1)
46080 .nr(4)
46081 .kr(1)
46082 .sr(1)
46083 .m(m)
46084 .n(n)
46085 .k(k)
46086 .ks(3)
46087 .iterations(1)
46088 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
46089 }
46090 }
46091 }
46092 }
46093
TEST(QS8_IGEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC,n_gt_4_small_kernel)46094 TEST(QS8_IGEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC, n_gt_4_small_kernel) {
46095 for (uint32_t n = 5; n < 8; n++) {
46096 for (size_t k = 1; k <= 5; k += 2) {
46097 GemmMicrokernelTester()
46098 .mr(1)
46099 .nr(4)
46100 .kr(1)
46101 .sr(1)
46102 .m(1)
46103 .n(n)
46104 .k(k)
46105 .ks(3)
46106 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
46107 }
46108 }
46109 }
46110
TEST(QS8_IGEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC,n_div_4_small_kernel)46111 TEST(QS8_IGEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC, n_div_4_small_kernel) {
46112 for (uint32_t n = 8; n <= 12; n += 4) {
46113 for (size_t k = 1; k <= 5; k += 2) {
46114 GemmMicrokernelTester()
46115 .mr(1)
46116 .nr(4)
46117 .kr(1)
46118 .sr(1)
46119 .m(1)
46120 .n(n)
46121 .k(k)
46122 .ks(3)
46123 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
46124 }
46125 }
46126 }
46127
TEST(QS8_IGEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC,strided_cm_subtile)46128 TEST(QS8_IGEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC, strided_cm_subtile) {
46129 for (size_t k = 1; k <= 5; k += 2) {
46130 for (uint32_t n = 1; n <= 4; n++) {
46131 for (uint32_t m = 1; m <= 1; m++) {
46132 GemmMicrokernelTester()
46133 .mr(1)
46134 .nr(4)
46135 .kr(1)
46136 .sr(1)
46137 .m(m)
46138 .n(n)
46139 .k(k)
46140 .cm_stride(7)
46141 .iterations(1)
46142 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
46143 }
46144 }
46145 }
46146 }
46147
TEST(QS8_IGEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC,a_offset)46148 TEST(QS8_IGEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC, a_offset) {
46149 for (size_t k = 1; k <= 5; k += 2) {
46150 GemmMicrokernelTester()
46151 .mr(1)
46152 .nr(4)
46153 .kr(1)
46154 .sr(1)
46155 .m(1)
46156 .n(4)
46157 .k(k)
46158 .ks(3)
46159 .a_offset(7)
46160 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
46161 }
46162 }
46163
TEST(QS8_IGEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC,zero)46164 TEST(QS8_IGEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC, zero) {
46165 for (size_t k = 1; k <= 5; k += 2) {
46166 for (uint32_t mz = 0; mz < 1; mz++) {
46167 GemmMicrokernelTester()
46168 .mr(1)
46169 .nr(4)
46170 .kr(1)
46171 .sr(1)
46172 .m(1)
46173 .n(4)
46174 .k(k)
46175 .ks(3)
46176 .a_offset(7)
46177 .zero_index(mz)
46178 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
46179 }
46180 }
46181 }
46182
TEST(QS8_IGEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC,qmin)46183 TEST(QS8_IGEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC, qmin) {
46184 GemmMicrokernelTester()
46185 .mr(1)
46186 .nr(4)
46187 .kr(1)
46188 .sr(1)
46189 .m(1)
46190 .n(4)
46191 .k(1)
46192 .qmin(128)
46193 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
46194 }
46195
TEST(QS8_IGEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC,qmax)46196 TEST(QS8_IGEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC, qmax) {
46197 GemmMicrokernelTester()
46198 .mr(1)
46199 .nr(4)
46200 .kr(1)
46201 .sr(1)
46202 .m(1)
46203 .n(4)
46204 .k(1)
46205 .qmax(128)
46206 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
46207 }
46208
TEST(QS8_IGEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC,strided_cm)46209 TEST(QS8_IGEMM_MINMAX_FP32_1X4__SCALAR_IMAGIC, strided_cm) {
46210 GemmMicrokernelTester()
46211 .mr(1)
46212 .nr(4)
46213 .kr(1)
46214 .sr(1)
46215 .m(1)
46216 .n(4)
46217 .k(1)
46218 .cm_stride(7)
46219 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x4__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
46220 }
46221
46222
TEST(QS8_IGEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC,k_eq_1)46223 TEST(QS8_IGEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC, k_eq_1) {
46224 GemmMicrokernelTester()
46225 .mr(2)
46226 .nr(2)
46227 .kr(1)
46228 .sr(1)
46229 .m(2)
46230 .n(2)
46231 .k(1)
46232 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x2__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
46233 }
46234
TEST(QS8_IGEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC,strided_cn)46235 TEST(QS8_IGEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC, strided_cn) {
46236 GemmMicrokernelTester()
46237 .mr(2)
46238 .nr(2)
46239 .kr(1)
46240 .sr(1)
46241 .m(2)
46242 .n(2)
46243 .k(1)
46244 .cn_stride(5)
46245 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x2__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
46246 }
46247
TEST(QS8_IGEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC,k_eq_1_subtile)46248 TEST(QS8_IGEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC, k_eq_1_subtile) {
46249 for (uint32_t n = 1; n <= 2; n++) {
46250 for (uint32_t m = 1; m <= 2; m++) {
46251 GemmMicrokernelTester()
46252 .mr(2)
46253 .nr(2)
46254 .kr(1)
46255 .sr(1)
46256 .m(m)
46257 .n(n)
46258 .k(1)
46259 .iterations(1)
46260 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x2__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
46261 }
46262 }
46263 }
46264
TEST(QS8_IGEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC,k_eq_1_subtile_m)46265 TEST(QS8_IGEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC, k_eq_1_subtile_m) {
46266 for (uint32_t m = 1; m <= 2; m++) {
46267 GemmMicrokernelTester()
46268 .mr(2)
46269 .nr(2)
46270 .kr(1)
46271 .sr(1)
46272 .m(m)
46273 .n(2)
46274 .k(1)
46275 .iterations(1)
46276 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x2__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
46277 }
46278 }
46279
TEST(QS8_IGEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC,k_eq_1_subtile_n)46280 TEST(QS8_IGEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC, k_eq_1_subtile_n) {
46281 for (uint32_t n = 1; n <= 2; n++) {
46282 GemmMicrokernelTester()
46283 .mr(2)
46284 .nr(2)
46285 .kr(1)
46286 .sr(1)
46287 .m(2)
46288 .n(n)
46289 .k(1)
46290 .iterations(1)
46291 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x2__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
46292 }
46293 }
46294
TEST(QS8_IGEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC,k_gt_1)46295 TEST(QS8_IGEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC, k_gt_1) {
46296 for (size_t k = 2; k < 10; k++) {
46297 GemmMicrokernelTester()
46298 .mr(2)
46299 .nr(2)
46300 .kr(1)
46301 .sr(1)
46302 .m(2)
46303 .n(2)
46304 .k(k)
46305 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x2__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
46306 }
46307 }
46308
TEST(QS8_IGEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC,k_gt_1_subtile)46309 TEST(QS8_IGEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC, k_gt_1_subtile) {
46310 for (size_t k = 2; k < 10; k++) {
46311 for (uint32_t n = 1; n <= 2; n++) {
46312 for (uint32_t m = 1; m <= 2; m++) {
46313 GemmMicrokernelTester()
46314 .mr(2)
46315 .nr(2)
46316 .kr(1)
46317 .sr(1)
46318 .m(m)
46319 .n(n)
46320 .k(k)
46321 .iterations(1)
46322 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x2__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
46323 }
46324 }
46325 }
46326 }
46327
TEST(QS8_IGEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC,n_gt_2)46328 TEST(QS8_IGEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC, n_gt_2) {
46329 for (uint32_t n = 3; n < 4; n++) {
46330 for (size_t k = 1; k <= 5; k += 2) {
46331 GemmMicrokernelTester()
46332 .mr(2)
46333 .nr(2)
46334 .kr(1)
46335 .sr(1)
46336 .m(2)
46337 .n(n)
46338 .k(k)
46339 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x2__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
46340 }
46341 }
46342 }
46343
TEST(QS8_IGEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC,n_gt_2_strided_cn)46344 TEST(QS8_IGEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC, n_gt_2_strided_cn) {
46345 for (uint32_t n = 3; n < 4; n++) {
46346 for (size_t k = 1; k <= 5; k += 2) {
46347 GemmMicrokernelTester()
46348 .mr(2)
46349 .nr(2)
46350 .kr(1)
46351 .sr(1)
46352 .m(2)
46353 .n(n)
46354 .k(k)
46355 .cn_stride(5)
46356 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x2__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
46357 }
46358 }
46359 }
46360
TEST(QS8_IGEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC,n_gt_2_subtile)46361 TEST(QS8_IGEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC, n_gt_2_subtile) {
46362 for (uint32_t n = 3; n < 4; n++) {
46363 for (size_t k = 1; k <= 5; k += 2) {
46364 for (uint32_t m = 1; m <= 2; m++) {
46365 GemmMicrokernelTester()
46366 .mr(2)
46367 .nr(2)
46368 .kr(1)
46369 .sr(1)
46370 .m(m)
46371 .n(n)
46372 .k(k)
46373 .iterations(1)
46374 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x2__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
46375 }
46376 }
46377 }
46378 }
46379
TEST(QS8_IGEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC,n_div_2)46380 TEST(QS8_IGEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC, n_div_2) {
46381 for (uint32_t n = 4; n <= 6; n += 2) {
46382 for (size_t k = 1; k <= 5; k += 2) {
46383 GemmMicrokernelTester()
46384 .mr(2)
46385 .nr(2)
46386 .kr(1)
46387 .sr(1)
46388 .m(2)
46389 .n(n)
46390 .k(k)
46391 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x2__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
46392 }
46393 }
46394 }
46395
TEST(QS8_IGEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC,n_div_2_strided_cn)46396 TEST(QS8_IGEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC, n_div_2_strided_cn) {
46397 for (uint32_t n = 4; n <= 6; n += 2) {
46398 for (size_t k = 1; k <= 5; k += 2) {
46399 GemmMicrokernelTester()
46400 .mr(2)
46401 .nr(2)
46402 .kr(1)
46403 .sr(1)
46404 .m(2)
46405 .n(n)
46406 .k(k)
46407 .cn_stride(5)
46408 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x2__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
46409 }
46410 }
46411 }
46412
TEST(QS8_IGEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC,n_div_2_subtile)46413 TEST(QS8_IGEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC, n_div_2_subtile) {
46414 for (uint32_t n = 4; n <= 6; n += 2) {
46415 for (size_t k = 1; k <= 5; k += 2) {
46416 for (uint32_t m = 1; m <= 2; m++) {
46417 GemmMicrokernelTester()
46418 .mr(2)
46419 .nr(2)
46420 .kr(1)
46421 .sr(1)
46422 .m(m)
46423 .n(n)
46424 .k(k)
46425 .iterations(1)
46426 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x2__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
46427 }
46428 }
46429 }
46430 }
46431
TEST(QS8_IGEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC,small_kernel)46432 TEST(QS8_IGEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC, small_kernel) {
46433 for (size_t k = 1; k <= 5; k += 2) {
46434 GemmMicrokernelTester()
46435 .mr(2)
46436 .nr(2)
46437 .kr(1)
46438 .sr(1)
46439 .m(2)
46440 .n(2)
46441 .k(k)
46442 .ks(3)
46443 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x2__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
46444 }
46445 }
46446
TEST(QS8_IGEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC,small_kernel_subtile)46447 TEST(QS8_IGEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC, small_kernel_subtile) {
46448 for (size_t k = 1; k <= 5; k += 2) {
46449 for (uint32_t n = 1; n <= 2; n++) {
46450 for (uint32_t m = 1; m <= 2; m++) {
46451 GemmMicrokernelTester()
46452 .mr(2)
46453 .nr(2)
46454 .kr(1)
46455 .sr(1)
46456 .m(m)
46457 .n(n)
46458 .k(k)
46459 .ks(3)
46460 .iterations(1)
46461 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x2__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
46462 }
46463 }
46464 }
46465 }
46466
TEST(QS8_IGEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC,n_gt_2_small_kernel)46467 TEST(QS8_IGEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC, n_gt_2_small_kernel) {
46468 for (uint32_t n = 3; n < 4; n++) {
46469 for (size_t k = 1; k <= 5; k += 2) {
46470 GemmMicrokernelTester()
46471 .mr(2)
46472 .nr(2)
46473 .kr(1)
46474 .sr(1)
46475 .m(2)
46476 .n(n)
46477 .k(k)
46478 .ks(3)
46479 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x2__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
46480 }
46481 }
46482 }
46483
TEST(QS8_IGEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC,n_div_2_small_kernel)46484 TEST(QS8_IGEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC, n_div_2_small_kernel) {
46485 for (uint32_t n = 4; n <= 6; n += 2) {
46486 for (size_t k = 1; k <= 5; k += 2) {
46487 GemmMicrokernelTester()
46488 .mr(2)
46489 .nr(2)
46490 .kr(1)
46491 .sr(1)
46492 .m(2)
46493 .n(n)
46494 .k(k)
46495 .ks(3)
46496 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x2__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
46497 }
46498 }
46499 }
46500
TEST(QS8_IGEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC,strided_cm_subtile)46501 TEST(QS8_IGEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC, strided_cm_subtile) {
46502 for (size_t k = 1; k <= 5; k += 2) {
46503 for (uint32_t n = 1; n <= 2; n++) {
46504 for (uint32_t m = 1; m <= 2; m++) {
46505 GemmMicrokernelTester()
46506 .mr(2)
46507 .nr(2)
46508 .kr(1)
46509 .sr(1)
46510 .m(m)
46511 .n(n)
46512 .k(k)
46513 .cm_stride(5)
46514 .iterations(1)
46515 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x2__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
46516 }
46517 }
46518 }
46519 }
46520
TEST(QS8_IGEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC,a_offset)46521 TEST(QS8_IGEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC, a_offset) {
46522 for (size_t k = 1; k <= 5; k += 2) {
46523 GemmMicrokernelTester()
46524 .mr(2)
46525 .nr(2)
46526 .kr(1)
46527 .sr(1)
46528 .m(2)
46529 .n(2)
46530 .k(k)
46531 .ks(3)
46532 .a_offset(13)
46533 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x2__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
46534 }
46535 }
46536
TEST(QS8_IGEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC,zero)46537 TEST(QS8_IGEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC, zero) {
46538 for (size_t k = 1; k <= 5; k += 2) {
46539 for (uint32_t mz = 0; mz < 2; mz++) {
46540 GemmMicrokernelTester()
46541 .mr(2)
46542 .nr(2)
46543 .kr(1)
46544 .sr(1)
46545 .m(2)
46546 .n(2)
46547 .k(k)
46548 .ks(3)
46549 .a_offset(13)
46550 .zero_index(mz)
46551 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x2__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
46552 }
46553 }
46554 }
46555
TEST(QS8_IGEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC,qmin)46556 TEST(QS8_IGEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC, qmin) {
46557 GemmMicrokernelTester()
46558 .mr(2)
46559 .nr(2)
46560 .kr(1)
46561 .sr(1)
46562 .m(2)
46563 .n(2)
46564 .k(1)
46565 .qmin(128)
46566 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x2__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
46567 }
46568
TEST(QS8_IGEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC,qmax)46569 TEST(QS8_IGEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC, qmax) {
46570 GemmMicrokernelTester()
46571 .mr(2)
46572 .nr(2)
46573 .kr(1)
46574 .sr(1)
46575 .m(2)
46576 .n(2)
46577 .k(1)
46578 .qmax(128)
46579 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x2__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
46580 }
46581
TEST(QS8_IGEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC,strided_cm)46582 TEST(QS8_IGEMM_MINMAX_FP32_2X2__SCALAR_IMAGIC, strided_cm) {
46583 GemmMicrokernelTester()
46584 .mr(2)
46585 .nr(2)
46586 .kr(1)
46587 .sr(1)
46588 .m(2)
46589 .n(2)
46590 .k(1)
46591 .cm_stride(5)
46592 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x2__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
46593 }
46594
46595
TEST(QS8_IGEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC,k_eq_1)46596 TEST(QS8_IGEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC, k_eq_1) {
46597 GemmMicrokernelTester()
46598 .mr(2)
46599 .nr(4)
46600 .kr(1)
46601 .sr(1)
46602 .m(2)
46603 .n(4)
46604 .k(1)
46605 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
46606 }
46607
TEST(QS8_IGEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC,strided_cn)46608 TEST(QS8_IGEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC, strided_cn) {
46609 GemmMicrokernelTester()
46610 .mr(2)
46611 .nr(4)
46612 .kr(1)
46613 .sr(1)
46614 .m(2)
46615 .n(4)
46616 .k(1)
46617 .cn_stride(7)
46618 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
46619 }
46620
TEST(QS8_IGEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC,k_eq_1_subtile)46621 TEST(QS8_IGEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC, k_eq_1_subtile) {
46622 for (uint32_t n = 1; n <= 4; n++) {
46623 for (uint32_t m = 1; m <= 2; m++) {
46624 GemmMicrokernelTester()
46625 .mr(2)
46626 .nr(4)
46627 .kr(1)
46628 .sr(1)
46629 .m(m)
46630 .n(n)
46631 .k(1)
46632 .iterations(1)
46633 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
46634 }
46635 }
46636 }
46637
TEST(QS8_IGEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC,k_eq_1_subtile_m)46638 TEST(QS8_IGEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC, k_eq_1_subtile_m) {
46639 for (uint32_t m = 1; m <= 2; m++) {
46640 GemmMicrokernelTester()
46641 .mr(2)
46642 .nr(4)
46643 .kr(1)
46644 .sr(1)
46645 .m(m)
46646 .n(4)
46647 .k(1)
46648 .iterations(1)
46649 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
46650 }
46651 }
46652
TEST(QS8_IGEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC,k_eq_1_subtile_n)46653 TEST(QS8_IGEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC, k_eq_1_subtile_n) {
46654 for (uint32_t n = 1; n <= 4; n++) {
46655 GemmMicrokernelTester()
46656 .mr(2)
46657 .nr(4)
46658 .kr(1)
46659 .sr(1)
46660 .m(2)
46661 .n(n)
46662 .k(1)
46663 .iterations(1)
46664 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
46665 }
46666 }
46667
TEST(QS8_IGEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC,k_gt_1)46668 TEST(QS8_IGEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC, k_gt_1) {
46669 for (size_t k = 2; k < 10; k++) {
46670 GemmMicrokernelTester()
46671 .mr(2)
46672 .nr(4)
46673 .kr(1)
46674 .sr(1)
46675 .m(2)
46676 .n(4)
46677 .k(k)
46678 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
46679 }
46680 }
46681
TEST(QS8_IGEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC,k_gt_1_subtile)46682 TEST(QS8_IGEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC, k_gt_1_subtile) {
46683 for (size_t k = 2; k < 10; k++) {
46684 for (uint32_t n = 1; n <= 4; n++) {
46685 for (uint32_t m = 1; m <= 2; m++) {
46686 GemmMicrokernelTester()
46687 .mr(2)
46688 .nr(4)
46689 .kr(1)
46690 .sr(1)
46691 .m(m)
46692 .n(n)
46693 .k(k)
46694 .iterations(1)
46695 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
46696 }
46697 }
46698 }
46699 }
46700
TEST(QS8_IGEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC,n_gt_4)46701 TEST(QS8_IGEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC, n_gt_4) {
46702 for (uint32_t n = 5; n < 8; n++) {
46703 for (size_t k = 1; k <= 5; k += 2) {
46704 GemmMicrokernelTester()
46705 .mr(2)
46706 .nr(4)
46707 .kr(1)
46708 .sr(1)
46709 .m(2)
46710 .n(n)
46711 .k(k)
46712 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
46713 }
46714 }
46715 }
46716
TEST(QS8_IGEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC,n_gt_4_strided_cn)46717 TEST(QS8_IGEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC, n_gt_4_strided_cn) {
46718 for (uint32_t n = 5; n < 8; n++) {
46719 for (size_t k = 1; k <= 5; k += 2) {
46720 GemmMicrokernelTester()
46721 .mr(2)
46722 .nr(4)
46723 .kr(1)
46724 .sr(1)
46725 .m(2)
46726 .n(n)
46727 .k(k)
46728 .cn_stride(7)
46729 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
46730 }
46731 }
46732 }
46733
TEST(QS8_IGEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC,n_gt_4_subtile)46734 TEST(QS8_IGEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC, n_gt_4_subtile) {
46735 for (uint32_t n = 5; n < 8; n++) {
46736 for (size_t k = 1; k <= 5; k += 2) {
46737 for (uint32_t m = 1; m <= 2; m++) {
46738 GemmMicrokernelTester()
46739 .mr(2)
46740 .nr(4)
46741 .kr(1)
46742 .sr(1)
46743 .m(m)
46744 .n(n)
46745 .k(k)
46746 .iterations(1)
46747 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
46748 }
46749 }
46750 }
46751 }
46752
TEST(QS8_IGEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC,n_div_4)46753 TEST(QS8_IGEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC, n_div_4) {
46754 for (uint32_t n = 8; n <= 12; n += 4) {
46755 for (size_t k = 1; k <= 5; k += 2) {
46756 GemmMicrokernelTester()
46757 .mr(2)
46758 .nr(4)
46759 .kr(1)
46760 .sr(1)
46761 .m(2)
46762 .n(n)
46763 .k(k)
46764 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
46765 }
46766 }
46767 }
46768
TEST(QS8_IGEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC,n_div_4_strided_cn)46769 TEST(QS8_IGEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC, n_div_4_strided_cn) {
46770 for (uint32_t n = 8; n <= 12; n += 4) {
46771 for (size_t k = 1; k <= 5; k += 2) {
46772 GemmMicrokernelTester()
46773 .mr(2)
46774 .nr(4)
46775 .kr(1)
46776 .sr(1)
46777 .m(2)
46778 .n(n)
46779 .k(k)
46780 .cn_stride(7)
46781 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
46782 }
46783 }
46784 }
46785
TEST(QS8_IGEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC,n_div_4_subtile)46786 TEST(QS8_IGEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC, n_div_4_subtile) {
46787 for (uint32_t n = 8; n <= 12; n += 4) {
46788 for (size_t k = 1; k <= 5; k += 2) {
46789 for (uint32_t m = 1; m <= 2; m++) {
46790 GemmMicrokernelTester()
46791 .mr(2)
46792 .nr(4)
46793 .kr(1)
46794 .sr(1)
46795 .m(m)
46796 .n(n)
46797 .k(k)
46798 .iterations(1)
46799 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
46800 }
46801 }
46802 }
46803 }
46804
TEST(QS8_IGEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC,small_kernel)46805 TEST(QS8_IGEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC, small_kernel) {
46806 for (size_t k = 1; k <= 5; k += 2) {
46807 GemmMicrokernelTester()
46808 .mr(2)
46809 .nr(4)
46810 .kr(1)
46811 .sr(1)
46812 .m(2)
46813 .n(4)
46814 .k(k)
46815 .ks(3)
46816 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
46817 }
46818 }
46819
TEST(QS8_IGEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC,small_kernel_subtile)46820 TEST(QS8_IGEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC, small_kernel_subtile) {
46821 for (size_t k = 1; k <= 5; k += 2) {
46822 for (uint32_t n = 1; n <= 4; n++) {
46823 for (uint32_t m = 1; m <= 2; m++) {
46824 GemmMicrokernelTester()
46825 .mr(2)
46826 .nr(4)
46827 .kr(1)
46828 .sr(1)
46829 .m(m)
46830 .n(n)
46831 .k(k)
46832 .ks(3)
46833 .iterations(1)
46834 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
46835 }
46836 }
46837 }
46838 }
46839
TEST(QS8_IGEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC,n_gt_4_small_kernel)46840 TEST(QS8_IGEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC, n_gt_4_small_kernel) {
46841 for (uint32_t n = 5; n < 8; n++) {
46842 for (size_t k = 1; k <= 5; k += 2) {
46843 GemmMicrokernelTester()
46844 .mr(2)
46845 .nr(4)
46846 .kr(1)
46847 .sr(1)
46848 .m(2)
46849 .n(n)
46850 .k(k)
46851 .ks(3)
46852 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
46853 }
46854 }
46855 }
46856
TEST(QS8_IGEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC,n_div_4_small_kernel)46857 TEST(QS8_IGEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC, n_div_4_small_kernel) {
46858 for (uint32_t n = 8; n <= 12; n += 4) {
46859 for (size_t k = 1; k <= 5; k += 2) {
46860 GemmMicrokernelTester()
46861 .mr(2)
46862 .nr(4)
46863 .kr(1)
46864 .sr(1)
46865 .m(2)
46866 .n(n)
46867 .k(k)
46868 .ks(3)
46869 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
46870 }
46871 }
46872 }
46873
TEST(QS8_IGEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC,strided_cm_subtile)46874 TEST(QS8_IGEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC, strided_cm_subtile) {
46875 for (size_t k = 1; k <= 5; k += 2) {
46876 for (uint32_t n = 1; n <= 4; n++) {
46877 for (uint32_t m = 1; m <= 2; m++) {
46878 GemmMicrokernelTester()
46879 .mr(2)
46880 .nr(4)
46881 .kr(1)
46882 .sr(1)
46883 .m(m)
46884 .n(n)
46885 .k(k)
46886 .cm_stride(7)
46887 .iterations(1)
46888 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
46889 }
46890 }
46891 }
46892 }
46893
TEST(QS8_IGEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC,a_offset)46894 TEST(QS8_IGEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC, a_offset) {
46895 for (size_t k = 1; k <= 5; k += 2) {
46896 GemmMicrokernelTester()
46897 .mr(2)
46898 .nr(4)
46899 .kr(1)
46900 .sr(1)
46901 .m(2)
46902 .n(4)
46903 .k(k)
46904 .ks(3)
46905 .a_offset(13)
46906 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
46907 }
46908 }
46909
TEST(QS8_IGEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC,zero)46910 TEST(QS8_IGEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC, zero) {
46911 for (size_t k = 1; k <= 5; k += 2) {
46912 for (uint32_t mz = 0; mz < 2; mz++) {
46913 GemmMicrokernelTester()
46914 .mr(2)
46915 .nr(4)
46916 .kr(1)
46917 .sr(1)
46918 .m(2)
46919 .n(4)
46920 .k(k)
46921 .ks(3)
46922 .a_offset(13)
46923 .zero_index(mz)
46924 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
46925 }
46926 }
46927 }
46928
TEST(QS8_IGEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC,qmin)46929 TEST(QS8_IGEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC, qmin) {
46930 GemmMicrokernelTester()
46931 .mr(2)
46932 .nr(4)
46933 .kr(1)
46934 .sr(1)
46935 .m(2)
46936 .n(4)
46937 .k(1)
46938 .qmin(128)
46939 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
46940 }
46941
TEST(QS8_IGEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC,qmax)46942 TEST(QS8_IGEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC, qmax) {
46943 GemmMicrokernelTester()
46944 .mr(2)
46945 .nr(4)
46946 .kr(1)
46947 .sr(1)
46948 .m(2)
46949 .n(4)
46950 .k(1)
46951 .qmax(128)
46952 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
46953 }
46954
TEST(QS8_IGEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC,strided_cm)46955 TEST(QS8_IGEMM_MINMAX_FP32_2X4__SCALAR_IMAGIC, strided_cm) {
46956 GemmMicrokernelTester()
46957 .mr(2)
46958 .nr(4)
46959 .kr(1)
46960 .sr(1)
46961 .m(2)
46962 .n(4)
46963 .k(1)
46964 .cm_stride(7)
46965 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x4__scalar_imagic, xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params, xnn_qs8_requantize_fp32);
46966 }
46967
46968
TEST(QS8_IGEMM_MINMAX_FP32_3X2__SCALAR_FMAGIC,k_eq_1)46969 TEST(QS8_IGEMM_MINMAX_FP32_3X2__SCALAR_FMAGIC, k_eq_1) {
46970 GemmMicrokernelTester()
46971 .mr(3)
46972 .nr(2)
46973 .kr(1)
46974 .sr(1)
46975 .m(3)
46976 .n(2)
46977 .k(1)
46978 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x2__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
46979 }
46980
TEST(QS8_IGEMM_MINMAX_FP32_3X2__SCALAR_FMAGIC,strided_cn)46981 TEST(QS8_IGEMM_MINMAX_FP32_3X2__SCALAR_FMAGIC, strided_cn) {
46982 GemmMicrokernelTester()
46983 .mr(3)
46984 .nr(2)
46985 .kr(1)
46986 .sr(1)
46987 .m(3)
46988 .n(2)
46989 .k(1)
46990 .cn_stride(5)
46991 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x2__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
46992 }
46993
TEST(QS8_IGEMM_MINMAX_FP32_3X2__SCALAR_FMAGIC,k_eq_1_subtile)46994 TEST(QS8_IGEMM_MINMAX_FP32_3X2__SCALAR_FMAGIC, k_eq_1_subtile) {
46995 for (uint32_t n = 1; n <= 2; n++) {
46996 for (uint32_t m = 1; m <= 3; m++) {
46997 GemmMicrokernelTester()
46998 .mr(3)
46999 .nr(2)
47000 .kr(1)
47001 .sr(1)
47002 .m(m)
47003 .n(n)
47004 .k(1)
47005 .iterations(1)
47006 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x2__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
47007 }
47008 }
47009 }
47010
TEST(QS8_IGEMM_MINMAX_FP32_3X2__SCALAR_FMAGIC,k_eq_1_subtile_m)47011 TEST(QS8_IGEMM_MINMAX_FP32_3X2__SCALAR_FMAGIC, k_eq_1_subtile_m) {
47012 for (uint32_t m = 1; m <= 3; m++) {
47013 GemmMicrokernelTester()
47014 .mr(3)
47015 .nr(2)
47016 .kr(1)
47017 .sr(1)
47018 .m(m)
47019 .n(2)
47020 .k(1)
47021 .iterations(1)
47022 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x2__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
47023 }
47024 }
47025
TEST(QS8_IGEMM_MINMAX_FP32_3X2__SCALAR_FMAGIC,k_eq_1_subtile_n)47026 TEST(QS8_IGEMM_MINMAX_FP32_3X2__SCALAR_FMAGIC, k_eq_1_subtile_n) {
47027 for (uint32_t n = 1; n <= 2; n++) {
47028 GemmMicrokernelTester()
47029 .mr(3)
47030 .nr(2)
47031 .kr(1)
47032 .sr(1)
47033 .m(3)
47034 .n(n)
47035 .k(1)
47036 .iterations(1)
47037 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x2__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
47038 }
47039 }
47040
TEST(QS8_IGEMM_MINMAX_FP32_3X2__SCALAR_FMAGIC,k_gt_1)47041 TEST(QS8_IGEMM_MINMAX_FP32_3X2__SCALAR_FMAGIC, k_gt_1) {
47042 for (size_t k = 2; k < 10; k++) {
47043 GemmMicrokernelTester()
47044 .mr(3)
47045 .nr(2)
47046 .kr(1)
47047 .sr(1)
47048 .m(3)
47049 .n(2)
47050 .k(k)
47051 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x2__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
47052 }
47053 }
47054
TEST(QS8_IGEMM_MINMAX_FP32_3X2__SCALAR_FMAGIC,k_gt_1_subtile)47055 TEST(QS8_IGEMM_MINMAX_FP32_3X2__SCALAR_FMAGIC, k_gt_1_subtile) {
47056 for (size_t k = 2; k < 10; k++) {
47057 for (uint32_t n = 1; n <= 2; n++) {
47058 for (uint32_t m = 1; m <= 3; m++) {
47059 GemmMicrokernelTester()
47060 .mr(3)
47061 .nr(2)
47062 .kr(1)
47063 .sr(1)
47064 .m(m)
47065 .n(n)
47066 .k(k)
47067 .iterations(1)
47068 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x2__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
47069 }
47070 }
47071 }
47072 }
47073
TEST(QS8_IGEMM_MINMAX_FP32_3X2__SCALAR_FMAGIC,n_gt_2)47074 TEST(QS8_IGEMM_MINMAX_FP32_3X2__SCALAR_FMAGIC, n_gt_2) {
47075 for (uint32_t n = 3; n < 4; n++) {
47076 for (size_t k = 1; k <= 5; k += 2) {
47077 GemmMicrokernelTester()
47078 .mr(3)
47079 .nr(2)
47080 .kr(1)
47081 .sr(1)
47082 .m(3)
47083 .n(n)
47084 .k(k)
47085 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x2__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
47086 }
47087 }
47088 }
47089
TEST(QS8_IGEMM_MINMAX_FP32_3X2__SCALAR_FMAGIC,n_gt_2_strided_cn)47090 TEST(QS8_IGEMM_MINMAX_FP32_3X2__SCALAR_FMAGIC, n_gt_2_strided_cn) {
47091 for (uint32_t n = 3; n < 4; n++) {
47092 for (size_t k = 1; k <= 5; k += 2) {
47093 GemmMicrokernelTester()
47094 .mr(3)
47095 .nr(2)
47096 .kr(1)
47097 .sr(1)
47098 .m(3)
47099 .n(n)
47100 .k(k)
47101 .cn_stride(5)
47102 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x2__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
47103 }
47104 }
47105 }
47106
TEST(QS8_IGEMM_MINMAX_FP32_3X2__SCALAR_FMAGIC,n_gt_2_subtile)47107 TEST(QS8_IGEMM_MINMAX_FP32_3X2__SCALAR_FMAGIC, n_gt_2_subtile) {
47108 for (uint32_t n = 3; n < 4; n++) {
47109 for (size_t k = 1; k <= 5; k += 2) {
47110 for (uint32_t m = 1; m <= 3; m++) {
47111 GemmMicrokernelTester()
47112 .mr(3)
47113 .nr(2)
47114 .kr(1)
47115 .sr(1)
47116 .m(m)
47117 .n(n)
47118 .k(k)
47119 .iterations(1)
47120 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x2__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
47121 }
47122 }
47123 }
47124 }
47125
TEST(QS8_IGEMM_MINMAX_FP32_3X2__SCALAR_FMAGIC,n_div_2)47126 TEST(QS8_IGEMM_MINMAX_FP32_3X2__SCALAR_FMAGIC, n_div_2) {
47127 for (uint32_t n = 4; n <= 6; n += 2) {
47128 for (size_t k = 1; k <= 5; k += 2) {
47129 GemmMicrokernelTester()
47130 .mr(3)
47131 .nr(2)
47132 .kr(1)
47133 .sr(1)
47134 .m(3)
47135 .n(n)
47136 .k(k)
47137 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x2__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
47138 }
47139 }
47140 }
47141
TEST(QS8_IGEMM_MINMAX_FP32_3X2__SCALAR_FMAGIC,n_div_2_strided_cn)47142 TEST(QS8_IGEMM_MINMAX_FP32_3X2__SCALAR_FMAGIC, n_div_2_strided_cn) {
47143 for (uint32_t n = 4; n <= 6; n += 2) {
47144 for (size_t k = 1; k <= 5; k += 2) {
47145 GemmMicrokernelTester()
47146 .mr(3)
47147 .nr(2)
47148 .kr(1)
47149 .sr(1)
47150 .m(3)
47151 .n(n)
47152 .k(k)
47153 .cn_stride(5)
47154 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x2__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
47155 }
47156 }
47157 }
47158
TEST(QS8_IGEMM_MINMAX_FP32_3X2__SCALAR_FMAGIC,n_div_2_subtile)47159 TEST(QS8_IGEMM_MINMAX_FP32_3X2__SCALAR_FMAGIC, n_div_2_subtile) {
47160 for (uint32_t n = 4; n <= 6; n += 2) {
47161 for (size_t k = 1; k <= 5; k += 2) {
47162 for (uint32_t m = 1; m <= 3; m++) {
47163 GemmMicrokernelTester()
47164 .mr(3)
47165 .nr(2)
47166 .kr(1)
47167 .sr(1)
47168 .m(m)
47169 .n(n)
47170 .k(k)
47171 .iterations(1)
47172 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x2__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
47173 }
47174 }
47175 }
47176 }
47177
TEST(QS8_IGEMM_MINMAX_FP32_3X2__SCALAR_FMAGIC,small_kernel)47178 TEST(QS8_IGEMM_MINMAX_FP32_3X2__SCALAR_FMAGIC, small_kernel) {
47179 for (size_t k = 1; k <= 5; k += 2) {
47180 GemmMicrokernelTester()
47181 .mr(3)
47182 .nr(2)
47183 .kr(1)
47184 .sr(1)
47185 .m(3)
47186 .n(2)
47187 .k(k)
47188 .ks(3)
47189 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x2__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
47190 }
47191 }
47192
TEST(QS8_IGEMM_MINMAX_FP32_3X2__SCALAR_FMAGIC,small_kernel_subtile)47193 TEST(QS8_IGEMM_MINMAX_FP32_3X2__SCALAR_FMAGIC, small_kernel_subtile) {
47194 for (size_t k = 1; k <= 5; k += 2) {
47195 for (uint32_t n = 1; n <= 2; n++) {
47196 for (uint32_t m = 1; m <= 3; m++) {
47197 GemmMicrokernelTester()
47198 .mr(3)
47199 .nr(2)
47200 .kr(1)
47201 .sr(1)
47202 .m(m)
47203 .n(n)
47204 .k(k)
47205 .ks(3)
47206 .iterations(1)
47207 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x2__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
47208 }
47209 }
47210 }
47211 }
47212
TEST(QS8_IGEMM_MINMAX_FP32_3X2__SCALAR_FMAGIC,n_gt_2_small_kernel)47213 TEST(QS8_IGEMM_MINMAX_FP32_3X2__SCALAR_FMAGIC, n_gt_2_small_kernel) {
47214 for (uint32_t n = 3; n < 4; n++) {
47215 for (size_t k = 1; k <= 5; k += 2) {
47216 GemmMicrokernelTester()
47217 .mr(3)
47218 .nr(2)
47219 .kr(1)
47220 .sr(1)
47221 .m(3)
47222 .n(n)
47223 .k(k)
47224 .ks(3)
47225 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x2__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
47226 }
47227 }
47228 }
47229
TEST(QS8_IGEMM_MINMAX_FP32_3X2__SCALAR_FMAGIC,n_div_2_small_kernel)47230 TEST(QS8_IGEMM_MINMAX_FP32_3X2__SCALAR_FMAGIC, n_div_2_small_kernel) {
47231 for (uint32_t n = 4; n <= 6; n += 2) {
47232 for (size_t k = 1; k <= 5; k += 2) {
47233 GemmMicrokernelTester()
47234 .mr(3)
47235 .nr(2)
47236 .kr(1)
47237 .sr(1)
47238 .m(3)
47239 .n(n)
47240 .k(k)
47241 .ks(3)
47242 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x2__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
47243 }
47244 }
47245 }
47246
TEST(QS8_IGEMM_MINMAX_FP32_3X2__SCALAR_FMAGIC,strided_cm_subtile)47247 TEST(QS8_IGEMM_MINMAX_FP32_3X2__SCALAR_FMAGIC, strided_cm_subtile) {
47248 for (size_t k = 1; k <= 5; k += 2) {
47249 for (uint32_t n = 1; n <= 2; n++) {
47250 for (uint32_t m = 1; m <= 3; m++) {
47251 GemmMicrokernelTester()
47252 .mr(3)
47253 .nr(2)
47254 .kr(1)
47255 .sr(1)
47256 .m(m)
47257 .n(n)
47258 .k(k)
47259 .cm_stride(5)
47260 .iterations(1)
47261 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x2__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
47262 }
47263 }
47264 }
47265 }
47266
TEST(QS8_IGEMM_MINMAX_FP32_3X2__SCALAR_FMAGIC,a_offset)47267 TEST(QS8_IGEMM_MINMAX_FP32_3X2__SCALAR_FMAGIC, a_offset) {
47268 for (size_t k = 1; k <= 5; k += 2) {
47269 GemmMicrokernelTester()
47270 .mr(3)
47271 .nr(2)
47272 .kr(1)
47273 .sr(1)
47274 .m(3)
47275 .n(2)
47276 .k(k)
47277 .ks(3)
47278 .a_offset(17)
47279 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x2__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
47280 }
47281 }
47282
TEST(QS8_IGEMM_MINMAX_FP32_3X2__SCALAR_FMAGIC,zero)47283 TEST(QS8_IGEMM_MINMAX_FP32_3X2__SCALAR_FMAGIC, zero) {
47284 for (size_t k = 1; k <= 5; k += 2) {
47285 for (uint32_t mz = 0; mz < 3; mz++) {
47286 GemmMicrokernelTester()
47287 .mr(3)
47288 .nr(2)
47289 .kr(1)
47290 .sr(1)
47291 .m(3)
47292 .n(2)
47293 .k(k)
47294 .ks(3)
47295 .a_offset(17)
47296 .zero_index(mz)
47297 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x2__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
47298 }
47299 }
47300 }
47301
TEST(QS8_IGEMM_MINMAX_FP32_3X2__SCALAR_FMAGIC,qmin)47302 TEST(QS8_IGEMM_MINMAX_FP32_3X2__SCALAR_FMAGIC, qmin) {
47303 GemmMicrokernelTester()
47304 .mr(3)
47305 .nr(2)
47306 .kr(1)
47307 .sr(1)
47308 .m(3)
47309 .n(2)
47310 .k(1)
47311 .qmin(128)
47312 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x2__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
47313 }
47314
TEST(QS8_IGEMM_MINMAX_FP32_3X2__SCALAR_FMAGIC,qmax)47315 TEST(QS8_IGEMM_MINMAX_FP32_3X2__SCALAR_FMAGIC, qmax) {
47316 GemmMicrokernelTester()
47317 .mr(3)
47318 .nr(2)
47319 .kr(1)
47320 .sr(1)
47321 .m(3)
47322 .n(2)
47323 .k(1)
47324 .qmax(128)
47325 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x2__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
47326 }
47327
TEST(QS8_IGEMM_MINMAX_FP32_3X2__SCALAR_FMAGIC,strided_cm)47328 TEST(QS8_IGEMM_MINMAX_FP32_3X2__SCALAR_FMAGIC, strided_cm) {
47329 GemmMicrokernelTester()
47330 .mr(3)
47331 .nr(2)
47332 .kr(1)
47333 .sr(1)
47334 .m(3)
47335 .n(2)
47336 .k(1)
47337 .cm_stride(5)
47338 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x2__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
47339 }
47340
47341
TEST(QS8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF,k_eq_1)47342 TEST(QS8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, k_eq_1) {
47343 GemmMicrokernelTester()
47344 .mr(3)
47345 .nr(2)
47346 .kr(1)
47347 .sr(1)
47348 .m(3)
47349 .n(2)
47350 .k(1)
47351 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
47352 }
47353
TEST(QS8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF,strided_cn)47354 TEST(QS8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, strided_cn) {
47355 GemmMicrokernelTester()
47356 .mr(3)
47357 .nr(2)
47358 .kr(1)
47359 .sr(1)
47360 .m(3)
47361 .n(2)
47362 .k(1)
47363 .cn_stride(5)
47364 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
47365 }
47366
TEST(QS8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF,k_eq_1_subtile)47367 TEST(QS8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, k_eq_1_subtile) {
47368 for (uint32_t n = 1; n <= 2; n++) {
47369 for (uint32_t m = 1; m <= 3; m++) {
47370 GemmMicrokernelTester()
47371 .mr(3)
47372 .nr(2)
47373 .kr(1)
47374 .sr(1)
47375 .m(m)
47376 .n(n)
47377 .k(1)
47378 .iterations(1)
47379 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
47380 }
47381 }
47382 }
47383
TEST(QS8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF,k_eq_1_subtile_m)47384 TEST(QS8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, k_eq_1_subtile_m) {
47385 for (uint32_t m = 1; m <= 3; m++) {
47386 GemmMicrokernelTester()
47387 .mr(3)
47388 .nr(2)
47389 .kr(1)
47390 .sr(1)
47391 .m(m)
47392 .n(2)
47393 .k(1)
47394 .iterations(1)
47395 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
47396 }
47397 }
47398
TEST(QS8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF,k_eq_1_subtile_n)47399 TEST(QS8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, k_eq_1_subtile_n) {
47400 for (uint32_t n = 1; n <= 2; n++) {
47401 GemmMicrokernelTester()
47402 .mr(3)
47403 .nr(2)
47404 .kr(1)
47405 .sr(1)
47406 .m(3)
47407 .n(n)
47408 .k(1)
47409 .iterations(1)
47410 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
47411 }
47412 }
47413
TEST(QS8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF,k_gt_1)47414 TEST(QS8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, k_gt_1) {
47415 for (size_t k = 2; k < 10; k++) {
47416 GemmMicrokernelTester()
47417 .mr(3)
47418 .nr(2)
47419 .kr(1)
47420 .sr(1)
47421 .m(3)
47422 .n(2)
47423 .k(k)
47424 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
47425 }
47426 }
47427
TEST(QS8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF,k_gt_1_subtile)47428 TEST(QS8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, k_gt_1_subtile) {
47429 for (size_t k = 2; k < 10; k++) {
47430 for (uint32_t n = 1; n <= 2; n++) {
47431 for (uint32_t m = 1; m <= 3; m++) {
47432 GemmMicrokernelTester()
47433 .mr(3)
47434 .nr(2)
47435 .kr(1)
47436 .sr(1)
47437 .m(m)
47438 .n(n)
47439 .k(k)
47440 .iterations(1)
47441 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
47442 }
47443 }
47444 }
47445 }
47446
TEST(QS8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF,n_gt_2)47447 TEST(QS8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, n_gt_2) {
47448 for (uint32_t n = 3; n < 4; n++) {
47449 for (size_t k = 1; k <= 5; k += 2) {
47450 GemmMicrokernelTester()
47451 .mr(3)
47452 .nr(2)
47453 .kr(1)
47454 .sr(1)
47455 .m(3)
47456 .n(n)
47457 .k(k)
47458 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
47459 }
47460 }
47461 }
47462
TEST(QS8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF,n_gt_2_strided_cn)47463 TEST(QS8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, n_gt_2_strided_cn) {
47464 for (uint32_t n = 3; n < 4; n++) {
47465 for (size_t k = 1; k <= 5; k += 2) {
47466 GemmMicrokernelTester()
47467 .mr(3)
47468 .nr(2)
47469 .kr(1)
47470 .sr(1)
47471 .m(3)
47472 .n(n)
47473 .k(k)
47474 .cn_stride(5)
47475 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
47476 }
47477 }
47478 }
47479
TEST(QS8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF,n_gt_2_subtile)47480 TEST(QS8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, n_gt_2_subtile) {
47481 for (uint32_t n = 3; n < 4; n++) {
47482 for (size_t k = 1; k <= 5; k += 2) {
47483 for (uint32_t m = 1; m <= 3; m++) {
47484 GemmMicrokernelTester()
47485 .mr(3)
47486 .nr(2)
47487 .kr(1)
47488 .sr(1)
47489 .m(m)
47490 .n(n)
47491 .k(k)
47492 .iterations(1)
47493 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
47494 }
47495 }
47496 }
47497 }
47498
TEST(QS8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF,n_div_2)47499 TEST(QS8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, n_div_2) {
47500 for (uint32_t n = 4; n <= 6; n += 2) {
47501 for (size_t k = 1; k <= 5; k += 2) {
47502 GemmMicrokernelTester()
47503 .mr(3)
47504 .nr(2)
47505 .kr(1)
47506 .sr(1)
47507 .m(3)
47508 .n(n)
47509 .k(k)
47510 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
47511 }
47512 }
47513 }
47514
TEST(QS8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF,n_div_2_strided_cn)47515 TEST(QS8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, n_div_2_strided_cn) {
47516 for (uint32_t n = 4; n <= 6; n += 2) {
47517 for (size_t k = 1; k <= 5; k += 2) {
47518 GemmMicrokernelTester()
47519 .mr(3)
47520 .nr(2)
47521 .kr(1)
47522 .sr(1)
47523 .m(3)
47524 .n(n)
47525 .k(k)
47526 .cn_stride(5)
47527 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
47528 }
47529 }
47530 }
47531
TEST(QS8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF,n_div_2_subtile)47532 TEST(QS8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, n_div_2_subtile) {
47533 for (uint32_t n = 4; n <= 6; n += 2) {
47534 for (size_t k = 1; k <= 5; k += 2) {
47535 for (uint32_t m = 1; m <= 3; m++) {
47536 GemmMicrokernelTester()
47537 .mr(3)
47538 .nr(2)
47539 .kr(1)
47540 .sr(1)
47541 .m(m)
47542 .n(n)
47543 .k(k)
47544 .iterations(1)
47545 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
47546 }
47547 }
47548 }
47549 }
47550
TEST(QS8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF,small_kernel)47551 TEST(QS8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, small_kernel) {
47552 for (size_t k = 1; k <= 5; k += 2) {
47553 GemmMicrokernelTester()
47554 .mr(3)
47555 .nr(2)
47556 .kr(1)
47557 .sr(1)
47558 .m(3)
47559 .n(2)
47560 .k(k)
47561 .ks(3)
47562 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
47563 }
47564 }
47565
TEST(QS8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF,small_kernel_subtile)47566 TEST(QS8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, small_kernel_subtile) {
47567 for (size_t k = 1; k <= 5; k += 2) {
47568 for (uint32_t n = 1; n <= 2; n++) {
47569 for (uint32_t m = 1; m <= 3; m++) {
47570 GemmMicrokernelTester()
47571 .mr(3)
47572 .nr(2)
47573 .kr(1)
47574 .sr(1)
47575 .m(m)
47576 .n(n)
47577 .k(k)
47578 .ks(3)
47579 .iterations(1)
47580 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
47581 }
47582 }
47583 }
47584 }
47585
TEST(QS8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF,n_gt_2_small_kernel)47586 TEST(QS8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, n_gt_2_small_kernel) {
47587 for (uint32_t n = 3; n < 4; n++) {
47588 for (size_t k = 1; k <= 5; k += 2) {
47589 GemmMicrokernelTester()
47590 .mr(3)
47591 .nr(2)
47592 .kr(1)
47593 .sr(1)
47594 .m(3)
47595 .n(n)
47596 .k(k)
47597 .ks(3)
47598 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
47599 }
47600 }
47601 }
47602
TEST(QS8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF,n_div_2_small_kernel)47603 TEST(QS8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, n_div_2_small_kernel) {
47604 for (uint32_t n = 4; n <= 6; n += 2) {
47605 for (size_t k = 1; k <= 5; k += 2) {
47606 GemmMicrokernelTester()
47607 .mr(3)
47608 .nr(2)
47609 .kr(1)
47610 .sr(1)
47611 .m(3)
47612 .n(n)
47613 .k(k)
47614 .ks(3)
47615 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
47616 }
47617 }
47618 }
47619
TEST(QS8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF,strided_cm_subtile)47620 TEST(QS8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, strided_cm_subtile) {
47621 for (size_t k = 1; k <= 5; k += 2) {
47622 for (uint32_t n = 1; n <= 2; n++) {
47623 for (uint32_t m = 1; m <= 3; m++) {
47624 GemmMicrokernelTester()
47625 .mr(3)
47626 .nr(2)
47627 .kr(1)
47628 .sr(1)
47629 .m(m)
47630 .n(n)
47631 .k(k)
47632 .cm_stride(5)
47633 .iterations(1)
47634 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
47635 }
47636 }
47637 }
47638 }
47639
TEST(QS8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF,a_offset)47640 TEST(QS8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, a_offset) {
47641 for (size_t k = 1; k <= 5; k += 2) {
47642 GemmMicrokernelTester()
47643 .mr(3)
47644 .nr(2)
47645 .kr(1)
47646 .sr(1)
47647 .m(3)
47648 .n(2)
47649 .k(k)
47650 .ks(3)
47651 .a_offset(17)
47652 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
47653 }
47654 }
47655
TEST(QS8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF,zero)47656 TEST(QS8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, zero) {
47657 for (size_t k = 1; k <= 5; k += 2) {
47658 for (uint32_t mz = 0; mz < 3; mz++) {
47659 GemmMicrokernelTester()
47660 .mr(3)
47661 .nr(2)
47662 .kr(1)
47663 .sr(1)
47664 .m(3)
47665 .n(2)
47666 .k(k)
47667 .ks(3)
47668 .a_offset(17)
47669 .zero_index(mz)
47670 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
47671 }
47672 }
47673 }
47674
TEST(QS8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF,qmin)47675 TEST(QS8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, qmin) {
47676 GemmMicrokernelTester()
47677 .mr(3)
47678 .nr(2)
47679 .kr(1)
47680 .sr(1)
47681 .m(3)
47682 .n(2)
47683 .k(1)
47684 .qmin(128)
47685 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
47686 }
47687
TEST(QS8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF,qmax)47688 TEST(QS8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, qmax) {
47689 GemmMicrokernelTester()
47690 .mr(3)
47691 .nr(2)
47692 .kr(1)
47693 .sr(1)
47694 .m(3)
47695 .n(2)
47696 .k(1)
47697 .qmax(128)
47698 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
47699 }
47700
TEST(QS8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF,strided_cm)47701 TEST(QS8_IGEMM_MINMAX_FP32_3X2__SCALAR_LRINTF, strided_cm) {
47702 GemmMicrokernelTester()
47703 .mr(3)
47704 .nr(2)
47705 .kr(1)
47706 .sr(1)
47707 .m(3)
47708 .n(2)
47709 .k(1)
47710 .cm_stride(5)
47711 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
47712 }
47713
47714
TEST(QS8_IGEMM_MINMAX_FP32_3X4__SCALAR_FMAGIC,k_eq_1)47715 TEST(QS8_IGEMM_MINMAX_FP32_3X4__SCALAR_FMAGIC, k_eq_1) {
47716 GemmMicrokernelTester()
47717 .mr(3)
47718 .nr(4)
47719 .kr(1)
47720 .sr(1)
47721 .m(3)
47722 .n(4)
47723 .k(1)
47724 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
47725 }
47726
TEST(QS8_IGEMM_MINMAX_FP32_3X4__SCALAR_FMAGIC,strided_cn)47727 TEST(QS8_IGEMM_MINMAX_FP32_3X4__SCALAR_FMAGIC, strided_cn) {
47728 GemmMicrokernelTester()
47729 .mr(3)
47730 .nr(4)
47731 .kr(1)
47732 .sr(1)
47733 .m(3)
47734 .n(4)
47735 .k(1)
47736 .cn_stride(7)
47737 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
47738 }
47739
TEST(QS8_IGEMM_MINMAX_FP32_3X4__SCALAR_FMAGIC,k_eq_1_subtile)47740 TEST(QS8_IGEMM_MINMAX_FP32_3X4__SCALAR_FMAGIC, k_eq_1_subtile) {
47741 for (uint32_t n = 1; n <= 4; n++) {
47742 for (uint32_t m = 1; m <= 3; m++) {
47743 GemmMicrokernelTester()
47744 .mr(3)
47745 .nr(4)
47746 .kr(1)
47747 .sr(1)
47748 .m(m)
47749 .n(n)
47750 .k(1)
47751 .iterations(1)
47752 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
47753 }
47754 }
47755 }
47756
TEST(QS8_IGEMM_MINMAX_FP32_3X4__SCALAR_FMAGIC,k_eq_1_subtile_m)47757 TEST(QS8_IGEMM_MINMAX_FP32_3X4__SCALAR_FMAGIC, k_eq_1_subtile_m) {
47758 for (uint32_t m = 1; m <= 3; m++) {
47759 GemmMicrokernelTester()
47760 .mr(3)
47761 .nr(4)
47762 .kr(1)
47763 .sr(1)
47764 .m(m)
47765 .n(4)
47766 .k(1)
47767 .iterations(1)
47768 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
47769 }
47770 }
47771
TEST(QS8_IGEMM_MINMAX_FP32_3X4__SCALAR_FMAGIC,k_eq_1_subtile_n)47772 TEST(QS8_IGEMM_MINMAX_FP32_3X4__SCALAR_FMAGIC, k_eq_1_subtile_n) {
47773 for (uint32_t n = 1; n <= 4; n++) {
47774 GemmMicrokernelTester()
47775 .mr(3)
47776 .nr(4)
47777 .kr(1)
47778 .sr(1)
47779 .m(3)
47780 .n(n)
47781 .k(1)
47782 .iterations(1)
47783 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
47784 }
47785 }
47786
TEST(QS8_IGEMM_MINMAX_FP32_3X4__SCALAR_FMAGIC,k_gt_1)47787 TEST(QS8_IGEMM_MINMAX_FP32_3X4__SCALAR_FMAGIC, k_gt_1) {
47788 for (size_t k = 2; k < 10; k++) {
47789 GemmMicrokernelTester()
47790 .mr(3)
47791 .nr(4)
47792 .kr(1)
47793 .sr(1)
47794 .m(3)
47795 .n(4)
47796 .k(k)
47797 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
47798 }
47799 }
47800
TEST(QS8_IGEMM_MINMAX_FP32_3X4__SCALAR_FMAGIC,k_gt_1_subtile)47801 TEST(QS8_IGEMM_MINMAX_FP32_3X4__SCALAR_FMAGIC, k_gt_1_subtile) {
47802 for (size_t k = 2; k < 10; k++) {
47803 for (uint32_t n = 1; n <= 4; n++) {
47804 for (uint32_t m = 1; m <= 3; m++) {
47805 GemmMicrokernelTester()
47806 .mr(3)
47807 .nr(4)
47808 .kr(1)
47809 .sr(1)
47810 .m(m)
47811 .n(n)
47812 .k(k)
47813 .iterations(1)
47814 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
47815 }
47816 }
47817 }
47818 }
47819
TEST(QS8_IGEMM_MINMAX_FP32_3X4__SCALAR_FMAGIC,n_gt_4)47820 TEST(QS8_IGEMM_MINMAX_FP32_3X4__SCALAR_FMAGIC, n_gt_4) {
47821 for (uint32_t n = 5; n < 8; n++) {
47822 for (size_t k = 1; k <= 5; k += 2) {
47823 GemmMicrokernelTester()
47824 .mr(3)
47825 .nr(4)
47826 .kr(1)
47827 .sr(1)
47828 .m(3)
47829 .n(n)
47830 .k(k)
47831 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
47832 }
47833 }
47834 }
47835
TEST(QS8_IGEMM_MINMAX_FP32_3X4__SCALAR_FMAGIC,n_gt_4_strided_cn)47836 TEST(QS8_IGEMM_MINMAX_FP32_3X4__SCALAR_FMAGIC, n_gt_4_strided_cn) {
47837 for (uint32_t n = 5; n < 8; n++) {
47838 for (size_t k = 1; k <= 5; k += 2) {
47839 GemmMicrokernelTester()
47840 .mr(3)
47841 .nr(4)
47842 .kr(1)
47843 .sr(1)
47844 .m(3)
47845 .n(n)
47846 .k(k)
47847 .cn_stride(7)
47848 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
47849 }
47850 }
47851 }
47852
TEST(QS8_IGEMM_MINMAX_FP32_3X4__SCALAR_FMAGIC,n_gt_4_subtile)47853 TEST(QS8_IGEMM_MINMAX_FP32_3X4__SCALAR_FMAGIC, n_gt_4_subtile) {
47854 for (uint32_t n = 5; n < 8; n++) {
47855 for (size_t k = 1; k <= 5; k += 2) {
47856 for (uint32_t m = 1; m <= 3; m++) {
47857 GemmMicrokernelTester()
47858 .mr(3)
47859 .nr(4)
47860 .kr(1)
47861 .sr(1)
47862 .m(m)
47863 .n(n)
47864 .k(k)
47865 .iterations(1)
47866 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
47867 }
47868 }
47869 }
47870 }
47871
TEST(QS8_IGEMM_MINMAX_FP32_3X4__SCALAR_FMAGIC,n_div_4)47872 TEST(QS8_IGEMM_MINMAX_FP32_3X4__SCALAR_FMAGIC, n_div_4) {
47873 for (uint32_t n = 8; n <= 12; n += 4) {
47874 for (size_t k = 1; k <= 5; k += 2) {
47875 GemmMicrokernelTester()
47876 .mr(3)
47877 .nr(4)
47878 .kr(1)
47879 .sr(1)
47880 .m(3)
47881 .n(n)
47882 .k(k)
47883 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
47884 }
47885 }
47886 }
47887
TEST(QS8_IGEMM_MINMAX_FP32_3X4__SCALAR_FMAGIC,n_div_4_strided_cn)47888 TEST(QS8_IGEMM_MINMAX_FP32_3X4__SCALAR_FMAGIC, n_div_4_strided_cn) {
47889 for (uint32_t n = 8; n <= 12; n += 4) {
47890 for (size_t k = 1; k <= 5; k += 2) {
47891 GemmMicrokernelTester()
47892 .mr(3)
47893 .nr(4)
47894 .kr(1)
47895 .sr(1)
47896 .m(3)
47897 .n(n)
47898 .k(k)
47899 .cn_stride(7)
47900 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
47901 }
47902 }
47903 }
47904
TEST(QS8_IGEMM_MINMAX_FP32_3X4__SCALAR_FMAGIC,n_div_4_subtile)47905 TEST(QS8_IGEMM_MINMAX_FP32_3X4__SCALAR_FMAGIC, n_div_4_subtile) {
47906 for (uint32_t n = 8; n <= 12; n += 4) {
47907 for (size_t k = 1; k <= 5; k += 2) {
47908 for (uint32_t m = 1; m <= 3; m++) {
47909 GemmMicrokernelTester()
47910 .mr(3)
47911 .nr(4)
47912 .kr(1)
47913 .sr(1)
47914 .m(m)
47915 .n(n)
47916 .k(k)
47917 .iterations(1)
47918 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
47919 }
47920 }
47921 }
47922 }
47923
TEST(QS8_IGEMM_MINMAX_FP32_3X4__SCALAR_FMAGIC,small_kernel)47924 TEST(QS8_IGEMM_MINMAX_FP32_3X4__SCALAR_FMAGIC, small_kernel) {
47925 for (size_t k = 1; k <= 5; k += 2) {
47926 GemmMicrokernelTester()
47927 .mr(3)
47928 .nr(4)
47929 .kr(1)
47930 .sr(1)
47931 .m(3)
47932 .n(4)
47933 .k(k)
47934 .ks(3)
47935 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
47936 }
47937 }
47938
TEST(QS8_IGEMM_MINMAX_FP32_3X4__SCALAR_FMAGIC,small_kernel_subtile)47939 TEST(QS8_IGEMM_MINMAX_FP32_3X4__SCALAR_FMAGIC, small_kernel_subtile) {
47940 for (size_t k = 1; k <= 5; k += 2) {
47941 for (uint32_t n = 1; n <= 4; n++) {
47942 for (uint32_t m = 1; m <= 3; m++) {
47943 GemmMicrokernelTester()
47944 .mr(3)
47945 .nr(4)
47946 .kr(1)
47947 .sr(1)
47948 .m(m)
47949 .n(n)
47950 .k(k)
47951 .ks(3)
47952 .iterations(1)
47953 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
47954 }
47955 }
47956 }
47957 }
47958
TEST(QS8_IGEMM_MINMAX_FP32_3X4__SCALAR_FMAGIC,n_gt_4_small_kernel)47959 TEST(QS8_IGEMM_MINMAX_FP32_3X4__SCALAR_FMAGIC, n_gt_4_small_kernel) {
47960 for (uint32_t n = 5; n < 8; n++) {
47961 for (size_t k = 1; k <= 5; k += 2) {
47962 GemmMicrokernelTester()
47963 .mr(3)
47964 .nr(4)
47965 .kr(1)
47966 .sr(1)
47967 .m(3)
47968 .n(n)
47969 .k(k)
47970 .ks(3)
47971 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
47972 }
47973 }
47974 }
47975
TEST(QS8_IGEMM_MINMAX_FP32_3X4__SCALAR_FMAGIC,n_div_4_small_kernel)47976 TEST(QS8_IGEMM_MINMAX_FP32_3X4__SCALAR_FMAGIC, n_div_4_small_kernel) {
47977 for (uint32_t n = 8; n <= 12; n += 4) {
47978 for (size_t k = 1; k <= 5; k += 2) {
47979 GemmMicrokernelTester()
47980 .mr(3)
47981 .nr(4)
47982 .kr(1)
47983 .sr(1)
47984 .m(3)
47985 .n(n)
47986 .k(k)
47987 .ks(3)
47988 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
47989 }
47990 }
47991 }
47992
TEST(QS8_IGEMM_MINMAX_FP32_3X4__SCALAR_FMAGIC,strided_cm_subtile)47993 TEST(QS8_IGEMM_MINMAX_FP32_3X4__SCALAR_FMAGIC, strided_cm_subtile) {
47994 for (size_t k = 1; k <= 5; k += 2) {
47995 for (uint32_t n = 1; n <= 4; n++) {
47996 for (uint32_t m = 1; m <= 3; m++) {
47997 GemmMicrokernelTester()
47998 .mr(3)
47999 .nr(4)
48000 .kr(1)
48001 .sr(1)
48002 .m(m)
48003 .n(n)
48004 .k(k)
48005 .cm_stride(7)
48006 .iterations(1)
48007 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
48008 }
48009 }
48010 }
48011 }
48012
TEST(QS8_IGEMM_MINMAX_FP32_3X4__SCALAR_FMAGIC,a_offset)48013 TEST(QS8_IGEMM_MINMAX_FP32_3X4__SCALAR_FMAGIC, a_offset) {
48014 for (size_t k = 1; k <= 5; k += 2) {
48015 GemmMicrokernelTester()
48016 .mr(3)
48017 .nr(4)
48018 .kr(1)
48019 .sr(1)
48020 .m(3)
48021 .n(4)
48022 .k(k)
48023 .ks(3)
48024 .a_offset(17)
48025 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
48026 }
48027 }
48028
TEST(QS8_IGEMM_MINMAX_FP32_3X4__SCALAR_FMAGIC,zero)48029 TEST(QS8_IGEMM_MINMAX_FP32_3X4__SCALAR_FMAGIC, zero) {
48030 for (size_t k = 1; k <= 5; k += 2) {
48031 for (uint32_t mz = 0; mz < 3; mz++) {
48032 GemmMicrokernelTester()
48033 .mr(3)
48034 .nr(4)
48035 .kr(1)
48036 .sr(1)
48037 .m(3)
48038 .n(4)
48039 .k(k)
48040 .ks(3)
48041 .a_offset(17)
48042 .zero_index(mz)
48043 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
48044 }
48045 }
48046 }
48047
TEST(QS8_IGEMM_MINMAX_FP32_3X4__SCALAR_FMAGIC,qmin)48048 TEST(QS8_IGEMM_MINMAX_FP32_3X4__SCALAR_FMAGIC, qmin) {
48049 GemmMicrokernelTester()
48050 .mr(3)
48051 .nr(4)
48052 .kr(1)
48053 .sr(1)
48054 .m(3)
48055 .n(4)
48056 .k(1)
48057 .qmin(128)
48058 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
48059 }
48060
TEST(QS8_IGEMM_MINMAX_FP32_3X4__SCALAR_FMAGIC,qmax)48061 TEST(QS8_IGEMM_MINMAX_FP32_3X4__SCALAR_FMAGIC, qmax) {
48062 GemmMicrokernelTester()
48063 .mr(3)
48064 .nr(4)
48065 .kr(1)
48066 .sr(1)
48067 .m(3)
48068 .n(4)
48069 .k(1)
48070 .qmax(128)
48071 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
48072 }
48073
TEST(QS8_IGEMM_MINMAX_FP32_3X4__SCALAR_FMAGIC,strided_cm)48074 TEST(QS8_IGEMM_MINMAX_FP32_3X4__SCALAR_FMAGIC, strided_cm) {
48075 GemmMicrokernelTester()
48076 .mr(3)
48077 .nr(4)
48078 .kr(1)
48079 .sr(1)
48080 .m(3)
48081 .n(4)
48082 .k(1)
48083 .cm_stride(7)
48084 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
48085 }
48086
48087
TEST(QS8_IGEMM_MINMAX_FP32_3X4__SCALAR_LRINTF,k_eq_1)48088 TEST(QS8_IGEMM_MINMAX_FP32_3X4__SCALAR_LRINTF, k_eq_1) {
48089 GemmMicrokernelTester()
48090 .mr(3)
48091 .nr(4)
48092 .kr(1)
48093 .sr(1)
48094 .m(3)
48095 .n(4)
48096 .k(1)
48097 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
48098 }
48099
TEST(QS8_IGEMM_MINMAX_FP32_3X4__SCALAR_LRINTF,strided_cn)48100 TEST(QS8_IGEMM_MINMAX_FP32_3X4__SCALAR_LRINTF, strided_cn) {
48101 GemmMicrokernelTester()
48102 .mr(3)
48103 .nr(4)
48104 .kr(1)
48105 .sr(1)
48106 .m(3)
48107 .n(4)
48108 .k(1)
48109 .cn_stride(7)
48110 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
48111 }
48112
TEST(QS8_IGEMM_MINMAX_FP32_3X4__SCALAR_LRINTF,k_eq_1_subtile)48113 TEST(QS8_IGEMM_MINMAX_FP32_3X4__SCALAR_LRINTF, k_eq_1_subtile) {
48114 for (uint32_t n = 1; n <= 4; n++) {
48115 for (uint32_t m = 1; m <= 3; m++) {
48116 GemmMicrokernelTester()
48117 .mr(3)
48118 .nr(4)
48119 .kr(1)
48120 .sr(1)
48121 .m(m)
48122 .n(n)
48123 .k(1)
48124 .iterations(1)
48125 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
48126 }
48127 }
48128 }
48129
TEST(QS8_IGEMM_MINMAX_FP32_3X4__SCALAR_LRINTF,k_eq_1_subtile_m)48130 TEST(QS8_IGEMM_MINMAX_FP32_3X4__SCALAR_LRINTF, k_eq_1_subtile_m) {
48131 for (uint32_t m = 1; m <= 3; m++) {
48132 GemmMicrokernelTester()
48133 .mr(3)
48134 .nr(4)
48135 .kr(1)
48136 .sr(1)
48137 .m(m)
48138 .n(4)
48139 .k(1)
48140 .iterations(1)
48141 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
48142 }
48143 }
48144
TEST(QS8_IGEMM_MINMAX_FP32_3X4__SCALAR_LRINTF,k_eq_1_subtile_n)48145 TEST(QS8_IGEMM_MINMAX_FP32_3X4__SCALAR_LRINTF, k_eq_1_subtile_n) {
48146 for (uint32_t n = 1; n <= 4; n++) {
48147 GemmMicrokernelTester()
48148 .mr(3)
48149 .nr(4)
48150 .kr(1)
48151 .sr(1)
48152 .m(3)
48153 .n(n)
48154 .k(1)
48155 .iterations(1)
48156 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
48157 }
48158 }
48159
TEST(QS8_IGEMM_MINMAX_FP32_3X4__SCALAR_LRINTF,k_gt_1)48160 TEST(QS8_IGEMM_MINMAX_FP32_3X4__SCALAR_LRINTF, k_gt_1) {
48161 for (size_t k = 2; k < 10; k++) {
48162 GemmMicrokernelTester()
48163 .mr(3)
48164 .nr(4)
48165 .kr(1)
48166 .sr(1)
48167 .m(3)
48168 .n(4)
48169 .k(k)
48170 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
48171 }
48172 }
48173
TEST(QS8_IGEMM_MINMAX_FP32_3X4__SCALAR_LRINTF,k_gt_1_subtile)48174 TEST(QS8_IGEMM_MINMAX_FP32_3X4__SCALAR_LRINTF, k_gt_1_subtile) {
48175 for (size_t k = 2; k < 10; k++) {
48176 for (uint32_t n = 1; n <= 4; n++) {
48177 for (uint32_t m = 1; m <= 3; m++) {
48178 GemmMicrokernelTester()
48179 .mr(3)
48180 .nr(4)
48181 .kr(1)
48182 .sr(1)
48183 .m(m)
48184 .n(n)
48185 .k(k)
48186 .iterations(1)
48187 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
48188 }
48189 }
48190 }
48191 }
48192
TEST(QS8_IGEMM_MINMAX_FP32_3X4__SCALAR_LRINTF,n_gt_4)48193 TEST(QS8_IGEMM_MINMAX_FP32_3X4__SCALAR_LRINTF, n_gt_4) {
48194 for (uint32_t n = 5; n < 8; n++) {
48195 for (size_t k = 1; k <= 5; k += 2) {
48196 GemmMicrokernelTester()
48197 .mr(3)
48198 .nr(4)
48199 .kr(1)
48200 .sr(1)
48201 .m(3)
48202 .n(n)
48203 .k(k)
48204 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
48205 }
48206 }
48207 }
48208
TEST(QS8_IGEMM_MINMAX_FP32_3X4__SCALAR_LRINTF,n_gt_4_strided_cn)48209 TEST(QS8_IGEMM_MINMAX_FP32_3X4__SCALAR_LRINTF, n_gt_4_strided_cn) {
48210 for (uint32_t n = 5; n < 8; n++) {
48211 for (size_t k = 1; k <= 5; k += 2) {
48212 GemmMicrokernelTester()
48213 .mr(3)
48214 .nr(4)
48215 .kr(1)
48216 .sr(1)
48217 .m(3)
48218 .n(n)
48219 .k(k)
48220 .cn_stride(7)
48221 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
48222 }
48223 }
48224 }
48225
TEST(QS8_IGEMM_MINMAX_FP32_3X4__SCALAR_LRINTF,n_gt_4_subtile)48226 TEST(QS8_IGEMM_MINMAX_FP32_3X4__SCALAR_LRINTF, n_gt_4_subtile) {
48227 for (uint32_t n = 5; n < 8; n++) {
48228 for (size_t k = 1; k <= 5; k += 2) {
48229 for (uint32_t m = 1; m <= 3; m++) {
48230 GemmMicrokernelTester()
48231 .mr(3)
48232 .nr(4)
48233 .kr(1)
48234 .sr(1)
48235 .m(m)
48236 .n(n)
48237 .k(k)
48238 .iterations(1)
48239 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
48240 }
48241 }
48242 }
48243 }
48244
TEST(QS8_IGEMM_MINMAX_FP32_3X4__SCALAR_LRINTF,n_div_4)48245 TEST(QS8_IGEMM_MINMAX_FP32_3X4__SCALAR_LRINTF, n_div_4) {
48246 for (uint32_t n = 8; n <= 12; n += 4) {
48247 for (size_t k = 1; k <= 5; k += 2) {
48248 GemmMicrokernelTester()
48249 .mr(3)
48250 .nr(4)
48251 .kr(1)
48252 .sr(1)
48253 .m(3)
48254 .n(n)
48255 .k(k)
48256 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
48257 }
48258 }
48259 }
48260
TEST(QS8_IGEMM_MINMAX_FP32_3X4__SCALAR_LRINTF,n_div_4_strided_cn)48261 TEST(QS8_IGEMM_MINMAX_FP32_3X4__SCALAR_LRINTF, n_div_4_strided_cn) {
48262 for (uint32_t n = 8; n <= 12; n += 4) {
48263 for (size_t k = 1; k <= 5; k += 2) {
48264 GemmMicrokernelTester()
48265 .mr(3)
48266 .nr(4)
48267 .kr(1)
48268 .sr(1)
48269 .m(3)
48270 .n(n)
48271 .k(k)
48272 .cn_stride(7)
48273 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
48274 }
48275 }
48276 }
48277
TEST(QS8_IGEMM_MINMAX_FP32_3X4__SCALAR_LRINTF,n_div_4_subtile)48278 TEST(QS8_IGEMM_MINMAX_FP32_3X4__SCALAR_LRINTF, n_div_4_subtile) {
48279 for (uint32_t n = 8; n <= 12; n += 4) {
48280 for (size_t k = 1; k <= 5; k += 2) {
48281 for (uint32_t m = 1; m <= 3; m++) {
48282 GemmMicrokernelTester()
48283 .mr(3)
48284 .nr(4)
48285 .kr(1)
48286 .sr(1)
48287 .m(m)
48288 .n(n)
48289 .k(k)
48290 .iterations(1)
48291 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
48292 }
48293 }
48294 }
48295 }
48296
TEST(QS8_IGEMM_MINMAX_FP32_3X4__SCALAR_LRINTF,small_kernel)48297 TEST(QS8_IGEMM_MINMAX_FP32_3X4__SCALAR_LRINTF, small_kernel) {
48298 for (size_t k = 1; k <= 5; k += 2) {
48299 GemmMicrokernelTester()
48300 .mr(3)
48301 .nr(4)
48302 .kr(1)
48303 .sr(1)
48304 .m(3)
48305 .n(4)
48306 .k(k)
48307 .ks(3)
48308 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
48309 }
48310 }
48311
TEST(QS8_IGEMM_MINMAX_FP32_3X4__SCALAR_LRINTF,small_kernel_subtile)48312 TEST(QS8_IGEMM_MINMAX_FP32_3X4__SCALAR_LRINTF, small_kernel_subtile) {
48313 for (size_t k = 1; k <= 5; k += 2) {
48314 for (uint32_t n = 1; n <= 4; n++) {
48315 for (uint32_t m = 1; m <= 3; m++) {
48316 GemmMicrokernelTester()
48317 .mr(3)
48318 .nr(4)
48319 .kr(1)
48320 .sr(1)
48321 .m(m)
48322 .n(n)
48323 .k(k)
48324 .ks(3)
48325 .iterations(1)
48326 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
48327 }
48328 }
48329 }
48330 }
48331
TEST(QS8_IGEMM_MINMAX_FP32_3X4__SCALAR_LRINTF,n_gt_4_small_kernel)48332 TEST(QS8_IGEMM_MINMAX_FP32_3X4__SCALAR_LRINTF, n_gt_4_small_kernel) {
48333 for (uint32_t n = 5; n < 8; n++) {
48334 for (size_t k = 1; k <= 5; k += 2) {
48335 GemmMicrokernelTester()
48336 .mr(3)
48337 .nr(4)
48338 .kr(1)
48339 .sr(1)
48340 .m(3)
48341 .n(n)
48342 .k(k)
48343 .ks(3)
48344 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
48345 }
48346 }
48347 }
48348
TEST(QS8_IGEMM_MINMAX_FP32_3X4__SCALAR_LRINTF,n_div_4_small_kernel)48349 TEST(QS8_IGEMM_MINMAX_FP32_3X4__SCALAR_LRINTF, n_div_4_small_kernel) {
48350 for (uint32_t n = 8; n <= 12; n += 4) {
48351 for (size_t k = 1; k <= 5; k += 2) {
48352 GemmMicrokernelTester()
48353 .mr(3)
48354 .nr(4)
48355 .kr(1)
48356 .sr(1)
48357 .m(3)
48358 .n(n)
48359 .k(k)
48360 .ks(3)
48361 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
48362 }
48363 }
48364 }
48365
TEST(QS8_IGEMM_MINMAX_FP32_3X4__SCALAR_LRINTF,strided_cm_subtile)48366 TEST(QS8_IGEMM_MINMAX_FP32_3X4__SCALAR_LRINTF, strided_cm_subtile) {
48367 for (size_t k = 1; k <= 5; k += 2) {
48368 for (uint32_t n = 1; n <= 4; n++) {
48369 for (uint32_t m = 1; m <= 3; m++) {
48370 GemmMicrokernelTester()
48371 .mr(3)
48372 .nr(4)
48373 .kr(1)
48374 .sr(1)
48375 .m(m)
48376 .n(n)
48377 .k(k)
48378 .cm_stride(7)
48379 .iterations(1)
48380 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
48381 }
48382 }
48383 }
48384 }
48385
TEST(QS8_IGEMM_MINMAX_FP32_3X4__SCALAR_LRINTF,a_offset)48386 TEST(QS8_IGEMM_MINMAX_FP32_3X4__SCALAR_LRINTF, a_offset) {
48387 for (size_t k = 1; k <= 5; k += 2) {
48388 GemmMicrokernelTester()
48389 .mr(3)
48390 .nr(4)
48391 .kr(1)
48392 .sr(1)
48393 .m(3)
48394 .n(4)
48395 .k(k)
48396 .ks(3)
48397 .a_offset(17)
48398 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
48399 }
48400 }
48401
TEST(QS8_IGEMM_MINMAX_FP32_3X4__SCALAR_LRINTF,zero)48402 TEST(QS8_IGEMM_MINMAX_FP32_3X4__SCALAR_LRINTF, zero) {
48403 for (size_t k = 1; k <= 5; k += 2) {
48404 for (uint32_t mz = 0; mz < 3; mz++) {
48405 GemmMicrokernelTester()
48406 .mr(3)
48407 .nr(4)
48408 .kr(1)
48409 .sr(1)
48410 .m(3)
48411 .n(4)
48412 .k(k)
48413 .ks(3)
48414 .a_offset(17)
48415 .zero_index(mz)
48416 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
48417 }
48418 }
48419 }
48420
TEST(QS8_IGEMM_MINMAX_FP32_3X4__SCALAR_LRINTF,qmin)48421 TEST(QS8_IGEMM_MINMAX_FP32_3X4__SCALAR_LRINTF, qmin) {
48422 GemmMicrokernelTester()
48423 .mr(3)
48424 .nr(4)
48425 .kr(1)
48426 .sr(1)
48427 .m(3)
48428 .n(4)
48429 .k(1)
48430 .qmin(128)
48431 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
48432 }
48433
TEST(QS8_IGEMM_MINMAX_FP32_3X4__SCALAR_LRINTF,qmax)48434 TEST(QS8_IGEMM_MINMAX_FP32_3X4__SCALAR_LRINTF, qmax) {
48435 GemmMicrokernelTester()
48436 .mr(3)
48437 .nr(4)
48438 .kr(1)
48439 .sr(1)
48440 .m(3)
48441 .n(4)
48442 .k(1)
48443 .qmax(128)
48444 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
48445 }
48446
TEST(QS8_IGEMM_MINMAX_FP32_3X4__SCALAR_LRINTF,strided_cm)48447 TEST(QS8_IGEMM_MINMAX_FP32_3X4__SCALAR_LRINTF, strided_cm) {
48448 GemmMicrokernelTester()
48449 .mr(3)
48450 .nr(4)
48451 .kr(1)
48452 .sr(1)
48453 .m(3)
48454 .n(4)
48455 .k(1)
48456 .cm_stride(7)
48457 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
48458 }
48459
48460
TEST(QS8_IGEMM_MINMAX_FP32_4X2__SCALAR_FMAGIC,k_eq_1)48461 TEST(QS8_IGEMM_MINMAX_FP32_4X2__SCALAR_FMAGIC, k_eq_1) {
48462 GemmMicrokernelTester()
48463 .mr(4)
48464 .nr(2)
48465 .kr(1)
48466 .sr(1)
48467 .m(4)
48468 .n(2)
48469 .k(1)
48470 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x2__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
48471 }
48472
TEST(QS8_IGEMM_MINMAX_FP32_4X2__SCALAR_FMAGIC,strided_cn)48473 TEST(QS8_IGEMM_MINMAX_FP32_4X2__SCALAR_FMAGIC, strided_cn) {
48474 GemmMicrokernelTester()
48475 .mr(4)
48476 .nr(2)
48477 .kr(1)
48478 .sr(1)
48479 .m(4)
48480 .n(2)
48481 .k(1)
48482 .cn_stride(5)
48483 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x2__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
48484 }
48485
TEST(QS8_IGEMM_MINMAX_FP32_4X2__SCALAR_FMAGIC,k_eq_1_subtile)48486 TEST(QS8_IGEMM_MINMAX_FP32_4X2__SCALAR_FMAGIC, k_eq_1_subtile) {
48487 for (uint32_t n = 1; n <= 2; n++) {
48488 for (uint32_t m = 1; m <= 4; m++) {
48489 GemmMicrokernelTester()
48490 .mr(4)
48491 .nr(2)
48492 .kr(1)
48493 .sr(1)
48494 .m(m)
48495 .n(n)
48496 .k(1)
48497 .iterations(1)
48498 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x2__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
48499 }
48500 }
48501 }
48502
TEST(QS8_IGEMM_MINMAX_FP32_4X2__SCALAR_FMAGIC,k_eq_1_subtile_m)48503 TEST(QS8_IGEMM_MINMAX_FP32_4X2__SCALAR_FMAGIC, k_eq_1_subtile_m) {
48504 for (uint32_t m = 1; m <= 4; m++) {
48505 GemmMicrokernelTester()
48506 .mr(4)
48507 .nr(2)
48508 .kr(1)
48509 .sr(1)
48510 .m(m)
48511 .n(2)
48512 .k(1)
48513 .iterations(1)
48514 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x2__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
48515 }
48516 }
48517
TEST(QS8_IGEMM_MINMAX_FP32_4X2__SCALAR_FMAGIC,k_eq_1_subtile_n)48518 TEST(QS8_IGEMM_MINMAX_FP32_4X2__SCALAR_FMAGIC, k_eq_1_subtile_n) {
48519 for (uint32_t n = 1; n <= 2; n++) {
48520 GemmMicrokernelTester()
48521 .mr(4)
48522 .nr(2)
48523 .kr(1)
48524 .sr(1)
48525 .m(4)
48526 .n(n)
48527 .k(1)
48528 .iterations(1)
48529 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x2__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
48530 }
48531 }
48532
TEST(QS8_IGEMM_MINMAX_FP32_4X2__SCALAR_FMAGIC,k_gt_1)48533 TEST(QS8_IGEMM_MINMAX_FP32_4X2__SCALAR_FMAGIC, k_gt_1) {
48534 for (size_t k = 2; k < 10; k++) {
48535 GemmMicrokernelTester()
48536 .mr(4)
48537 .nr(2)
48538 .kr(1)
48539 .sr(1)
48540 .m(4)
48541 .n(2)
48542 .k(k)
48543 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x2__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
48544 }
48545 }
48546
TEST(QS8_IGEMM_MINMAX_FP32_4X2__SCALAR_FMAGIC,k_gt_1_subtile)48547 TEST(QS8_IGEMM_MINMAX_FP32_4X2__SCALAR_FMAGIC, k_gt_1_subtile) {
48548 for (size_t k = 2; k < 10; k++) {
48549 for (uint32_t n = 1; n <= 2; n++) {
48550 for (uint32_t m = 1; m <= 4; m++) {
48551 GemmMicrokernelTester()
48552 .mr(4)
48553 .nr(2)
48554 .kr(1)
48555 .sr(1)
48556 .m(m)
48557 .n(n)
48558 .k(k)
48559 .iterations(1)
48560 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x2__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
48561 }
48562 }
48563 }
48564 }
48565
TEST(QS8_IGEMM_MINMAX_FP32_4X2__SCALAR_FMAGIC,n_gt_2)48566 TEST(QS8_IGEMM_MINMAX_FP32_4X2__SCALAR_FMAGIC, n_gt_2) {
48567 for (uint32_t n = 3; n < 4; n++) {
48568 for (size_t k = 1; k <= 5; k += 2) {
48569 GemmMicrokernelTester()
48570 .mr(4)
48571 .nr(2)
48572 .kr(1)
48573 .sr(1)
48574 .m(4)
48575 .n(n)
48576 .k(k)
48577 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x2__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
48578 }
48579 }
48580 }
48581
TEST(QS8_IGEMM_MINMAX_FP32_4X2__SCALAR_FMAGIC,n_gt_2_strided_cn)48582 TEST(QS8_IGEMM_MINMAX_FP32_4X2__SCALAR_FMAGIC, n_gt_2_strided_cn) {
48583 for (uint32_t n = 3; n < 4; n++) {
48584 for (size_t k = 1; k <= 5; k += 2) {
48585 GemmMicrokernelTester()
48586 .mr(4)
48587 .nr(2)
48588 .kr(1)
48589 .sr(1)
48590 .m(4)
48591 .n(n)
48592 .k(k)
48593 .cn_stride(5)
48594 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x2__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
48595 }
48596 }
48597 }
48598
TEST(QS8_IGEMM_MINMAX_FP32_4X2__SCALAR_FMAGIC,n_gt_2_subtile)48599 TEST(QS8_IGEMM_MINMAX_FP32_4X2__SCALAR_FMAGIC, n_gt_2_subtile) {
48600 for (uint32_t n = 3; n < 4; n++) {
48601 for (size_t k = 1; k <= 5; k += 2) {
48602 for (uint32_t m = 1; m <= 4; m++) {
48603 GemmMicrokernelTester()
48604 .mr(4)
48605 .nr(2)
48606 .kr(1)
48607 .sr(1)
48608 .m(m)
48609 .n(n)
48610 .k(k)
48611 .iterations(1)
48612 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x2__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
48613 }
48614 }
48615 }
48616 }
48617
TEST(QS8_IGEMM_MINMAX_FP32_4X2__SCALAR_FMAGIC,n_div_2)48618 TEST(QS8_IGEMM_MINMAX_FP32_4X2__SCALAR_FMAGIC, n_div_2) {
48619 for (uint32_t n = 4; n <= 6; n += 2) {
48620 for (size_t k = 1; k <= 5; k += 2) {
48621 GemmMicrokernelTester()
48622 .mr(4)
48623 .nr(2)
48624 .kr(1)
48625 .sr(1)
48626 .m(4)
48627 .n(n)
48628 .k(k)
48629 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x2__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
48630 }
48631 }
48632 }
48633
TEST(QS8_IGEMM_MINMAX_FP32_4X2__SCALAR_FMAGIC,n_div_2_strided_cn)48634 TEST(QS8_IGEMM_MINMAX_FP32_4X2__SCALAR_FMAGIC, n_div_2_strided_cn) {
48635 for (uint32_t n = 4; n <= 6; n += 2) {
48636 for (size_t k = 1; k <= 5; k += 2) {
48637 GemmMicrokernelTester()
48638 .mr(4)
48639 .nr(2)
48640 .kr(1)
48641 .sr(1)
48642 .m(4)
48643 .n(n)
48644 .k(k)
48645 .cn_stride(5)
48646 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x2__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
48647 }
48648 }
48649 }
48650
TEST(QS8_IGEMM_MINMAX_FP32_4X2__SCALAR_FMAGIC,n_div_2_subtile)48651 TEST(QS8_IGEMM_MINMAX_FP32_4X2__SCALAR_FMAGIC, n_div_2_subtile) {
48652 for (uint32_t n = 4; n <= 6; n += 2) {
48653 for (size_t k = 1; k <= 5; k += 2) {
48654 for (uint32_t m = 1; m <= 4; m++) {
48655 GemmMicrokernelTester()
48656 .mr(4)
48657 .nr(2)
48658 .kr(1)
48659 .sr(1)
48660 .m(m)
48661 .n(n)
48662 .k(k)
48663 .iterations(1)
48664 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x2__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
48665 }
48666 }
48667 }
48668 }
48669
TEST(QS8_IGEMM_MINMAX_FP32_4X2__SCALAR_FMAGIC,small_kernel)48670 TEST(QS8_IGEMM_MINMAX_FP32_4X2__SCALAR_FMAGIC, small_kernel) {
48671 for (size_t k = 1; k <= 5; k += 2) {
48672 GemmMicrokernelTester()
48673 .mr(4)
48674 .nr(2)
48675 .kr(1)
48676 .sr(1)
48677 .m(4)
48678 .n(2)
48679 .k(k)
48680 .ks(3)
48681 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x2__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
48682 }
48683 }
48684
TEST(QS8_IGEMM_MINMAX_FP32_4X2__SCALAR_FMAGIC,small_kernel_subtile)48685 TEST(QS8_IGEMM_MINMAX_FP32_4X2__SCALAR_FMAGIC, small_kernel_subtile) {
48686 for (size_t k = 1; k <= 5; k += 2) {
48687 for (uint32_t n = 1; n <= 2; n++) {
48688 for (uint32_t m = 1; m <= 4; m++) {
48689 GemmMicrokernelTester()
48690 .mr(4)
48691 .nr(2)
48692 .kr(1)
48693 .sr(1)
48694 .m(m)
48695 .n(n)
48696 .k(k)
48697 .ks(3)
48698 .iterations(1)
48699 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x2__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
48700 }
48701 }
48702 }
48703 }
48704
TEST(QS8_IGEMM_MINMAX_FP32_4X2__SCALAR_FMAGIC,n_gt_2_small_kernel)48705 TEST(QS8_IGEMM_MINMAX_FP32_4X2__SCALAR_FMAGIC, n_gt_2_small_kernel) {
48706 for (uint32_t n = 3; n < 4; n++) {
48707 for (size_t k = 1; k <= 5; k += 2) {
48708 GemmMicrokernelTester()
48709 .mr(4)
48710 .nr(2)
48711 .kr(1)
48712 .sr(1)
48713 .m(4)
48714 .n(n)
48715 .k(k)
48716 .ks(3)
48717 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x2__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
48718 }
48719 }
48720 }
48721
TEST(QS8_IGEMM_MINMAX_FP32_4X2__SCALAR_FMAGIC,n_div_2_small_kernel)48722 TEST(QS8_IGEMM_MINMAX_FP32_4X2__SCALAR_FMAGIC, n_div_2_small_kernel) {
48723 for (uint32_t n = 4; n <= 6; n += 2) {
48724 for (size_t k = 1; k <= 5; k += 2) {
48725 GemmMicrokernelTester()
48726 .mr(4)
48727 .nr(2)
48728 .kr(1)
48729 .sr(1)
48730 .m(4)
48731 .n(n)
48732 .k(k)
48733 .ks(3)
48734 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x2__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
48735 }
48736 }
48737 }
48738
TEST(QS8_IGEMM_MINMAX_FP32_4X2__SCALAR_FMAGIC,strided_cm_subtile)48739 TEST(QS8_IGEMM_MINMAX_FP32_4X2__SCALAR_FMAGIC, strided_cm_subtile) {
48740 for (size_t k = 1; k <= 5; k += 2) {
48741 for (uint32_t n = 1; n <= 2; n++) {
48742 for (uint32_t m = 1; m <= 4; m++) {
48743 GemmMicrokernelTester()
48744 .mr(4)
48745 .nr(2)
48746 .kr(1)
48747 .sr(1)
48748 .m(m)
48749 .n(n)
48750 .k(k)
48751 .cm_stride(5)
48752 .iterations(1)
48753 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x2__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
48754 }
48755 }
48756 }
48757 }
48758
TEST(QS8_IGEMM_MINMAX_FP32_4X2__SCALAR_FMAGIC,a_offset)48759 TEST(QS8_IGEMM_MINMAX_FP32_4X2__SCALAR_FMAGIC, a_offset) {
48760 for (size_t k = 1; k <= 5; k += 2) {
48761 GemmMicrokernelTester()
48762 .mr(4)
48763 .nr(2)
48764 .kr(1)
48765 .sr(1)
48766 .m(4)
48767 .n(2)
48768 .k(k)
48769 .ks(3)
48770 .a_offset(23)
48771 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x2__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
48772 }
48773 }
48774
TEST(QS8_IGEMM_MINMAX_FP32_4X2__SCALAR_FMAGIC,zero)48775 TEST(QS8_IGEMM_MINMAX_FP32_4X2__SCALAR_FMAGIC, zero) {
48776 for (size_t k = 1; k <= 5; k += 2) {
48777 for (uint32_t mz = 0; mz < 4; mz++) {
48778 GemmMicrokernelTester()
48779 .mr(4)
48780 .nr(2)
48781 .kr(1)
48782 .sr(1)
48783 .m(4)
48784 .n(2)
48785 .k(k)
48786 .ks(3)
48787 .a_offset(23)
48788 .zero_index(mz)
48789 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x2__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
48790 }
48791 }
48792 }
48793
TEST(QS8_IGEMM_MINMAX_FP32_4X2__SCALAR_FMAGIC,qmin)48794 TEST(QS8_IGEMM_MINMAX_FP32_4X2__SCALAR_FMAGIC, qmin) {
48795 GemmMicrokernelTester()
48796 .mr(4)
48797 .nr(2)
48798 .kr(1)
48799 .sr(1)
48800 .m(4)
48801 .n(2)
48802 .k(1)
48803 .qmin(128)
48804 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x2__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
48805 }
48806
TEST(QS8_IGEMM_MINMAX_FP32_4X2__SCALAR_FMAGIC,qmax)48807 TEST(QS8_IGEMM_MINMAX_FP32_4X2__SCALAR_FMAGIC, qmax) {
48808 GemmMicrokernelTester()
48809 .mr(4)
48810 .nr(2)
48811 .kr(1)
48812 .sr(1)
48813 .m(4)
48814 .n(2)
48815 .k(1)
48816 .qmax(128)
48817 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x2__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
48818 }
48819
TEST(QS8_IGEMM_MINMAX_FP32_4X2__SCALAR_FMAGIC,strided_cm)48820 TEST(QS8_IGEMM_MINMAX_FP32_4X2__SCALAR_FMAGIC, strided_cm) {
48821 GemmMicrokernelTester()
48822 .mr(4)
48823 .nr(2)
48824 .kr(1)
48825 .sr(1)
48826 .m(4)
48827 .n(2)
48828 .k(1)
48829 .cm_stride(5)
48830 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x2__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
48831 }
48832
48833
TEST(QS8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF,k_eq_1)48834 TEST(QS8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, k_eq_1) {
48835 GemmMicrokernelTester()
48836 .mr(4)
48837 .nr(2)
48838 .kr(1)
48839 .sr(1)
48840 .m(4)
48841 .n(2)
48842 .k(1)
48843 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
48844 }
48845
TEST(QS8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF,strided_cn)48846 TEST(QS8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, strided_cn) {
48847 GemmMicrokernelTester()
48848 .mr(4)
48849 .nr(2)
48850 .kr(1)
48851 .sr(1)
48852 .m(4)
48853 .n(2)
48854 .k(1)
48855 .cn_stride(5)
48856 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
48857 }
48858
TEST(QS8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF,k_eq_1_subtile)48859 TEST(QS8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, k_eq_1_subtile) {
48860 for (uint32_t n = 1; n <= 2; n++) {
48861 for (uint32_t m = 1; m <= 4; m++) {
48862 GemmMicrokernelTester()
48863 .mr(4)
48864 .nr(2)
48865 .kr(1)
48866 .sr(1)
48867 .m(m)
48868 .n(n)
48869 .k(1)
48870 .iterations(1)
48871 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
48872 }
48873 }
48874 }
48875
TEST(QS8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF,k_eq_1_subtile_m)48876 TEST(QS8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, k_eq_1_subtile_m) {
48877 for (uint32_t m = 1; m <= 4; m++) {
48878 GemmMicrokernelTester()
48879 .mr(4)
48880 .nr(2)
48881 .kr(1)
48882 .sr(1)
48883 .m(m)
48884 .n(2)
48885 .k(1)
48886 .iterations(1)
48887 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
48888 }
48889 }
48890
TEST(QS8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF,k_eq_1_subtile_n)48891 TEST(QS8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, k_eq_1_subtile_n) {
48892 for (uint32_t n = 1; n <= 2; n++) {
48893 GemmMicrokernelTester()
48894 .mr(4)
48895 .nr(2)
48896 .kr(1)
48897 .sr(1)
48898 .m(4)
48899 .n(n)
48900 .k(1)
48901 .iterations(1)
48902 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
48903 }
48904 }
48905
TEST(QS8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF,k_gt_1)48906 TEST(QS8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, k_gt_1) {
48907 for (size_t k = 2; k < 10; k++) {
48908 GemmMicrokernelTester()
48909 .mr(4)
48910 .nr(2)
48911 .kr(1)
48912 .sr(1)
48913 .m(4)
48914 .n(2)
48915 .k(k)
48916 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
48917 }
48918 }
48919
TEST(QS8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF,k_gt_1_subtile)48920 TEST(QS8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, k_gt_1_subtile) {
48921 for (size_t k = 2; k < 10; k++) {
48922 for (uint32_t n = 1; n <= 2; n++) {
48923 for (uint32_t m = 1; m <= 4; m++) {
48924 GemmMicrokernelTester()
48925 .mr(4)
48926 .nr(2)
48927 .kr(1)
48928 .sr(1)
48929 .m(m)
48930 .n(n)
48931 .k(k)
48932 .iterations(1)
48933 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
48934 }
48935 }
48936 }
48937 }
48938
TEST(QS8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF,n_gt_2)48939 TEST(QS8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, n_gt_2) {
48940 for (uint32_t n = 3; n < 4; n++) {
48941 for (size_t k = 1; k <= 5; k += 2) {
48942 GemmMicrokernelTester()
48943 .mr(4)
48944 .nr(2)
48945 .kr(1)
48946 .sr(1)
48947 .m(4)
48948 .n(n)
48949 .k(k)
48950 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
48951 }
48952 }
48953 }
48954
TEST(QS8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF,n_gt_2_strided_cn)48955 TEST(QS8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, n_gt_2_strided_cn) {
48956 for (uint32_t n = 3; n < 4; n++) {
48957 for (size_t k = 1; k <= 5; k += 2) {
48958 GemmMicrokernelTester()
48959 .mr(4)
48960 .nr(2)
48961 .kr(1)
48962 .sr(1)
48963 .m(4)
48964 .n(n)
48965 .k(k)
48966 .cn_stride(5)
48967 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
48968 }
48969 }
48970 }
48971
TEST(QS8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF,n_gt_2_subtile)48972 TEST(QS8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, n_gt_2_subtile) {
48973 for (uint32_t n = 3; n < 4; n++) {
48974 for (size_t k = 1; k <= 5; k += 2) {
48975 for (uint32_t m = 1; m <= 4; m++) {
48976 GemmMicrokernelTester()
48977 .mr(4)
48978 .nr(2)
48979 .kr(1)
48980 .sr(1)
48981 .m(m)
48982 .n(n)
48983 .k(k)
48984 .iterations(1)
48985 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
48986 }
48987 }
48988 }
48989 }
48990
TEST(QS8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF,n_div_2)48991 TEST(QS8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, n_div_2) {
48992 for (uint32_t n = 4; n <= 6; n += 2) {
48993 for (size_t k = 1; k <= 5; k += 2) {
48994 GemmMicrokernelTester()
48995 .mr(4)
48996 .nr(2)
48997 .kr(1)
48998 .sr(1)
48999 .m(4)
49000 .n(n)
49001 .k(k)
49002 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
49003 }
49004 }
49005 }
49006
TEST(QS8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF,n_div_2_strided_cn)49007 TEST(QS8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, n_div_2_strided_cn) {
49008 for (uint32_t n = 4; n <= 6; n += 2) {
49009 for (size_t k = 1; k <= 5; k += 2) {
49010 GemmMicrokernelTester()
49011 .mr(4)
49012 .nr(2)
49013 .kr(1)
49014 .sr(1)
49015 .m(4)
49016 .n(n)
49017 .k(k)
49018 .cn_stride(5)
49019 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
49020 }
49021 }
49022 }
49023
TEST(QS8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF,n_div_2_subtile)49024 TEST(QS8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, n_div_2_subtile) {
49025 for (uint32_t n = 4; n <= 6; n += 2) {
49026 for (size_t k = 1; k <= 5; k += 2) {
49027 for (uint32_t m = 1; m <= 4; m++) {
49028 GemmMicrokernelTester()
49029 .mr(4)
49030 .nr(2)
49031 .kr(1)
49032 .sr(1)
49033 .m(m)
49034 .n(n)
49035 .k(k)
49036 .iterations(1)
49037 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
49038 }
49039 }
49040 }
49041 }
49042
TEST(QS8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF,small_kernel)49043 TEST(QS8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, small_kernel) {
49044 for (size_t k = 1; k <= 5; k += 2) {
49045 GemmMicrokernelTester()
49046 .mr(4)
49047 .nr(2)
49048 .kr(1)
49049 .sr(1)
49050 .m(4)
49051 .n(2)
49052 .k(k)
49053 .ks(3)
49054 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
49055 }
49056 }
49057
TEST(QS8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF,small_kernel_subtile)49058 TEST(QS8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, small_kernel_subtile) {
49059 for (size_t k = 1; k <= 5; k += 2) {
49060 for (uint32_t n = 1; n <= 2; n++) {
49061 for (uint32_t m = 1; m <= 4; m++) {
49062 GemmMicrokernelTester()
49063 .mr(4)
49064 .nr(2)
49065 .kr(1)
49066 .sr(1)
49067 .m(m)
49068 .n(n)
49069 .k(k)
49070 .ks(3)
49071 .iterations(1)
49072 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
49073 }
49074 }
49075 }
49076 }
49077
TEST(QS8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF,n_gt_2_small_kernel)49078 TEST(QS8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, n_gt_2_small_kernel) {
49079 for (uint32_t n = 3; n < 4; n++) {
49080 for (size_t k = 1; k <= 5; k += 2) {
49081 GemmMicrokernelTester()
49082 .mr(4)
49083 .nr(2)
49084 .kr(1)
49085 .sr(1)
49086 .m(4)
49087 .n(n)
49088 .k(k)
49089 .ks(3)
49090 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
49091 }
49092 }
49093 }
49094
TEST(QS8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF,n_div_2_small_kernel)49095 TEST(QS8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, n_div_2_small_kernel) {
49096 for (uint32_t n = 4; n <= 6; n += 2) {
49097 for (size_t k = 1; k <= 5; k += 2) {
49098 GemmMicrokernelTester()
49099 .mr(4)
49100 .nr(2)
49101 .kr(1)
49102 .sr(1)
49103 .m(4)
49104 .n(n)
49105 .k(k)
49106 .ks(3)
49107 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
49108 }
49109 }
49110 }
49111
TEST(QS8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF,strided_cm_subtile)49112 TEST(QS8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, strided_cm_subtile) {
49113 for (size_t k = 1; k <= 5; k += 2) {
49114 for (uint32_t n = 1; n <= 2; n++) {
49115 for (uint32_t m = 1; m <= 4; m++) {
49116 GemmMicrokernelTester()
49117 .mr(4)
49118 .nr(2)
49119 .kr(1)
49120 .sr(1)
49121 .m(m)
49122 .n(n)
49123 .k(k)
49124 .cm_stride(5)
49125 .iterations(1)
49126 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
49127 }
49128 }
49129 }
49130 }
49131
TEST(QS8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF,a_offset)49132 TEST(QS8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, a_offset) {
49133 for (size_t k = 1; k <= 5; k += 2) {
49134 GemmMicrokernelTester()
49135 .mr(4)
49136 .nr(2)
49137 .kr(1)
49138 .sr(1)
49139 .m(4)
49140 .n(2)
49141 .k(k)
49142 .ks(3)
49143 .a_offset(23)
49144 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
49145 }
49146 }
49147
TEST(QS8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF,zero)49148 TEST(QS8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, zero) {
49149 for (size_t k = 1; k <= 5; k += 2) {
49150 for (uint32_t mz = 0; mz < 4; mz++) {
49151 GemmMicrokernelTester()
49152 .mr(4)
49153 .nr(2)
49154 .kr(1)
49155 .sr(1)
49156 .m(4)
49157 .n(2)
49158 .k(k)
49159 .ks(3)
49160 .a_offset(23)
49161 .zero_index(mz)
49162 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
49163 }
49164 }
49165 }
49166
TEST(QS8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF,qmin)49167 TEST(QS8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, qmin) {
49168 GemmMicrokernelTester()
49169 .mr(4)
49170 .nr(2)
49171 .kr(1)
49172 .sr(1)
49173 .m(4)
49174 .n(2)
49175 .k(1)
49176 .qmin(128)
49177 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
49178 }
49179
TEST(QS8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF,qmax)49180 TEST(QS8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, qmax) {
49181 GemmMicrokernelTester()
49182 .mr(4)
49183 .nr(2)
49184 .kr(1)
49185 .sr(1)
49186 .m(4)
49187 .n(2)
49188 .k(1)
49189 .qmax(128)
49190 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
49191 }
49192
TEST(QS8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF,strided_cm)49193 TEST(QS8_IGEMM_MINMAX_FP32_4X2__SCALAR_LRINTF, strided_cm) {
49194 GemmMicrokernelTester()
49195 .mr(4)
49196 .nr(2)
49197 .kr(1)
49198 .sr(1)
49199 .m(4)
49200 .n(2)
49201 .k(1)
49202 .cm_stride(5)
49203 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x2__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
49204 }
49205
49206
TEST(QS8_IGEMM_MINMAX_FP32_4X4__SCALAR_FMAGIC,k_eq_1)49207 TEST(QS8_IGEMM_MINMAX_FP32_4X4__SCALAR_FMAGIC, k_eq_1) {
49208 GemmMicrokernelTester()
49209 .mr(4)
49210 .nr(4)
49211 .kr(1)
49212 .sr(1)
49213 .m(4)
49214 .n(4)
49215 .k(1)
49216 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
49217 }
49218
TEST(QS8_IGEMM_MINMAX_FP32_4X4__SCALAR_FMAGIC,strided_cn)49219 TEST(QS8_IGEMM_MINMAX_FP32_4X4__SCALAR_FMAGIC, strided_cn) {
49220 GemmMicrokernelTester()
49221 .mr(4)
49222 .nr(4)
49223 .kr(1)
49224 .sr(1)
49225 .m(4)
49226 .n(4)
49227 .k(1)
49228 .cn_stride(7)
49229 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
49230 }
49231
TEST(QS8_IGEMM_MINMAX_FP32_4X4__SCALAR_FMAGIC,k_eq_1_subtile)49232 TEST(QS8_IGEMM_MINMAX_FP32_4X4__SCALAR_FMAGIC, k_eq_1_subtile) {
49233 for (uint32_t n = 1; n <= 4; n++) {
49234 for (uint32_t m = 1; m <= 4; m++) {
49235 GemmMicrokernelTester()
49236 .mr(4)
49237 .nr(4)
49238 .kr(1)
49239 .sr(1)
49240 .m(m)
49241 .n(n)
49242 .k(1)
49243 .iterations(1)
49244 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
49245 }
49246 }
49247 }
49248
TEST(QS8_IGEMM_MINMAX_FP32_4X4__SCALAR_FMAGIC,k_eq_1_subtile_m)49249 TEST(QS8_IGEMM_MINMAX_FP32_4X4__SCALAR_FMAGIC, k_eq_1_subtile_m) {
49250 for (uint32_t m = 1; m <= 4; m++) {
49251 GemmMicrokernelTester()
49252 .mr(4)
49253 .nr(4)
49254 .kr(1)
49255 .sr(1)
49256 .m(m)
49257 .n(4)
49258 .k(1)
49259 .iterations(1)
49260 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
49261 }
49262 }
49263
TEST(QS8_IGEMM_MINMAX_FP32_4X4__SCALAR_FMAGIC,k_eq_1_subtile_n)49264 TEST(QS8_IGEMM_MINMAX_FP32_4X4__SCALAR_FMAGIC, k_eq_1_subtile_n) {
49265 for (uint32_t n = 1; n <= 4; n++) {
49266 GemmMicrokernelTester()
49267 .mr(4)
49268 .nr(4)
49269 .kr(1)
49270 .sr(1)
49271 .m(4)
49272 .n(n)
49273 .k(1)
49274 .iterations(1)
49275 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
49276 }
49277 }
49278
TEST(QS8_IGEMM_MINMAX_FP32_4X4__SCALAR_FMAGIC,k_gt_1)49279 TEST(QS8_IGEMM_MINMAX_FP32_4X4__SCALAR_FMAGIC, k_gt_1) {
49280 for (size_t k = 2; k < 10; k++) {
49281 GemmMicrokernelTester()
49282 .mr(4)
49283 .nr(4)
49284 .kr(1)
49285 .sr(1)
49286 .m(4)
49287 .n(4)
49288 .k(k)
49289 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
49290 }
49291 }
49292
TEST(QS8_IGEMM_MINMAX_FP32_4X4__SCALAR_FMAGIC,k_gt_1_subtile)49293 TEST(QS8_IGEMM_MINMAX_FP32_4X4__SCALAR_FMAGIC, k_gt_1_subtile) {
49294 for (size_t k = 2; k < 10; k++) {
49295 for (uint32_t n = 1; n <= 4; n++) {
49296 for (uint32_t m = 1; m <= 4; m++) {
49297 GemmMicrokernelTester()
49298 .mr(4)
49299 .nr(4)
49300 .kr(1)
49301 .sr(1)
49302 .m(m)
49303 .n(n)
49304 .k(k)
49305 .iterations(1)
49306 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
49307 }
49308 }
49309 }
49310 }
49311
TEST(QS8_IGEMM_MINMAX_FP32_4X4__SCALAR_FMAGIC,n_gt_4)49312 TEST(QS8_IGEMM_MINMAX_FP32_4X4__SCALAR_FMAGIC, n_gt_4) {
49313 for (uint32_t n = 5; n < 8; n++) {
49314 for (size_t k = 1; k <= 5; k += 2) {
49315 GemmMicrokernelTester()
49316 .mr(4)
49317 .nr(4)
49318 .kr(1)
49319 .sr(1)
49320 .m(4)
49321 .n(n)
49322 .k(k)
49323 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
49324 }
49325 }
49326 }
49327
TEST(QS8_IGEMM_MINMAX_FP32_4X4__SCALAR_FMAGIC,n_gt_4_strided_cn)49328 TEST(QS8_IGEMM_MINMAX_FP32_4X4__SCALAR_FMAGIC, n_gt_4_strided_cn) {
49329 for (uint32_t n = 5; n < 8; n++) {
49330 for (size_t k = 1; k <= 5; k += 2) {
49331 GemmMicrokernelTester()
49332 .mr(4)
49333 .nr(4)
49334 .kr(1)
49335 .sr(1)
49336 .m(4)
49337 .n(n)
49338 .k(k)
49339 .cn_stride(7)
49340 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
49341 }
49342 }
49343 }
49344
TEST(QS8_IGEMM_MINMAX_FP32_4X4__SCALAR_FMAGIC,n_gt_4_subtile)49345 TEST(QS8_IGEMM_MINMAX_FP32_4X4__SCALAR_FMAGIC, n_gt_4_subtile) {
49346 for (uint32_t n = 5; n < 8; n++) {
49347 for (size_t k = 1; k <= 5; k += 2) {
49348 for (uint32_t m = 1; m <= 4; m++) {
49349 GemmMicrokernelTester()
49350 .mr(4)
49351 .nr(4)
49352 .kr(1)
49353 .sr(1)
49354 .m(m)
49355 .n(n)
49356 .k(k)
49357 .iterations(1)
49358 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
49359 }
49360 }
49361 }
49362 }
49363
TEST(QS8_IGEMM_MINMAX_FP32_4X4__SCALAR_FMAGIC,n_div_4)49364 TEST(QS8_IGEMM_MINMAX_FP32_4X4__SCALAR_FMAGIC, n_div_4) {
49365 for (uint32_t n = 8; n <= 12; n += 4) {
49366 for (size_t k = 1; k <= 5; k += 2) {
49367 GemmMicrokernelTester()
49368 .mr(4)
49369 .nr(4)
49370 .kr(1)
49371 .sr(1)
49372 .m(4)
49373 .n(n)
49374 .k(k)
49375 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
49376 }
49377 }
49378 }
49379
TEST(QS8_IGEMM_MINMAX_FP32_4X4__SCALAR_FMAGIC,n_div_4_strided_cn)49380 TEST(QS8_IGEMM_MINMAX_FP32_4X4__SCALAR_FMAGIC, n_div_4_strided_cn) {
49381 for (uint32_t n = 8; n <= 12; n += 4) {
49382 for (size_t k = 1; k <= 5; k += 2) {
49383 GemmMicrokernelTester()
49384 .mr(4)
49385 .nr(4)
49386 .kr(1)
49387 .sr(1)
49388 .m(4)
49389 .n(n)
49390 .k(k)
49391 .cn_stride(7)
49392 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
49393 }
49394 }
49395 }
49396
TEST(QS8_IGEMM_MINMAX_FP32_4X4__SCALAR_FMAGIC,n_div_4_subtile)49397 TEST(QS8_IGEMM_MINMAX_FP32_4X4__SCALAR_FMAGIC, n_div_4_subtile) {
49398 for (uint32_t n = 8; n <= 12; n += 4) {
49399 for (size_t k = 1; k <= 5; k += 2) {
49400 for (uint32_t m = 1; m <= 4; m++) {
49401 GemmMicrokernelTester()
49402 .mr(4)
49403 .nr(4)
49404 .kr(1)
49405 .sr(1)
49406 .m(m)
49407 .n(n)
49408 .k(k)
49409 .iterations(1)
49410 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
49411 }
49412 }
49413 }
49414 }
49415
TEST(QS8_IGEMM_MINMAX_FP32_4X4__SCALAR_FMAGIC,small_kernel)49416 TEST(QS8_IGEMM_MINMAX_FP32_4X4__SCALAR_FMAGIC, small_kernel) {
49417 for (size_t k = 1; k <= 5; k += 2) {
49418 GemmMicrokernelTester()
49419 .mr(4)
49420 .nr(4)
49421 .kr(1)
49422 .sr(1)
49423 .m(4)
49424 .n(4)
49425 .k(k)
49426 .ks(3)
49427 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
49428 }
49429 }
49430
TEST(QS8_IGEMM_MINMAX_FP32_4X4__SCALAR_FMAGIC,small_kernel_subtile)49431 TEST(QS8_IGEMM_MINMAX_FP32_4X4__SCALAR_FMAGIC, small_kernel_subtile) {
49432 for (size_t k = 1; k <= 5; k += 2) {
49433 for (uint32_t n = 1; n <= 4; n++) {
49434 for (uint32_t m = 1; m <= 4; m++) {
49435 GemmMicrokernelTester()
49436 .mr(4)
49437 .nr(4)
49438 .kr(1)
49439 .sr(1)
49440 .m(m)
49441 .n(n)
49442 .k(k)
49443 .ks(3)
49444 .iterations(1)
49445 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
49446 }
49447 }
49448 }
49449 }
49450
TEST(QS8_IGEMM_MINMAX_FP32_4X4__SCALAR_FMAGIC,n_gt_4_small_kernel)49451 TEST(QS8_IGEMM_MINMAX_FP32_4X4__SCALAR_FMAGIC, n_gt_4_small_kernel) {
49452 for (uint32_t n = 5; n < 8; n++) {
49453 for (size_t k = 1; k <= 5; k += 2) {
49454 GemmMicrokernelTester()
49455 .mr(4)
49456 .nr(4)
49457 .kr(1)
49458 .sr(1)
49459 .m(4)
49460 .n(n)
49461 .k(k)
49462 .ks(3)
49463 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
49464 }
49465 }
49466 }
49467
TEST(QS8_IGEMM_MINMAX_FP32_4X4__SCALAR_FMAGIC,n_div_4_small_kernel)49468 TEST(QS8_IGEMM_MINMAX_FP32_4X4__SCALAR_FMAGIC, n_div_4_small_kernel) {
49469 for (uint32_t n = 8; n <= 12; n += 4) {
49470 for (size_t k = 1; k <= 5; k += 2) {
49471 GemmMicrokernelTester()
49472 .mr(4)
49473 .nr(4)
49474 .kr(1)
49475 .sr(1)
49476 .m(4)
49477 .n(n)
49478 .k(k)
49479 .ks(3)
49480 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
49481 }
49482 }
49483 }
49484
TEST(QS8_IGEMM_MINMAX_FP32_4X4__SCALAR_FMAGIC,strided_cm_subtile)49485 TEST(QS8_IGEMM_MINMAX_FP32_4X4__SCALAR_FMAGIC, strided_cm_subtile) {
49486 for (size_t k = 1; k <= 5; k += 2) {
49487 for (uint32_t n = 1; n <= 4; n++) {
49488 for (uint32_t m = 1; m <= 4; m++) {
49489 GemmMicrokernelTester()
49490 .mr(4)
49491 .nr(4)
49492 .kr(1)
49493 .sr(1)
49494 .m(m)
49495 .n(n)
49496 .k(k)
49497 .cm_stride(7)
49498 .iterations(1)
49499 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
49500 }
49501 }
49502 }
49503 }
49504
TEST(QS8_IGEMM_MINMAX_FP32_4X4__SCALAR_FMAGIC,a_offset)49505 TEST(QS8_IGEMM_MINMAX_FP32_4X4__SCALAR_FMAGIC, a_offset) {
49506 for (size_t k = 1; k <= 5; k += 2) {
49507 GemmMicrokernelTester()
49508 .mr(4)
49509 .nr(4)
49510 .kr(1)
49511 .sr(1)
49512 .m(4)
49513 .n(4)
49514 .k(k)
49515 .ks(3)
49516 .a_offset(23)
49517 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
49518 }
49519 }
49520
TEST(QS8_IGEMM_MINMAX_FP32_4X4__SCALAR_FMAGIC,zero)49521 TEST(QS8_IGEMM_MINMAX_FP32_4X4__SCALAR_FMAGIC, zero) {
49522 for (size_t k = 1; k <= 5; k += 2) {
49523 for (uint32_t mz = 0; mz < 4; mz++) {
49524 GemmMicrokernelTester()
49525 .mr(4)
49526 .nr(4)
49527 .kr(1)
49528 .sr(1)
49529 .m(4)
49530 .n(4)
49531 .k(k)
49532 .ks(3)
49533 .a_offset(23)
49534 .zero_index(mz)
49535 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
49536 }
49537 }
49538 }
49539
TEST(QS8_IGEMM_MINMAX_FP32_4X4__SCALAR_FMAGIC,qmin)49540 TEST(QS8_IGEMM_MINMAX_FP32_4X4__SCALAR_FMAGIC, qmin) {
49541 GemmMicrokernelTester()
49542 .mr(4)
49543 .nr(4)
49544 .kr(1)
49545 .sr(1)
49546 .m(4)
49547 .n(4)
49548 .k(1)
49549 .qmin(128)
49550 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
49551 }
49552
TEST(QS8_IGEMM_MINMAX_FP32_4X4__SCALAR_FMAGIC,qmax)49553 TEST(QS8_IGEMM_MINMAX_FP32_4X4__SCALAR_FMAGIC, qmax) {
49554 GemmMicrokernelTester()
49555 .mr(4)
49556 .nr(4)
49557 .kr(1)
49558 .sr(1)
49559 .m(4)
49560 .n(4)
49561 .k(1)
49562 .qmax(128)
49563 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
49564 }
49565
TEST(QS8_IGEMM_MINMAX_FP32_4X4__SCALAR_FMAGIC,strided_cm)49566 TEST(QS8_IGEMM_MINMAX_FP32_4X4__SCALAR_FMAGIC, strided_cm) {
49567 GemmMicrokernelTester()
49568 .mr(4)
49569 .nr(4)
49570 .kr(1)
49571 .sr(1)
49572 .m(4)
49573 .n(4)
49574 .k(1)
49575 .cm_stride(7)
49576 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4__scalar_fmagic, xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params, xnn_qs8_requantize_fp32);
49577 }
49578
49579
TEST(QS8_IGEMM_MINMAX_FP32_4X4__SCALAR_LRINTF,k_eq_1)49580 TEST(QS8_IGEMM_MINMAX_FP32_4X4__SCALAR_LRINTF, k_eq_1) {
49581 GemmMicrokernelTester()
49582 .mr(4)
49583 .nr(4)
49584 .kr(1)
49585 .sr(1)
49586 .m(4)
49587 .n(4)
49588 .k(1)
49589 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
49590 }
49591
TEST(QS8_IGEMM_MINMAX_FP32_4X4__SCALAR_LRINTF,strided_cn)49592 TEST(QS8_IGEMM_MINMAX_FP32_4X4__SCALAR_LRINTF, strided_cn) {
49593 GemmMicrokernelTester()
49594 .mr(4)
49595 .nr(4)
49596 .kr(1)
49597 .sr(1)
49598 .m(4)
49599 .n(4)
49600 .k(1)
49601 .cn_stride(7)
49602 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
49603 }
49604
TEST(QS8_IGEMM_MINMAX_FP32_4X4__SCALAR_LRINTF,k_eq_1_subtile)49605 TEST(QS8_IGEMM_MINMAX_FP32_4X4__SCALAR_LRINTF, k_eq_1_subtile) {
49606 for (uint32_t n = 1; n <= 4; n++) {
49607 for (uint32_t m = 1; m <= 4; m++) {
49608 GemmMicrokernelTester()
49609 .mr(4)
49610 .nr(4)
49611 .kr(1)
49612 .sr(1)
49613 .m(m)
49614 .n(n)
49615 .k(1)
49616 .iterations(1)
49617 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
49618 }
49619 }
49620 }
49621
TEST(QS8_IGEMM_MINMAX_FP32_4X4__SCALAR_LRINTF,k_eq_1_subtile_m)49622 TEST(QS8_IGEMM_MINMAX_FP32_4X4__SCALAR_LRINTF, k_eq_1_subtile_m) {
49623 for (uint32_t m = 1; m <= 4; m++) {
49624 GemmMicrokernelTester()
49625 .mr(4)
49626 .nr(4)
49627 .kr(1)
49628 .sr(1)
49629 .m(m)
49630 .n(4)
49631 .k(1)
49632 .iterations(1)
49633 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
49634 }
49635 }
49636
TEST(QS8_IGEMM_MINMAX_FP32_4X4__SCALAR_LRINTF,k_eq_1_subtile_n)49637 TEST(QS8_IGEMM_MINMAX_FP32_4X4__SCALAR_LRINTF, k_eq_1_subtile_n) {
49638 for (uint32_t n = 1; n <= 4; n++) {
49639 GemmMicrokernelTester()
49640 .mr(4)
49641 .nr(4)
49642 .kr(1)
49643 .sr(1)
49644 .m(4)
49645 .n(n)
49646 .k(1)
49647 .iterations(1)
49648 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
49649 }
49650 }
49651
TEST(QS8_IGEMM_MINMAX_FP32_4X4__SCALAR_LRINTF,k_gt_1)49652 TEST(QS8_IGEMM_MINMAX_FP32_4X4__SCALAR_LRINTF, k_gt_1) {
49653 for (size_t k = 2; k < 10; k++) {
49654 GemmMicrokernelTester()
49655 .mr(4)
49656 .nr(4)
49657 .kr(1)
49658 .sr(1)
49659 .m(4)
49660 .n(4)
49661 .k(k)
49662 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
49663 }
49664 }
49665
TEST(QS8_IGEMM_MINMAX_FP32_4X4__SCALAR_LRINTF,k_gt_1_subtile)49666 TEST(QS8_IGEMM_MINMAX_FP32_4X4__SCALAR_LRINTF, k_gt_1_subtile) {
49667 for (size_t k = 2; k < 10; k++) {
49668 for (uint32_t n = 1; n <= 4; n++) {
49669 for (uint32_t m = 1; m <= 4; m++) {
49670 GemmMicrokernelTester()
49671 .mr(4)
49672 .nr(4)
49673 .kr(1)
49674 .sr(1)
49675 .m(m)
49676 .n(n)
49677 .k(k)
49678 .iterations(1)
49679 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
49680 }
49681 }
49682 }
49683 }
49684
TEST(QS8_IGEMM_MINMAX_FP32_4X4__SCALAR_LRINTF,n_gt_4)49685 TEST(QS8_IGEMM_MINMAX_FP32_4X4__SCALAR_LRINTF, n_gt_4) {
49686 for (uint32_t n = 5; n < 8; n++) {
49687 for (size_t k = 1; k <= 5; k += 2) {
49688 GemmMicrokernelTester()
49689 .mr(4)
49690 .nr(4)
49691 .kr(1)
49692 .sr(1)
49693 .m(4)
49694 .n(n)
49695 .k(k)
49696 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
49697 }
49698 }
49699 }
49700
TEST(QS8_IGEMM_MINMAX_FP32_4X4__SCALAR_LRINTF,n_gt_4_strided_cn)49701 TEST(QS8_IGEMM_MINMAX_FP32_4X4__SCALAR_LRINTF, n_gt_4_strided_cn) {
49702 for (uint32_t n = 5; n < 8; n++) {
49703 for (size_t k = 1; k <= 5; k += 2) {
49704 GemmMicrokernelTester()
49705 .mr(4)
49706 .nr(4)
49707 .kr(1)
49708 .sr(1)
49709 .m(4)
49710 .n(n)
49711 .k(k)
49712 .cn_stride(7)
49713 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
49714 }
49715 }
49716 }
49717
TEST(QS8_IGEMM_MINMAX_FP32_4X4__SCALAR_LRINTF,n_gt_4_subtile)49718 TEST(QS8_IGEMM_MINMAX_FP32_4X4__SCALAR_LRINTF, n_gt_4_subtile) {
49719 for (uint32_t n = 5; n < 8; n++) {
49720 for (size_t k = 1; k <= 5; k += 2) {
49721 for (uint32_t m = 1; m <= 4; m++) {
49722 GemmMicrokernelTester()
49723 .mr(4)
49724 .nr(4)
49725 .kr(1)
49726 .sr(1)
49727 .m(m)
49728 .n(n)
49729 .k(k)
49730 .iterations(1)
49731 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
49732 }
49733 }
49734 }
49735 }
49736
TEST(QS8_IGEMM_MINMAX_FP32_4X4__SCALAR_LRINTF,n_div_4)49737 TEST(QS8_IGEMM_MINMAX_FP32_4X4__SCALAR_LRINTF, n_div_4) {
49738 for (uint32_t n = 8; n <= 12; n += 4) {
49739 for (size_t k = 1; k <= 5; k += 2) {
49740 GemmMicrokernelTester()
49741 .mr(4)
49742 .nr(4)
49743 .kr(1)
49744 .sr(1)
49745 .m(4)
49746 .n(n)
49747 .k(k)
49748 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
49749 }
49750 }
49751 }
49752
TEST(QS8_IGEMM_MINMAX_FP32_4X4__SCALAR_LRINTF,n_div_4_strided_cn)49753 TEST(QS8_IGEMM_MINMAX_FP32_4X4__SCALAR_LRINTF, n_div_4_strided_cn) {
49754 for (uint32_t n = 8; n <= 12; n += 4) {
49755 for (size_t k = 1; k <= 5; k += 2) {
49756 GemmMicrokernelTester()
49757 .mr(4)
49758 .nr(4)
49759 .kr(1)
49760 .sr(1)
49761 .m(4)
49762 .n(n)
49763 .k(k)
49764 .cn_stride(7)
49765 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
49766 }
49767 }
49768 }
49769
TEST(QS8_IGEMM_MINMAX_FP32_4X4__SCALAR_LRINTF,n_div_4_subtile)49770 TEST(QS8_IGEMM_MINMAX_FP32_4X4__SCALAR_LRINTF, n_div_4_subtile) {
49771 for (uint32_t n = 8; n <= 12; n += 4) {
49772 for (size_t k = 1; k <= 5; k += 2) {
49773 for (uint32_t m = 1; m <= 4; m++) {
49774 GemmMicrokernelTester()
49775 .mr(4)
49776 .nr(4)
49777 .kr(1)
49778 .sr(1)
49779 .m(m)
49780 .n(n)
49781 .k(k)
49782 .iterations(1)
49783 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
49784 }
49785 }
49786 }
49787 }
49788
TEST(QS8_IGEMM_MINMAX_FP32_4X4__SCALAR_LRINTF,small_kernel)49789 TEST(QS8_IGEMM_MINMAX_FP32_4X4__SCALAR_LRINTF, small_kernel) {
49790 for (size_t k = 1; k <= 5; k += 2) {
49791 GemmMicrokernelTester()
49792 .mr(4)
49793 .nr(4)
49794 .kr(1)
49795 .sr(1)
49796 .m(4)
49797 .n(4)
49798 .k(k)
49799 .ks(3)
49800 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
49801 }
49802 }
49803
TEST(QS8_IGEMM_MINMAX_FP32_4X4__SCALAR_LRINTF,small_kernel_subtile)49804 TEST(QS8_IGEMM_MINMAX_FP32_4X4__SCALAR_LRINTF, small_kernel_subtile) {
49805 for (size_t k = 1; k <= 5; k += 2) {
49806 for (uint32_t n = 1; n <= 4; n++) {
49807 for (uint32_t m = 1; m <= 4; m++) {
49808 GemmMicrokernelTester()
49809 .mr(4)
49810 .nr(4)
49811 .kr(1)
49812 .sr(1)
49813 .m(m)
49814 .n(n)
49815 .k(k)
49816 .ks(3)
49817 .iterations(1)
49818 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
49819 }
49820 }
49821 }
49822 }
49823
TEST(QS8_IGEMM_MINMAX_FP32_4X4__SCALAR_LRINTF,n_gt_4_small_kernel)49824 TEST(QS8_IGEMM_MINMAX_FP32_4X4__SCALAR_LRINTF, n_gt_4_small_kernel) {
49825 for (uint32_t n = 5; n < 8; n++) {
49826 for (size_t k = 1; k <= 5; k += 2) {
49827 GemmMicrokernelTester()
49828 .mr(4)
49829 .nr(4)
49830 .kr(1)
49831 .sr(1)
49832 .m(4)
49833 .n(n)
49834 .k(k)
49835 .ks(3)
49836 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
49837 }
49838 }
49839 }
49840
TEST(QS8_IGEMM_MINMAX_FP32_4X4__SCALAR_LRINTF,n_div_4_small_kernel)49841 TEST(QS8_IGEMM_MINMAX_FP32_4X4__SCALAR_LRINTF, n_div_4_small_kernel) {
49842 for (uint32_t n = 8; n <= 12; n += 4) {
49843 for (size_t k = 1; k <= 5; k += 2) {
49844 GemmMicrokernelTester()
49845 .mr(4)
49846 .nr(4)
49847 .kr(1)
49848 .sr(1)
49849 .m(4)
49850 .n(n)
49851 .k(k)
49852 .ks(3)
49853 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
49854 }
49855 }
49856 }
49857
TEST(QS8_IGEMM_MINMAX_FP32_4X4__SCALAR_LRINTF,strided_cm_subtile)49858 TEST(QS8_IGEMM_MINMAX_FP32_4X4__SCALAR_LRINTF, strided_cm_subtile) {
49859 for (size_t k = 1; k <= 5; k += 2) {
49860 for (uint32_t n = 1; n <= 4; n++) {
49861 for (uint32_t m = 1; m <= 4; m++) {
49862 GemmMicrokernelTester()
49863 .mr(4)
49864 .nr(4)
49865 .kr(1)
49866 .sr(1)
49867 .m(m)
49868 .n(n)
49869 .k(k)
49870 .cm_stride(7)
49871 .iterations(1)
49872 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
49873 }
49874 }
49875 }
49876 }
49877
TEST(QS8_IGEMM_MINMAX_FP32_4X4__SCALAR_LRINTF,a_offset)49878 TEST(QS8_IGEMM_MINMAX_FP32_4X4__SCALAR_LRINTF, a_offset) {
49879 for (size_t k = 1; k <= 5; k += 2) {
49880 GemmMicrokernelTester()
49881 .mr(4)
49882 .nr(4)
49883 .kr(1)
49884 .sr(1)
49885 .m(4)
49886 .n(4)
49887 .k(k)
49888 .ks(3)
49889 .a_offset(23)
49890 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
49891 }
49892 }
49893
TEST(QS8_IGEMM_MINMAX_FP32_4X4__SCALAR_LRINTF,zero)49894 TEST(QS8_IGEMM_MINMAX_FP32_4X4__SCALAR_LRINTF, zero) {
49895 for (size_t k = 1; k <= 5; k += 2) {
49896 for (uint32_t mz = 0; mz < 4; mz++) {
49897 GemmMicrokernelTester()
49898 .mr(4)
49899 .nr(4)
49900 .kr(1)
49901 .sr(1)
49902 .m(4)
49903 .n(4)
49904 .k(k)
49905 .ks(3)
49906 .a_offset(23)
49907 .zero_index(mz)
49908 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
49909 }
49910 }
49911 }
49912
TEST(QS8_IGEMM_MINMAX_FP32_4X4__SCALAR_LRINTF,qmin)49913 TEST(QS8_IGEMM_MINMAX_FP32_4X4__SCALAR_LRINTF, qmin) {
49914 GemmMicrokernelTester()
49915 .mr(4)
49916 .nr(4)
49917 .kr(1)
49918 .sr(1)
49919 .m(4)
49920 .n(4)
49921 .k(1)
49922 .qmin(128)
49923 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
49924 }
49925
TEST(QS8_IGEMM_MINMAX_FP32_4X4__SCALAR_LRINTF,qmax)49926 TEST(QS8_IGEMM_MINMAX_FP32_4X4__SCALAR_LRINTF, qmax) {
49927 GemmMicrokernelTester()
49928 .mr(4)
49929 .nr(4)
49930 .kr(1)
49931 .sr(1)
49932 .m(4)
49933 .n(4)
49934 .k(1)
49935 .qmax(128)
49936 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
49937 }
49938
TEST(QS8_IGEMM_MINMAX_FP32_4X4__SCALAR_LRINTF,strided_cm)49939 TEST(QS8_IGEMM_MINMAX_FP32_4X4__SCALAR_LRINTF, strided_cm) {
49940 GemmMicrokernelTester()
49941 .mr(4)
49942 .nr(4)
49943 .kr(1)
49944 .sr(1)
49945 .m(4)
49946 .n(4)
49947 .k(1)
49948 .cm_stride(7)
49949 .Test(xnn_qs8_igemm_minmax_fp32_ukernel_4x4__scalar_lrintf, xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params, xnn_qs8_requantize_fp32);
49950 }
49951